linkcheckerrc (9676B)
1 # Sample configuration file; see the linkcheckerrc(5) man page or 2 # execute linkchecker -h for help on these options. 3 # Commandline options override these settings. 4 5 ##################### output configuration ########################## 6 [output] 7 # enable debug messages; see 'linkchecker -h' for valid debug names, example: 8 #debug=all 9 # print status output 10 #status=1 11 # change the logging type 12 #log=text 13 # turn on/off --verbose 14 #verbose=0 15 # turn on/off --warnings 16 #warnings=1 17 # turn on/off --quiet 18 #quiet=0 19 # additional file output, example: 20 #fileoutput = text, html, gml, sql 21 # errors to ignore (URL regular expression, message regular expression) 22 ignoreerrors= 23 ^mailto 24 .*orders.* 25 ^https://donations.demo.taler.net/en/checkout 26 ^https://web.archive.org/web/20120118201902/http://www.gnu.org/ 27 ^https://www.researchgate.net/publication/4980956_The_Case_Against_Intellectual_Property 28 ^https://shop.fsf.org/.* 29 ^https://blog.fefe.de/.* 30 ^https://x.com/.* 31 ^https://news.ycombinator.com/.* 32 ^https://twitter.com.* 33 # ignore all errors for broken.example.com: 34 # ^https?://broken.example.com/ 35 # ignore SSL errors for dev.example.com: 36 # ^https://dev.example.com/ ^SSLError .* 37 38 39 ##################### logger configuration ########################## 40 # logger output part names: 41 # all For all parts 42 # realurl The full url link 43 # result Valid or invalid, with messages 44 # extern 1 or 0, only in some logger types reported 45 # base <base href=...> 46 # name <a href=...>name</a> and <img alt="name"> 47 # parenturl The referrer URL if there is any 48 # info Some additional info, e.g. FTP welcome messages 49 # warning Warnings 50 # dltime Download time 51 # checktime Check time 52 # url The original url name, can be relative 53 # intro The blurb at the beginning, "starting at ..." 54 # outro The blurb at the end, "found x errors ..." 55 # stats Statistics including URL lengths and contents. 56 57 # each Logger can have separate configuration parameters 58 59 # standard text logger 60 [text] 61 #filename=linkchecker-out.txt 62 #parts=all 63 # colors for the various parts, syntax is <color> or <type>;<color> 64 # type can be bold, light, blink, invert 65 # color can be default, black, red, green, yellow, blue, purple, cyan, white, 66 # Black, Red, Green, Yellow, Blue, Purple, Cyan, White 67 #colorparent=default 68 #colorurl=default 69 #colorname=default 70 #colorreal=cyan 71 #colorbase=purple 72 #colorvalid=bold;green 73 #colorinvalid=bold;red 74 #colorinfo=default 75 #colorwarning=bold;yellow 76 #colordltime=default 77 #colorreset=default 78 79 # GML logger 80 [gml] 81 #filename=linkchecker-out.gml 82 #parts=all 83 # valid encodings are listed in http://docs.python.org/library/codecs.html#standard-encodings 84 # example: 85 #encoding=utf_16 86 87 # DOT logger 88 [dot] 89 #filename=linkchecker-out.dot 90 #parts=all 91 # default encoding is ascii since the original DOT format does not 92 # support other charsets, example: 93 #encoding=iso-8859-15 94 95 # CSV logger 96 [csv] 97 #filename=linkchecker-out.csv 98 #separator=; 99 #quotechar=" 100 #dialect=excel 101 #parts=all 102 103 # SQL logger 104 [sql] 105 #filename=linkchecker-out.sql 106 #dbname=linksdb 107 #separator=; 108 #parts=all 109 110 # HTML logger 111 [html] 112 #filename=linkchecker-out.html 113 # colors for the various parts 114 #colorbackground=#fff7e5 115 #colorurl=#dcd5cf 116 #colorborder=#000000 117 #colorlink=#191c83 118 #colorwarning=#e0954e 119 #colorerror=#db4930 120 #colorok=#3ba557 121 #parts=all 122 123 # failures logger 124 [failures] 125 #filename=$XDG_DATA_HOME/linkchecker/failures 126 127 # custom xml logger 128 [xml] 129 #filename=linkchecker-out.xml 130 # system encoding is used by default. Example: 131 #encoding=iso-8859-1 132 133 # GraphXML logger 134 [gxml] 135 #filename=linkchecker-out.gxml 136 # system encoding is used by default. Example: 137 #encoding=iso-8859-1 138 139 # Sitemap logger 140 [sitemap] 141 #filename=linkchecker-out.sitemap.xml 142 #encoding=utf-8 143 #priority=0.5 144 #frequency=daily 145 146 147 ##################### checking options ########################## 148 [checking] 149 # number of threads 150 #threads=10 151 # connection timeout in seconds 152 #timeout=60 153 # Time to wait for checks to finish after the user aborts the first time 154 # (with Ctrl-C or the abort button). 155 #aborttimeout=300 156 # The recursion level determines how many times links inside pages are followed. 157 #recursionlevel=-1 158 # Basic NNTP server. Overrides NNTP_SERVER environment variable. 159 #nntpserver= 160 # parse a cookiefile for initial cookie data, example: 161 #cookiefile=/path/to/cookies.txt 162 # User-Agent header string to send to HTTP web servers 163 # Note that robots.txt are always checked with the original User-Agent. Example: 164 #useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 165 # When checking finishes, write a memory dump to a temporary file. 166 # The memory dump is written both when checking finishes normally 167 # and when checking gets canceled. 168 # The memory dump only works if the python-meliae package is installed. 169 # Otherwise a warning is printed to install it. 170 #debugmemory=0 171 # When checking absolute URLs inside local files, the given root directory 172 # is used as base URL. 173 # Note that the given directory must have URL syntax, so it must use a slash 174 # to join directories instead of a backslash. 175 # And the given directory must end with a slash. 176 # Unix example: 177 #localwebroot=/var/www/ 178 # Windows example: 179 #localwebroot=/C|/public_html/ 180 # Check SSL certificates. Set to an absolute pathname for a custom 181 # CA cert bundle to use. Set to zero to disable SSL certificate verification. 182 #sslverify=1 183 # Stop checking new URLs after the given number of seconds. Same as if the 184 # user hits Ctrl-C after X seconds. Example: 185 #maxrunseconds=600 186 # Don't download files larger than the given number of bytes 187 #maxfilesizedownload=5242880 188 # Don't parse files larger than the given number of bytes 189 #maxfilesizeparse=1048576 190 # Maximum number of URLs to check. New URLs will not be queued after the 191 # given number of URLs is checked. Example: 192 #maxnumurls=153 193 # Maximum number of requests per second to one host. 194 #maxrequestspersecond=10 195 # Respect the instructions in any robots.txt files 196 #robotstxt=1 197 # Allowed URL schemes as a comma-separated list. Example: 198 #allowedschemes=http,https 199 # Size of the result cache. Checking more urls might increase memory usage during runtime 200 #resultcachesize=100000 201 202 ##################### filtering options ########################## 203 [filtering] 204 #ignore= 205 # ignore everything with 'lconline' in the URL name 206 # lconline 207 # and ignore everything with 'bookmark' in the URL name 208 # bookmark 209 # and ignore all mailto: URLs 210 # ^mailto: 211 # do not recurse into the following URLs 212 213 #nofollow= 214 # just an example 215 # http://www\.example\.com/bla 216 217 # Ignore specified warnings (see linkchecker -h for the list of 218 # recognized warnings). Add a comma-separated list of warnings here 219 # that prevent a valid URL from being logged. Note that the warning 220 # will be logged for invalid URLs. Example: 221 #ignorewarnings=url-unicode-domain 222 # Regular expression to add more URLs recognized as internal links. 223 # Default is that URLs given on the command line are internal. 224 #internlinks=^http://www\.example\.net/ 225 # Check external links 226 #checkextern=0 227 228 229 ##################### password authentication ########################## 230 [authentication] 231 # WARNING: if you store passwords in this configuration entry, make sure the 232 # configuration file is not readable by other users. 233 # Different user/password pairs for different URLs can be provided. 234 # Entries are a triple (URL regular expression, username, password), 235 # separated by whitespace. 236 # If the regular expression matches, the given user/password pair is used 237 # for authentication. The commandline options -u,-p match every link 238 # and therefore override the entries given here. The first match wins. 239 # At the moment, authentication is used for http[s] and ftp links. 240 #entry= 241 # Note that passwords are optional. If any passwords are stored here, 242 # this file should not readable by other users. 243 # ^https?://www\.example\.com/~calvin/ calvin mypass 244 # ^ftp://www\.example\.com/secret/ calvin 245 246 # if the website requires a login via a page with an HTML form the URL of the 247 # page and optionally the username and password input element name attributes 248 # can be provided. 249 #loginurl=http://www.example.com/ 250 251 # The name attributes of the username and password HTML input elements 252 #loginuserfield=login 253 #loginpasswordfield=password 254 # Optionally the name attributes of any additional input elements and the values 255 # to populate them with. Note that these are submitted without checking 256 # whether matching input elements exist in the HTML form. Example: 257 #loginextrafields= 258 # name1:value1 259 # name 2:value 2 260 261 ############################ Plugins ################################### 262 # 263 # uncomment sections to enable plugins 264 265 # Check HTML anchors 266 #[AnchorCheck] 267 268 # Print HTTP header info 269 #[HttpHeaderInfo] 270 # Comma separated list of header prefixes to print. 271 # The names are case insensitive. 272 # The default list is empty, so it should be non-empty when activating 273 # this plugin. Example: 274 #prefixes=Server,X- 275 276 # Add country info to URLs 277 #[LocationInfo] 278 279 # Run W3C syntax checks 280 #[CssSyntaxCheck] 281 #[HtmlSyntaxCheck] 282 283 # Search for regular expression in page contents 284 #[RegexCheck] 285 # Example: 286 #warningregex=Oracle Error 287 288 # Search for viruses in page contents 289 #[VirusCheck] 290 #clamavconf=/etc/clamav/clamd.conf 291 292 # Check that SSL certificates have at least the given number of days validity. 293 #[SslCertificateCheck] 294 #sslcertwarndays=30 295 296 # Parse and check links in PDF files 297 #[PdfParser] 298 299 # Parse and check links in Word files 300 #[WordParser] 301 302 # Parse and check links in Markdown files. 303 # Supported links are: 304 # <http://autolink.com> 305 # [name](http://link.com "Optional title") 306 # [id]: http://link.com "Optional title" 307 #[MarkdownCheck] 308 # Regexp of filename 309 #filename_re=.*\.(markdown|md(own)?|mkdn?)$