linkcheckerrc (9705B)
1 # Sample configuration file; see the linkcheckerrc(5) man page or 2 # execute linkchecker -h for help on these options. 3 # Commandline options override these settings. 4 5 ##################### output configuration ########################## 6 [output] 7 # enable debug messages; see 'linkchecker -h' for valid debug names, example: 8 #debug=all 9 # print status output 10 #status=1 11 # change the logging type 12 #log=text 13 # turn on/off --verbose 14 #verbose=0 15 # turn on/off --warnings 16 #warnings=1 17 # turn on/off --quiet 18 #quiet=0 19 # additional file output, example: 20 #fileoutput = text, html, gml, sql 21 # errors to ignore (URL regular expression, message regular expression) 22 ignoreerrors= 23 ^mailto 24 .*orders.* 25 ^https://donations.demo.taler.net/en/checkout 26 ^https://web.archive.org/web/20120118201902/http://www.gnu.org/ 27 ^https://www.researchgate.net/publication/4980956_The_Case_Against_Intellectual_Property 28 ^https://shop.fsf.org/.* 29 ^https://blog.fefe.de/.* 30 ^https://x.com/.* 31 ^https://www.netzbon.ch/.* 32 ^https://news.ycombinator.com/.* 33 ^https://twitter.com.* 34 # ignore all errors for broken.example.com: 35 # ^https?://broken.example.com/ 36 # ignore SSL errors for dev.example.com: 37 # ^https://dev.example.com/ ^SSLError .* 38 39 40 ##################### logger configuration ########################## 41 # logger output part names: 42 # all For all parts 43 # realurl The full url link 44 # result Valid or invalid, with messages 45 # extern 1 or 0, only in some logger types reported 46 # base <base href=...> 47 # name <a href=...>name</a> and <img alt="name"> 48 # parenturl The referrer URL if there is any 49 # info Some additional info, e.g. FTP welcome messages 50 # warning Warnings 51 # dltime Download time 52 # checktime Check time 53 # url The original url name, can be relative 54 # intro The blurb at the beginning, "starting at ..." 55 # outro The blurb at the end, "found x errors ..." 56 # stats Statistics including URL lengths and contents. 57 58 # each Logger can have separate configuration parameters 59 60 # standard text logger 61 [text] 62 #filename=linkchecker-out.txt 63 #parts=all 64 # colors for the various parts, syntax is <color> or <type>;<color> 65 # type can be bold, light, blink, invert 66 # color can be default, black, red, green, yellow, blue, purple, cyan, white, 67 # Black, Red, Green, Yellow, Blue, Purple, Cyan, White 68 #colorparent=default 69 #colorurl=default 70 #colorname=default 71 #colorreal=cyan 72 #colorbase=purple 73 #colorvalid=bold;green 74 #colorinvalid=bold;red 75 #colorinfo=default 76 #colorwarning=bold;yellow 77 #colordltime=default 78 #colorreset=default 79 80 # GML logger 81 [gml] 82 #filename=linkchecker-out.gml 83 #parts=all 84 # valid encodings are listed in http://docs.python.org/library/codecs.html#standard-encodings 85 # example: 86 #encoding=utf_16 87 88 # DOT logger 89 [dot] 90 #filename=linkchecker-out.dot 91 #parts=all 92 # default encoding is ascii since the original DOT format does not 93 # support other charsets, example: 94 #encoding=iso-8859-15 95 96 # CSV logger 97 [csv] 98 #filename=linkchecker-out.csv 99 #separator=; 100 #quotechar=" 101 #dialect=excel 102 #parts=all 103 104 # SQL logger 105 [sql] 106 #filename=linkchecker-out.sql 107 #dbname=linksdb 108 #separator=; 109 #parts=all 110 111 # HTML logger 112 [html] 113 #filename=linkchecker-out.html 114 # colors for the various parts 115 #colorbackground=#fff7e5 116 #colorurl=#dcd5cf 117 #colorborder=#000000 118 #colorlink=#191c83 119 #colorwarning=#e0954e 120 #colorerror=#db4930 121 #colorok=#3ba557 122 #parts=all 123 124 # failures logger 125 [failures] 126 #filename=$XDG_DATA_HOME/linkchecker/failures 127 128 # custom xml logger 129 [xml] 130 #filename=linkchecker-out.xml 131 # system encoding is used by default. Example: 132 #encoding=iso-8859-1 133 134 # GraphXML logger 135 [gxml] 136 #filename=linkchecker-out.gxml 137 # system encoding is used by default. Example: 138 #encoding=iso-8859-1 139 140 # Sitemap logger 141 [sitemap] 142 #filename=linkchecker-out.sitemap.xml 143 #encoding=utf-8 144 #priority=0.5 145 #frequency=daily 146 147 148 ##################### checking options ########################## 149 [checking] 150 # number of threads 151 #threads=10 152 # connection timeout in seconds 153 #timeout=60 154 # Time to wait for checks to finish after the user aborts the first time 155 # (with Ctrl-C or the abort button). 156 #aborttimeout=300 157 # The recursion level determines how many times links inside pages are followed. 158 #recursionlevel=-1 159 # Basic NNTP server. Overrides NNTP_SERVER environment variable. 160 #nntpserver= 161 # parse a cookiefile for initial cookie data, example: 162 #cookiefile=/path/to/cookies.txt 163 # User-Agent header string to send to HTTP web servers 164 # Note that robots.txt are always checked with the original User-Agent. Example: 165 #useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) 166 # When checking finishes, write a memory dump to a temporary file. 167 # The memory dump is written both when checking finishes normally 168 # and when checking gets canceled. 169 # The memory dump only works if the python-meliae package is installed. 170 # Otherwise a warning is printed to install it. 171 #debugmemory=0 172 # When checking absolute URLs inside local files, the given root directory 173 # is used as base URL. 174 # Note that the given directory must have URL syntax, so it must use a slash 175 # to join directories instead of a backslash. 176 # And the given directory must end with a slash. 177 # Unix example: 178 #localwebroot=/var/www/ 179 # Windows example: 180 #localwebroot=/C|/public_html/ 181 # Check SSL certificates. Set to an absolute pathname for a custom 182 # CA cert bundle to use. Set to zero to disable SSL certificate verification. 183 #sslverify=1 184 # Stop checking new URLs after the given number of seconds. Same as if the 185 # user hits Ctrl-C after X seconds. Example: 186 #maxrunseconds=600 187 # Don't download files larger than the given number of bytes 188 #maxfilesizedownload=5242880 189 # Don't parse files larger than the given number of bytes 190 #maxfilesizeparse=1048576 191 # Maximum number of URLs to check. New URLs will not be queued after the 192 # given number of URLs is checked. Example: 193 #maxnumurls=153 194 # Maximum number of requests per second to one host. 195 #maxrequestspersecond=10 196 # Respect the instructions in any robots.txt files 197 #robotstxt=1 198 # Allowed URL schemes as a comma-separated list. Example: 199 #allowedschemes=http,https 200 # Size of the result cache. Checking more urls might increase memory usage during runtime 201 #resultcachesize=100000 202 203 ##################### filtering options ########################## 204 [filtering] 205 #ignore= 206 # ignore everything with 'lconline' in the URL name 207 # lconline 208 # and ignore everything with 'bookmark' in the URL name 209 # bookmark 210 # and ignore all mailto: URLs 211 # ^mailto: 212 # do not recurse into the following URLs 213 214 #nofollow= 215 # just an example 216 # http://www\.example\.com/bla 217 218 # Ignore specified warnings (see linkchecker -h for the list of 219 # recognized warnings). Add a comma-separated list of warnings here 220 # that prevent a valid URL from being logged. Note that the warning 221 # will be logged for invalid URLs. Example: 222 #ignorewarnings=url-unicode-domain 223 # Regular expression to add more URLs recognized as internal links. 224 # Default is that URLs given on the command line are internal. 225 #internlinks=^http://www\.example\.net/ 226 # Check external links 227 #checkextern=0 228 229 230 ##################### password authentication ########################## 231 [authentication] 232 # WARNING: if you store passwords in this configuration entry, make sure the 233 # configuration file is not readable by other users. 234 # Different user/password pairs for different URLs can be provided. 235 # Entries are a triple (URL regular expression, username, password), 236 # separated by whitespace. 237 # If the regular expression matches, the given user/password pair is used 238 # for authentication. The commandline options -u,-p match every link 239 # and therefore override the entries given here. The first match wins. 240 # At the moment, authentication is used for http[s] and ftp links. 241 #entry= 242 # Note that passwords are optional. If any passwords are stored here, 243 # this file should not readable by other users. 244 # ^https?://www\.example\.com/~calvin/ calvin mypass 245 # ^ftp://www\.example\.com/secret/ calvin 246 247 # if the website requires a login via a page with an HTML form the URL of the 248 # page and optionally the username and password input element name attributes 249 # can be provided. 250 #loginurl=http://www.example.com/ 251 252 # The name attributes of the username and password HTML input elements 253 #loginuserfield=login 254 #loginpasswordfield=password 255 # Optionally the name attributes of any additional input elements and the values 256 # to populate them with. Note that these are submitted without checking 257 # whether matching input elements exist in the HTML form. Example: 258 #loginextrafields= 259 # name1:value1 260 # name 2:value 2 261 262 ############################ Plugins ################################### 263 # 264 # uncomment sections to enable plugins 265 266 # Check HTML anchors 267 #[AnchorCheck] 268 269 # Print HTTP header info 270 #[HttpHeaderInfo] 271 # Comma separated list of header prefixes to print. 272 # The names are case insensitive. 273 # The default list is empty, so it should be non-empty when activating 274 # this plugin. Example: 275 #prefixes=Server,X- 276 277 # Add country info to URLs 278 #[LocationInfo] 279 280 # Run W3C syntax checks 281 #[CssSyntaxCheck] 282 #[HtmlSyntaxCheck] 283 284 # Search for regular expression in page contents 285 #[RegexCheck] 286 # Example: 287 #warningregex=Oracle Error 288 289 # Search for viruses in page contents 290 #[VirusCheck] 291 #clamavconf=/etc/clamav/clamd.conf 292 293 # Check that SSL certificates have at least the given number of days validity. 294 #[SslCertificateCheck] 295 #sslcertwarndays=30 296 297 # Parse and check links in PDF files 298 #[PdfParser] 299 300 # Parse and check links in Word files 301 #[WordParser] 302 303 # Parse and check links in Markdown files. 304 # Supported links are: 305 # <http://autolink.com> 306 # [name](http://link.com "Optional title") 307 # [id]: http://link.com "Optional title" 308 #[MarkdownCheck] 309 # Regexp of filename 310 #filename_re=.*\.(markdown|md(own)?|mkdn?)$