diff options
author | Devan Carpenter <devan@taler.net> | 2024-02-18 15:12:40 -0500 |
---|---|---|
committer | Devan Carpenter <devan@taler.net> | 2024-02-18 15:12:40 -0500 |
commit | 5fe7b6ca3b535a2e08be3053d76117bea11f27e2 (patch) | |
tree | df3affcc06e7f3894d9c4ba0b07b05a883fc31a2 | |
parent | a8301b93e202427b507fa33ed787b7601ccbfb36 (diff) | |
download | deployment-5fe7b6ca3b535a2e08be3053d76117bea11f27e2.tar.gz deployment-5fe7b6ca3b535a2e08be3053d76117bea11f27e2.tar.bz2 deployment-5fe7b6ca3b535a2e08be3053d76117bea11f27e2.zip |
linkchecker: ignore errors from specific urls
-rw-r--r-- | buildbot/linkchecker.Containerfile | 2 | ||||
-rwxr-xr-x | buildbot/linkchecker.sh | 2 | ||||
-rw-r--r-- | buildbot/linkcheckerrc | 301 |
3 files changed, 304 insertions, 1 deletions
diff --git a/buildbot/linkchecker.Containerfile b/buildbot/linkchecker.Containerfile index 5c958f3..d80693c 100644 --- a/buildbot/linkchecker.Containerfile +++ b/buildbot/linkchecker.Containerfile @@ -6,3 +6,5 @@ RUN apt-get update && \ apt-get install -yqq \ linkchecker \ && rm -rf /var/lib/apt/lists/* + +COPY linkcheckerrc /root/.config/linkchecker/linkcheckerrc diff --git a/buildbot/linkchecker.sh b/buildbot/linkchecker.sh index e2a96c0..0bf0776 100755 --- a/buildbot/linkchecker.sh +++ b/buildbot/linkchecker.sh @@ -20,7 +20,7 @@ if [ -f "$logfile" ] echo "Info: existing log file '$logfile' not found." fi -podman build -t linkchecker:latest -f "$HOME/deployment/buildbot/linkchecker.Containerfile" +podman build -t linkchecker:latest -f "$HOME/deployment/buildbot/linkchecker.Containerfile" "$HOME/deployment/buildbot" # Use wget to scan hosts and save output for url in "https://www.taler.net/" "https://docs.taler.net/" "https://taler-systems.net/" "https://demo.taler.net/" "https://bank.demo.taler.net/" "https://shop.demo.taler.net/" "https://donations.demo.taler.net/" "https://survey.demo.taler.net/" ; do diff --git a/buildbot/linkcheckerrc b/buildbot/linkcheckerrc new file mode 100644 index 0000000..837277c --- /dev/null +++ b/buildbot/linkcheckerrc @@ -0,0 +1,301 @@ +# Sample configuration file; see the linkcheckerrc(5) man page or +# execute linkchecker -h for help on these options. +# Commandline options override these settings. + +##################### output configuration ########################## +[output] +# enable debug messages; see 'linkchecker -h' for valid debug names, example: +#debug=all +# print status output +#status=1 +# change the logging type +#log=text +# turn on/off --verbose +#verbose=0 +# turn on/off --warnings +#warnings=1 +# turn on/off --quiet +#quiet=0 +# additional file output, example: +#fileoutput = text, html, gml, sql +# errors to ignore (URL regular expression, message regular expression) +ignoreerrors= + ^mailto + .*orders.* +# ignore all errors for broken.example.com: +# ^https?://broken.example.com/ +# ignore SSL errors for dev.example.com: +# ^https://dev.example.com/ ^SSLError .* + + +##################### logger configuration ########################## +# logger output part names: +# all For all parts +# realurl The full url link +# result Valid or invalid, with messages +# extern 1 or 0, only in some logger types reported +# base <base href=...> +# name <a href=...>name</a> and <img alt="name"> +# parenturl The referrer URL if there is any +# info Some additional info, e.g. FTP welcome messages +# warning Warnings +# dltime Download time +# checktime Check time +# url The original url name, can be relative +# intro The blurb at the beginning, "starting at ..." +# outro The blurb at the end, "found x errors ..." +# stats Statistics including URL lengths and contents. + +# each Logger can have separate configuration parameters + +# standard text logger +[text] +#filename=linkchecker-out.txt +#parts=all +# colors for the various parts, syntax is <color> or <type>;<color> +# type can be bold, light, blink, invert +# color can be default, black, red, green, yellow, blue, purple, cyan, white, +# Black, Red, Green, Yellow, Blue, Purple, Cyan, White +#colorparent=default +#colorurl=default +#colorname=default +#colorreal=cyan +#colorbase=purple +#colorvalid=bold;green +#colorinvalid=bold;red +#colorinfo=default +#colorwarning=bold;yellow +#colordltime=default +#colorreset=default + +# GML logger +[gml] +#filename=linkchecker-out.gml +#parts=all +# valid encodings are listed in http://docs.python.org/library/codecs.html#standard-encodings +# example: +#encoding=utf_16 + +# DOT logger +[dot] +#filename=linkchecker-out.dot +#parts=all +# default encoding is ascii since the original DOT format does not +# support other charsets, example: +#encoding=iso-8859-15 + +# CSV logger +[csv] +#filename=linkchecker-out.csv +#separator=; +#quotechar=" +#dialect=excel +#parts=all + +# SQL logger +[sql] +#filename=linkchecker-out.sql +#dbname=linksdb +#separator=; +#parts=all + +# HTML logger +[html] +#filename=linkchecker-out.html +# colors for the various parts +#colorbackground=#fff7e5 +#colorurl=#dcd5cf +#colorborder=#000000 +#colorlink=#191c83 +#colorwarning=#e0954e +#colorerror=#db4930 +#colorok=#3ba557 +#parts=all + +# failures logger +[failures] +#filename=$XDG_DATA_HOME/linkchecker/failures + +# custom xml logger +[xml] +#filename=linkchecker-out.xml +# system encoding is used by default. Example: +#encoding=iso-8859-1 + +# GraphXML logger +[gxml] +#filename=linkchecker-out.gxml +# system encoding is used by default. Example: +#encoding=iso-8859-1 + +# Sitemap logger +[sitemap] +#filename=linkchecker-out.sitemap.xml +#encoding=utf-8 +#priority=0.5 +#frequency=daily + + +##################### checking options ########################## +[checking] +# number of threads +#threads=10 +# connection timeout in seconds +#timeout=60 +# Time to wait for checks to finish after the user aborts the first time +# (with Ctrl-C or the abort button). +#aborttimeout=300 +# The recursion level determines how many times links inside pages are followed. +#recursionlevel=-1 +# Basic NNTP server. Overrides NNTP_SERVER environment variable. +#nntpserver= +# parse a cookiefile for initial cookie data, example: +#cookiefile=/path/to/cookies.txt +# User-Agent header string to send to HTTP web servers +# Note that robots.txt are always checked with the original User-Agent. Example: +#useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html) +# When checking finishes, write a memory dump to a temporary file. +# The memory dump is written both when checking finishes normally +# and when checking gets canceled. +# The memory dump only works if the python-meliae package is installed. +# Otherwise a warning is printed to install it. +#debugmemory=0 +# When checking absolute URLs inside local files, the given root directory +# is used as base URL. +# Note that the given directory must have URL syntax, so it must use a slash +# to join directories instead of a backslash. +# And the given directory must end with a slash. +# Unix example: +#localwebroot=/var/www/ +# Windows example: +#localwebroot=/C|/public_html/ +# Check SSL certificates. Set to an absolute pathname for a custom +# CA cert bundle to use. Set to zero to disable SSL certificate verification. +#sslverify=1 +# Stop checking new URLs after the given number of seconds. Same as if the +# user hits Ctrl-C after X seconds. Example: +#maxrunseconds=600 +# Don't download files larger than the given number of bytes +#maxfilesizedownload=5242880 +# Don't parse files larger than the given number of bytes +#maxfilesizeparse=1048576 +# Maximum number of URLs to check. New URLs will not be queued after the +# given number of URLs is checked. Example: +#maxnumurls=153 +# Maximum number of requests per second to one host. +#maxrequestspersecond=10 +# Respect the instructions in any robots.txt files +#robotstxt=1 +# Allowed URL schemes as a comma-separated list. Example: +#allowedschemes=http,https +# Size of the result cache. Checking more urls might increase memory usage during runtime +#resultcachesize=100000 + +##################### filtering options ########################## +[filtering] +#ignore= +# ignore everything with 'lconline' in the URL name +# lconline +# and ignore everything with 'bookmark' in the URL name +# bookmark +# and ignore all mailto: URLs +# ^mailto: +# do not recurse into the following URLs + +#nofollow= +# just an example +# http://www\.example\.com/bla + +# Ignore specified warnings (see linkchecker -h for the list of +# recognized warnings). Add a comma-separated list of warnings here +# that prevent a valid URL from being logged. Note that the warning +# will be logged for invalid URLs. Example: +#ignorewarnings=url-unicode-domain +# Regular expression to add more URLs recognized as internal links. +# Default is that URLs given on the command line are internal. +#internlinks=^http://www\.example\.net/ +# Check external links +#checkextern=0 + + +##################### password authentication ########################## +[authentication] +# WARNING: if you store passwords in this configuration entry, make sure the +# configuration file is not readable by other users. +# Different user/password pairs for different URLs can be provided. +# Entries are a triple (URL regular expression, username, password), +# separated by whitespace. +# If the regular expression matches, the given user/password pair is used +# for authentication. The commandline options -u,-p match every link +# and therefore override the entries given here. The first match wins. +# At the moment, authentication is used for http[s] and ftp links. +#entry= +# Note that passwords are optional. If any passwords are stored here, +# this file should not readable by other users. +# ^https?://www\.example\.com/~calvin/ calvin mypass +# ^ftp://www\.example\.com/secret/ calvin + +# if the website requires a login via a page with an HTML form the URL of the +# page and optionally the username and password input element name attributes +# can be provided. +#loginurl=http://www.example.com/ + +# The name attributes of the username and password HTML input elements +#loginuserfield=login +#loginpasswordfield=password +# Optionally the name attributes of any additional input elements and the values +# to populate them with. Note that these are submitted without checking +# whether matching input elements exist in the HTML form. Example: +#loginextrafields= +# name1:value1 +# name 2:value 2 + +############################ Plugins ################################### +# +# uncomment sections to enable plugins + +# Check HTML anchors +#[AnchorCheck] + +# Print HTTP header info +#[HttpHeaderInfo] +# Comma separated list of header prefixes to print. +# The names are case insensitive. +# The default list is empty, so it should be non-empty when activating +# this plugin. Example: +#prefixes=Server,X- + +# Add country info to URLs +#[LocationInfo] + +# Run W3C syntax checks +#[CssSyntaxCheck] +#[HtmlSyntaxCheck] + +# Search for regular expression in page contents +#[RegexCheck] +# Example: +#warningregex=Oracle Error + +# Search for viruses in page contents +#[VirusCheck] +#clamavconf=/etc/clamav/clamd.conf + +# Check that SSL certificates have at least the given number of days validity. +#[SslCertificateCheck] +#sslcertwarndays=30 + +# Parse and check links in PDF files +#[PdfParser] + +# Parse and check links in Word files +#[WordParser] + +# Parse and check links in Markdown files. +# Supported links are: +# <http://autolink.com> +# [name](http://link.com "Optional title") +# [id]: http://link.com "Optional title" +#[MarkdownCheck] +# Regexp of filename +#filename_re=.*\.(markdown|md(own)?|mkdn?)$ |