diff options
author | MS <ms@taler.net> | 2020-07-22 14:53:45 +0200 |
---|---|---|
committer | MS <ms@taler.net> | 2020-07-22 14:53:45 +0200 |
commit | 2d97ecc2c1ac605ca49e8a866b309daaeb7a831c (patch) | |
tree | 173f7917c5d0af822d2d51ed491c3cf2d8eaf23f /talermerchantdemos/blog/content.py | |
download | taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.gz taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.bz2 taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.zip |
Installing the Blog
Diffstat (limited to 'talermerchantdemos/blog/content.py')
-rw-r--r-- | talermerchantdemos/blog/content.py | 166 |
1 files changed, 166 insertions, 0 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py new file mode 100644 index 0000000..0ecfa66 --- /dev/null +++ b/talermerchantdemos/blog/content.py @@ -0,0 +1,166 @@ +## +# This file is part of GNU TALER. +# Copyright (C) 2014-2016 INRIA +# +# TALER is free software; you can redistribute it and/or modify it under the +# terms of the GNU Lesser General Public License as published by the Free Software +# Foundation; either version 2.1, or (at your option) any later version. +# +# TALER is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License along with +# GNU TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/> +# +# @author Florian Dold +# @brief Define content and associated metadata that is served on the blog. + +from collections import OrderedDict, namedtuple +import logging +import os +import re +from bs4 import BeautifulSoup +from pkg_resources import resource_stream, resource_filename + +LOGGER = logging.getLogger(__name__) +NOISY_LOGGER = logging.getLogger("chardet.charsetprober") +NOISY_LOGGER.setLevel(logging.INFO) +Article = namedtuple("Article", "slug title teaser main_file extra_files") + +## +# @var if a article is added to this list, then it will +# be made available in the blog. +ARTICLES = OrderedDict() + + +## +# Add article to the list of the available articles. +# +# @param slug article's title with all the spaces converted to underscores. +# @param title article's title. +# @param teaser a short description of the main article's content. +# @param main_file path to the article's HTML file. +# @param extra_file collection of extra files associated with the +# article, like images and sounds. +def add_article(slug, title, teaser, main_file, extra_files): + ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files) + + +## +# Build the file path of a image. +# +# @param image the image filename. +# @return the path to the image file. +def get_image_file(image): + filex = resource_filename("talerblog", os.path.join("blog/data/", image)) + return os.path.abspath(filex) + + +## +# Build the file path of a article. +# +# @param article the article filename. +# @return the path to the article HTML file. +def get_article_file(article): + filex = resource_filename("talerblog", article.main_file) + return os.path.basename(filex) + + +## +# Extract information from HTML file, and use these informations +# to make the article available in the blog. +# +# @param resource_name path to the (HTML) article. +# @param teaser_paragraph position of the teaser paragraph in the +# article's list of all the P tags. Defaults to zero, as normally +# this information is found under the very first P tag. +# @param title article's title; normally, this bit is extracted from the +# HTML itself, so give it here if a explicit title needs to be +# specified. +def add_from_html(resource_name, teaser_paragraph=0, title=None): + res = resource_stream("talerblog", resource_name) + soup = BeautifulSoup(res, 'html.parser') + res.close() + if title is None: + title_el = soup.find("h1", attrs={"class": ["chapter", "unnumbered"]}) + if title_el is None: + LOGGER.warning("Can't extract title from '%s'", resource_name) + title = resource_name + else: + title = title_el.get_text().strip() + slug = title.replace(" ", "_") + paragraphs = soup.find_all("p") + + teaser = soup.find("p", attrs={"id": ["teaser"]}) + if teaser is None: + teaser = paragraphs[teaser_paragraph].get_text() + else: + teaser = teaser.get_text() + re_proc = re.compile("^/essay/[^/]+/data/[^/]+$") + imgs = soup.find_all("img") + extra_files = [] + for img in imgs: + # We require that any image whose access is regulated is src'd + # as "<slug>/data/img.png". We also need to check if the <slug> + # component actually matches the article's slug + if re_proc.match(img['src']): + if img['src'].split(os.sep)[2] == slug: + LOGGER.info( + "extra file for %s is %s" % + (slug, os.path.basename(img['src'])) + ) + extra_files.append(os.path.basename(img['src'])) + else: + LOGGER.warning("Image src and slug don't match: '%s' != '%s'" \ + % (img['src'].split(os.sep)[2], slug)) + add_article(slug, title, teaser, resource_name, extra_files) + + +add_from_html("blog/articles/scrap1_U.0.html", 0) +add_from_html("blog/articles/scrap1_U.1.html", 0) +add_from_html("blog/articles/scrap1_1.html", 1) +add_from_html("blog/articles/scrap1_2.html") +add_from_html("blog/articles/scrap1_3.html") +add_from_html("blog/articles/scrap1_4.html") +add_from_html("blog/articles/scrap1_5.html") +add_from_html("blog/articles/scrap1_6.html") +add_from_html("blog/articles/scrap1_7.html") +add_from_html("blog/articles/scrap1_8.html") +add_from_html("blog/articles/scrap1_9.html") +add_from_html("blog/articles/scrap1_10.html") +add_from_html("blog/articles/scrap1_11.html") +add_from_html("blog/articles/scrap1_12.html") +add_from_html("blog/articles/scrap1_13.html", 1) +add_from_html("blog/articles/scrap1_14.html") +add_from_html("blog/articles/scrap1_15.html") +add_from_html("blog/articles/scrap1_16.html") +add_from_html("blog/articles/scrap1_17.html") +add_from_html("blog/articles/scrap1_18.html") +add_from_html("blog/articles/scrap1_19.html") +add_from_html("blog/articles/scrap1_20.html", 1) +add_from_html("blog/articles/scrap1_21.html") +add_from_html("blog/articles/scrap1_22.html") +add_from_html("blog/articles/scrap1_23.html") +add_from_html("blog/articles/scrap1_24.html") +add_from_html("blog/articles/scrap1_25.html", 1) +add_from_html("blog/articles/scrap1_26.html", 1) +add_from_html("blog/articles/scrap1_27.html") +add_from_html("blog/articles/scrap1_28.html", 1) +add_from_html("blog/articles/scrap1_29.html") +add_from_html("blog/articles/scrap1_30.html", 1) +add_from_html("blog/articles/scrap1_31.html", 1) +add_from_html("blog/articles/scrap1_32.html") +add_from_html("blog/articles/scrap1_33.html") +add_from_html("blog/articles/scrap1_34.html") +add_from_html("blog/articles/scrap1_35.html") +add_from_html("blog/articles/scrap1_36.html") +add_from_html("blog/articles/scrap1_37.html") +add_from_html("blog/articles/scrap1_38.html") +add_from_html("blog/articles/scrap1_39.html") +add_from_html("blog/articles/scrap1_40.html") +add_from_html("blog/articles/scrap1_41.html") +add_from_html("blog/articles/scrap1_42.html") +add_from_html("blog/articles/scrap1_43.html", 2) +add_from_html("blog/articles/scrap1_46.html", 1) +add_from_html("blog/articles/scrap1_47.html") |