Installing the Blog

author: MS <ms@taler.net> 2020-07-22 14:53:45 +0200
committer: MS <ms@taler.net> 2020-07-22 14:53:45 +0200
commit: 2d97ecc2c1ac605ca49e8a866b309daaeb7a831c (patch)
tree: 173f7917c5d0af822d2d51ed491c3cf2d8eaf23f /talermerchantdemos/blog/content.py
download: taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.gz
taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.bz2
taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.zip
1 files changed, 166 insertions, 0 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py
new file mode 100644
index 0000000..0ecfa66
--- /dev/null
+++ b/talermerchantdemos/blog/content.py
@@ -0,0 +1,166 @@
+##
+# This file is part of GNU TALER.
+# Copyright (C) 2014-2016 INRIA
+#
+# TALER is free software; you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free Software
+# Foundation; either version 2.1, or (at your option) any later version.
+#
+# TALER is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License along with
+# GNU TALER; see the file COPYING.  If not, see <http://www.gnu.org/licenses/>
+#
+# @author Florian Dold
+# @brief Define content and associated metadata that is served on the blog.
+
+from collections import OrderedDict, namedtuple
+import logging
+import os
+import re
+from bs4 import BeautifulSoup
+from pkg_resources import resource_stream, resource_filename
+
+LOGGER = logging.getLogger(__name__)
+NOISY_LOGGER = logging.getLogger("chardet.charsetprober")
+NOISY_LOGGER.setLevel(logging.INFO)
+Article = namedtuple("Article", "slug title teaser main_file extra_files")
+
+##
+# @var if a article is added to this list, then it will
+#      be made available in the blog.
+ARTICLES = OrderedDict()
+
+
+##
+# Add article to the list of the available articles.
+#
+# @param slug article's title with all the spaces converted to underscores.
+# @param title article's title.
+# @param teaser a short description of the main article's content.
+# @param main_file path to the article's HTML file.
+# @param extra_file collection of extra files associated with the
+#        article, like images and sounds.
+def add_article(slug, title, teaser, main_file, extra_files):
+    ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files)
+
+
+##
+# Build the file path of a image.
+#
+# @param image the image filename.
+# @return the path to the image file.
+def get_image_file(image):
+    filex = resource_filename("talerblog", os.path.join("blog/data/", image))
+    return os.path.abspath(filex)
+
+
+##
+# Build the file path of a article.
+#
+# @param article the article filename.
+# @return the path to the article HTML file.
+def get_article_file(article):
+    filex = resource_filename("talerblog", article.main_file)
+    return os.path.basename(filex)
+
+
+##
+# Extract information from HTML file, and use these informations
+# to make the article available in the blog.
+#
+# @param resource_name path to the (HTML) article.
+# @param teaser_paragraph position of the teaser paragraph in the
+#        article's list of all the P tags.  Defaults to zero, as normally
+#        this information is found under the very first P tag.
+# @param title article's title; normally, this bit is extracted from the
+#        HTML itself, so give it here if a explicit title needs to be
+#        specified.
+def add_from_html(resource_name, teaser_paragraph=0, title=None):
+    res = resource_stream("talerblog", resource_name)
+    soup = BeautifulSoup(res, 'html.parser')
+    res.close()
+    if title is None:
+        title_el = soup.find("h1", attrs={"class": ["chapter", "unnumbered"]})
+        if title_el is None:
+            LOGGER.warning("Can't extract title from '%s'", resource_name)
+            title = resource_name
+        else:
+            title = title_el.get_text().strip()
+    slug = title.replace(" ", "_")
+    paragraphs = soup.find_all("p")
+
+    teaser = soup.find("p", attrs={"id": ["teaser"]})
+    if teaser is None:
+        teaser = paragraphs[teaser_paragraph].get_text()
+    else:
+        teaser = teaser.get_text()
+    re_proc = re.compile("^/essay/[^/]+/data/[^/]+$")
+    imgs = soup.find_all("img")
+    extra_files = []
+    for img in imgs:
+        # We require that any image whose access is regulated is src'd
+        # as "<slug>/data/img.png". We also need to check if the <slug>
+        # component actually matches the article's slug
+        if re_proc.match(img['src']):
+            if img['src'].split(os.sep)[2] == slug:
+                LOGGER.info(
+                    "extra file for %s is %s" %
+                    (slug, os.path.basename(img['src']))
+                )
+                extra_files.append(os.path.basename(img['src']))
+            else:
+                LOGGER.warning("Image src and slug don't match: '%s' != '%s'" \
+                               % (img['src'].split(os.sep)[2], slug))
+    add_article(slug, title, teaser, resource_name, extra_files)
+
+
+add_from_html("blog/articles/scrap1_U.0.html", 0)
+add_from_html("blog/articles/scrap1_U.1.html", 0)
+add_from_html("blog/articles/scrap1_1.html", 1)
+add_from_html("blog/articles/scrap1_2.html")
+add_from_html("blog/articles/scrap1_3.html")
+add_from_html("blog/articles/scrap1_4.html")
+add_from_html("blog/articles/scrap1_5.html")
+add_from_html("blog/articles/scrap1_6.html")
+add_from_html("blog/articles/scrap1_7.html")
+add_from_html("blog/articles/scrap1_8.html")
+add_from_html("blog/articles/scrap1_9.html")
+add_from_html("blog/articles/scrap1_10.html")
+add_from_html("blog/articles/scrap1_11.html")
+add_from_html("blog/articles/scrap1_12.html")
+add_from_html("blog/articles/scrap1_13.html", 1)
+add_from_html("blog/articles/scrap1_14.html")
+add_from_html("blog/articles/scrap1_15.html")
+add_from_html("blog/articles/scrap1_16.html")
+add_from_html("blog/articles/scrap1_17.html")
+add_from_html("blog/articles/scrap1_18.html")
+add_from_html("blog/articles/scrap1_19.html")
+add_from_html("blog/articles/scrap1_20.html", 1)
+add_from_html("blog/articles/scrap1_21.html")
+add_from_html("blog/articles/scrap1_22.html")
+add_from_html("blog/articles/scrap1_23.html")
+add_from_html("blog/articles/scrap1_24.html")
+add_from_html("blog/articles/scrap1_25.html", 1)
+add_from_html("blog/articles/scrap1_26.html", 1)
+add_from_html("blog/articles/scrap1_27.html")
+add_from_html("blog/articles/scrap1_28.html", 1)
+add_from_html("blog/articles/scrap1_29.html")
+add_from_html("blog/articles/scrap1_30.html", 1)
+add_from_html("blog/articles/scrap1_31.html", 1)
+add_from_html("blog/articles/scrap1_32.html")
+add_from_html("blog/articles/scrap1_33.html")
+add_from_html("blog/articles/scrap1_34.html")
+add_from_html("blog/articles/scrap1_35.html")
+add_from_html("blog/articles/scrap1_36.html")
+add_from_html("blog/articles/scrap1_37.html")
+add_from_html("blog/articles/scrap1_38.html")
+add_from_html("blog/articles/scrap1_39.html")
+add_from_html("blog/articles/scrap1_40.html")
+add_from_html("blog/articles/scrap1_41.html")
+add_from_html("blog/articles/scrap1_42.html")
+add_from_html("blog/articles/scrap1_43.html", 2)
+add_from_html("blog/articles/scrap1_46.html", 1)
+add_from_html("blog/articles/scrap1_47.html")
author	MS <ms@taler.net>	2020-07-22 14:53:45 +0200
committer	MS <ms@taler.net>	2020-07-22 14:53:45 +0200
commit	2d97ecc2c1ac605ca49e8a866b309daaeb7a831c (patch)
tree	173f7917c5d0af822d2d51ed491c3cf2d8eaf23f /talermerchantdemos/blog/content.py
download	taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.gz taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.bz2 taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.zip