summaryrefslogtreecommitdiff
path: root/talermerchantdemos/blog/content.py
diff options
context:
space:
mode:
authorMS <ms@taler.net>2020-07-22 14:53:45 +0200
committerMS <ms@taler.net>2020-07-22 14:53:45 +0200
commit2d97ecc2c1ac605ca49e8a866b309daaeb7a831c (patch)
tree173f7917c5d0af822d2d51ed491c3cf2d8eaf23f /talermerchantdemos/blog/content.py
downloadtaler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.gz
taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.tar.bz2
taler-merchant-demos-2d97ecc2c1ac605ca49e8a866b309daaeb7a831c.zip
Installing the Blog
Diffstat (limited to 'talermerchantdemos/blog/content.py')
-rw-r--r--talermerchantdemos/blog/content.py166
1 files changed, 166 insertions, 0 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py
new file mode 100644
index 0000000..0ecfa66
--- /dev/null
+++ b/talermerchantdemos/blog/content.py
@@ -0,0 +1,166 @@
+##
+# This file is part of GNU TALER.
+# Copyright (C) 2014-2016 INRIA
+#
+# TALER is free software; you can redistribute it and/or modify it under the
+# terms of the GNU Lesser General Public License as published by the Free Software
+# Foundation; either version 2.1, or (at your option) any later version.
+#
+# TALER is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License along with
+# GNU TALER; see the file COPYING. If not, see <http://www.gnu.org/licenses/>
+#
+# @author Florian Dold
+# @brief Define content and associated metadata that is served on the blog.
+
+from collections import OrderedDict, namedtuple
+import logging
+import os
+import re
+from bs4 import BeautifulSoup
+from pkg_resources import resource_stream, resource_filename
+
+LOGGER = logging.getLogger(__name__)
+NOISY_LOGGER = logging.getLogger("chardet.charsetprober")
+NOISY_LOGGER.setLevel(logging.INFO)
+Article = namedtuple("Article", "slug title teaser main_file extra_files")
+
+##
+# @var if a article is added to this list, then it will
+# be made available in the blog.
+ARTICLES = OrderedDict()
+
+
+##
+# Add article to the list of the available articles.
+#
+# @param slug article's title with all the spaces converted to underscores.
+# @param title article's title.
+# @param teaser a short description of the main article's content.
+# @param main_file path to the article's HTML file.
+# @param extra_file collection of extra files associated with the
+# article, like images and sounds.
+def add_article(slug, title, teaser, main_file, extra_files):
+ ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files)
+
+
+##
+# Build the file path of a image.
+#
+# @param image the image filename.
+# @return the path to the image file.
+def get_image_file(image):
+ filex = resource_filename("talerblog", os.path.join("blog/data/", image))
+ return os.path.abspath(filex)
+
+
+##
+# Build the file path of a article.
+#
+# @param article the article filename.
+# @return the path to the article HTML file.
+def get_article_file(article):
+ filex = resource_filename("talerblog", article.main_file)
+ return os.path.basename(filex)
+
+
+##
+# Extract information from HTML file, and use these informations
+# to make the article available in the blog.
+#
+# @param resource_name path to the (HTML) article.
+# @param teaser_paragraph position of the teaser paragraph in the
+# article's list of all the P tags. Defaults to zero, as normally
+# this information is found under the very first P tag.
+# @param title article's title; normally, this bit is extracted from the
+# HTML itself, so give it here if a explicit title needs to be
+# specified.
+def add_from_html(resource_name, teaser_paragraph=0, title=None):
+ res = resource_stream("talerblog", resource_name)
+ soup = BeautifulSoup(res, 'html.parser')
+ res.close()
+ if title is None:
+ title_el = soup.find("h1", attrs={"class": ["chapter", "unnumbered"]})
+ if title_el is None:
+ LOGGER.warning("Can't extract title from '%s'", resource_name)
+ title = resource_name
+ else:
+ title = title_el.get_text().strip()
+ slug = title.replace(" ", "_")
+ paragraphs = soup.find_all("p")
+
+ teaser = soup.find("p", attrs={"id": ["teaser"]})
+ if teaser is None:
+ teaser = paragraphs[teaser_paragraph].get_text()
+ else:
+ teaser = teaser.get_text()
+ re_proc = re.compile("^/essay/[^/]+/data/[^/]+$")
+ imgs = soup.find_all("img")
+ extra_files = []
+ for img in imgs:
+ # We require that any image whose access is regulated is src'd
+ # as "<slug>/data/img.png". We also need to check if the <slug>
+ # component actually matches the article's slug
+ if re_proc.match(img['src']):
+ if img['src'].split(os.sep)[2] == slug:
+ LOGGER.info(
+ "extra file for %s is %s" %
+ (slug, os.path.basename(img['src']))
+ )
+ extra_files.append(os.path.basename(img['src']))
+ else:
+ LOGGER.warning("Image src and slug don't match: '%s' != '%s'" \
+ % (img['src'].split(os.sep)[2], slug))
+ add_article(slug, title, teaser, resource_name, extra_files)
+
+
+add_from_html("blog/articles/scrap1_U.0.html", 0)
+add_from_html("blog/articles/scrap1_U.1.html", 0)
+add_from_html("blog/articles/scrap1_1.html", 1)
+add_from_html("blog/articles/scrap1_2.html")
+add_from_html("blog/articles/scrap1_3.html")
+add_from_html("blog/articles/scrap1_4.html")
+add_from_html("blog/articles/scrap1_5.html")
+add_from_html("blog/articles/scrap1_6.html")
+add_from_html("blog/articles/scrap1_7.html")
+add_from_html("blog/articles/scrap1_8.html")
+add_from_html("blog/articles/scrap1_9.html")
+add_from_html("blog/articles/scrap1_10.html")
+add_from_html("blog/articles/scrap1_11.html")
+add_from_html("blog/articles/scrap1_12.html")
+add_from_html("blog/articles/scrap1_13.html", 1)
+add_from_html("blog/articles/scrap1_14.html")
+add_from_html("blog/articles/scrap1_15.html")
+add_from_html("blog/articles/scrap1_16.html")
+add_from_html("blog/articles/scrap1_17.html")
+add_from_html("blog/articles/scrap1_18.html")
+add_from_html("blog/articles/scrap1_19.html")
+add_from_html("blog/articles/scrap1_20.html", 1)
+add_from_html("blog/articles/scrap1_21.html")
+add_from_html("blog/articles/scrap1_22.html")
+add_from_html("blog/articles/scrap1_23.html")
+add_from_html("blog/articles/scrap1_24.html")
+add_from_html("blog/articles/scrap1_25.html", 1)
+add_from_html("blog/articles/scrap1_26.html", 1)
+add_from_html("blog/articles/scrap1_27.html")
+add_from_html("blog/articles/scrap1_28.html", 1)
+add_from_html("blog/articles/scrap1_29.html")
+add_from_html("blog/articles/scrap1_30.html", 1)
+add_from_html("blog/articles/scrap1_31.html", 1)
+add_from_html("blog/articles/scrap1_32.html")
+add_from_html("blog/articles/scrap1_33.html")
+add_from_html("blog/articles/scrap1_34.html")
+add_from_html("blog/articles/scrap1_35.html")
+add_from_html("blog/articles/scrap1_36.html")
+add_from_html("blog/articles/scrap1_37.html")
+add_from_html("blog/articles/scrap1_38.html")
+add_from_html("blog/articles/scrap1_39.html")
+add_from_html("blog/articles/scrap1_40.html")
+add_from_html("blog/articles/scrap1_41.html")
+add_from_html("blog/articles/scrap1_42.html")
+add_from_html("blog/articles/scrap1_43.html", 2)
+add_from_html("blog/articles/scrap1_46.html", 1)
+add_from_html("blog/articles/scrap1_47.html")