From 1ae0306a3cf2ea27f60b2d205789994d260c2cce Mon Sep 17 00:00:00 2001
From: Christian Grothoff <christian@grothoff.org>
Date: Sun, 11 Oct 2020 13:29:45 +0200
Subject: add i18n FSFS

---
 talermerchantdemos/blog/content.py | 96 ++++++++++++++------------------------
 1 file changed, 34 insertions(+), 62 deletions(-)

(limited to 'talermerchantdemos/blog/content.py')

diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py
index fa9ace2..f4e37fe 100644
--- a/talermerchantdemos/blog/content.py
+++ b/talermerchantdemos/blog/content.py
@@ -1,6 +1,6 @@
 ##
 # This file is part of GNU TALER.
-# Copyright (C) 2014-2016 INRIA
+# Copyright (C) 2014-2020 Taler Systems SA
 #
 # TALER is free software; you can redistribute it and/or modify it under the
 # terms of the GNU Lesser General Public License as published by the Free Software
@@ -22,6 +22,9 @@ import os
 import re
 from bs4 import BeautifulSoup
 from pkg_resources import resource_stream, resource_filename
+from os import listdir
+from os.path import isfile, join
+
 
 LOGGER = logging.getLogger(__name__)
 NOISY_LOGGER = logging.getLogger("chardet.charsetprober")
@@ -31,7 +34,9 @@ Article = namedtuple("Article", "slug title teaser main_file extra_files lang")
 ##
 # @var if a article is added to this list, then it will
 #      be made available in the blog.
-ARTICLES = OrderedDict()
+#      ARTICLES is a dict mapping a languguage ('en') to an OrderedDict() of
+#      articles available in that language.
+ARTICLES = {}
 
 
 ##
@@ -45,7 +50,9 @@ ARTICLES = OrderedDict()
 #        article, like images and sounds.
 # @param lang language of the arcile
 def add_article(slug, title, teaser, main_file, extra_files, lang='en'):
-    ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files, lang)
+    if (not (lang in ARTICLES)):
+        ARTICLES[lang] = OrderedDict()
+    ARTICLES[lang][slug] = Article(slug, title, teaser, main_file, extra_files, lang)
 
 
 ##
@@ -79,23 +86,30 @@ def get_article_file(article):
 # @param title article's title; normally, this bit is extracted from the
 #        HTML itself, so give it here if a explicit title needs to be
 #        specified.
-def add_from_html(resource_name, teaser_paragraph=0, title=None):
+def add_from_html(resource_name, lang):
     res = resource_stream("talermerchantdemos", resource_name)
     soup = BeautifulSoup(res, 'html.parser')
     res.close()
-    if title is None:
-        title_el = soup.find("h1", attrs={"class": ["chapter", "unnumbered"]})
-        if title_el is None:
-            LOGGER.warning("Cannot extract title from '%s'", resource_name)
-            title = resource_name
-        else:
-            title = title_el.get_text().strip()
+    title_el = soup.find("h2")
+    if title_el is None:
+        LOGGER.warning("Cannot extract title from '%s'", resource_name)
+        title = resource_name
+    else:
+        title = title_el.get_text().strip()
     slug = title.replace(" ", "_")
-    paragraphs = soup.find_all("p")
 
     teaser = soup.find("p", attrs={"id": ["teaser"]})
     if teaser is None:
-        teaser = paragraphs[teaser_paragraph].get_text()
+        paragraphs = soup.find_all("p")
+        if len(paragraphs) > 0:
+            teaser = paragraphs[0].get_text()
+            if (len(paragraphs) > 1) and (len (teaser) < 100):
+                teaser2 = paragraphs[1].get_text()
+                if (len(teaser2) > len(teaser)):
+                    teaser = teaser2
+        else:
+            LOGGER.warning("Cannot extract teaser from '%s'", resource_name)
+            teaser = ""
     else:
         teaser = teaser.get_text()
     re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$")
@@ -115,53 +129,11 @@ def add_from_html(resource_name, teaser_paragraph=0, title=None):
             else:
                 LOGGER.warning("Image src and slug don't match: '%s' != '%s'" \
                                % (img['src'].split(os.sep)[2], slug))
-    add_article(slug, title, teaser, resource_name, extra_files, 'en')
-
+    add_article(slug, title, teaser, resource_name, extra_files, lang)
 
-add_from_html("blog/articles/scrap1_U.0.html", 0)
-add_from_html("blog/articles/scrap1_U.1.html", 0)
-add_from_html("blog/articles/scrap1_1.html", 1)
-add_from_html("blog/articles/scrap1_2.html")
-add_from_html("blog/articles/scrap1_3.html")
-add_from_html("blog/articles/scrap1_4.html")
-add_from_html("blog/articles/scrap1_5.html")
-add_from_html("blog/articles/scrap1_6.html")
-add_from_html("blog/articles/scrap1_7.html")
-add_from_html("blog/articles/scrap1_8.html")
-add_from_html("blog/articles/scrap1_9.html")
-add_from_html("blog/articles/scrap1_10.html")
-add_from_html("blog/articles/scrap1_11.html")
-add_from_html("blog/articles/scrap1_12.html")
-add_from_html("blog/articles/scrap1_13.html", 1)
-add_from_html("blog/articles/scrap1_14.html")
-add_from_html("blog/articles/scrap1_15.html")
-add_from_html("blog/articles/scrap1_16.html")
-add_from_html("blog/articles/scrap1_17.html")
-add_from_html("blog/articles/scrap1_18.html")
-add_from_html("blog/articles/scrap1_19.html")
-add_from_html("blog/articles/scrap1_20.html", 1)
-add_from_html("blog/articles/scrap1_21.html")
-add_from_html("blog/articles/scrap1_22.html")
-add_from_html("blog/articles/scrap1_23.html")
-add_from_html("blog/articles/scrap1_24.html")
-add_from_html("blog/articles/scrap1_25.html", 1)
-add_from_html("blog/articles/scrap1_26.html", 1)
-add_from_html("blog/articles/scrap1_27.html")
-add_from_html("blog/articles/scrap1_28.html", 1)
-add_from_html("blog/articles/scrap1_29.html")
-add_from_html("blog/articles/scrap1_30.html", 1)
-add_from_html("blog/articles/scrap1_31.html", 1)
-add_from_html("blog/articles/scrap1_32.html")
-add_from_html("blog/articles/scrap1_33.html")
-add_from_html("blog/articles/scrap1_34.html")
-add_from_html("blog/articles/scrap1_35.html")
-add_from_html("blog/articles/scrap1_36.html")
-add_from_html("blog/articles/scrap1_37.html")
-add_from_html("blog/articles/scrap1_38.html")
-add_from_html("blog/articles/scrap1_39.html")
-add_from_html("blog/articles/scrap1_40.html")
-add_from_html("blog/articles/scrap1_41.html")
-add_from_html("blog/articles/scrap1_42.html")
-add_from_html("blog/articles/scrap1_43.html", 2)
-add_from_html("blog/articles/scrap1_46.html", 1)
-add_from_html("blog/articles/scrap1_47.html")
+for l in listdir(resource_filename("talermerchantdemos", "blog/articles/")):
+    # Filter by active languages, otherwise this takes quite a while to load...
+    if l in { "en", "de" }:
+        LOGGER.info("importing %s" % l)
+        for a in listdir(resource_filename ("talermerchantdemos", "blog/articles/" + l)):
+            add_from_html("blog/articles/" + l + "/" + a, l)
-- 
cgit v1.2.3