From 1ae0306a3cf2ea27f60b2d205789994d260c2cce Mon Sep 17 00:00:00 2001 From: Christian Grothoff Date: Sun, 11 Oct 2020 13:29:45 +0200 Subject: add i18n FSFS --- talermerchantdemos/blog/content.py | 96 ++++++++++++++------------------------ 1 file changed, 34 insertions(+), 62 deletions(-) (limited to 'talermerchantdemos/blog/content.py') diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py index fa9ace2..f4e37fe 100644 --- a/talermerchantdemos/blog/content.py +++ b/talermerchantdemos/blog/content.py @@ -1,6 +1,6 @@ ## # This file is part of GNU TALER. -# Copyright (C) 2014-2016 INRIA +# Copyright (C) 2014-2020 Taler Systems SA # # TALER is free software; you can redistribute it and/or modify it under the # terms of the GNU Lesser General Public License as published by the Free Software @@ -22,6 +22,9 @@ import os import re from bs4 import BeautifulSoup from pkg_resources import resource_stream, resource_filename +from os import listdir +from os.path import isfile, join + LOGGER = logging.getLogger(__name__) NOISY_LOGGER = logging.getLogger("chardet.charsetprober") @@ -31,7 +34,9 @@ Article = namedtuple("Article", "slug title teaser main_file extra_files lang") ## # @var if a article is added to this list, then it will # be made available in the blog. -ARTICLES = OrderedDict() +# ARTICLES is a dict mapping a languguage ('en') to an OrderedDict() of +# articles available in that language. +ARTICLES = {} ## @@ -45,7 +50,9 @@ ARTICLES = OrderedDict() # article, like images and sounds. # @param lang language of the arcile def add_article(slug, title, teaser, main_file, extra_files, lang='en'): - ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files, lang) + if (not (lang in ARTICLES)): + ARTICLES[lang] = OrderedDict() + ARTICLES[lang][slug] = Article(slug, title, teaser, main_file, extra_files, lang) ## @@ -79,23 +86,30 @@ def get_article_file(article): # @param title article's title; normally, this bit is extracted from the # HTML itself, so give it here if a explicit title needs to be # specified. -def add_from_html(resource_name, teaser_paragraph=0, title=None): +def add_from_html(resource_name, lang): res = resource_stream("talermerchantdemos", resource_name) soup = BeautifulSoup(res, 'html.parser') res.close() - if title is None: - title_el = soup.find("h1", attrs={"class": ["chapter", "unnumbered"]}) - if title_el is None: - LOGGER.warning("Cannot extract title from '%s'", resource_name) - title = resource_name - else: - title = title_el.get_text().strip() + title_el = soup.find("h2") + if title_el is None: + LOGGER.warning("Cannot extract title from '%s'", resource_name) + title = resource_name + else: + title = title_el.get_text().strip() slug = title.replace(" ", "_") - paragraphs = soup.find_all("p") teaser = soup.find("p", attrs={"id": ["teaser"]}) if teaser is None: - teaser = paragraphs[teaser_paragraph].get_text() + paragraphs = soup.find_all("p") + if len(paragraphs) > 0: + teaser = paragraphs[0].get_text() + if (len(paragraphs) > 1) and (len (teaser) < 100): + teaser2 = paragraphs[1].get_text() + if (len(teaser2) > len(teaser)): + teaser = teaser2 + else: + LOGGER.warning("Cannot extract teaser from '%s'", resource_name) + teaser = "" else: teaser = teaser.get_text() re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$") @@ -115,53 +129,11 @@ def add_from_html(resource_name, teaser_paragraph=0, title=None): else: LOGGER.warning("Image src and slug don't match: '%s' != '%s'" \ % (img['src'].split(os.sep)[2], slug)) - add_article(slug, title, teaser, resource_name, extra_files, 'en') - + add_article(slug, title, teaser, resource_name, extra_files, lang) -add_from_html("blog/articles/scrap1_U.0.html", 0) -add_from_html("blog/articles/scrap1_U.1.html", 0) -add_from_html("blog/articles/scrap1_1.html", 1) -add_from_html("blog/articles/scrap1_2.html") -add_from_html("blog/articles/scrap1_3.html") -add_from_html("blog/articles/scrap1_4.html") -add_from_html("blog/articles/scrap1_5.html") -add_from_html("blog/articles/scrap1_6.html") -add_from_html("blog/articles/scrap1_7.html") -add_from_html("blog/articles/scrap1_8.html") -add_from_html("blog/articles/scrap1_9.html") -add_from_html("blog/articles/scrap1_10.html") -add_from_html("blog/articles/scrap1_11.html") -add_from_html("blog/articles/scrap1_12.html") -add_from_html("blog/articles/scrap1_13.html", 1) -add_from_html("blog/articles/scrap1_14.html") -add_from_html("blog/articles/scrap1_15.html") -add_from_html("blog/articles/scrap1_16.html") -add_from_html("blog/articles/scrap1_17.html") -add_from_html("blog/articles/scrap1_18.html") -add_from_html("blog/articles/scrap1_19.html") -add_from_html("blog/articles/scrap1_20.html", 1) -add_from_html("blog/articles/scrap1_21.html") -add_from_html("blog/articles/scrap1_22.html") -add_from_html("blog/articles/scrap1_23.html") -add_from_html("blog/articles/scrap1_24.html") -add_from_html("blog/articles/scrap1_25.html", 1) -add_from_html("blog/articles/scrap1_26.html", 1) -add_from_html("blog/articles/scrap1_27.html") -add_from_html("blog/articles/scrap1_28.html", 1) -add_from_html("blog/articles/scrap1_29.html") -add_from_html("blog/articles/scrap1_30.html", 1) -add_from_html("blog/articles/scrap1_31.html", 1) -add_from_html("blog/articles/scrap1_32.html") -add_from_html("blog/articles/scrap1_33.html") -add_from_html("blog/articles/scrap1_34.html") -add_from_html("blog/articles/scrap1_35.html") -add_from_html("blog/articles/scrap1_36.html") -add_from_html("blog/articles/scrap1_37.html") -add_from_html("blog/articles/scrap1_38.html") -add_from_html("blog/articles/scrap1_39.html") -add_from_html("blog/articles/scrap1_40.html") -add_from_html("blog/articles/scrap1_41.html") -add_from_html("blog/articles/scrap1_42.html") -add_from_html("blog/articles/scrap1_43.html", 2) -add_from_html("blog/articles/scrap1_46.html", 1) -add_from_html("blog/articles/scrap1_47.html") +for l in listdir(resource_filename("talermerchantdemos", "blog/articles/")): + # Filter by active languages, otherwise this takes quite a while to load... + if l in { "en", "de" }: + LOGGER.info("importing %s" % l) + for a in listdir(resource_filename ("talermerchantdemos", "blog/articles/" + l)): + add_from_html("blog/articles/" + l + "/" + a, l) -- cgit v1.2.3