diff options
Diffstat (limited to 'talermerchantdemos/blog/content.py')
-rw-r--r-- | talermerchantdemos/blog/content.py | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py index d049718..1b2a466 100644 --- a/talermerchantdemos/blog/content.py +++ b/talermerchantdemos/blog/content.py @@ -96,25 +96,24 @@ def add_from_html(resource_name, lang): title_el = soup.find("h2") if title_el is None: LOGGER.warning("Cannot extract title from '%s'", resource_name) - title = resource_name - else: - title = title_el.get_text().strip() - slug = quote(title.replace(" ", "_"), safe="") + return + title = title_el.get_text().strip() + slug = title.replace(" ", "_") + slug = re.sub(r'[^a-zA-Z0-9_]+', "-", slug) teaser = soup.find("p", attrs={"id": ["teaser"]}) if teaser is None: paragraphs = soup.find_all("p") if len(paragraphs) > 0: - teaser = paragraphs[0].get_text() - if (len(paragraphs) > 1) and (len(teaser) < 100): - teaser2 = paragraphs[1].get_text() - if len(teaser2) > len(teaser): - teaser = teaser2 + teaser = paragraphs[0].prettify() + if len(teaser) < 100: + LOGGER.warning("Cannot extract adequate teaser from '%s'", resource_name) + return else: LOGGER.warning("Cannot extract teaser from '%s'", resource_name) - teaser = "" + return else: - teaser = teaser.get_text() + teaser = teaser.prettify() re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$") imgs = soup.find_all("img") extra_files = [] @@ -141,4 +140,5 @@ for l in listdir(resource_filename("talermerchantdemos", "blog/articles/")): if l in {"en", "ar", "zh", "fr", "hi", "it", "ja", "ko", "pt", "pt_BR", "ru", "tr", "de", "sv", "es"}: LOGGER.info("importing %s" % l) for a in listdir(resource_filename("talermerchantdemos", "blog/articles/" + l)): - add_from_html("blog/articles/" + l + "/" + a, l) + if os.path.isfile(resource_filename("talermerchantdemos", "blog/articles/" + l + "/" + a)): + add_from_html("blog/articles/" + l + "/" + a, l) |