1 files changed, 12 insertions, 12 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py
index d049718..1b2a466 100644
--- a/talermerchantdemos/blog/content.py
+++ b/talermerchantdemos/blog/content.py
@@ -96,25 +96,24 @@ def add_from_html(resource_name, lang):
     title_el = soup.find("h2")
     if title_el is None:
         LOGGER.warning("Cannot extract title from '%s'", resource_name)
-        title = resource_name
-    else:
-        title = title_el.get_text().strip()
-    slug = quote(title.replace(" ", "_"), safe="")
+        return
+    title = title_el.get_text().strip()
+    slug = title.replace(" ", "_")
+    slug = re.sub(r'[^a-zA-Z0-9_]+', "-", slug)
 
     teaser = soup.find("p", attrs={"id": ["teaser"]})
     if teaser is None:
         paragraphs = soup.find_all("p")
         if len(paragraphs) > 0:
-            teaser = paragraphs[0].get_text()
-            if (len(paragraphs) > 1) and (len(teaser) < 100):
-                teaser2 = paragraphs[1].get_text()
-                if len(teaser2) > len(teaser):
-                    teaser = teaser2
+            teaser = paragraphs[0].prettify()
+            if len(teaser) < 100:
+                LOGGER.warning("Cannot extract adequate teaser from '%s'", resource_name)
+                return
         else:
             LOGGER.warning("Cannot extract teaser from '%s'", resource_name)
-            teaser = ""
+            return
     else:
-        teaser = teaser.get_text()
+        teaser = teaser.prettify()
     re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$")
     imgs = soup.find_all("img")
     extra_files = []
@@ -141,4 +140,5 @@ for l in listdir(resource_filename("talermerchantdemos", "blog/articles/")):
     if l in {"en", "ar", "zh", "fr", "hi", "it", "ja", "ko", "pt", "pt_BR", "ru", "tr", "de", "sv", "es"}:
         LOGGER.info("importing %s" % l)
         for a in listdir(resource_filename("talermerchantdemos", "blog/articles/" + l)):
-            add_from_html("blog/articles/" + l + "/" + a, l)
+            if os.path.isfile(resource_filename("talermerchantdemos", "blog/articles/" + l + "/" + a)):
+                add_from_html("blog/articles/" + l + "/" + a, l)