simplify extraction

author: Christian Grothoff <christian@grothoff.org> 2022-10-20 23:52:09 +0200
committer: Christian Grothoff <christian@grothoff.org> 2022-10-20 23:52:09 +0200
commit: ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0 (patch)
tree: e0d1de9aab5fb5cc66d51d3c32746947b594fc18
parent: b0c67aa4aeaf03ec7deac4f879811646c7308071 (diff)
download: taler-merchant-demos-ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0.tar.gz
taler-merchant-demos-ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0.tar.bz2
taler-merchant-demos-ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0.zip
1 files changed, 6 insertions, 24 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py
index bb5fad6..875a5fa 100644
--- a/talermerchantdemos/blog/content.py
+++ b/talermerchantdemos/blog/content.py
@@ -104,34 +104,16 @@ def add_from_html(resource_name, lang):
     teaser = soup.find("p", attrs={"id": ["teaser"]})
     if teaser is None:
         paragraphs = soup.find_all("p")
-        lists = soup.find_all("li")
-        if (len(paragraphs) > 0) and (len(lists) > 0):
-            if (paragraphs[0].sourceline > lists[0].sourceline):
-                titleat = lists
-            else:
-                titleat = paragraphs
-        else:
-            if (len(paragraphs) > 0):
-                titleat = paragraphs
-            else:
-                titleat = lists
         if len(titleat) > 0:
-            if (titleat[0].tag == 'li'):
-                teaser = titleat[0].contents[0].prettify()
-            else:
-                teaser = titleat[0].prettify()
-            if (len(titleat) > 1) and (len(teaser) < 100):
-                if (titleat[1].tag == 'li'):
-                    teaser2 = titleat[1].contents[0].prettify()
-                else:
-                    teaser2 = titleat[1].prettify()
-                if len(teaser2) > len(teaser):
-                    teaser = teaser2
+            teaser = paragraphs[0].prettify()
+            if len(teaser) < 100:
+                LOGGER.warning("Cannot extract adequate teaser from '%s'", resource_name)
+                return
         else:
             LOGGER.warning("Cannot extract teaser from '%s'", resource_name)
-            teaser = ""
+            return
     else:
-        teaser = teaser.get_text()
+        teaser = teaser.prettify()
     re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$")
     imgs = soup.find_all("img")
     extra_files = []
author	Christian Grothoff <christian@grothoff.org>	2022-10-20 23:52:09 +0200
committer	Christian Grothoff <christian@grothoff.org>	2022-10-20 23:52:09 +0200
commit	ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0 (patch)
tree	e0d1de9aab5fb5cc66d51d3c32746947b594fc18
parent	b0c67aa4aeaf03ec7deac4f879811646c7308071 (diff)
download	taler-merchant-demos-ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0.tar.gz taler-merchant-demos-ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0.tar.bz2 taler-merchant-demos-ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0.zip