commit ea081314ab0a1b74e31f36fd2a163d6b84d2b9b0 parent b0c67aa4aeaf03ec7deac4f879811646c7308071 Author: Christian Grothoff <christian@grothoff.org> Date: Thu, 20 Oct 2022 23:52:09 +0200 simplify extraction Diffstat:
| M | talermerchantdemos/blog/content.py | | | 30 | ++++++------------------------ |
1 file changed, 6 insertions(+), 24 deletions(-)
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py @@ -104,34 +104,16 @@ def add_from_html(resource_name, lang): teaser = soup.find("p", attrs={"id": ["teaser"]}) if teaser is None: paragraphs = soup.find_all("p") - lists = soup.find_all("li") - if (len(paragraphs) > 0) and (len(lists) > 0): - if (paragraphs[0].sourceline > lists[0].sourceline): - titleat = lists - else: - titleat = paragraphs - else: - if (len(paragraphs) > 0): - titleat = paragraphs - else: - titleat = lists if len(titleat) > 0: - if (titleat[0].tag == 'li'): - teaser = titleat[0].contents[0].prettify() - else: - teaser = titleat[0].prettify() - if (len(titleat) > 1) and (len(teaser) < 100): - if (titleat[1].tag == 'li'): - teaser2 = titleat[1].contents[0].prettify() - else: - teaser2 = titleat[1].prettify() - if len(teaser2) > len(teaser): - teaser = teaser2 + teaser = paragraphs[0].prettify() + if len(teaser) < 100: + LOGGER.warning("Cannot extract adequate teaser from '%s'", resource_name) + return else: LOGGER.warning("Cannot extract teaser from '%s'", resource_name) - teaser = "" + return else: - teaser = teaser.get_text() + teaser = teaser.prettify() re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$") imgs = soup.find_all("img") extra_files = []