summaryrefslogtreecommitdiff
path: root/talermerchantdemos
diff options
context:
space:
mode:
authorChristian Grothoff <christian@grothoff.org>2022-10-19 08:27:10 +0200
committerChristian Grothoff <christian@grothoff.org>2022-10-19 08:27:21 +0200
commit25314dd613ecaed102567b2aa3745de34817aaf3 (patch)
tree7924e9e240c8159f75c648d920e713bbbcacad4a /talermerchantdemos
parenta09b7bde1827b2f8c59e9f7a4227c3e204489d84 (diff)
downloadtaler-merchant-demos-25314dd613ecaed102567b2aa3745de34817aaf3.tar.gz
taler-merchant-demos-25314dd613ecaed102567b2aa3745de34817aaf3.tar.bz2
taler-merchant-demos-25314dd613ecaed102567b2aa3745de34817aaf3.zip
try to fix #7390: title extraction for 'Only the Free World Can Stand up to Microsoft' and related issues
Diffstat (limited to 'talermerchantdemos')
-rw-r--r--talermerchantdemos/blog/content.py19
1 files changed, 15 insertions, 4 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py
index 8de89f4..ba69248 100644
--- a/talermerchantdemos/blog/content.py
+++ b/talermerchantdemos/blog/content.py
@@ -104,10 +104,21 @@ def add_from_html(resource_name, lang):
teaser = soup.find("p", attrs={"id": ["teaser"]})
if teaser is None:
paragraphs = soup.find_all("p")
- if len(paragraphs) > 0:
- teaser = paragraphs[0].get_text()
- if (len(paragraphs) > 1) and (len(teaser) < 100):
- teaser2 = paragraphs[1].get_text()
+ lists = soup.find_all("li")
+ if (len(paragraphs) > 0) and (len(lists) > 0):
+ if (paragraphs[0].sourcepos > lists[0].sourcepos):
+ titleat = lists
+ else:
+ titleat = paragraphs
+ else:
+ if (len(paragraphs) > 0):
+ titleat = paragraphs
+ else:
+ titleat = lists
+ if len(titleat) > 0:
+ teaser = titelat[0].prettify()
+ if (len(titleat) > 1) and (len(teaser) < 100):
+ teaser2 = titleat[1].prettify()
if len(teaser2) > len(teaser):
teaser = teaser2
else: