diff options
author | Christian Grothoff <christian@grothoff.org> | 2022-10-19 08:27:10 +0200 |
---|---|---|
committer | Christian Grothoff <christian@grothoff.org> | 2022-10-19 08:27:21 +0200 |
commit | 25314dd613ecaed102567b2aa3745de34817aaf3 (patch) | |
tree | 7924e9e240c8159f75c648d920e713bbbcacad4a /talermerchantdemos | |
parent | a09b7bde1827b2f8c59e9f7a4227c3e204489d84 (diff) | |
download | taler-merchant-demos-25314dd613ecaed102567b2aa3745de34817aaf3.tar.gz taler-merchant-demos-25314dd613ecaed102567b2aa3745de34817aaf3.tar.bz2 taler-merchant-demos-25314dd613ecaed102567b2aa3745de34817aaf3.zip |
try to fix #7390: title extraction for 'Only the Free World Can Stand up to Microsoft' and related issues
Diffstat (limited to 'talermerchantdemos')
-rw-r--r-- | talermerchantdemos/blog/content.py | 19 |
1 files changed, 15 insertions, 4 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py index 8de89f4..ba69248 100644 --- a/talermerchantdemos/blog/content.py +++ b/talermerchantdemos/blog/content.py @@ -104,10 +104,21 @@ def add_from_html(resource_name, lang): teaser = soup.find("p", attrs={"id": ["teaser"]}) if teaser is None: paragraphs = soup.find_all("p") - if len(paragraphs) > 0: - teaser = paragraphs[0].get_text() - if (len(paragraphs) > 1) and (len(teaser) < 100): - teaser2 = paragraphs[1].get_text() + lists = soup.find_all("li") + if (len(paragraphs) > 0) and (len(lists) > 0): + if (paragraphs[0].sourcepos > lists[0].sourcepos): + titleat = lists + else: + titleat = paragraphs + else: + if (len(paragraphs) > 0): + titleat = paragraphs + else: + titleat = lists + if len(titleat) > 0: + teaser = titelat[0].prettify() + if (len(titleat) > 1) and (len(teaser) < 100): + teaser2 = titleat[1].prettify() if len(teaser2) > len(teaser): teaser = teaser2 else: |