summaryrefslogtreecommitdiff
path: root/talermerchantdemos/blog/content.py
diff options
context:
space:
mode:
Diffstat (limited to 'talermerchantdemos/blog/content.py')
-rw-r--r--talermerchantdemos/blog/content.py13
1 files changed, 7 insertions, 6 deletions
diff --git a/talermerchantdemos/blog/content.py b/talermerchantdemos/blog/content.py
index a0e90dd..fa9ace2 100644
--- a/talermerchantdemos/blog/content.py
+++ b/talermerchantdemos/blog/content.py
@@ -26,7 +26,7 @@ from pkg_resources import resource_stream, resource_filename
LOGGER = logging.getLogger(__name__)
NOISY_LOGGER = logging.getLogger("chardet.charsetprober")
NOISY_LOGGER.setLevel(logging.INFO)
-Article = namedtuple("Article", "slug title teaser main_file extra_files")
+Article = namedtuple("Article", "slug title teaser main_file extra_files lang")
##
# @var if a article is added to this list, then it will
@@ -43,8 +43,9 @@ ARTICLES = OrderedDict()
# @param main_file path to the article's HTML file.
# @param extra_file collection of extra files associated with the
# article, like images and sounds.
-def add_article(slug, title, teaser, main_file, extra_files):
- ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files)
+# @param lang language of the arcile
+def add_article(slug, title, teaser, main_file, extra_files, lang='en'):
+ ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files, lang)
##
@@ -85,7 +86,7 @@ def add_from_html(resource_name, teaser_paragraph=0, title=None):
if title is None:
title_el = soup.find("h1", attrs={"class": ["chapter", "unnumbered"]})
if title_el is None:
- LOGGER.warning("Can't extract title from '%s'", resource_name)
+ LOGGER.warning("Cannot extract title from '%s'", resource_name)
title = resource_name
else:
title = title_el.get_text().strip()
@@ -97,7 +98,7 @@ def add_from_html(resource_name, teaser_paragraph=0, title=None):
teaser = paragraphs[teaser_paragraph].get_text()
else:
teaser = teaser.get_text()
- re_proc = re.compile("^/essay/[^/]+/data/[^/]+$")
+ re_proc = re.compile("^/[^/][^/]/essay/[^/]+/data/[^/]+$")
imgs = soup.find_all("img")
extra_files = []
for img in imgs:
@@ -114,7 +115,7 @@ def add_from_html(resource_name, teaser_paragraph=0, title=None):
else:
LOGGER.warning("Image src and slug don't match: '%s' != '%s'" \
% (img['src'].split(os.sep)[2], slug))
- add_article(slug, title, teaser, resource_name, extra_files)
+ add_article(slug, title, teaser, resource_name, extra_files, 'en')
add_from_html("blog/articles/scrap1_U.0.html", 0)