##
# This file is part of GNU TALER.
# Copyright (C) 2014-2016 INRIA
#
# TALER is free software; you can redistribute it and/or modify it under the
# terms of the GNU Lesser General Public License as published by the Free Software
# Foundation; either version 2.1, or (at your option) any later version.
#
# TALER is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License along with
# GNU TALER; see the file COPYING. If not, see
#
# @author Florian Dold
# @brief Define content and associated metadata that is served on the blog.
from collections import OrderedDict, namedtuple
import logging
import os
import re
from bs4 import BeautifulSoup
from pkg_resources import resource_stream, resource_filename
LOGGER = logging.getLogger(__name__)
NOISY_LOGGER = logging.getLogger("chardet.charsetprober")
NOISY_LOGGER.setLevel(logging.INFO)
Article = namedtuple("Article", "slug title teaser main_file extra_files")
##
# @var if a article is added to this list, then it will
# be made available in the blog.
ARTICLES = OrderedDict()
##
# Add article to the list of the available articles.
#
# @param slug article's title with all the spaces converted to underscores.
# @param title article's title.
# @param teaser a short description of the main article's content.
# @param main_file path to the article's HTML file.
# @param extra_file collection of extra files associated with the
# article, like images and sounds.
def add_article(slug, title, teaser, main_file, extra_files):
ARTICLES[slug] = Article(slug, title, teaser, main_file, extra_files)
##
# Build the file path of a image.
#
# @param image the image filename.
# @return the path to the image file.
def get_image_file(image):
filex = resource_filename("talermerchantdemos", os.path.join("blog/data/", image))
return os.path.abspath(filex)
##
# Build the file path of a article.
#
# @param article the article filename.
# @return the path to the article HTML file.
def get_article_file(article):
filex = resource_filename("talermerchantdemos", article.main_file)
return os.path.basename(filex)
##
# Extract information from HTML file, and use these informations
# to make the article available in the blog.
#
# @param resource_name path to the (HTML) article.
# @param teaser_paragraph position of the teaser paragraph in the
# article's list of all the P tags. Defaults to zero, as normally
# this information is found under the very first P tag.
# @param title article's title; normally, this bit is extracted from the
# HTML itself, so give it here if a explicit title needs to be
# specified.
def add_from_html(resource_name, teaser_paragraph=0, title=None):
res = resource_stream("talermerchantdemos", resource_name)
soup = BeautifulSoup(res, 'html.parser')
res.close()
if title is None:
title_el = soup.find("h1", attrs={"class": ["chapter", "unnumbered"]})
if title_el is None:
LOGGER.warning("Can't extract title from '%s'", resource_name)
title = resource_name
else:
title = title_el.get_text().strip()
slug = title.replace(" ", "_")
paragraphs = soup.find_all("p")
teaser = soup.find("p", attrs={"id": ["teaser"]})
if teaser is None:
teaser = paragraphs[teaser_paragraph].get_text()
else:
teaser = teaser.get_text()
re_proc = re.compile("^/essay/[^/]+/data/[^/]+$")
imgs = soup.find_all("img")
extra_files = []
for img in imgs:
# We require that any image whose access is regulated is src'd
# as "/data/img.png". We also need to check if the
# component actually matches the article's slug
if re_proc.match(img['src']):
if img['src'].split(os.sep)[2] == slug:
LOGGER.info(
"extra file for %s is %s" %
(slug, os.path.basename(img['src']))
)
extra_files.append(os.path.basename(img['src']))
else:
LOGGER.warning("Image src and slug don't match: '%s' != '%s'" \
% (img['src'].split(os.sep)[2], slug))
add_article(slug, title, teaser, resource_name, extra_files)
add_from_html("blog/articles/scrap1_U.0.html", 0)
add_from_html("blog/articles/scrap1_U.1.html", 0)
add_from_html("blog/articles/scrap1_1.html", 1)
add_from_html("blog/articles/scrap1_2.html")
add_from_html("blog/articles/scrap1_3.html")
add_from_html("blog/articles/scrap1_4.html")
add_from_html("blog/articles/scrap1_5.html")
add_from_html("blog/articles/scrap1_6.html")
add_from_html("blog/articles/scrap1_7.html")
add_from_html("blog/articles/scrap1_8.html")
add_from_html("blog/articles/scrap1_9.html")
add_from_html("blog/articles/scrap1_10.html")
add_from_html("blog/articles/scrap1_11.html")
add_from_html("blog/articles/scrap1_12.html")
add_from_html("blog/articles/scrap1_13.html", 1)
add_from_html("blog/articles/scrap1_14.html")
add_from_html("blog/articles/scrap1_15.html")
add_from_html("blog/articles/scrap1_16.html")
add_from_html("blog/articles/scrap1_17.html")
add_from_html("blog/articles/scrap1_18.html")
add_from_html("blog/articles/scrap1_19.html")
add_from_html("blog/articles/scrap1_20.html", 1)
add_from_html("blog/articles/scrap1_21.html")
add_from_html("blog/articles/scrap1_22.html")
add_from_html("blog/articles/scrap1_23.html")
add_from_html("blog/articles/scrap1_24.html")
add_from_html("blog/articles/scrap1_25.html", 1)
add_from_html("blog/articles/scrap1_26.html", 1)
add_from_html("blog/articles/scrap1_27.html")
add_from_html("blog/articles/scrap1_28.html", 1)
add_from_html("blog/articles/scrap1_29.html")
add_from_html("blog/articles/scrap1_30.html", 1)
add_from_html("blog/articles/scrap1_31.html", 1)
add_from_html("blog/articles/scrap1_32.html")
add_from_html("blog/articles/scrap1_33.html")
add_from_html("blog/articles/scrap1_34.html")
add_from_html("blog/articles/scrap1_35.html")
add_from_html("blog/articles/scrap1_36.html")
add_from_html("blog/articles/scrap1_37.html")
add_from_html("blog/articles/scrap1_38.html")
add_from_html("blog/articles/scrap1_39.html")
add_from_html("blog/articles/scrap1_40.html")
add_from_html("blog/articles/scrap1_41.html")
add_from_html("blog/articles/scrap1_42.html")
add_from_html("blog/articles/scrap1_43.html", 2)
add_from_html("blog/articles/scrap1_46.html", 1)
add_from_html("blog/articles/scrap1_47.html")