# This file is part of GNU TALER. # Copyright (C) 2014-2016 INRIA # # TALER is free software; you can redistribute it and/or modify it under the # terms of the GNU Lesser General Public License as published by the Free Software # Foundation; either version 2.1, or (at your option) any later version. # # TALER is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License along with # GNU TALER; see the file COPYING. If not, see # # @author Florian Dold """ Define content and associated metadata that is served on the blog. """ from collections import OrderedDict from bs4 import BeautifulSoup from pkg_resources import resource_stream, resource_filename from collections import namedtuple import logging import os import re logger = logging.getLogger(__name__) Article = namedtuple("Article", "slug title teaser main_file extra_files") articles = OrderedDict() def add_article(slug, title, teaser, main_file, extra_files=[]): articles[slug] = Article(slug, title, teaser, main_file, extra_files) def get_image_file(image): f = resource_filename("talerblog", os.path.join("blog/data/", image)) return os.path.abspath(f) def get_article_file(article): f = resource_filename("talerblog", article.main_file) return os.path.basename(f) def add_from_html(resource_name, teaser_paragraph=0, title=None): """ Extract information from article html. """ res = resource_stream("talerblog", resource_name) soup = BeautifulSoup(res, 'html.parser') if title is None: title_el = soup.find("h1", attrs={"class":["chapter", "unnumbered"]}) if title_el is None: logger.warn("Can't extract title from '%s'", resource_name) title = resource_name else: title = title_el.get_text().strip() slug = title.replace(" ", "_") paragraphs = soup.find_all("p") teaser = soup.find("p", attrs={"id":["teaser"]}) if teaser is None: teaser = str(paragraphs[teaser_paragraph]) p = re.compile("^/essay/[^/]+/data/[^/]+$") imgs = soup.find_all("img") extra_files = [] for img in imgs: # We require that any image whose access is regulated is src'd # as "/data/img.png". We also need to check if the # component actually matches the article's slug if p.match(img['src']): if img['src'].split(os.sep)[2] == slug: logger.info("extra file for %s is %s" % (slug, os.path.basename(img['src']))) extra_files.append(os.path.basename(img['src'])) else: logger.warning("Image src and slug don't match: '%s' != '%s'" % (img['src'].split(os.sep)[2], slug)) add_article(slug, title, teaser, resource_name, extra_files) add_from_html("blog/articles/scrap1_U.0.html", 0) add_from_html("blog/articles/scrap1_U.1.html", 0) add_from_html("blog/articles/scrap1_1.html", 1) add_from_html("blog/articles/scrap1_2.html") add_from_html("blog/articles/scrap1_3.html") add_from_html("blog/articles/scrap1_4.html") add_from_html("blog/articles/scrap1_5.html") add_from_html("blog/articles/scrap1_6.html") add_from_html("blog/articles/scrap1_7.html") add_from_html("blog/articles/scrap1_8.html") add_from_html("blog/articles/scrap1_9.html") add_from_html("blog/articles/scrap1_10.html") add_from_html("blog/articles/scrap1_11.html") add_from_html("blog/articles/scrap1_12.html") add_from_html("blog/articles/scrap1_13.html", 1) add_from_html("blog/articles/scrap1_14.html") add_from_html("blog/articles/scrap1_15.html") add_from_html("blog/articles/scrap1_16.html") add_from_html("blog/articles/scrap1_17.html") add_from_html("blog/articles/scrap1_18.html") add_from_html("blog/articles/scrap1_19.html") add_from_html("blog/articles/scrap1_20.html", 1) add_from_html("blog/articles/scrap1_21.html") add_from_html("blog/articles/scrap1_22.html") add_from_html("blog/articles/scrap1_23.html") add_from_html("blog/articles/scrap1_24.html") add_from_html("blog/articles/scrap1_25.html", 1) add_from_html("blog/articles/scrap1_26.html", 1) add_from_html("blog/articles/scrap1_27.html") add_from_html("blog/articles/scrap1_28.html", 1) add_from_html("blog/articles/scrap1_29.html") add_from_html("blog/articles/scrap1_30.html", 1) add_from_html("blog/articles/scrap1_31.html", 1) add_from_html("blog/articles/scrap1_32.html") add_from_html("blog/articles/scrap1_33.html") add_from_html("blog/articles/scrap1_34.html") add_from_html("blog/articles/scrap1_35.html") add_from_html("blog/articles/scrap1_36.html") add_from_html("blog/articles/scrap1_37.html") add_from_html("blog/articles/scrap1_38.html") add_from_html("blog/articles/scrap1_39.html") add_from_html("blog/articles/scrap1_40.html") add_from_html("blog/articles/scrap1_41.html") add_from_html("blog/articles/scrap1_42.html") add_from_html("blog/articles/scrap1_43.html", 2) add_from_html("blog/articles/scrap1_46.html", 1) add_from_html("blog/articles/scrap1_47.html")