commit 8aa4fb4cee2252ceb23f85255b5da1a235bb3827
parent cd8cf7177a78910fe6c611de275fc186fd8fde73
Author: Marcello Stanisci <marcello.stanisci@inria.fr>
Date: Fri, 22 Jul 2016 18:59:05 +0200
adding initial logic to parse articles and
annotate which images should be served with them
Diffstat:
2 files changed, 42 insertions(+), 0 deletions(-)
diff --git a/examples/blog/articles/.gitignore b/examples/blog/articles/.gitignore
@@ -0,0 +1 @@
+articles_images.json
diff --git a/examples/blog/articles/article_images.php b/examples/blog/articles/article_images.php
@@ -0,0 +1,41 @@
+<?php
+ /**
+ * Parse $html_filename and add an entry of the type
+ * "$html_filename" => ("img1.png", "img2.png") for each
+ * encountered 'img' tag having the 'src' attribute formatted
+ * as "/essay/<article_slug>/data/img1.png", to the JSON which
+ * associates any article with its images
+ */
+ function add_article($html_filename){
+ $doc = new DOMDocument();
+ $doc->loadHTMLFile($html_filename);
+ $xpath = new DOMXPath($doc);
+ $xpath->registerNamespace('php', 'http://php.net/xpath');
+ $xpath->registerPhpFunctions('preg_match');
+ $elements = $xpath->query('//img[php:functionString("preg_match", "@^/essay/[^/]+/data/[^/]+@", @src) > 0]');
+ /*
+ 1 open final JSON in "append mode"
+ 2 append current file's entries
+ */
+ $db_filename = "articles_images.json";
+ $json_str;
+ if (file_exists($db_filename))
+ $json_str = file_get_contents($db_filename);
+ else
+ $json_str = "";
+ $json_db = json_decode($json_str);
+ $json_db->$html_filename = array();
+ foreach($elements as $img){
+ $value = $img->getAttributeNode("src")->value;
+ array_push($json_db->$html_filename, basename($value));
+ }
+ file_put_contents($db_filename, json_encode($json_db));
+ }
+
+ /* suppress warnings due to parsing HTML5 */
+ libxml_use_internal_errors(true);
+
+ /* main: manually call add_article() for each article */
+ add_article("scrap1_10.html");
+ add_article("scrap1_11.html");
+?>