summaryrefslogtreecommitdiff
path: root/http/index.php
diff options
context:
space:
mode:
authoremkael <emkael@tlen.pl>2015-02-09 20:14:59 +0100
committeremkael <emkael@tlen.pl>2015-02-09 20:14:59 +0100
commit17167b1c0e088000164b87ef0a02237a3ffc107c (patch)
tree79c61deeea11e57b3c8e99dc39ca76cee491a745 /http/index.php
parentdd3d2509f6048e11f9b2b127f6c7acb80a506d8d (diff)
* mod_python port
Diffstat (limited to 'http/index.php')
-rw-r--r--http/index.php139
1 files changed, 0 insertions, 139 deletions
diff --git a/http/index.php b/http/index.php
deleted file mode 100644
index a7034b6..0000000
--- a/http/index.php
+++ /dev/null
@@ -1,139 +0,0 @@
-<?php
-
-define('BASE_PATH', '/');
-define('CACHE_PATH', '../cache');
-define('CEZAR_URL', 'http://msc.com.pl/cezar/');
-define('QUERYPATH_PATH', '/usr/share/php/QueryPath/QueryPath.php');
-
-// fetch item from content cache
-function get_cache_content($cacheKey, $url, $force = FALSE) {
- $cacheFile = realpath(dirname(__FILE__) . '/' . CACHE_PATH) . '/' . $cacheKey;
- if ($force || !(file_exists($cacheFile) && filemtime($cacheFile) > strtotime('-1 day'))) {
- fetch_with_user_agent_spoof($cacheFile, $url);
- }
- return file_get_contents($cacheFile);
-}
-
-// save remote URL locally, forwarding browser's User Agent String
-function fetch_with_user_agent_spoof($targetFile, $sourceUrl) {
- file_put_contents(
- $targetFile,
- file_get_contents($sourceUrl,
- FALSE,
- stream_context_create(['http' => [
- 'user_agent' => $_SERVER['HTTP_USER_AGENT']
- ]
- ]))
- );
-}
-
-// parse requested path (after rewrite)
-$url = parse_url(preg_replace('#^' . preg_quote(BASE_PATH) . '#', '', $_SERVER['REQUEST_URI']));
-$path = array_values(array_filter(explode('/', $url['path'])));
-
-if ($path) {
- // /[ANYTHING]/refresh disables cache (forces cache refresh)
- $cache = !(count($path) > 1 && $path[1] == 'refresh');
-
- // build Cezar URL for requested path
- $path[0] = urldecode($path[0]);
- $searchUrl = new http\Url(CEZAR_URL,
- ['query' => http_build_query(
- ['pid_search' => $path[0],
- 'p' => '21']
- )]);
-
- $mscUrl = $searchUrl->toString();
-
- $content = get_cache_content($path[0], $mscUrl, !$cache); // requested content
- $contentLines = explode(PHP_EOL, $content);
-
- // if the comment delimiters are present, we're possibly dealing with the content we want, slice it and wrap it
- $delimiters = array_keys(preg_grep('/---- page content /', $contentLines));
- if ($delimiters) {
- $content = '<html><head>'
- . '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'
- . '<base href="' . BASE_PATH . '" />'
- . '<style>'
- . 'body{width:580px;font-family:Tahoma,Geneva,Arial,Helvetica,"sans-serif";}'
- . 'a{text-decoration:none;color:black;}'
- . '</style>'
- . '</head><body>'
- . implode(PHP_EOL, array_slice($contentLines, $delimiters[0]+1, $delimiters[1]-$delimiters[0]-1))
- . '</body></html>';
- }
- else {
- die('Malformed (even more than usually) content :(');
- }
-
- require_once(QUERYPATH_PATH);
-
- $html = htmlqp($content, NULL, ['convert_to_encoding' => 'utf-8']);
-
- // for search list pages, replace links with internal links to player IDs
- $links = $html->find('a[href^="?p=21&pid="]');
- if ($links->size()) {
- foreach ($links as $link) {
- $href = [];
- parse_str($link->attr('href'), $href);
- $link->attr('href', $href['pid']);
- }
- }
-
- $html->top();
-
- // remove general crap
- $html->find('script, table.msc_noprint, center>p')->remove();
- $html->top();
-
- // leave only first-ish table of the content
- $html->find('table > tr > td')->eq(1)->remove();
- $html->top();
- $html->find('table > tr > td > table')->eq(2)->remove();
- $html->top();
- $html->find('table > tr > td > table')->eq(2)->remove();
- $html->top();
-
- // remove internal Cezar links
- $innerLinks = $html->find('table > tr > td > table a');
- foreach ($innerLinks as $innerLink) {
- $innerLink->removeAttr('href');
- }
- // get rid of Cezar link icons (right green arrows)
- $html->find('img[src*="ico_link_8.gif"]')->remove();
- $html->top();
-
- // proxy all external images, by resolving them relatively to the original server
- // and cache them locally
- // internal images are left untouched in the markup and are proxied through pic/fetch.php handler
- // (if they're not present/overwritten locally)
- $images = $html->find('img')->not('[src^="pic/"]');
- foreach ($images as $image) {
- $src = $image->attr('src');
- $url = new http\Url(CEZAR_URL, $src, http\Url::FROM_ENV | http\Url::SANITIZE_PATH | http\Url::JOIN_PATH | http\Url::REPLACE);
- $imageUrl = $url->toString();
- $cachedImageUrl = 'foto/' . md5($imageUrl) . '.' . array_pop(explode('.', $imageUrl));
- if (!file_exists($cachedImageUrl) || !$cache) {
- fetch_with_user_agent_spoof($cachedImageUrl, $imageUrl);
- }
- $image->attr('src', $cachedImageUrl);
- }
- $html->top();
-
- // link to the original URL on the image from foto/ directory and on the name+surname
- // (actually, on every text with font-size:28px set)
- $linking = $html->find('img[src^="foto/"],span[style*=":28px"]');
- foreach ($linking as $link) {
- $link->wrap('<a></a>');
- $link->parent()->attr('href', $mscUrl);
- }
- $html->top();
-
- // all done
- print $html->html();
-}
-else {
- die('Nothing to see here, move along.');
-}
-
-?>