From 17167b1c0e088000164b87ef0a02237a3ffc107c Mon Sep 17 00:00:00 2001 From: emkael Date: Mon, 9 Feb 2015 20:14:59 +0100 Subject: * mod_python port --- http/index.php | 139 --------------------------------------------------------- 1 file changed, 139 deletions(-) delete mode 100644 http/index.php (limited to 'http/index.php') diff --git a/http/index.php b/http/index.php deleted file mode 100644 index a7034b6..0000000 --- a/http/index.php +++ /dev/null @@ -1,139 +0,0 @@ - strtotime('-1 day'))) { - fetch_with_user_agent_spoof($cacheFile, $url); - } - return file_get_contents($cacheFile); -} - -// save remote URL locally, forwarding browser's User Agent String -function fetch_with_user_agent_spoof($targetFile, $sourceUrl) { - file_put_contents( - $targetFile, - file_get_contents($sourceUrl, - FALSE, - stream_context_create(['http' => [ - 'user_agent' => $_SERVER['HTTP_USER_AGENT'] - ] - ])) - ); -} - -// parse requested path (after rewrite) -$url = parse_url(preg_replace('#^' . preg_quote(BASE_PATH) . '#', '', $_SERVER['REQUEST_URI'])); -$path = array_values(array_filter(explode('/', $url['path']))); - -if ($path) { - // /[ANYTHING]/refresh disables cache (forces cache refresh) - $cache = !(count($path) > 1 && $path[1] == 'refresh'); - - // build Cezar URL for requested path - $path[0] = urldecode($path[0]); - $searchUrl = new http\Url(CEZAR_URL, - ['query' => http_build_query( - ['pid_search' => $path[0], - 'p' => '21'] - )]); - - $mscUrl = $searchUrl->toString(); - - $content = get_cache_content($path[0], $mscUrl, !$cache); // requested content - $contentLines = explode(PHP_EOL, $content); - - // if the comment delimiters are present, we're possibly dealing with the content we want, slice it and wrap it - $delimiters = array_keys(preg_grep('/---- page content /', $contentLines)); - if ($delimiters) { - $content = '' - . '' - . '' - . '' - . '' - . implode(PHP_EOL, array_slice($contentLines, $delimiters[0]+1, $delimiters[1]-$delimiters[0]-1)) - . ''; - } - else { - die('Malformed (even more than usually) content :('); - } - - require_once(QUERYPATH_PATH); - - $html = htmlqp($content, NULL, ['convert_to_encoding' => 'utf-8']); - - // for search list pages, replace links with internal links to player IDs - $links = $html->find('a[href^="?p=21&pid="]'); - if ($links->size()) { - foreach ($links as $link) { - $href = []; - parse_str($link->attr('href'), $href); - $link->attr('href', $href['pid']); - } - } - - $html->top(); - - // remove general crap - $html->find('script, table.msc_noprint, center>p')->remove(); - $html->top(); - - // leave only first-ish table of the content - $html->find('table > tr > td')->eq(1)->remove(); - $html->top(); - $html->find('table > tr > td > table')->eq(2)->remove(); - $html->top(); - $html->find('table > tr > td > table')->eq(2)->remove(); - $html->top(); - - // remove internal Cezar links - $innerLinks = $html->find('table > tr > td > table a'); - foreach ($innerLinks as $innerLink) { - $innerLink->removeAttr('href'); - } - // get rid of Cezar link icons (right green arrows) - $html->find('img[src*="ico_link_8.gif"]')->remove(); - $html->top(); - - // proxy all external images, by resolving them relatively to the original server - // and cache them locally - // internal images are left untouched in the markup and are proxied through pic/fetch.php handler - // (if they're not present/overwritten locally) - $images = $html->find('img')->not('[src^="pic/"]'); - foreach ($images as $image) { - $src = $image->attr('src'); - $url = new http\Url(CEZAR_URL, $src, http\Url::FROM_ENV | http\Url::SANITIZE_PATH | http\Url::JOIN_PATH | http\Url::REPLACE); - $imageUrl = $url->toString(); - $cachedImageUrl = 'foto/' . md5($imageUrl) . '.' . array_pop(explode('.', $imageUrl)); - if (!file_exists($cachedImageUrl) || !$cache) { - fetch_with_user_agent_spoof($cachedImageUrl, $imageUrl); - } - $image->attr('src', $cachedImageUrl); - } - $html->top(); - - // link to the original URL on the image from foto/ directory and on the name+surname - // (actually, on every text with font-size:28px set) - $linking = $html->find('img[src^="foto/"],span[style*=":28px"]'); - foreach ($linking as $link) { - $link->wrap(''); - $link->parent()->attr('href', $mscUrl); - } - $html->top(); - - // all done - print $html->html(); -} -else { - die('Nothing to see here, move along.'); -} - -?> -- cgit v1.2.3