strtotime('-1 day'))) {
fetch_with_user_agent_spoof($cacheFile, $url);
}
return file_get_contents($cacheFile);
}
// save remote URL locally, forwarding browser's User Agent String
function fetch_with_user_agent_spoof($targetFile, $sourceUrl) {
file_put_contents(
$targetFile,
file_get_contents($sourceUrl,
FALSE,
stream_context_create(['http' => [
'user_agent' => $_SERVER['HTTP_USER_AGENT']
]
]))
);
}
// parse requested path (after rewrite)
$url = parse_url(preg_replace('#^' . preg_quote(BASE_PATH) . '#', '', $_SERVER['REQUEST_URI']));
$path = array_values(array_filter(explode('/', $url['path'])));
if ($path) {
// /[ANYTHING]/refresh disables cache (forces cache refresh)
$cache = !(count($path) > 1 && $path[1] == 'refresh');
// build Cezar URL for requested path
$path[0] = urldecode($path[0]);
$searchUrl = new http\Url(CEZAR_URL,
['query' => http_build_query(
['pid_search' => $path[0],
'p' => '21']
)]);
$mscUrl = $searchUrl->toString();
$content = get_cache_content($path[0], $mscUrl, !$cache); // requested content
$contentLines = explode(PHP_EOL, $content);
// if the comment delimiters are present, we're possibly dealing with the content we want, slice it and wrap it
$delimiters = array_keys(preg_grep('/---- page content /', $contentLines));
if ($delimiters) {
$content = '
'
. ''
. ''
. ''
. ''
. implode(PHP_EOL, array_slice($contentLines, $delimiters[0]+1, $delimiters[1]-$delimiters[0]-1))
. '';
}
else {
die('Malformed (even more than usually) content :(');
}
require_once('/usr/share/php/QueryPath/QueryPath.php');
$html = htmlqp($content, NULL, ['convert_to_encoding' => 'utf-8']);
// for search list pages, replace links with internal links to player IDs
$links = $html->find('a[href^="?p=21&pid="]');
if ($links->size()) {
foreach ($links as $link) {
$href = [];
parse_str($link->attr('href'), $href);
$link->attr('href', $href['pid']);
}
}
$html->top();
// remove general crap
$html->find('script, table.msc_noprint, center>p')->remove();
$html->top();
// leave only first-ish table of the content
$html->find('table > tr > td')->eq(1)->remove();
$html->top();
$html->find('table > tr > td > table')->eq(2)->remove();
$html->top();
$html->find('table > tr > td > table')->eq(2)->remove();
$html->top();
// remove internal Cezar links
$innerLinks = $html->find('table > tr > td > table a');
foreach ($innerLinks as $innerLink) {
$innerLink->removeAttr('href');
}
// get rid of Cezar link icons (right green arrows)
$html->find('img[src*="ico_link_8.gif"]')->remove();
$html->top();
// proxy all external images, by resolving them relatively to the original server
// and cache them locally
// internal images are left untouched in the markup and are proxied through pic/fetch.php handler
// (if they're not present/overwritten locally)
$images = $html->find('img')->not('[src^="pic/"]');
foreach ($images as $image) {
$src = $image->attr('src');
$url = new http\Url(CEZAR_URL, $src, http\Url::FROM_ENV | http\Url::SANITIZE_PATH | http\Url::JOIN_PATH | http\Url::REPLACE);
$imageUrl = $url->toString();
$cachedImageUrl = 'foto/' . md5($imageUrl) . '.' . array_pop(explode('.', $imageUrl));
if (!file_exists($cachedImageUrl) || !$cache) {
fetch_with_user_agent_spoof($cachedImageUrl, $imageUrl);
}
$image->attr('src', $cachedImageUrl);
}
$html->top();
// all done
print $html->html();
}
else {
die('Nothing to see here, move along.');
}
?>