diff options
author | Ciro Mattia Gonano <ciromattia@gmail.com> | 2013-09-11 15:56:48 +0200 |
---|---|---|
committer | Ciro Mattia Gonano <ciromattia@gmail.com> | 2013-09-11 15:57:07 +0200 |
commit | 3069eaf35e833ffe4a1c1c7829dd7e168ae27420 (patch) | |
tree | d0c2e4d934cc34ba7d4232f759923b5a257dcb21 /buildscripts/wikibuilder/dumpHTML.inc | |
parent | b833247ce597ec26159b46c8dfbea7f1e265950b (diff) |
Merge up to r3319
Diffstat (limited to 'buildscripts/wikibuilder/dumpHTML.inc')
-rw-r--r-- | buildscripts/wikibuilder/dumpHTML.inc | 390 |
1 files changed, 0 insertions, 390 deletions
diff --git a/buildscripts/wikibuilder/dumpHTML.inc b/buildscripts/wikibuilder/dumpHTML.inc deleted file mode 100644 index 5b8ca15a..00000000 --- a/buildscripts/wikibuilder/dumpHTML.inc +++ /dev/null @@ -1,390 +0,0 @@ -<?php -/** - * @package MediaWiki - * @subpackage Maintenance - */ - -define( 'REPORTING_INTERVAL', 10 ); - -require_once( 'includes/ImagePage.php' ); -require_once( 'includes/CategoryPage.php' ); - -class DumpHTML { - # Destination directory - var $dest; - - # Show interlanguage links? - var $interwiki = true; - - # Depth of HTML directory tree - var $depth = 3; - - # Directory that commons images are copied into - var $sharedStaticPath; - - # Relative path to image directory - var $imageRel = 'upload'; - - # Copy commons images instead of symlinking - var $forceCopy = false; - - # Make links assuming the script path is in the same directory as - # the destination - var $alternateScriptPath = false; - - function DumpHTML( $settings ) { - foreach ( $settings as $var => $value ) { - $this->$var = $value; - } - } - - /** - * Write a set of articles specified by start and end page_id - * Skip categories and images, they will be done separately - */ - function doArticles( $start, $end = false ) { - $fname = 'DumpHTML::doArticles'; - - $this->setupGlobals(); - - if ( $end === false ) { - $dbr =& wfGetDB( DB_SLAVE ); - $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname ); - } - - - for ($id = $start; $id <= $end; $id++) { - if ( !($id % REPORTING_INTERVAL) ) { - print "Processing ID: $id\r"; - } - $title = DumpTitle::newFromID( $id ); - if ( $title ) { - $ns = $title->getNamespace() ; - if ( $ns != NS_CATEGORY ) { - $this->doArticle( $title ); - } - } - } - print "\n"; - } - - function doSpecials() { - $this->doMainPage(); - - $this->setupGlobals(); - print "Special:Categories..."; - $this->doArticle( DumpTitle::makeTitle( NS_SPECIAL, 'Categories' ) ); - print "\n"; - } - - /** Write the main page as index.html */ - function doMainPage() { - global $wgMakeDumpLinks; - - print "Making index.html "; - - // Set up globals with no ../../.. in the link URLs - $this->setupGlobals( 0 ); - - // But still use that directory style - $wgMakeDumpLinks = 3; - - $title = DumpTitle::newMainPage(); - - $text = $this->getArticleHTML( $title ); - $file = fopen( "{$this->dest}/index.html", "w" ); - if ( !$file ) { - print "\nCan't open index.html for writing\n"; - return false; - } - fwrite( $file, $text ); - fclose( $file ); - print "\n"; - } - - function doImageDescriptions() { - global $wgSharedUploadDirectory; - - $fname = 'DumpHTML::doImageDescriptions'; - - $this->setupGlobals( 3 ); - - /** - * Dump image description pages that don't have an associated article, but do - * have a local image - */ - $dbr =& wfGetDB( DB_SLAVE ); - extract( $dbr->tableNames( 'image', 'page' ) ); - $res = $dbr->select( 'image', array( 'img_name' ), false, $fname ); - - $i = 0; - print "Writing image description pages for local images\n"; - $num = $dbr->numRows( $res ); - while ( $row = $dbr->fetchObject( $res ) ) { - if ( !( ++$i % REPORTING_INTERVAL ) ) { - print "Done $i of $num\r"; - } - $title = DumpTitle::makeTitle( NS_IMAGE, $row->img_name ); - if ( $title->getArticleID() ) { - // Already done by dumpHTML - continue; - } - $this->doArticle( $title ); - } - print "\n"; - - /** - * Dump images which only have a real description page on commons - */ - print "Writing description pages for commons images\n"; - $i = 0; - for ( $hash = 0; $hash < 256; $hash++ ) { - $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash ); - $paths = glob( "{$this->sharedStaticPath}/$dir/*" ); - $paths += glob( "{$this->sharedStaticPath}/thumb/$dir/*" ); - - foreach ( $paths as $path ) { - $file = basename( $path ); - if ( !(++$i % REPORTING_INTERVAL ) ) { - print "$i\r"; - } - - $title = DumpTitle::makeTitle( NS_IMAGE, $file ); - $this->doArticle( $title ); - } - } - print "\n"; - } - - function doCategories() { - $fname = 'DumpHTML::doCategories'; - $this->setupGlobals(); - - $dbr =& wfGetDB( DB_SLAVE ); - print "Selecting categories..."; - $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' ); - $res = $dbr->query( $sql, $fname ); - - print "\nWriting " . $dbr->numRows( $res ). " category pages\n"; - $i = 0; - while ( $row = $dbr->fetchObject( $res ) ) { - if ( !(++$i % REPORTING_INTERVAL ) ) { - print "$i\r"; - } - $title = DumpTitle::makeTitle( NS_CATEGORY, $row->cl_to ); - $this->doArticle( $title ); - } - print "\n"; - } - - - /** Write an article specified by title */ - function doArticle( $title ) { - global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory; - global $wgUploadDirectory; - - $text = $this->getArticleHTML( $title ); - if ( $text === false ) { - return; - } - - # Parse the XHTML to find the images - $images = $this->findImages( $text ); - $this->copyImages( $images ); - - # Write to file - $this->writeArticle( $title, $text ); - } - - /** Write the given text to the file identified by the given title object */ - function writeArticle( &$title, $text ) { - $filename = strtr($title->getHashedFilename(),':~','__'); - $fullName = "{$this->dest}/$filename"; - $fullDir = dirname( $fullName ); - - wfMkdirParents( $fullDir, 0755 ); - - $file = fopen( $fullName, 'w' ); - if ( !$file ) { - print("Can't open file $fullName for writing\n"); - return; - } - - fwrite( $file, $text ); - fclose( $file ); - } - - /** Set up globals required for parsing */ - function setupGlobals( $depth = NULL ) { - global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath; - global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; - global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; - global $wgSharedThumbnailScriptPath, $wgEnableParserCache; - - static $oldLogo = NULL; - - if ( is_null( $depth ) ) { - $wgMakeDumpLinks = $this->depth; - } else { - $wgMakeDumpLinks = $depth; - } - - if ( $this->alternateScriptPath ) { - if ( $wgMakeDumpLinks == 0 ) { - $wgScriptPath = '.'; - } else { - $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 ); - } - } else { - $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks ); - } - - $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1'; - - # Logo image - # Allow for repeated setup - if ( !is_null( $oldLogo ) ) { - $wgLogo = $oldLogo; - } else { - $oldLogo = $wgLogo; - } - - if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) { - # If it's in the upload directory, rewrite it to the new upload directory - $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 ); - } elseif ( $wgLogo{0} == '/' ) { - # This is basically heuristic - # Rewrite an absolute logo path to one relative to the the script path - $wgLogo = $wgScriptPath . $wgLogo; - } - - $wgScriptPath = substr($wgScriptPath,3); - - $wgStylePath = $wgScriptPath ? "$wgScriptPath/" : ''; - $wgUploadPath = "$wgScriptPath/{$this->imageRel}"; - $wgSharedUploadPath = "$wgUploadPath/shared"; - $wgMaxCredits = -1; - $wgHideInterlangageLinks = !$this->interwiki; - $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; - $wgEnableParserCache = false; - - $wgUser = new User; - $wgUser->setOption( 'skin', 'htmldump' ); - $wgUser->setOption( 'editsection', 0 ); - - $this->sharedStaticPath = "$wgUploadDirectory/shared"; - - } - - /** Reads the content of a title object, executes the skin and captures the result */ - function getArticleHTML( &$title ) { - global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic; - - $wgOut = new OutputPage; - $wgOut->setParserOptions( new ParserOptions ); - - $wgTitle = $title; - if ( is_null( $wgTitle ) ) { - return false; - } - - $ns = $wgTitle->getNamespace(); - if ( $ns == NS_SPECIAL ) { - SpecialPage::executePath( $wgTitle ); - } else { - if ( $ns == NS_IMAGE ) { - $wgArticle = new ImagePage( $wgTitle ); - } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) { - $wgArticle = new CategoryPage( $wgTitle ); - } else { - $wgArticle = new Article( $wgTitle ); - } - $wgArticle->view(); - } - - $sk =& $wgUser->getSkin(); - ob_start(); - $sk->outputPage( $wgOut ); - $text = ob_get_contents(); - ob_end_clean(); - - $text = str_replace(array('/:/','%7E'), array('/_/','_'), $text); - - return $text; - } - - /** Returns image paths used in an XHTML document */ - function findImages( $text ) { - global $wgOutputEncoding, $wgDumpImages; - $parser = xml_parser_create( $wgOutputEncoding ); - xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' ); - - $wgDumpImages = array(); - xml_parse( $parser, $text ); - xml_parser_free( $parser ); - - return $wgDumpImages; - } - - /** - * Copy images (or create symlinks) from commons to a static directory. - * This is necessary even if you intend to distribute all of commons, because - * the directory contents is used to work out which image description pages - * are needed. - */ - function copyImages( $images ) { - global $wiki_dir, $output_dir; - global $wgSharedUploadPath, $wgSharedUploadDirectory; - # Find shared uploads and copy them into the static directory - $sharedPathLength = strlen( $wgSharedUploadPath ); - foreach ( $images as $image => $dummy ) { - # Is it shared? - if ( strpos($image, 'upload') > 0) { - # Reconstruct full filename - $rel = substr( $image, strpos($image,'upload')+7 ); // +1 for slash - $sourceLoc = $wiki_dir."images/$rel"; - $staticLoc = "$output_dir/upload/$rel"; -// print "Copying $sourceLoc to $staticLoc\n"; - - # Copy to static directory - if ( !file_exists( $staticLoc ) ) { - wfMkdirParents( dirname( $staticLoc ), 0755 ); - //if ( function_exists( 'symlink' ) && !$this->forceCopy ) { - // symlink( $sourceLoc, $staticLoc ); - //} else { - copy( $sourceLoc, $staticLoc ); - //} - } - - if ( substr( $rel, 0, 6 ) == 'thumb/' ) { - # That was a thumbnail - # We will also copy the real image - $parts = explode( '/', $rel ); - $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}"; - $sourceLoc = $wiki_dir."images/$rel"; - $staticLoc = "$output_dir/upload/$rel"; -# print "Copying $sourceLoc to $staticLoc\n"; - if ( !file_exists( $staticLoc ) ) { - wfMkdirParents( dirname( $staticLoc ), 0755 ); - copy( $sourceLoc, $staticLoc ); - } - } - } - } - } -} - -/** XML parser callback */ -function wfDumpStartTagHandler( $parser, $name, $attribs ) { - global $wgDumpImages; - - if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) { - $wgDumpImages[$attribs['SRC']] = true; - } -} - -/** XML parser callback */ -function wfDumpEndTagHandler( $parser, $name ) {} - -# vim: syn=php -?> |