<?php /** * @package MediaWiki * @subpackage Maintenance */ define( 'REPORTING_INTERVAL', 10 ); require_once( 'includes/ImagePage.php' ); require_once( 'includes/CategoryPage.php' ); class DumpHTML { # Destination directory var $dest; # Show interlanguage links? var $interwiki = true; # Depth of HTML directory tree var $depth = 3; # Directory that commons images are copied into var $sharedStaticPath; # Relative path to image directory var $imageRel = 'upload'; # Copy commons images instead of symlinking var $forceCopy = false; # Make links assuming the script path is in the same directory as # the destination var $alternateScriptPath = false; function DumpHTML( $settings ) { foreach ( $settings as $var => $value ) { $this->$var = $value; } } /** * Write a set of articles specified by start and end page_id * Skip categories and images, they will be done separately */ function doArticles( $start, $end = false ) { $fname = 'DumpHTML::doArticles'; $this->setupGlobals(); if ( $end === false ) { $dbr =& wfGetDB( DB_SLAVE ); $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname ); } for ($id = $start; $id <= $end; $id++) { if ( !($id % REPORTING_INTERVAL) ) { print "Processing ID: $id\r"; } $title = DumpTitle::newFromID( $id ); if ( $title ) { $ns = $title->getNamespace() ; if ( $ns != NS_CATEGORY ) { $this->doArticle( $title ); } } } print "\n"; } function doSpecials() { $this->doMainPage(); $this->setupGlobals(); print "Special:Categories..."; $this->doArticle( DumpTitle::makeTitle( NS_SPECIAL, 'Categories' ) ); print "\n"; } /** Write the main page as index.html */ function doMainPage() { global $wgMakeDumpLinks; print "Making index.html "; // Set up globals with no ../../.. in the link URLs $this->setupGlobals( 0 ); // But still use that directory style $wgMakeDumpLinks = 3; $title = DumpTitle::newMainPage(); $text = $this->getArticleHTML( $title ); $file = fopen( "{$this->dest}/index.html", "w" ); if ( !$file ) { print "\nCan't open index.html for writing\n"; return false; } fwrite( $file, $text ); fclose( $file ); print "\n"; } function doImageDescriptions() { global $wgSharedUploadDirectory; $fname = 'DumpHTML::doImageDescriptions'; $this->setupGlobals( 3 ); /** * Dump image description pages that don't have an associated article, but do * have a local image */ $dbr =& wfGetDB( DB_SLAVE ); extract( $dbr->tableNames( 'image', 'page' ) ); $res = $dbr->select( 'image', array( 'img_name' ), false, $fname ); $i = 0; print "Writing image description pages for local images\n"; $num = $dbr->numRows( $res ); while ( $row = $dbr->fetchObject( $res ) ) { if ( !( ++$i % REPORTING_INTERVAL ) ) { print "Done $i of $num\r"; } $title = DumpTitle::makeTitle( NS_IMAGE, $row->img_name ); if ( $title->getArticleID() ) { // Already done by dumpHTML continue; } $this->doArticle( $title ); } print "\n"; /** * Dump images which only have a real description page on commons */ print "Writing description pages for commons images\n"; $i = 0; for ( $hash = 0; $hash < 256; $hash++ ) { $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash ); $paths = glob( "{$this->sharedStaticPath}/$dir/*" ); $paths += glob( "{$this->sharedStaticPath}/thumb/$dir/*" ); foreach ( $paths as $path ) { $file = basename( $path ); if ( !(++$i % REPORTING_INTERVAL ) ) { print "$i\r"; } $title = DumpTitle::makeTitle( NS_IMAGE, $file ); $this->doArticle( $title ); } } print "\n"; } function doCategories() { $fname = 'DumpHTML::doCategories'; $this->setupGlobals(); $dbr =& wfGetDB( DB_SLAVE ); print "Selecting categories..."; $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' ); $res = $dbr->query( $sql, $fname ); print "\nWriting " . $dbr->numRows( $res ). " category pages\n"; $i = 0; while ( $row = $dbr->fetchObject( $res ) ) { if ( !(++$i % REPORTING_INTERVAL ) ) { print "$i\r"; } $title = DumpTitle::makeTitle( NS_CATEGORY, $row->cl_to ); $this->doArticle( $title ); } print "\n"; } /** Write an article specified by title */ function doArticle( $title ) { global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory; global $wgUploadDirectory; $text = $this->getArticleHTML( $title ); if ( $text === false ) { return; } # Parse the XHTML to find the images $images = $this->findImages( $text ); $this->copyImages( $images ); # Write to file $this->writeArticle( $title, $text ); } /** Write the given text to the file identified by the given title object */ function writeArticle( &$title, $text ) { $filename = strtr($title->getHashedFilename(),':~','__'); $fullName = "{$this->dest}/$filename"; $fullDir = dirname( $fullName ); wfMkdirParents( $fullDir, 0755 ); $file = fopen( $fullName, 'w' ); if ( !$file ) { print("Can't open file $fullName for writing\n"); return; } fwrite( $file, $text ); fclose( $file ); } /** Set up globals required for parsing */ function setupGlobals( $depth = NULL ) { global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath; global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; global $wgSharedThumbnailScriptPath, $wgEnableParserCache; static $oldLogo = NULL; if ( is_null( $depth ) ) { $wgMakeDumpLinks = $this->depth; } else { $wgMakeDumpLinks = $depth; } if ( $this->alternateScriptPath ) { if ( $wgMakeDumpLinks == 0 ) { $wgScriptPath = '.'; } else { $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 ); } } else { $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks ); } $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1'; # Logo image # Allow for repeated setup if ( !is_null( $oldLogo ) ) { $wgLogo = $oldLogo; } else { $oldLogo = $wgLogo; } if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) { # If it's in the upload directory, rewrite it to the new upload directory $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 ); } elseif ( $wgLogo{0} == '/' ) { # This is basically heuristic # Rewrite an absolute logo path to one relative to the the script path $wgLogo = $wgScriptPath . $wgLogo; } $wgScriptPath = substr($wgScriptPath,3); $wgStylePath = $wgScriptPath ? "$wgScriptPath/" : ''; $wgUploadPath = "$wgScriptPath/{$this->imageRel}"; $wgSharedUploadPath = "$wgUploadPath/shared"; $wgMaxCredits = -1; $wgHideInterlangageLinks = !$this->interwiki; $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; $wgEnableParserCache = false; $wgUser = new User; $wgUser->setOption( 'skin', 'htmldump' ); $wgUser->setOption( 'editsection', 0 ); $this->sharedStaticPath = "$wgUploadDirectory/shared"; } /** Reads the content of a title object, executes the skin and captures the result */ function getArticleHTML( &$title ) { global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic; $wgOut = new OutputPage; $wgOut->setParserOptions( new ParserOptions ); $wgTitle = $title; if ( is_null( $wgTitle ) ) { return false; } $ns = $wgTitle->getNamespace(); if ( $ns == NS_SPECIAL ) { SpecialPage::executePath( $wgTitle ); } else { if ( $ns == NS_IMAGE ) { $wgArticle = new ImagePage( $wgTitle ); } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) { $wgArticle = new CategoryPage( $wgTitle ); } else { $wgArticle = new Article( $wgTitle ); } $wgArticle->view(); } $sk =& $wgUser->getSkin(); ob_start(); $sk->outputPage( $wgOut ); $text = ob_get_contents(); ob_end_clean(); $text = str_replace(array('/:/','%7E'), array('/_/','_'), $text); return $text; } /** Returns image paths used in an XHTML document */ function findImages( $text ) { global $wgOutputEncoding, $wgDumpImages; $parser = xml_parser_create( $wgOutputEncoding ); xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' ); $wgDumpImages = array(); xml_parse( $parser, $text ); xml_parser_free( $parser ); return $wgDumpImages; } /** * Copy images (or create symlinks) from commons to a static directory. * This is necessary even if you intend to distribute all of commons, because * the directory contents is used to work out which image description pages * are needed. */ function copyImages( $images ) { global $wiki_dir, $output_dir; global $wgSharedUploadPath, $wgSharedUploadDirectory; # Find shared uploads and copy them into the static directory $sharedPathLength = strlen( $wgSharedUploadPath ); foreach ( $images as $image => $dummy ) { # Is it shared? if ( strpos($image, 'upload') > 0) { # Reconstruct full filename $rel = substr( $image, strpos($image,'upload')+7 ); // +1 for slash $sourceLoc = $wiki_dir."images/$rel"; $staticLoc = "$output_dir/upload/$rel"; // print "Copying $sourceLoc to $staticLoc\n"; # Copy to static directory if ( !file_exists( $staticLoc ) ) { wfMkdirParents( dirname( $staticLoc ), 0755 ); //if ( function_exists( 'symlink' ) && !$this->forceCopy ) { // symlink( $sourceLoc, $staticLoc ); //} else { copy( $sourceLoc, $staticLoc ); //} } if ( substr( $rel, 0, 6 ) == 'thumb/' ) { # That was a thumbnail # We will also copy the real image $parts = explode( '/', $rel ); $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}"; $sourceLoc = $wiki_dir."images/$rel"; $staticLoc = "$output_dir/upload/$rel"; # print "Copying $sourceLoc to $staticLoc\n"; if ( !file_exists( $staticLoc ) ) { wfMkdirParents( dirname( $staticLoc ), 0755 ); copy( $sourceLoc, $staticLoc ); } } } } } } /** XML parser callback */ function wfDumpStartTagHandler( $parser, $name, $attribs ) { global $wgDumpImages; if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) { $wgDumpImages[$attribs['SRC']] = true; } } /** XML parser callback */ function wfDumpEndTagHandler( $parser, $name ) {} # vim: syn=php ?>