summaryrefslogtreecommitdiff
path: root/buildscripts/wikibuilder/dumpHTML.inc
diff options
context:
space:
mode:
authorCiro Mattia Gonano <ciromattia@gmail.com>2013-09-11 15:56:48 +0200
committerCiro Mattia Gonano <ciromattia@gmail.com>2013-09-11 15:57:07 +0200
commit3069eaf35e833ffe4a1c1c7829dd7e168ae27420 (patch)
treed0c2e4d934cc34ba7d4232f759923b5a257dcb21 /buildscripts/wikibuilder/dumpHTML.inc
parentb833247ce597ec26159b46c8dfbea7f1e265950b (diff)
Merge up to r3319
Diffstat (limited to 'buildscripts/wikibuilder/dumpHTML.inc')
-rw-r--r--buildscripts/wikibuilder/dumpHTML.inc390
1 files changed, 0 insertions, 390 deletions
diff --git a/buildscripts/wikibuilder/dumpHTML.inc b/buildscripts/wikibuilder/dumpHTML.inc
deleted file mode 100644
index 5b8ca15a..00000000
--- a/buildscripts/wikibuilder/dumpHTML.inc
+++ /dev/null
@@ -1,390 +0,0 @@
-<?php
-/**
- * @package MediaWiki
- * @subpackage Maintenance
- */
-
-define( 'REPORTING_INTERVAL', 10 );
-
-require_once( 'includes/ImagePage.php' );
-require_once( 'includes/CategoryPage.php' );
-
-class DumpHTML {
- # Destination directory
- var $dest;
-
- # Show interlanguage links?
- var $interwiki = true;
-
- # Depth of HTML directory tree
- var $depth = 3;
-
- # Directory that commons images are copied into
- var $sharedStaticPath;
-
- # Relative path to image directory
- var $imageRel = 'upload';
-
- # Copy commons images instead of symlinking
- var $forceCopy = false;
-
- # Make links assuming the script path is in the same directory as
- # the destination
- var $alternateScriptPath = false;
-
- function DumpHTML( $settings ) {
- foreach ( $settings as $var => $value ) {
- $this->$var = $value;
- }
- }
-
- /**
- * Write a set of articles specified by start and end page_id
- * Skip categories and images, they will be done separately
- */
- function doArticles( $start, $end = false ) {
- $fname = 'DumpHTML::doArticles';
-
- $this->setupGlobals();
-
- if ( $end === false ) {
- $dbr =& wfGetDB( DB_SLAVE );
- $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname );
- }
-
-
- for ($id = $start; $id <= $end; $id++) {
- if ( !($id % REPORTING_INTERVAL) ) {
- print "Processing ID: $id\r";
- }
- $title = DumpTitle::newFromID( $id );
- if ( $title ) {
- $ns = $title->getNamespace() ;
- if ( $ns != NS_CATEGORY ) {
- $this->doArticle( $title );
- }
- }
- }
- print "\n";
- }
-
- function doSpecials() {
- $this->doMainPage();
-
- $this->setupGlobals();
- print "Special:Categories...";
- $this->doArticle( DumpTitle::makeTitle( NS_SPECIAL, 'Categories' ) );
- print "\n";
- }
-
- /** Write the main page as index.html */
- function doMainPage() {
- global $wgMakeDumpLinks;
-
- print "Making index.html ";
-
- // Set up globals with no ../../.. in the link URLs
- $this->setupGlobals( 0 );
-
- // But still use that directory style
- $wgMakeDumpLinks = 3;
-
- $title = DumpTitle::newMainPage();
-
- $text = $this->getArticleHTML( $title );
- $file = fopen( "{$this->dest}/index.html", "w" );
- if ( !$file ) {
- print "\nCan't open index.html for writing\n";
- return false;
- }
- fwrite( $file, $text );
- fclose( $file );
- print "\n";
- }
-
- function doImageDescriptions() {
- global $wgSharedUploadDirectory;
-
- $fname = 'DumpHTML::doImageDescriptions';
-
- $this->setupGlobals( 3 );
-
- /**
- * Dump image description pages that don't have an associated article, but do
- * have a local image
- */
- $dbr =& wfGetDB( DB_SLAVE );
- extract( $dbr->tableNames( 'image', 'page' ) );
- $res = $dbr->select( 'image', array( 'img_name' ), false, $fname );
-
- $i = 0;
- print "Writing image description pages for local images\n";
- $num = $dbr->numRows( $res );
- while ( $row = $dbr->fetchObject( $res ) ) {
- if ( !( ++$i % REPORTING_INTERVAL ) ) {
- print "Done $i of $num\r";
- }
- $title = DumpTitle::makeTitle( NS_IMAGE, $row->img_name );
- if ( $title->getArticleID() ) {
- // Already done by dumpHTML
- continue;
- }
- $this->doArticle( $title );
- }
- print "\n";
-
- /**
- * Dump images which only have a real description page on commons
- */
- print "Writing description pages for commons images\n";
- $i = 0;
- for ( $hash = 0; $hash < 256; $hash++ ) {
- $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash );
- $paths = glob( "{$this->sharedStaticPath}/$dir/*" );
- $paths += glob( "{$this->sharedStaticPath}/thumb/$dir/*" );
-
- foreach ( $paths as $path ) {
- $file = basename( $path );
- if ( !(++$i % REPORTING_INTERVAL ) ) {
- print "$i\r";
- }
-
- $title = DumpTitle::makeTitle( NS_IMAGE, $file );
- $this->doArticle( $title );
- }
- }
- print "\n";
- }
-
- function doCategories() {
- $fname = 'DumpHTML::doCategories';
- $this->setupGlobals();
-
- $dbr =& wfGetDB( DB_SLAVE );
- print "Selecting categories...";
- $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' );
- $res = $dbr->query( $sql, $fname );
-
- print "\nWriting " . $dbr->numRows( $res ). " category pages\n";
- $i = 0;
- while ( $row = $dbr->fetchObject( $res ) ) {
- if ( !(++$i % REPORTING_INTERVAL ) ) {
- print "$i\r";
- }
- $title = DumpTitle::makeTitle( NS_CATEGORY, $row->cl_to );
- $this->doArticle( $title );
- }
- print "\n";
- }
-
-
- /** Write an article specified by title */
- function doArticle( $title ) {
- global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory;
- global $wgUploadDirectory;
-
- $text = $this->getArticleHTML( $title );
- if ( $text === false ) {
- return;
- }
-
- # Parse the XHTML to find the images
- $images = $this->findImages( $text );
- $this->copyImages( $images );
-
- # Write to file
- $this->writeArticle( $title, $text );
- }
-
- /** Write the given text to the file identified by the given title object */
- function writeArticle( &$title, $text ) {
- $filename = strtr($title->getHashedFilename(),':~','__');
- $fullName = "{$this->dest}/$filename";
- $fullDir = dirname( $fullName );
-
- wfMkdirParents( $fullDir, 0755 );
-
- $file = fopen( $fullName, 'w' );
- if ( !$file ) {
- print("Can't open file $fullName for writing\n");
- return;
- }
-
- fwrite( $file, $text );
- fclose( $file );
- }
-
- /** Set up globals required for parsing */
- function setupGlobals( $depth = NULL ) {
- global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath;
- global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath;
- global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath;
- global $wgSharedThumbnailScriptPath, $wgEnableParserCache;
-
- static $oldLogo = NULL;
-
- if ( is_null( $depth ) ) {
- $wgMakeDumpLinks = $this->depth;
- } else {
- $wgMakeDumpLinks = $depth;
- }
-
- if ( $this->alternateScriptPath ) {
- if ( $wgMakeDumpLinks == 0 ) {
- $wgScriptPath = '.';
- } else {
- $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 );
- }
- } else {
- $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks );
- }
-
- $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1';
-
- # Logo image
- # Allow for repeated setup
- if ( !is_null( $oldLogo ) ) {
- $wgLogo = $oldLogo;
- } else {
- $oldLogo = $wgLogo;
- }
-
- if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) {
- # If it's in the upload directory, rewrite it to the new upload directory
- $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 );
- } elseif ( $wgLogo{0} == '/' ) {
- # This is basically heuristic
- # Rewrite an absolute logo path to one relative to the the script path
- $wgLogo = $wgScriptPath . $wgLogo;
- }
-
- $wgScriptPath = substr($wgScriptPath,3);
-
- $wgStylePath = $wgScriptPath ? "$wgScriptPath/" : '';
- $wgUploadPath = "$wgScriptPath/{$this->imageRel}";
- $wgSharedUploadPath = "$wgUploadPath/shared";
- $wgMaxCredits = -1;
- $wgHideInterlangageLinks = !$this->interwiki;
- $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false;
- $wgEnableParserCache = false;
-
- $wgUser = new User;
- $wgUser->setOption( 'skin', 'htmldump' );
- $wgUser->setOption( 'editsection', 0 );
-
- $this->sharedStaticPath = "$wgUploadDirectory/shared";
-
- }
-
- /** Reads the content of a title object, executes the skin and captures the result */
- function getArticleHTML( &$title ) {
- global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic;
-
- $wgOut = new OutputPage;
- $wgOut->setParserOptions( new ParserOptions );
-
- $wgTitle = $title;
- if ( is_null( $wgTitle ) ) {
- return false;
- }
-
- $ns = $wgTitle->getNamespace();
- if ( $ns == NS_SPECIAL ) {
- SpecialPage::executePath( $wgTitle );
- } else {
- if ( $ns == NS_IMAGE ) {
- $wgArticle = new ImagePage( $wgTitle );
- } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) {
- $wgArticle = new CategoryPage( $wgTitle );
- } else {
- $wgArticle = new Article( $wgTitle );
- }
- $wgArticle->view();
- }
-
- $sk =& $wgUser->getSkin();
- ob_start();
- $sk->outputPage( $wgOut );
- $text = ob_get_contents();
- ob_end_clean();
-
- $text = str_replace(array('/:/','%7E'), array('/_/','_'), $text);
-
- return $text;
- }
-
- /** Returns image paths used in an XHTML document */
- function findImages( $text ) {
- global $wgOutputEncoding, $wgDumpImages;
- $parser = xml_parser_create( $wgOutputEncoding );
- xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' );
-
- $wgDumpImages = array();
- xml_parse( $parser, $text );
- xml_parser_free( $parser );
-
- return $wgDumpImages;
- }
-
- /**
- * Copy images (or create symlinks) from commons to a static directory.
- * This is necessary even if you intend to distribute all of commons, because
- * the directory contents is used to work out which image description pages
- * are needed.
- */
- function copyImages( $images ) {
- global $wiki_dir, $output_dir;
- global $wgSharedUploadPath, $wgSharedUploadDirectory;
- # Find shared uploads and copy them into the static directory
- $sharedPathLength = strlen( $wgSharedUploadPath );
- foreach ( $images as $image => $dummy ) {
- # Is it shared?
- if ( strpos($image, 'upload') > 0) {
- # Reconstruct full filename
- $rel = substr( $image, strpos($image,'upload')+7 ); // +1 for slash
- $sourceLoc = $wiki_dir."images/$rel";
- $staticLoc = "$output_dir/upload/$rel";
-// print "Copying $sourceLoc to $staticLoc\n";
-
- # Copy to static directory
- if ( !file_exists( $staticLoc ) ) {
- wfMkdirParents( dirname( $staticLoc ), 0755 );
- //if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
- // symlink( $sourceLoc, $staticLoc );
- //} else {
- copy( $sourceLoc, $staticLoc );
- //}
- }
-
- if ( substr( $rel, 0, 6 ) == 'thumb/' ) {
- # That was a thumbnail
- # We will also copy the real image
- $parts = explode( '/', $rel );
- $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}";
- $sourceLoc = $wiki_dir."images/$rel";
- $staticLoc = "$output_dir/upload/$rel";
-# print "Copying $sourceLoc to $staticLoc\n";
- if ( !file_exists( $staticLoc ) ) {
- wfMkdirParents( dirname( $staticLoc ), 0755 );
- copy( $sourceLoc, $staticLoc );
- }
- }
- }
- }
- }
-}
-
-/** XML parser callback */
-function wfDumpStartTagHandler( $parser, $name, $attribs ) {
- global $wgDumpImages;
-
- if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) {
- $wgDumpImages[$attribs['SRC']] = true;
- }
-}
-
-/** XML parser callback */
-function wfDumpEndTagHandler( $parser, $name ) {}
-
-# vim: syn=php
-?>