Manual:Chris G's botclasses/AllPagesBot.php
Appearance
This is a bot that uses Chris G's botclasses to retrieve a list of all files on the wiki and store that list in two text files, one for the File: namespace, and another for all the other namespaces. Customize the urls, login info, and namespace variables to suit your needs.
<?php
/* AllPagesBot
* By Leucosticte, https://www.mediawiki.org/wiki/User:Leucosticte
* GNU Public License 2.0
*
* This bot retrieves a list of all files on the wiki and stores that list in two text files,
* one for the File: namespace, and another for all the other namespaces.
*/
/* Setup my classes. */
include( 'botclasses.php' );
$wiki = new wikipedia;
$wiki->url = "http://en.wikipedia.org/w/api.php";
/* All the login stuff. */
$user = 'REMOVED';
$pass = 'REMOVED';
$wiki->login( $user,$pass );
$namespaces = range( 0, 15 ); // Default namespaces
// Extra namespaces
#$namespaces[] = 500;
#$namespaces[] = 501;
$namespaces = array_filter( $namespaces, "notFile" ); // Filter out the File: namespace
$pageTitlesFile = 'PageTitles.txt';
$pageTitlesNs6File = 'PageTitlesNs6.txt';
$pageTitles = fopen ( $pageTitlesFile, 'w' );
$pageTitlesNs6 = fopen ( $pageTitlesNs6File, 'w' );
iterate ( $wiki, $namespaces, $pageTitles ); // Everything but File: namespace
iterate ( $wiki, array ( 6), $pageTitlesNs6 ); // Only the File: namespace
// Filter out the File: namespace
function notFile ( $var ) {
return ( $var != 6 );
}
// Retrieve the data and store it in the file
function iterate ( $wiki, $namespaces, $pageTitles ) {
foreach ( $namespaces as $namespace ) {
$done = false;
$apfrom = '';
while ( !$done ) {
$query = "?action=query&format=php&list=allpages&aplimit=500&apnamespace=$namespace";
if ( $apfrom ) {
$query .= "&apfrom=$apfrom";
}
$ret = $wiki->query ( $query );
if ( !isset ( $ret['query-continue'] ) ) {
$done = true;
} else {
$apfrom = $ret['query-continue']['allpages']['apfrom'];
}
foreach ( $ret['query']['allpages'] as $thisPage ) {
fwrite ( $pageTitles, $thisPage['title'] . "\n" );
}
}
}
}
fclose ( $pageTitles );