User:Alterego/ExtensionMatrix/Source code
Appearance
Extension Matrix Source
[edit]This is the source code for User:Alterego/ExtensionMatrix
username = 'test'
password = 'test'
from re import sub
from sys import path
from dateutil.parser import parse
import datetime
path.append('/usr/local/mwclient')
path.append('/usr/local/mwclient/simplejson')
import client as mwclient
site = mwclient.Site('www.mediawiki.org', path='/w/')
site.login(username,password)
all_extensions = site.categories["All extensions"]
extensions, extensions_dicts, extensions_by_type, extensions_by_status = {}, {}, {}, {}
extensions_by_mw_version, extensions_by_creation_date = {}, {}
recently_edited, recently_discussed, recently_updated, recently_created = [], [], [], []
# converts an extension dict back into template format
def BuildTemplate(extension_dict):
template = '{{ExtensionMatrix\n'
keys = extension_dict.keys()
for key in keys:
# This guy giving me a hard time for some reason
if '<!-' in extension_dict[key] or '-->' in extension_dict[key]:
continue
# Build this line of the template
template += '|' + key + '=' + extension_dict[key] + '\n'
template += '}}\n'
return template
##########################################
# Download the template for each extension
##########################################
for this_extension in all_extensions:
try:
extension_name = this_extension.name.split(':')[1]
# Keep empty vals around to create a list of poorly formatted extensions
extensions[extension_name] = ''
# Extract the wikitext. Normally wouldn't be this simple but
# the extensions are well formatted, each ending with
# \n}}. Could recursively look for sub templates to be more
# sure we're at the end.
wikitext = site.Pages['Extension:' + extension_name].edit()
template_start = wikitext.find('{{Extension')
if template_start == -1:
template_start = wikitext.find('{{extension')
if template_start == -1:
raise
template_end = template_start + wikitext[template_start:].find('\n}}')
template = wikitext[template_start:template_end+3]
extensions[extension_name] = template
except:
# If someone did something stupid, not worth breaking the bot
continue
# With just a little work we can turn the template into a dictionary
# and then do some cleanup processing of its parameters. This bot
# is definitely relying on the fact that the template ends with \n}}
for extension in extensions.keys():
extension_dict = {}
hooks, tags, types = [], [], []
template = extensions[extension]
# Some people like to have funky spacing. Double up just in case
template = template.replace(' |','|').replace(' |','|').replace('| ','|').replace('| ','|')
# This hacks off {{Extension and }}, and has the convenient side effect
# of nuking |templatemode= when it shows up on the first line
template = template.split('\n')[1:-1]
# Can't allow newlines - saw way too many crazy template values. In order for
# this to be sane the template must have a pipe as the first non whitespace char
# on each line
filtered_template = []
for line in template:
if len(line):
if line[0] == '|':
filtered_template.append(line)
template = filtered_template
# Don't allow subpage extensions
if '/' in extension:
continue
for param in template:
try:
param = param.split('=',1)
key = param[0].replace('|','').strip()
value = param[1].strip()
except:
continue # Can't do this? Not my fault.
if not len(value.strip()):
continue
if 'name' in key:
# Sometimes the name field doesn't contain the actual name of the extension
value = extension
if key.find('hook') is not -1:
hooks.append(value)
continue
if key.find('tag') is not -1:
tags.append(value)
continue
if key.find('type') is not -1:
types.append(value)
continue
# Have a look at LocalisationUpdate for nested templateness that is just not ok.
if '{{' in value and not '}}' in value:
continue
if '}}' in value and not '{{' in value:
continue
# These always turn out to be copy/paste jobs from the prototype template
if '<!-' in value or '-->' in value:
continue
if '<ref>' in value:
value = value.replace('<ref>',' ')
if '</ref>' in value:
value = value.replace('</ref>',' ')
extension_dict[key] = value
if hooks:
hooks.sort()
hooks = '<br/>'.join(hooks)
extension_dict['hooks'] = hooks
if tags:
tags.sort()
tags = '<br/>'.join(tags)
extension_dict['tags'] = tags
if types:
types.sort()
types = '<br/>'.join(types)
extension_dict['types'] = types
# Sometimes the name isn't specified at all
if not extension_dict.has_key('name'):
extension_dict['name'] = extension
# If there is a newline in a template parameter, that's probably
# going to mess things up
keys = extension_dict.keys()
# Don't allow empty templates, or templates with just one parameter
if not len(keys) or len(keys) == 1:
continue
extensions_dicts[extension] = extension_dict
# sorted list of the full matrix for later use
sorted_matrix = extensions_dicts.keys()
sorted_matrix.sort()
# convert all parseable dates into a common wikitable-sortable format
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
for extension in extensions_dicts.keys():
if extensions_dicts[extension].has_key('update'):
try:
this_date = parse(extensions_dicts[extension]['update'])
this_day = this_date.day
this_month = months[this_date.month-1]
this_year = this_date.year
extensions_dicts[extension]['update'] = str(this_day) + ' ' + \
str(this_month) + ' ' + \
str(this_year)
except:
del extensions_dicts[extension]['update']
##########################################
# figure out what versions of mediawiki this extension works on
# this just looks for a string match of the version. i personally
# don't trust the +,>=,etc.. sign people like to use, for example, 1.12+.
# that generally means that they tested it on 1.12, but not the
# versions that came afterwards.
##########################################
for extension in extensions_dicts.keys():
if extensions_dicts[extension].has_key('mediawiki'):
supported_versions = []
version_text = extensions_dicts[extension]['mediawiki']
for major_version in xrange(1,3):
for version in xrange(0,20):
this_version = str(major_version) + '.' + str(version)
# TODO: Hacky. This still fails on i.e.
# - 1.11.0+ in that it puts it in 1.0 because it matches on "1.0+"
# - mediawiki 1.16 <= accesscontrol 1.3 because it matches on "1.3 "
# - 1.6.x, 1.8.x, 1.9.x or higher (not tested by author on most recent MW versions - i.e. > 1.12) because it matches on "1.12)"
if version_text.find(this_version + " ") != -1 or \
version_text.find(this_version + "\n") != -1 or \
version_text.find(this_version + "(") != -1 or \
version_text.find(this_version + ".") != -1 or \
version_text.find(this_version + "+") != -1:
supported_versions.append(this_version)
if not extensions_by_mw_version.has_key(this_version):
extensions_by_mw_version[this_version] = [extension]
else:
extensions_by_mw_version[this_version].append(extension)
extensions_dicts[extension]['mediawiki'] = ', '.join(supported_versions)
##########################################
# Get the last day that each extension and its talk page were edited
# and the creation date of the extension
##########################################
for extension in extensions_dicts.keys():
this_extension = site.Pages["Extension:" + extension]
if this_extension.exists: # should never fail!
this_date = this_extension.touched
this_day = this_date.tm_mday
this_month = months[this_date.tm_mon-1]
this_year = this_date.tm_year
this_date = str(this_day) + ' ' + \
str(this_month) + ' ' + \
str(this_year)
extensions_dicts[extension]['lastupdated'] = this_date
first_edit_timestamp = list(this_extension.revisions())[-1]['timestamp']
first_edit_year = first_edit_timestamp.tm_year
first_edit_month = months[first_edit_timestamp.tm_mon-1]
first_edit_day = first_edit_timestamp.tm_mday
first_edit_date = str(first_edit_day) + ' ' + \
str(first_edit_month) + ' ' + \
str(first_edit_year)
extensions_dicts[extension]['created'] = first_edit_date
this_extension = site.Pages["Extension_talk:" + extension]
if this_extension.exists:
this_date = this_extension.touched
this_day = this_date.tm_mday
this_month = months[this_date.tm_mon-1]
this_year = this_date.tm_year
this_date = str(this_day) + ' ' + \
str(this_month) + ' ' + \
str(this_year)
extensions_dicts[extension]['lastupdatedtalk'] = this_date
##########################################
# Create lists the most recently edited, discussed, updated and created extensions
# A bit redundant with above code, but its more clear to break it out
# Key to sorting by date is a tuple with (year,month,day). easy peasy.
##########################################
for extension in extensions_dicts.keys():
if extensions_dicts[extension].has_key('lastupdated'):
this_date = parse(extensions_dicts[extension]['lastupdated'])
recently_edited.append((this_date.year, this_date.month, this_date.day, extension))
if extensions_dicts[extension].has_key('lastupdatedtalk'):
this_date = parse(extensions_dicts[extension]['lastupdatedtalk'])
recently_discussed.append((this_date.year, this_date.month, this_date.day, extension))
if extensions_dicts[extension].has_key('update'):
this_date = parse(extensions_dicts[extension]['update'])
recently_updated.append((this_date.year, this_date.month, this_date.day, extension))
if extensions_dicts[extension].has_key('created'):
this_date = parse(extensions_dicts[extension]['created'])
recently_created.append((this_date.year, this_date.month, this_date.day, extension))
recently_edited.sort()
recently_discussed.sort()
recently_updated.sort()
recently_created.sort()
recently_edited.reverse()
recently_discussed.reverse()
recently_updated.reverse()
recently_created.reverse()
##########################################
# extensions by type
##########################################
for extension in extensions_dicts.keys():
this_extension = extensions_dicts[extension]
if this_extension.has_key('types'):
this_extensions_types = this_extension['types'].split('<br/>')
for this_type in this_extensions_types:
this_type = this_type.lower()
if '--' in this_type:
this_type = this_type.split('<!--')[0]
if not extensions_by_type.has_key(this_type):
if '--' in this_type:
this_type = this_type.split('<!--')[0]
extensions_by_type[this_type] = [extension]
else:
extensions_by_type[this_type].append(extension)
else:
if not extensions_by_type.has_key('notype'):
extensions_by_type['notype'] = [extension]
else:
extensions_by_type['notype'].append(extension)
for this_type in extensions_by_type.keys():
if not len(extensions_by_type[this_type]) >= 5:
del extensions_by_type[this_type]
##########################################
# extensions by status
##########################################
extensions_by_status = {}
for extension in extensions_dicts.keys():
this_extension = extensions_dicts[extension]
if this_extension.has_key('status'):
this_status = this_extension['status'].lower()
# Make sure this is a single word status - sanity check
if len(this_status.split(' ')) == 1:
if not extensions_by_status.has_key(this_status):
extensions_by_status[this_status] = [extension]
else:
extensions_by_status[this_status].append(extension)
##########################################
# Create main extension matrix output page
##########################################
extension_matrix = ''
prefix = 'Extension Matrix'
updated = 'Last updated: ' + \
datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + ' MST. '
num_listed = 'Listing ' + str(len(extensions_dicts)) + \
' out of ' + str(len(extensions)) + \
' members of [[:Category:Extensions]]<br/>'
extension_matrix = updated + num_listed + '\n'
extension_matrix += '== Entire Extension Matrix ==\n'
extension_matrix += '* [[' + prefix + '/AllExtensions|View all extensions]] (very large!)\n'
# Create the entire extension matrix
entire_matrix = '{{ExtensionMatrixHeader}}'
for extension in sorted_matrix:
entire_matrix += BuildTemplate(extensions_dicts[extension])
entire_matrix += '{{ExtensionMatrixFooter}}'
page = site.Pages[prefix + "/AllExtensions"]
page.save(entire_matrix)
# One subpage for each version of mediawiki that has extensions which mention it
extension_matrix += '== By explicitly supported MediaWiki version ==\n* '
for major_version in xrange(1,3):
for version in xrange(0,20):
version = str(major_version) + '.' + str(version)
if extensions_by_mw_version.has_key(version):
num_extensions = str(len(extensions_by_mw_version[version]))
extension_matrix += '[[' + prefix + '/' + version + '|' + version + ']] (' + num_extensions + '), '
# Create an extension matrix for each version
this_version_matrix = '{{ExtensionMatrixHeader}}'
for extension in extensions_by_mw_version[version]:
this_version_matrix += BuildTemplate(extensions_dicts[extension])
this_version_matrix += '{{ExtensionMatrixFooter}}'
page = site.Pages[prefix + '/' + version]
page.save(this_version_matrix)
extension_matrix += '\n'
# One subpage for each type of status
extension_matrix += '== By status of extension ==\n*'
status_keys = extensions_by_status.keys()
status_keys.sort()
for this_status in status_keys:
num_extensions = str(len(extensions_by_status[this_status]))
extension_matrix += '[[' + prefix + '/' + this_status + '|' + this_status + ']] (' + num_extensions + '), '
this_status_matrix = '{{ExtensionMatrixHeader}}'
for extension in extensions_by_status[this_status]:
this_status_matrix += BuildTemplate(extensions_dicts[extension])
this_status_matrix += '{{ExtensionMatrixFooter}}'
page = site.Pages[prefix + '/' + this_status]
page.save(this_status_matrix)
extension_matrix += '\n'
# One subpage for each extension type
extension_matrix += '== By type of extension ==\n* '
type_keys = extensions_by_type.keys()
type_keys.sort()
for this_type in type_keys[1:]: # [1:] gets rid of weird 'Alterego/ExtensionMatrix' type
num_extensions = str(len(extensions_by_type[this_type]))
extension_matrix += '[[' + prefix + '/' + this_type + '|' + this_type + ']] (' + num_extensions + '), '
this_type_matrix = '{{ExtensionMatrixHeader}}'
for extension in extensions_by_type[this_type]:
this_type_matrix += BuildTemplate(extensions_dicts[extension])
this_type_matrix += '{{ExtensionMatrixFooter}}'
page = site.Pages[prefix + '/' + this_type]
page.save(this_type_matrix)
extension_matrix += '\n'
extension_matrix += '== 500 most recently created extensions ==\n* '
for extension in xrange(500):
extension_name = recently_created[extension][3]
extension_date = extensions_dicts[extension_name]['created']
extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'
extension_matrix += '== 500 most recently edited extension pages ==\n* '
for extension in xrange(500):
extension_name = recently_edited[extension][3]
extension_date = extensions_dicts[extension_name]['lastupdated']
extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'
extension_matrix += '== 500 most recently edited extension talk pages ==\n* '
for extension in xrange(500):
extension_name = recently_discussed[extension][3]
extension_date = extensions_dicts[extension_name]['lastupdatedtalk']
extension_matrix += '[[Extension_talk:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'
extension_matrix += '== 500 most recently updated extensions ==\n* '
for extension in xrange(500):
extension_name = recently_updated[extension][3]
extension_date = extensions_dicts[extension_name]['update']
extension_matrix += '[[Extension:' + extension_name + '|' + extension_name + ']] (' + extension_date + '), '
extension_matrix += '\n'
# '\n{{ExtensionMatrixFooter}}\n'
page = site.Pages[prefix]
page.save(extension_matrix)