User:Caesar Schinas/pwb/cg related.py

This is a PWB script which I wrote to automatically create Related Articles subpages for those articles which don't have them, populating these pages with the contents of Special:WhatLinksHere.

""" This bot looks for articles which do not have a "Related Articles" subpage, and creates this subpage for those articles. The page is populated with the contents of Special:WhatLinksHere.
 * 1) !/usr/bin/python
 * 2) -*- coding: utf-8	 -*-

The following parameters are supported:

-start           Start checking CZ articles alphabetically from this point, instead of starting from the beginning.

-always          If given, doesn't ask each time before creating a                  Related Articles subpage, but creates it anyway.

-debug           If given, doesn't do any real changes, but only shows what would have been changed.

""" __version__ = '$Id: cg_related.py 0 2009-06-29 01:10:00Z caesarsgrunt $' import wikipedia import pagegenerators

class RelatedArticlesBot: # Edit summary message that should be used. # NOTE: Put a good description here, and add translations, if possible! msg = { 'en': u'Robot: Creating Related Articles subpage', }

def __init__(self, generator, debug, always): """		Constructor. Parameters:			* generator - The page generator that determines on which pages						 to work on.			* debug		- If True, doesn't do any real changes, but only shows						  what would have been changed.		""" self.generator = generator self.debug = debug self.always = always self.prev = None

def run(self): # Set the edit summary message wikipedia.setAction(wikipedia.translate(wikipedia.getSite, self.msg)) for page in self.generator: self.check(page)

def check(self, page): title = page.title if self.prev : if page.title.startswith(self.prev.title) : # New top level page reached, and no Related Articles subpage found for the previous one. # Check that there is really no Related Articles subpage... if wikipedia.Page(wikipedia.getSite, self.prev.title+'/Related_Articles').exists == False : #wikipedia.output(u"\03{lightred}%s has no Related Articles subpage\03{default}" % self.prev.aslink) self.create(self.prev) self.prev = None return elif page.title == self.prev.title + '/Related Articles' : # The current top level page already has a Related Articles page. #wikipedia.output(u"\03{lightgreen}%s\03{default}" % page.title) self.prev = None return elif page.title.count('/') != 0 : # Page is a subpage, but not Related Articles #wikipedia.output(u"%s" % page.title) return elif page.title.count('/') != 0 : # Page is a subpage, but has no parent. Ignore. #wikipedia.output(u"Skipping %s (standalone subpage)" % page.aslink) return elif wikipedia.Page(wikipedia.getSite, 'Template:'+page.title+'/Metadata').exists == False : #wikipedia.output(u"Skipping %s (page without metadata)" % page.aslink) return else : # Prev isn't set either because we've already dealt with it or because this is the first page. #wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title) self.prev = page return def create(self, rootpage): related = [] for page in rootpage.getReferences : if page.namespace == 0 : title = page.title.split('/')[0] if title == rootpage.title : continue if not page.exists : continue if page.isRedirectPage : continue related.append('') # If nothing links here, we obviously can't create a Related Articles subpage. if len(related) == 0 : return related = "\n".join(sorted(set(related))) # Show the user what we're doing... wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % rootpage.title) wikipedia.output(u"Related Articles :") wikipedia.output(related) # If -debug is set, we don't need to do anything more. if self.debug : wikipedia.output(u"\n\n") return related = "\n\n==Bot-suggested topics==\nAuto-populated based on Special:WhatLinksHere/"+rootpage.title+". Needs checking by a human.\n\n" + related + "\n\n" # Confirm that we should create the page (unless -always is set). if not self.always : choice = wikipedia.inputChoice(u'Do you want to create the Related Articles subpage?', ['Yes', 'No'], ['Y', 'N'], 'N') if not choice == 'y' : wikipedia.output(u"\n") return # Create the Related Articles subpage. try: wikipedia.Page(wikipedia.getSite, rootpage.title+'/Related_Articles').put(related) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked; skipping." % page.aslink) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title)) except wikipedia.SpamfilterError, error: wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title, error.url))

def main: # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on.

start = '!' debug = False always = False

# Parse command line arguments for arg in wikipedia.handleArgs: if arg.startswith("-start"): start = arg[7:] elif arg.startswith("-debug"): debug = True elif arg.startswith("-always"): always = True

gen = pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start, namespace=0, includeredirects=False)) bot = RelatedArticlesBot(gen, debug, always) bot.run

if __name__ == "__main__": try: main finally: wikipedia.stopme