User:Caesar Schinas/pwb/cg_related.py
Jump to navigation
Jump to search
This is a PWB script which I wrote to automatically create Related Articles subpages for those articles which don't have them, based on Special:WhatLinksHere.
These automatically created pages need checking and editing by a human, and articles for which this has not been done are listed in the Category:Bot-created Related Articles subpages.
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright © 2009 Caesar Schinas.
# Released under the CC-by-nc-sa-3.0 licence (http://creativecommons.org/licenses/by-nc-sa/3.0/).
# May be distributed and modified under the following conditions :
# (by) Attribution must be given to Caesar Schinas in all distributions and derivatives.
# (nc) Commercial use is prohibited except with prior written permission from Caesar Schinas.
# (sa) Derivative works must be released under the same licence and conditions.
"""
This bot automatically creates "Related Articles" subpages for those articles which don't have
them, based on Special:WhatLinksHere.
The following parameters are supported:
-start Start checking articles alphabetically from this point,
instead of starting from the beginning.
-always Don't ask each time before creating a Related Articles
subpage; just create it anyway.
-debug Don't actually create any Relate Articles subpages; just
show what would have been created.
"""
__version__ = '$Id: cg_related.py 0 2009-06-30 10:23:00 caesarsgrunt $'
import wikipedia
import pagegenerators
class RelatedArticlesBot:
# Edit summary
msg = {
'en': u'Robot test edit: Starting Related Articles subpage. Please check and brush. For context, see [[:Category:Bot-created Related Articles subpages|here]].',
}
def __init__(self, generator, debug, always):
"""
Constructor. Parameters:
* generator - The page generator that determines on which pages
to work on.
* debug - If True, doesn't do any real changes, but only shows
what would have been changed.
"""
self.generator = generator
self.debug = debug
self.always = always
self.prev = None
def run(self):
# Set the edit summary message
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
for page in self.generator:
self.check(page)
def check(self, page):
title = page.title()
if self.prev :
if page.title().split('/')[0] == page.title() :
# New top level page reached, and no Related Articles subpage found for the previous one.
# Check that there is really no Related Articles subpage...
if wikipedia.Page(wikipedia.getSite(), self.prev.title()+'/Related_Articles').exists() == False :
wikipedia.output(u"\03{lightred}%s has no Related Articles subpage\03{default}" % self.prev.aslink())
if self.prev.title().endswith("(disambiguation)") :
# The current top level page is a disambiguation page.
wikipedia.output(u"\03{lightgreen}%s is a disambiguation page\03{default}" % page.title())
self.prev = None
return
self.create(self.prev)
self.prev = None
return
elif page.title() == self.prev.title() + '/Related Articles' :
# The current top level page already has a Related Articles page.
wikipedia.output(u"\03{lightgreen}%s already exists \03{default}" % page.title())
self.prev = None
return
elif page.title().count('/') != 0 :
# Page is a subpage, but not Related Articles
wikipedia.output(u"%s" % page.title())
return
else :
self.prev = None
return
elif page.title().count('/') != 0 :
# Page is a subpage, but has no parent. Ignore.
wikipedia.output(u"\n Skipping %s (standalone subpage)" % page.aslink())
return
#elif wikipedia.Page(wikipedia.getSite(), 'Template:'+page.title()+'/Metadata').exists() == False :
# wikipedia.output(u"\n Skipping %s (page without metadata)" % page.aslink())
# return
else :
# Prev isn't set either because we've already dealt with it or because this is the first page.
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
self.prev = page
return
def create(self, rootpage):
wikipedia.output(u"Looking for Related Articles...")
related = []
for page in rootpage.getReferences() :
if page.namespace() == 0 :
title = page.title().split('/')[0]
if title == rootpage.title() :
continue
if not page.exists() :
continue
if page.isRedirectPage() :
continue
related.append('{{r|'+title+'}}')
# If nothing links here, we obviously can't create a Related Articles subpage.
if len(related) == 0 :
return
related = "\n".join(sorted(set(related)))
# Show the user what we're doing...
# wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % rootpage.title())
wikipedia.output(u"Related Articles :")
wikipedia.output(related)
# If -debug is set, we don't need to do anything more.
if self.debug :
wikipedia.output(u"\n\n")
return
# Confirm that we should create the page (unless -always is set).
if not self.always :
choice = wikipedia.inputChoice(u'Do you want to create the Related Articles subpage?', ['Yes', 'No'], ['Y', 'N'], 'N')
if not choice == 'y' :
wikipedia.output(u"\n")
return
before = u"<noinclude>{{subpages}}</noinclude>\n\n==Parent topics==\n\n\n==Subtopics==\n\n\n==Other related topics==\n\n\n"
intro = "<!-- Remove the section below after copying links to the other sections. -->\n==Bot-suggested topics==\nAuto-populated based on [[Special:WhatLinksHere/%s]]. Needs checking by a human.\n\n" % rootpage.title()
after = u"\n\n{{Bot-created_related_article_subpage}}\n<!-- Remove the section above after copying links to the other sections. -->"
related = before + intro + related + after
# Create the Related Articles subpage.
try:
wikipedia.Page(wikipedia.getSite(), rootpage.title()+'/Related_Articles').put(related)
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
wikipedia.output(u"\n")
def main():
start = '!'
debug = False
always = False
# Parse command line arguments
for arg in wikipedia.handleArgs():
if arg.startswith("-start"):
start = arg[7:]
elif arg.startswith("-debug"):
debug = True
elif arg.startswith("-always"):
always = True
gen = pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start, namespace=0, includeredirects=False))
bot = RelatedArticlesBot(gen, debug, always)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()