User:Caesar Schinas/pwb/cg related.py: Difference between revisions

From Citizendium
Jump to navigation Jump to search
imported>Caesar Schinas
mNo edit summary
imported>Caesar Schinas
mNo edit summary
Line 1: Line 1:
{{DISPLAYTITLE:User:Caesar Schinas/pwb/cg_related.py}}
{{DISPLAYTITLE:User:Caesar Schinas/pwb/cg_related.py}}
This is a PWB script which I wrote to automatically create Related Articles subpages for those articles which don't have them, populating these pages with the contents of [[Special:WhatLinksHere]].
This is a PWB script which I wrote to automatically create Related Articles subpages for those articles which don't have them, based on Special:WhatLinksHere.


<pre>
<pre>
Line 7: Line 7:


# Copyright © 2009 Caesar Schinas.
# Copyright © 2009 Caesar Schinas.
# Released under the CC-by-nc-sa-3.0.
# Released under the CC-by-nc-sa-3.0 licence (http://creativecommons.org/licenses/by-nc-sa/3.0/).
# May be freely distributed and modified, under the following condtions :
# May be distributed and modified under the following condtions :
# (a) Attribution is given to Caesar Schinas in all distributions and derivatives.
# (by) Attribution must be given to Caesar Schinas in all distributions and derivatives.
# (b) May not be used commercially in any way without prior written permission.
# (nc) Commercial use is prohibited except with prior written permission from Caesar Schinas.
# (c) Derivative works must be released under the same licence with this notice.
# (sa) Derivative works must be released under the same licence and conditions.


"""
"""
This bot looks for articles which do not have a "Related Articles" subpage,
This bot automatically creates "Related Articles" subpages for those articles which don't have
and creates this subpage for those articles.
them, based on Special:WhatLinksHere.
The page is populated with the contents of Special:WhatLinksHere.


The following parameters are supported:
The following parameters are supported:


-start            Start checking CZ articles alphabetically from this point,
-start            Start checking articles alphabetically from this point,
                   instead of starting from the beginning.
                   instead of starting from the beginning.


-always          If given, doesn't ask each time before creating a
-always          Don't ask each time before creating a Related Articles
                   Related Articles subpage, but creates it anyway.
                   subpage; just create it anyway.


-debug            If given, doesn't do any real changes, but only shows
-debug            Don't actually create any Relate Articles subpages; just
                   what would have been changed.
                   show what would have been created.


"""
"""
__version__ = '$Id: cg_related.py 0 2009-06-29 01:10:00Z caesarsgrunt $'
__version__ = '$Id: cg_related.py 0 2009-06-29 09:58:00 caesarsgrunt $'
import wikipedia
import wikipedia
import pagegenerators
import pagegenerators


class RelatedArticlesBot:
class RelatedArticlesBot:
# Edit summary message that should be used.
# Edit summary
# NOTE: Put a good description here, and add translations, if possible!
msg = {
msg = {
'en': u'Robot: Creating Related Articles subpage',
'en': u'Robot: Creating Related Articles subpage',
Line 68: Line 66:
# Check that there is really no Related Articles subpage...
# Check that there is really no Related Articles subpage...
if wikipedia.Page(wikipedia.getSite(), self.prev.title()+'/Related_Articles').exists() == False :
if wikipedia.Page(wikipedia.getSite(), self.prev.title()+'/Related_Articles').exists() == False :
#wikipedia.output(u"\03{lightred}%s has no Related Articles subpage\03{default}" % self.prev.aslink())
# wikipedia.output(u"\03{lightred}%s has no Related Articles subpage\03{default}" % self.prev.aslink())
self.create(self.prev)
self.create(self.prev)
self.prev = None
self.prev = None
Line 74: Line 72:
elif page.title() == self.prev.title() + '/Related Articles' :
elif page.title() == self.prev.title() + '/Related Articles' :
# The current top level page already has a Related Articles page.
# The current top level page already has a Related Articles page.
#wikipedia.output(u"\03{lightgreen}%s\03{default}" % page.title())
# wikipedia.output(u"\03{lightgreen}%s\03{default}" % page.title())
self.prev = None
self.prev = None
return
return
elif page.title().count('/') != 0 :
elif page.title().count('/') != 0 :
# Page is a subpage, but not Related Articles
# Page is a subpage, but not Related Articles
#wikipedia.output(u"%s" % page.title())
# wikipedia.output(u"%s" % page.title())
return
return
elif page.title().count('/') != 0 :
elif page.title().count('/') != 0 :
# Page is a subpage, but has no parent. Ignore.
# Page is a subpage, but has no parent. Ignore.
#wikipedia.output(u"Skipping %s (standalone subpage)" % page.aslink())
# wikipedia.output(u"Skipping %s (standalone subpage)" % page.aslink())
return
return
elif wikipedia.Page(wikipedia.getSite(), 'Template:'+page.title()+'/Metadata').exists() == False :
elif wikipedia.Page(wikipedia.getSite(), 'Template:'+page.title()+'/Metadata').exists() == False :
#wikipedia.output(u"Skipping %s (page without metadata)" % page.aslink())
# wikipedia.output(u"Skipping %s (page without metadata)" % page.aslink())
return
return
else :
else :
# Prev isn't set either because we've already dealt with it or because this is the first page.
# Prev isn't set either because we've already dealt with it or because this is the first page.
#wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
# wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
self.prev = page
self.prev = page
return
return
Line 149: Line 147:


def main():
def main():
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
start = '!'
start = '!'
debug = False
debug = False

Revision as of 03:59, 29 June 2009

This is a PWB script which I wrote to automatically create Related Articles subpages for those articles which don't have them, based on Special:WhatLinksHere.

#!/usr/bin/python
# -*- coding: utf-8	 -*-

# Copyright © 2009 Caesar Schinas.
# Released under the CC-by-nc-sa-3.0 licence (http://creativecommons.org/licenses/by-nc-sa/3.0/).
# May be distributed and modified under the following condtions :
# (by) Attribution must be given to Caesar Schinas in all distributions and derivatives.
# (nc) Commercial use is prohibited except with prior written permission from Caesar Schinas.
# (sa) Derivative works must be released under the same licence and conditions.

"""
This bot automatically creates "Related Articles" subpages for those articles which don't have
them, based on Special:WhatLinksHere.

The following parameters are supported:

-start            Start checking articles alphabetically from this point,
                  instead of starting from the beginning.

-always           Don't ask each time before creating a Related Articles
                  subpage; just create it anyway.

-debug            Don't actually create any Relate Articles subpages; just
                  show what would have been created.

"""
__version__ = '$Id: cg_related.py 0 2009-06-29 09:58:00 caesarsgrunt $'
import wikipedia
import pagegenerators

class RelatedArticlesBot:
	# Edit summary
	msg = {
		'en': u'Robot: Creating Related Articles subpage',
	}

	def __init__(self, generator, debug, always):
		"""
		Constructor. Parameters:
			* generator - The page generator that determines on which pages
						  to work on.
			* debug		- If True, doesn't do any real changes, but only shows
						  what would have been changed.
		"""
		self.generator = generator
		self.debug = debug
		self.always = always
		self.prev = None

	def run(self):
		# Set the edit summary message
		wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
		for page in self.generator:
			self.check(page)

	def check(self, page):
		title = page.title()
		
		if self.prev :
			if page.title().startswith(self.prev.title()) :
				# New top level page reached, and no Related Articles subpage found for the previous one.
				# Check that there is really no Related Articles subpage...
				if wikipedia.Page(wikipedia.getSite(), self.prev.title()+'/Related_Articles').exists() == False :
#					wikipedia.output(u"\03{lightred}%s has no Related Articles subpage\03{default}" % self.prev.aslink())
					self.create(self.prev)
				self.prev = None
				return
			elif page.title() == self.prev.title() + '/Related Articles' :
				# The current top level page already has a Related Articles page.
#				wikipedia.output(u"\03{lightgreen}%s\03{default}" % page.title())
				self.prev = None
				return
			elif page.title().count('/') != 0 :
				# Page is a subpage, but not Related Articles
#				wikipedia.output(u"%s" % page.title())
				return
		elif page.title().count('/') != 0 :
			# Page is a subpage, but has no parent. Ignore.
#			wikipedia.output(u"Skipping %s (standalone subpage)" % page.aslink())
			return
		elif wikipedia.Page(wikipedia.getSite(), 'Template:'+page.title()+'/Metadata').exists() == False :
#			wikipedia.output(u"Skipping %s (page without metadata)" % page.aslink())
			return
		else :
			# Prev isn't set either because we've already dealt with it or because this is the first page.
#			wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
			self.prev = page
			return
		
	def create(self, rootpage):
		related = []
		for page in rootpage.getReferences() :
			if page.namespace() == 0 :
				title = page.title().split('/')[0]
				if title == rootpage.title() :
					continue
				if not page.exists() :
					continue
				if page.isRedirectPage() :
					continue
				related.append('{{r|'+title+'}}')
		
		# If nothing links here, we obviously can't create a Related Articles subpage.
		if len(related) == 0 :
			return
		
		related = "\n".join(sorted(set(related)))
		
		# Show the user what we're doing...
		wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % rootpage.title())
		wikipedia.output(u"Related Articles :")
		wikipedia.output(related)
		
		# If -debug is set, we don't need to do anything more.
		if self.debug :
			wikipedia.output(u"\n\n")
			return
		
		# Confirm that we should create the page (unless -always is set).
		if not self.always :
			choice = wikipedia.inputChoice(u'Do you want to create the Related Articles subpage?', ['Yes', 'No'], ['Y', 'N'], 'N')
			if not choice == 'y' :
				wikipedia.output(u"\n")
				return
		
		before = u"{{subpages}}\n\n==Parent topics==\n\n\n==Subtopics==\n\n\n==Other related topics==\n\n\n"
		intro = "<!-- Remove the section below after copying links to the other sections. -->\n==Bot-suggested topics==\nAuto-populated based on [[Special:WhatLinksHere/%s]]. Needs checking by a human.\n\n" % rootpage.title()
		after = u"\n\n[[Category:Bot-created Related Articles subpages]]\n<!-- Remove the section above after copying links to the other sections. -->"
		related = before + intro + related + after
		
		# Create the Related Articles subpage.
		try:
			wikipedia.Page(wikipedia.getSite(), rootpage.title()+'/Related_Articles').put(related)
		except wikipedia.LockedPage:
			wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
		except wikipedia.EditConflict:
			wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
		except wikipedia.SpamfilterError, error:
			wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))
		
		wikipedia.output(u"\n\n")
		

def main():
	start = '!'
	debug = False
	always = False

	# Parse command line arguments
	for arg in wikipedia.handleArgs():
		if arg.startswith("-start"):
			start = arg[7:]
		elif arg.startswith("-debug"):
			debug = True
		elif arg.startswith("-always"):
			always = True

	gen = pagegenerators.PreloadingGenerator(pagegenerators.AllpagesPageGenerator(start=start, namespace=0, includeredirects=False))
	bot = RelatedArticlesBot(gen, debug, always)
	bot.run()

if __name__ == "__main__":
	try:
		main()
	finally:
		wikipedia.stopme()