# -*- coding: utf-8 -*-
"""
This module is based on cosmetic_changes.py.
This module can do slight modifications to a wiki page source code such that
the code looks cleaner. The changes are not supposed to change the look of the
rendered wiki page.
The changes it proposes are mostly targeted to French language.
Feel free to use.
"""
__version__ = '2009-06-05 Neuceu'
import wikipedia, pagegenerators, isbn
import re,codecs
warning = """ATTENTION: You can run this script as a stand-alone for testing purposes.
However, the changes are that are made are only minor, and other users
might get angry if you fill the version histories and watchlists with such
irrelevant changes."""
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp,
'&warning;': warning,
}
# Summary message when using this module as a stand-alone script
msg_standalone = {
'de': u'Bot: Kosmetische Änderungen',
'en': u'Robot: Cosmetic changes',
'fr': u'[[User:PimpBot|PimpBot]] : changements cosmétiques et orthographiques',
}
# Summary message that will be appended to the normal message when
# cosmetic changes are made on the fly
msg_append = {
'de': u'; kosmetische Änderungen',
'en': u'; cosmetic changes',
'fr': u'; changements cosmétiques',
}
deprecatedTemplates = {
'wikipedia': {
'de': [
u'Stub',
]
}
}
# functions to manipulate wikitext strings (by default, all text arguments
# should be Unicode)
# All return the modified text as a unicode object
def replaceExcept2(text, old, new, exceptions, caseInsensitive=False,
allowoverlap=False, marker = '', site = None):
"""
Return text with 'old' replaced by 'new', ignoring specified types of text.
Skips occurences of 'old' within exceptions; e.g., within nowiki tags or
HTML comments. If caseInsensitive is true, then use case insensitive
regex matching. If allowoverlap is true, overlapping occurences are all
replaced (watch out when using this, it might lead to infinite loops!).
Parameters:
text - a unicode string
old - a compiled regular expression
new - a unicode string (which can contain regular
expression references), or a function which takes
a match object as parameter. See parameter repl of
re.sub().
exceptions - a list of strings which signal what to leave out,
e.g. ['math', 'table', 'template']
caseInsensitive - a boolean
marker - a string that will be added to the last replacement;
if nothing is changed, it is added at the end
"""
# Hyperlink regex is defined in weblinkchecker.py
import weblinkchecker
if site is None:
site = wikipedia.getSite()
exceptionRegexes = {
'comment': re.compile(r'(?s)<!--.*?-->'),
# section headers
'header': re.compile(r'\r\n=+.+=+ *\r\n'),
'includeonly': re.compile(r'(?is)<includeonly>.*?</includeonly>'),
'math': re.compile(r'(?is)<math>.*?</math>'),
'noinclude': re.compile(r'(?is)<noinclude>.*?</noinclude>'),
# wiki tags are ignored inside nowiki tags.
'nowiki': re.compile(r'(?is)<nowiki>.*?</nowiki>'),
# preformatted text
'pre': re.compile(r'(?ism)<pre>.*?</pre>'),
'source': re.compile(r'(?is)<syntaxhighlight .*?</' + r'source>'),
# inline references
'ref': re.compile(r'(?ism)<ref[ >].*?</ref>'),
'timeline': re.compile(r'(?is)<timeline>.*?</timeline>'),
# lines that start with a space are shown in a monospace font and
# have whitespace preserved.
'startspace': re.compile(r'(?m)^ (.*?)$'),
# tables often have whitespace that is used to improve wiki
# source code readability.
# TODO: handle nested tables.
'table': re.compile(r'(?ims)^{\|.*?^\|}|<table>.*?</table>'),
# templates with parameters often have whitespace that is used to
# improve wiki source code readability.
# 'template': re.compile(r'(?s){{.*?}}'),
# The regex above fails on nested templates. This regex can handle
# templates cascaded up to level 3, but no deeper. For arbitrary
# depth, we'd need recursion which can't be done in Python's re.
# After all, the language of correct parenthesis words is not regular.
'template': re.compile(r'(?s){{(({{(({{.*?}})|.)*}})|.)*}}'),
'hyperlink': weblinkchecker.compileLinkR(),
'gallery': re.compile(r'(?is)<gallery.*?>.*?</gallery>'),
# this matches internal wikilinks, but also interwiki, categories, and
# images.
'link': re.compile(r'\[\[[^\]\|]*(\|[^\]]*)?\]\]'),
'interwiki': re.compile(r'(?i)\[\[(%s)\s?:[^\]]*\]\][\s]*'
% '|'.join(site.validLanguageLinks() + site.family.obsolete.keys())),
'isbn': re.compile(r'ISBN-13[:]?[-\d\s]*|ISBN[-\d\s]*|ASIN[-\d\s:]+'),
'exotic': re.compile(r'Tel\s?[:]\s?[\d-]+|#([AaBbCcEeFf]|\d){6}|style=\"[^\"]+\"'),
}
# if we got a string, compile it as a regular expression
if isinstance(old, basestring):
if caseInsensitive:
old = re.compile(old, re.IGNORECASE | re.UNICODE)
else:
old = re.compile(old)
dontTouchRegexes = []
for exc in exceptions:
if isinstance(exc, basestring):
# assume it's a reference to the exceptionRegexes dictionary
# defined above.
if exc not in exceptionRegexes:
raise ValueError("Unknown tag type: " + exc)
dontTouchRegexes.append(exceptionRegexes[exc])
else:
# assume it's a regular expression
dontTouchRegexes.append(exc)
index = 0
markerpos = len(text)
while True:
match = old.search(text, index)
if not match:
# nothing left to replace
break
# check which exception will occur next.
nextExceptionMatch = None
for dontTouchR in dontTouchRegexes:
excMatch = dontTouchR.search(text, index)
if excMatch and (
nextExceptionMatch is None or
excMatch.start() < nextExceptionMatch.start()):
nextExceptionMatch = excMatch
if nextExceptionMatch is not None and nextExceptionMatch.start() <= match.start():
# an HTML comment or text in nowiki tags stands before the next valid match. Skip.
index = nextExceptionMatch.end()
else:
# We found a valid match. Replace it.
if callable(new):
# the parameter new can be a function which takes the match as a parameter.
replacement = new(match)
else:
# it is not a function, but a string.
# it is a little hack to make \n work. It would be better to fix it
# previously, but better than nothing.
new = new.replace('\\n', '\n')
# We cannot just insert the new string, as it may contain regex
# group references such as \2 or \g<name>.
# On the other hand, this approach does not work because it can't
# handle lookahead or lookbehind (see bug #1731008):
#replacement = old.sub(new, text[match.start():match.end()])
#text = text[:match.start()] + replacement + text[match.end():]
# So we have to process the group references manually.
replacement = new
groupR = re.compile(r'\\(?P<number>\d+)|\\g<(?P<name>.+?)>')
while True:
groupMatch = groupR.search(replacement)
if not groupMatch:
break
groupID = groupMatch.group('name') or int(groupMatch.group('number'))
replacement = replacement[:groupMatch.start()] + match.group(groupID) + replacement[groupMatch.end():]
text = text[:match.start()] + replacement + text[match.end():]
# continue the search on the remaining text
if allowoverlap:
index = match.start() + 1
else:
index = match.start() + len(replacement)
markerpos = match.start() + len(replacement)
text = text[:markerpos] + marker + text[markerpos:]
return text
def replaceExceptMathNowikiLinksGalleryAndComments(text, old, new):
"""
Replaces old by new in text, skipping occurences of old within nowiki tags
and HTML comments.
Parameters:
text - a string
old - a compiled regular expression
new - a string
"""
return replaceExcept2( text, old, new, [ 'comment', 'includeonly', 'math', 'table', 'template', 'hyperlink', 'nowiki', 'gallery', 'timeline', 'ref', 'interwiki', 'link', 'isbn', 'exotic'] )
# also no change in the external links
def replaceExceptMathNowikiLinksGalleryAndComments2(text, old, new):
"""
Replaces old by new in text, skipping occurences of old within nowiki tags
and HTML comments.
Parameters:
text - a string
old - a compiled regular expression
new - a string
"""
return replaceExcept2( text, old, new, [ 'comment', 'includeonly', 'math', 'table', 'template', 'hyperlink', 'nowiki', 'gallery', 'timeline', 'ref', 'interwiki', 'link', 'isbn', 'exotic'] )
class CosmeticChangesToolkit:
def __init__(self, site, debug = False):
self.site = site
self.debug = debug
def change(self, text):
"""
Given a wiki source code text, returns the cleaned up version.
"""
oldText = text
text = self.fixSelfInterwiki(text)
text = self.standardizeInterwiki(text)
text = self.standardizeCategories(text)
text = self.cleanUpLinks(text)
text = self.cleanUpSectionHeaders(text)
# Disabled because of a bug, and because its usefulness is disputed
# text = self.putSpacesInLists(text)
# text = self.translateAndCapitalizeNamespaces(text)
text = self.removeDeprecatedTemplates(text)
text = self.resolveHtmlEntities(text)
text = self.validXhtml(text)
text = self.removeUselessSpaces(text)
text = self.removeNonBreakingSpaceBeforePercent(text)
try:
text = isbn.hyphenateIsbnNumbers(text)
except isbn.InvalidIsbnException, error:
pass
#text = self.replaceWithNiceQuotes(text)
text = self.replaceIer(text)
text = self.replaceNumbers(text)
text = self.replaceParenthesis(text)
text = self.replaceCenturies(text)
text = self.cleanupPonctuation(text)
return text
def fixSelfInterwiki(self, text):
"""
Interwiki links to the site itself are displayed like local links.
Remove their language code prefix.
"""
interwikiR = re.compile(r'\[\[%s\s?:([^\[\]\n]*)\]\]' % self.site.lang)
text = interwikiR.sub(r'[[\1]]', text)
return text
def standardizeInterwiki(self, text):
"""
Makes sure that interwiki links are put to the correct position and
into the right order.
"""
interwikiLinks = wikipedia.getLanguageLinks(text, insite = self.site)
text = wikipedia.replaceLanguageLinks(text, interwikiLinks, site = self.site)
return text
def standardizeCategories(self, text):
"""
Makes sure that categories are put to the correct position, but
does not sort them.
"""
# The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006#Position_der_Personendaten_am_.22Artikelende.22
if self.site != wikipedia.getSite('de', 'wikipedia'):
categories = wikipedia.getCategoryLinks(text, site = self.site)
text = wikipedia.replaceCategoryLinks(text, categories, site = self.site)
return text
def translateAndCapitalizeNamespaces(self, text):
"""
Makes sure that localized namespace names are used.
"""
family = self.site.family
# wiki links aren't parsed here.
exceptions = ['nowiki', 'comment', 'math', 'pre']
for nsNumber in family.namespaces:
if not family.isDefinedNSLanguage(nsNumber, self.site.lang):
# Skip undefined namespaces
continue
namespaces = list(family.namespace(self.site.lang, nsNumber, all = True))
thisNs = namespaces.pop(0)
# skip main (article) namespace
if thisNs and namespaces:
text = wikipedia.replaceExcept(text, r'\[\[\s*(' + '|'.join(namespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions)
return text
def cleanUpLinks(self, text):
# helper function which works on one link and either returns it
# unmodified, or returns a replacement.
def handleOneLink(match):
titleWithSection = match.group('titleWithSection')
label = match.group('label')
trailingChars = match.group('linktrail')
if not self.site.isInterwikiLink(titleWithSection):
# The link looks like this:
# [[page_title|link_text]]trailing_chars
# We only work on namespace 0 because pipes and linktrails work
# differently for images and categories.
try:
page = wikipedia.Page(self.site, titleWithSection)
except wikipedia.InvalidTitle:
return match.group()
if page.namespace() == 0:
# Replace underlines by spaces, also multiple underlines
titleWithSection = re.sub('_+', ' ', titleWithSection)
# Remove double spaces
titleWithSection = re.sub(' +', ' ', titleWithSection)
# Remove unnecessary leading spaces from title,
# but remember if we did this because we eventually want
# to re-add it outside of the link later.
titleLength = len(titleWithSection)
titleWithSection = titleWithSection.lstrip()
hadLeadingSpaces = (len(titleWithSection) != titleLength)
hadTrailingSpaces = False
# Remove unnecessary trailing spaces from title,
# but remember if we did this because it may affect
# the linktrail and because we eventually want to
# re-add it outside of the link later.
if not trailingChars:
titleLength = len(titleWithSection)
titleWithSection = titleWithSection.rstrip()
hadTrailingSpaces = (len(titleWithSection) != titleLength)
# Convert URL-encoded characters to unicode
titleWithSection = wikipedia.url2unicode(titleWithSection, site = self.site)
if titleWithSection == '':
# just skip empty links.
return match.group()
# Remove unnecessary initial and final spaces from label.
# Please note that some editors prefer spaces around pipes. (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
if label is not None:
# Remove unnecessary leading spaces from label,
# but remember if we did this because we want
# to re-add it outside of the link later.
labelLength = len(label)
label = label.lstrip()
hadLeadingSpaces = (len(label) != labelLength)
# Remove unnecessary trailing spaces from label,
# but remember if we did this because it affects
# the linktrail.
if not trailingChars:
labelLength = len(label)
label = label.rstrip()
hadTrailingSpaces = (len(label) != labelLength)
else:
label = titleWithSection
if trailingChars:
label += trailingChars
if titleWithSection == label or titleWithSection[0].lower() + titleWithSection[1:] == label:
newLink = "[[%s]]" % label
# Check if we can create a link with trailing characters instead of a pipelink
elif label.startswith(titleWithSection) and re.sub(trailR, '', label[len(titleWithSection):]) == '':
newLink = "[[%s]]%s" % (label[:len(titleWithSection)], label[len(titleWithSection):])
else:
# Try to capitalize the first letter of the title.
# Maybe this feature is not useful for languages that
# don't capitalize nouns...
#if not self.site.nocapitalize:
if self.site.sitename() == 'wikipedia:de':
titleWithSection = titleWithSection[0].upper() + titleWithSection[1:]
newLink = "[[%s|%s]]" % (titleWithSection, label)
# re-add spaces that were pulled out of the link.
# Examples:
# text[[ title ]]text -> text [[title]] text
# text[[ title | name ]]text -> text [[title|name]] text
# text[[ title |name]]text -> text[[title|name]]text
# text[[title| name]]text -> text [[title|name]]text
if hadLeadingSpaces:
newLink = ' ' + newLink
if hadTrailingSpaces:
newLink = newLink + ' '
return newLink
# don't change anything
return match.group()
trailR = re.compile(self.site.linktrail())
# The regular expression which finds links. Results consist of four groups:
# group title is the target page title, that is, everything before | or ].
# group section is the page section. It'll include the # to make life easier for us.
# group label is the alternative link title, that's everything between | and ].
# group linktrail is the link trail, that's letters after ]] which are part of the word.
# note that the definition of 'letter' varies from language to language.
linkR = re.compile(r'\[\[(?P<titleWithSection>[^\]\|]+)(\|(?P<label>[^\]\|]*))?\]\](?P<linktrail>' + self.site.linktrail() + ')')
text = wikipedia.replaceExcept(text, linkR, handleOneLink, ['comment', 'math', 'nowiki', 'pre', 'startspace'])
return text
def resolveHtmlEntities(self, text):
ignore = [
38, # Ampersand (&)
60, # Less than (<)
62, # Great than (>)
91, # Opening bracket - sometimes used intentionally inside links
93, # Closing bracket - sometimes used intentionally inside links
124, # Vertical bar (??) - used intentionally in navigation bar templates on de:
160, # Non-breaking space ( ) - not supported by Firefox textareas
]
text = wikipedia.html2unicode(text, ignore = ignore)
return text
def validXhtml(self, text):
text = wikipedia.replaceExcept(text, r'<br>', r'<br />', ['comment', 'math', 'nowiki', 'pre'])
return text
def removeUselessSpaces(self, text):
result = []
multipleSpacesR = re.compile(' +')
spaceAtLineEndR = re.compile(' $')
exceptions = ['comment', 'math', 'nowiki', 'pre', 'startspace', 'table', 'template']
text = wikipedia.replaceExcept(text, multipleSpacesR, ' ', exceptions)
text = wikipedia.replaceExcept(text, spaceAtLineEndR, '', exceptions)
return text
def removeNonBreakingSpaceBeforePercent(self, text):
'''
Newer MediaWiki versions automatically place a non-breaking space in
front of a percent sign, so it is no longer required to place it
manually.
'''
percentR = re.compile(r'(\d) %')
text = percentR.sub(r'\1 %', text)
return text
def cleanUpSectionHeaders(self, text):
"""
For better readability of section header source code, puts a space
between the equal signs and the title.
Example: ==Section title== becomes == Section title ==
NOTE: This space is recommended in the syntax help on the English and
German Wikipedia. It might be that it is not wanted on other wikis.
If there are any complaints, please file a bug report.
"""
for level in range(1, 7):
equals = '=' * level
text = wikipedia.replaceExcept(text, r'\n' + equals + ' *(?P<title>[^=]+?) *' + equals + ' *\r\n', '\n' + equals + ' \g<title> ' + equals + '\r\n', ['comment', 'math', 'nowiki', 'pre'])
return text
def putSpacesInLists(self, text):
"""
For better readability of bullet list and enumeration wiki source code,
puts a space between the * or # and the text.
NOTE: This space is recommended in the syntax help on the English, German,
and French Wikipedia. It might be that it is not wanted on other wikis.
If there are any complaints, please file a bug report.
"""
# FIXME: This breaks redirects.
text = wikipedia.replaceExcept(text, r'(?m)^(?P<bullet>(\*+|#+):*)(?P<char>[^\s\*#:].+?)', '\g<bullet> \g<char>', ['comment', 'math', 'nowiki', 'pre'])
return text
def removeDeprecatedTemplates(self, text):
if deprecatedTemplates.has_key(self.site.family.name) and deprecatedTemplates[self.site.family.name].has_key(self.site.lang):
for template in deprecatedTemplates[self.site.family.name][self.site.lang]:
if not self.site.nocapitalize:
template = '[' + template[0].upper() + template[0].lower() + ']' + template[1:]
text = wikipedia.replaceExcept(text, r'\{\{([mM][sS][gG]:)?' + template + '(?P<parameters>\|[^}]+|)}}', '', ['comment', 'math', 'nowiki', 'pre'])
return text
# Change ' to ’
def replaceWithNiceQuotes(self, text):
text2 = replaceExceptMathNowikiLinksGalleryAndComments( text, r'([^\'])\'([^\'\[])', r'\1' + u'’' +r'\2' )
text3 = replaceExceptMathNowikiLinksGalleryAndComments( text2, r'([^\'])\'\[', r'\1' + u'’[' )
text4 = wikipedia.replaceExcept( text3, r'\[\[([^:|\]\']*?)\'([^:|\]\']*?)\]\]', r'[[\1' + u'\'' + r'\2|\1' + u'’' + r'\2]]', [ 'math', 'comment', 'noinclude', 'nowiki' ] )
return text4
def replaceIer(self, text):
text2 = replaceExceptMathNowikiLinksGalleryAndComments( text, r'(\s)Ier(\s)', r'\1I{{er}}\2' )
text3 = wikipedia.replaceExcept( text2, r'\[\[([^:|\]]*?)Ier([^:|\]]*?)\]\]', r'[[\1' + u'Ier' + r'\2|\1' + u'I{{er}}' + r'\2]]', [ 'math', 'comment', 'noinclude', 'nowiki' ] )
text4 = replaceExceptMathNowikiLinksGalleryAndComments( text3, r'(\s)(1)er(\s)', r'\1\2{{er}}\3' )
text5 = wikipedia.replaceExcept( text4, r'\[\[([^:|\]]*?)1er([^:|\]]*?)\]\]', r'[[\1' + u' 1er' + r'\2|\1' + u' 1{{er}}' + r'\2]]', [ 'math', 'comment', 'noinclude', 'nowiki' ] )
text6 = wikipedia.replaceExcept( text5, r'(\s)(\d+)(eme|ème|e)(\s)', r'\1\2{{e}}\4', [ 'math', 'comment', 'noinclude', 'nowiki' ] )
return text6
def replaceNumbers(self, text):
text2 = replaceExceptMathNowikiLinksGalleryAndComments2( text, r'(\d{5,10})', r'' + u'{{formatnum:' + r'\1' + u'}}' )
text3 = replaceExceptMathNowikiLinksGalleryAndComments2( text2, r'([^\w\d])(\d)( | [;])(\d{3})([^\w/])', r'\1\2\4\5' )
text4 = replaceExceptMathNowikiLinksGalleryAndComments2( text3, r'([^\w])(\d+)( | [;])(\d{3})( | [;])(\d{3})([^\w/])', r'\1' + u'{{formatnum:' + r'\2\4\6' + u'}}' +r'\7' )
text5 = replaceExceptMathNowikiLinksGalleryAndComments2( text4, r'([^\w])(\d+)( | [;])(\d{3})([^\w/])', r'\1' + u'{{formatnum:' + r'\2\4' + u'}}' + r'\5' )
return text5
def replaceParenthesis(self, text):
text2 = replaceExceptMathNowikiLinksGalleryAndComments( text, r'([^\s])\(', r'\1 (' )
return text2
def replaceCenturies(self, text):
text2 = replaceExceptMathNowikiLinksGalleryAndComments( text, r'([XVI]+)(e|' +u'è' + r'me|eme)' + u' siècle' + r'([^\w])(av|av\.|avant)\s(JC|J\.C\.|J\.-C|J\.-C\.|J-C)', u'{{' + r'\1' + u'e siècle av. J.-C.}}' )
text3 = replaceExceptMathNowikiLinksGalleryAndComments( text2, r'([XVI]+)(e|' +u'è' + r'me|eme)' + u' siècle' + r'([^\w])', u'{{' + r'\1' + u'e siècle}}' + r'\3' )
text4 = wikipedia.replaceExcept( text3, r'\[\[([XVI]+e' +u' siècle)\]\]', r'{{\1}}', [ 'math', 'comment', 'noinclude', 'nowiki' ] )
return text4
def cleanupPonctuation( self, text ):
text2 = replaceExceptMathNowikiLinksGalleryAndComments( text, u'\.\.\.', u'…' )
text3 = replaceExceptMathNowikiLinksGalleryAndComments( text2, r'([A-Za-z])\s?:(\s)([A-Za-z])', r'\1 :\2\3' )
text4 = replaceExceptMathNowikiLinksGalleryAndComments( text3, r'([A-Za-z])\s?:([A-Za-z])', r'\1 : \2' )
#text5 = replaceExceptMathNowikiLinksGalleryAndComments( text4, r'([A-Za-z])\s?;(\s)([A-Za-z])', r'\1 ;\2\3' )
#text6 = replaceExceptMathNowikiLinksGalleryAndComments( text5, r'([A-Za-z])\s?;([A-Za-z])', r'\1 ; \2' )
#text7 = replaceExceptMathNowikiLinksGalleryAndComments( text6, r'([A-Za-z]);', r'\1 ;' )
text8 = replaceExceptMathNowikiLinksGalleryAndComments2( text3, r'([A-Za-z])\s?\.\s?([A-Za-z])', r'\1' +u'. ' + r'\2' )
return text8
class DictToolkit:
def __init__(self, site, debug = False):
self.site = site
self.debug = debug
self.words = {}
try:
f = codecs.open( "dict.txt", 'r', encoding = site.encoding())
for line in f:
# remove trailing newlines and carriage returns
line = line.rstrip('\r\n')
#skip empty lines
if line:
w = line.split(' ')
self.words[w[0]] = w[1]
f.close()
except IOError:
print "Warning! There is no wordlist for your language!"
else:
print "Wordlist successfully loaded."
def change(self, text):
ct = text
for (k, v) in self.words.iteritems():
ct = replaceExceptMathNowikiLinksGalleryAndComments( ct, k, v )
return ct
def bad_text(self, text):
return any(re.search(k, text) for k in self.words)
class PimpBot:
def __init__(self, generator, acceptall = False):
self.generator = generator
self.acceptall = acceptall
self.debug = not acceptall
# Load default summary message.
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg_standalone))
self.ccToolkit = CosmeticChangesToolkit(wikipedia.getSite(), debug = self.debug)
self.dictToolkit = DictToolkit( wikipedia.getSite(), debug = self.debug)
def run(self):
for page in self.generator:
try:
changedText = page.get()
changedText = self.ccToolkit.change(changedText)
changedText = self.dictToolkit.change(changedText)
if changedText != page.get():
if self.debug:
wikipedia.showDiff(page.get(), changedText)
if not self.acceptall:
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
if choice in ['a', 'A']:
self.acceptall = True
if self.acceptall or choice in ['y', 'Y']:
page.put(changedText)
except wikipedia.NoPage:
print "Page %s does not exist?!" % page.aslink()
except wikipedia.IsRedirectPage:
print "Page %s is a redirect; skipping." % page.aslink()
except wikipedia.LockedPage:
print "Page %s is locked?!" % page.aslink()
def main():
#page generator
gen = None
pageTitle = []
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
for arg in wikipedia.handleArgs():
if not genFactory.handleArg(arg):
pageTitle.append(arg)
if pageTitle:
page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
gen = iter([page])
if not gen:
wikipedia.showHelp()
else:
preloadingGen = pagegenerators.PreloadingGenerator(gen)
bot = PimpBot(preloadingGen)
print "Bot is running"
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()