Une page de Wikipédia, l'encyclopédie libre.
# -*- coding: utf-8 -*-
import wikipedia, pagegenerators
import sys
import re
import catlib
import codecs
def utf8(text):
return unicode(text, 'utf-8')
class ExtractCats:
def __init__(self, generator, acceptall = False):
self.generator = generator
self.acceptall = acceptall
def run(self):
file = codecs.open('fichier.html','w','utf-8')
file.write("<table border=\"1\">\n")
for page in self.generator:
try:
if (page.exists()):
if (page.namespace()==0):
c = page.categories()
if (len(c)==0):
file.write("<tr bgcolor=\"red\">")
else:
file.write("<tr>")
file.write("<td>")
file.write("[[%s]]" % page._title)
file.write( "</td>")
file.write("<td>")
for i in c:
file.write(i.aslink() + "<br />")
file.write("</td>")
file.write("</tr>\n")
except wikipedia.NoPage:
print "Page %s does not exist?!" % page.aslink()
except wikipedia.IsRedirectPage:
print "Page %s is a redirect; skipping." % page.aslink()
except wikipedia.LockedPage:
print "Page %s is locked?!" % page.aslink()
print "%s OK" % page._title
file.write("</table>")
file.close()
def main():
#page generator
gen = None
pageTitle = []
for arg in wikipedia.handleArgs():
if arg.startswith('-start:'):
gen = pagegenerators.AllpagesPageGenerator(arg[7:])
elif arg.startswith('-ref:'):
referredPage = wikipedia.Page(wikipedia.getSite(), arg[5:])
gen = pagegenerators.ReferringPageGenerator(referredPage)
elif arg.startswith('-links:'):
linkingPage = wikipedia.Page(wikipedia.getSite(), arg[7:])
gen = pagegenerators.LinkedPageGenerator(linkingPage)
elif arg.startswith('-file:'):
gen = pagegenerators.TextfilePageGenerator(arg[6:])
elif arg.startswith('-cat:'):
cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % arg[5:])
gen = pagegenerators.CategorizedPageGenerator(cat)
else:
pageTitle.append(arg)
if pageTitle:
page = wikipedia.Page(wikipedia.getSite(), ' '.join(pageTitle))
gen = iter([page])
if not gen:
wikipedia.showHelp()
else:
preloadingGen = pagegenerators.PreloadingGenerator(gen)
bot = ExtractCats(preloadingGen)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()