#-*- coding:utf-8 -*-
import wikipedia
import urllib
import sys
import pagegenerators
import unicodedata
import catlib
import re
def lisse(key):
print u'dedans'
if key[0]==u' ':
key= key[1:]
key = key[0].upper() + key[1:]
key = unicodedata.normalize('NFKD', key).encode('ascii','ignore')
key = key.decode('utf-8')
return key
def menage(text):
adddefo = 1 #mettre a 0 quand il ne s'agit pas de personnalité
if text.find(u'{DEFAULTSORT:')>0:
debut = text.find(u'{DEFAULTSORT:')+13
fin = text.find(u'}}',debut)
KEY = text[debut:fin]
KEY = lisse(KEY)
text = text[:debut] + KEY + text[fin:]
elif adddefo == 1:
lines = text.split(u'\n')
text = u''
cat = 0
for l in lines:
if l.find(u"[Catégorie:")>0 and l.find(u'|')>0 and cat==0:
cat = 1
debut = l.find(u'|')+1
fin = l.find(u']]',debut)
key = l[debut:fin]
key = lisse(key)
l = u'{{DEFAULTSORT:' + key + u'}}\n\n' + l[:debut-1] +l[fin:]
text += l + u'\n'
return text
def data(lstfr,categorie,c):
site1 = wikipedia.getSite('fr','wikipedia')
## pag = wikipedia.Page(site1, u'Utilisateur:Sisyph/Test Zone')
## if 1==1:
for pag in pagegenerators.PreloadingGenerator(lstfr,400):
c = c-1
try:
text1 = pag.get()
categories = wikipedia.getCategoryLinks(text1, site1)
text1 = wikipedia.replaceCategoryLinks(text1, categories, site1)
cats = pag.categories()
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(
u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% pag.title())
wikipedia.output(u"Current categories:")
for cat in cats:
wikipedia.output(u"* %s" % cat.title())
catpl = wikipedia.Page(site1,site1.category_namespaces()[0] + u':' + categorie)
if catpl in cats:
wikipedia.output(u"%s is already in %s."
% (pag.title(), catpl.title()))
else:
#elif text1.find(u'DEFAULTSORT')>0: #Sécurité temporaire en attendant d'avoir un vrai ménage des clés de tri
wikipedia.output(u'Adding %s' % catpl.aslink())
cats.append(catpl)
newtext = wikipedia.replaceCategoryLinks(text1, cats)
newtext = menage(newtext)
wikipedia.output(text1)
newtext = newtext.replace(u'Jeux Olympiques',u'Jeux olympiques')
newtext = newtext.replace(u'jeux Olympiques',u'Jeux olympiques')
wikipedia.showDiff(text1, newtext)
print c
comment = u'Bot, ajoute [[Catégorie:'+categorie+u"]], importé de l'article anglais"
print categorie[13:]
if categorie.find(u'Naissance en')>=0 and re.search(u'née? [ \[\]\w\d]*' + categorie[13:], text1) and text1.find(u':Naissance')<0:
pag.put(newtext,comment)
## choice = 'u'
## choice = wikipedia.inputChoice(u'upload, Skip', [ 'upload', 'Skip' ], ['u', 'S'], 's')
## if choice == 'u':
## pag.put(newtext,comment)
except wikipedia.NoPage:
pass
except wikipedia.IsRedirectPage:
pass
except wikipedia.LockedPage:
pass
def main():
site2 = wikipedia.getSite('en','wikipedia')
site1 = wikipedia.getSite('fr','wikipedia')
categories = [u"1984 births"]
for category in categories:
cat = catlib.Category(site2, category)
lst = cat.articlesList(False)
page = wikipedia.Page(site2, u'Category:'+category)
text = page.get()
deb = text.find(u'[fr:Catégorie:')
if deb>0:
deb = deb+14
fin = text.find(u']]',deb)
categorie = text[deb:fin]
b= len(lst)
lstfr = []
for p in pagegenerators.PreloadingGenerator(lst,400):
#for p in lst:
pagename = p.titleWithoutNamespace()
b = b - 1
if site2.messages():
message = raw_input(u'Un message est arrive, n importe quelle touche pour continuer')
try:
text = p.get()
lines = text.split(u'\n')
leng = u'##VIDE##'
for l in lines:
if l.find(u'[[fr:')>=0 and p.title().find(u'of Fame')<0:
leng = l
#leng = leng[5:-3]
leng = leng[5:len(leng)-2]
leng = leng.replace(u']',u'')
wikipedia.output(leng + u"\03{lightpurple} ajouté, provient de \03{default}" + pagename)
lstfr.append(wikipedia.Page(site1, leng))
except wikipedia.NoPage:
pass
except wikipedia.IsRedirectPage:
pass
except wikipedia.LockedPage:
pass
if b%400==0 or b<1:
c= len(lstfr)
data(lstfr,categorie,c)
lstfr = []
else:
message = input(u'Pas de lien FR, n importe quelle touche pour continuer')
if __name__ == '__main__':
try:
main()
finally:
wikipedia.stopme()