Utilisateur:DSisyphBot/Script/importcategory.py

Une page de Wikipédia, l'encyclopédie libre.
#-*- coding:utf-8 -*-
 
import wikipedia
import urllib
import sys
import pagegenerators
import unicodedata
import catlib
import re

def lisse(key):
	print u'dedans'
	if key[0]==u' ':
		key= key[1:]
	key = key[0].upper() + key[1:]
	key = unicodedata.normalize('NFKD', key).encode('ascii','ignore')
	key = key.decode('utf-8')
	return key

def menage(text):
	adddefo = 1 #mettre a 0 quand il ne s'agit pas de personnalité
	if text.find(u'{DEFAULTSORT:')>0:
		debut = text.find(u'{DEFAULTSORT:')+13
		fin = text.find(u'}}',debut)
		KEY = text[debut:fin]
		KEY = lisse(KEY)
		text = text[:debut] + KEY + text[fin:]
	elif adddefo == 1:
		lines = text.split(u'\n')
		text = u''
		cat = 0
		for l in lines:
			if l.find(u"[Catégorie:")>0 and l.find(u'|')>0 and cat==0:
				cat = 1
				debut = l.find(u'|')+1
				fin = l.find(u']]',debut)
				key = l[debut:fin]
				key = lisse(key)
				l = u'{{DEFAULTSORT:' + key + u'}}\n\n' + l[:debut-1] +l[fin:]
			text += l + u'\n'
	return text
			
 
def data(lstfr,categorie,c):
	site1 = wikipedia.getSite('fr','wikipedia')
	
##	pag = wikipedia.Page(site1, u'Utilisateur:Sisyph/Test Zone')
##	if 1==1:			  
	for pag in pagegenerators.PreloadingGenerator(lstfr,400):
		c = c-1
		try:
			text1 = pag.get()
			categories = wikipedia.getCategoryLinks(text1, site1)
			text1 = wikipedia.replaceCategoryLinks(text1, categories, site1)
			cats = pag.categories()
			# Show the title of the page we're working on.
			# Highlight the title in purple.
			wikipedia.output(
			    u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
			    % pag.title())
			wikipedia.output(u"Current categories:")
			
			for cat in cats:
				wikipedia.output(u"* %s" % cat.title())
			catpl = wikipedia.Page(site1,site1.category_namespaces()[0] + u':' + categorie)
			if catpl in cats:
				wikipedia.output(u"%s is already in %s."
					     % (pag.title(), catpl.title()))
			else:
			#elif text1.find(u'DEFAULTSORT')>0: #Sécurité temporaire en attendant d'avoir un vrai ménage des clés de tri
				wikipedia.output(u'Adding %s' % catpl.aslink())
				cats.append(catpl)
				newtext = wikipedia.replaceCategoryLinks(text1, cats)
				newtext = menage(newtext)
				wikipedia.output(text1)
				newtext = newtext.replace(u'Jeux Olympiques',u'Jeux olympiques')
				newtext = newtext.replace(u'jeux Olympiques',u'Jeux olympiques')
				wikipedia.showDiff(text1, newtext)
				print c
				comment = u'Bot, ajoute [[Catégorie:'+categorie+u"]], importé de l'article anglais"
				print categorie[13:]
				if categorie.find(u'Naissance en')>=0 and re.search(u'née? [ \[\]\w\d]*' + categorie[13:], text1) and text1.find(u':Naissance')<0:
					pag.put(newtext,comment)
##				choice = 'u'
##				choice = wikipedia.inputChoice(u'upload, Skip', [ 'upload', 'Skip' ], ['u', 'S'], 's')
##				if choice == 'u':
##					pag.put(newtext,comment)
		except wikipedia.NoPage:
			pass
		except wikipedia.IsRedirectPage:
			pass
		except wikipedia.LockedPage:
			pass
 
 
def main():
	site2 = wikipedia.getSite('en','wikipedia')
	site1 = wikipedia.getSite('fr','wikipedia')
	categories = [u"1984 births"]
	for category in categories:
		cat = catlib.Category(site2, category)
		lst = cat.articlesList(False)
		page = wikipedia.Page(site2, u'Category:'+category)
		text = page.get()
		deb = text.find(u'[fr:Catégorie:')
		if deb>0:
			deb = deb+14
			fin = text.find(u']]',deb)
			categorie = text[deb:fin]
			b= len(lst)
			lstfr = []
	
			for p in pagegenerators.PreloadingGenerator(lst,400):
			#for p in lst:
				pagename = p.titleWithoutNamespace()
				b = b - 1
				if site2.messages():
					message = raw_input(u'Un message est arrive, n importe quelle touche pour continuer')
				try:
					text = p.get()
					lines = text.split(u'\n')
					leng = u'##VIDE##'
					for l in lines:
						if l.find(u'[[fr:')>=0 and p.title().find(u'of Fame')<0:
							leng = l
							#leng = leng[5:-3]
							leng = leng[5:len(leng)-2]
							leng = leng.replace(u']',u'')
							wikipedia.output(leng + u"\03{lightpurple} ajouté, provient de \03{default}" + pagename)
							lstfr.append(wikipedia.Page(site1, leng))
				except wikipedia.NoPage:
					pass
				except wikipedia.IsRedirectPage:
					pass
				except wikipedia.LockedPage:
					pass
				if b%400==0 or b<1:
					c= len(lstfr)
					data(lstfr,categorie,c)
					lstfr = []
		else:
			message = input(u'Pas de lien FR, n importe quelle touche pour continuer')
 
 
if __name__ == '__main__':
	try:
		main()
	finally:
		wikipedia.stopme()