# -*- coding: utf-8 -*-
import re
import time
import sys
import urllib
import re
import unicodedata
import pywikibot
from pywikibot import pagegenerators
from pywikibot.pagegenerators import PreloadingGenerator
from pywikibot.compat import catlib
def uploadWD(page,value):
item = pywikibot.ItemPage.fromPage(page)
item.get()
dictionary = item.get()
## print dictionary
## raw_input('pause')
pid = u"P345"
try :
iaaf = item.claims[pid][0].getTarget()
print "Already the property"
except :
print 'pas de ' + pid + page.title()+ value
if re.search(u'Catégorie\:Naissance ',page.get()):
choice = 'u'
choice = pywikibot.inputChoice(u'upload, Skip', [ 'upload', 'Skip' ], ['u', 'S'], 's') # à mettre en commentaire pour tourner en automatique
if choice == 'u':
fr_wikipedia = pywikibot.Site('fr', 'wikipedia')
wikidata = fr_wikipedia.data_repository()
claim = pywikibot.Claim(site=wikidata, pid=pid)
claim.setTarget(value)
item.addClaim(claim)
frwiki = pywikibot.Site('fr', 'wikipedia')
repo = frwiki.data_repository()
REF = pywikibot.Claim(repo, 'P143') #importé de
REF.setTarget(pywikibot.ItemPage(repo, 'Q8447')) #wikipédia en français
claim.addSource(REF)
else :
#raw_input('pause2')
pass
def verifmodel(lstenfr,lang):
liste = u''
sitefr = pywikibot.getSite('fr','wikipedia')
siteen = pywikibot.getSite(lang,'wikipedia')
pagfr = lstenfr[2]
pagen = lstenfr[0]
valueen=lstenfr[1]
if 1==1:
valueen=lstenfr[1]
if not pagfr.exists():
print "no page"
try:
#if 1==1:
textfr = pagfr.get()
pagenamefr = pagfr.title(withNamespace=False)
if re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',textfr):
valuefr = re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',textfr).group(1)
if valueen == valuefr:
print pagenamefr + valueen+ valuefr
if len(valueen)==7 and 1==2:
uploadWD(pagfr,u"nm" + valuefr)
elif len(valueen)==6 and 1==1:
uploadWD(pagfr,u"nm0" + valuefr)
#pywikibot.output(pagename)
except pywikibot.NoPage:
print pagfr.title(), 'nopage'
pass
except pywikibot.IsRedirectPage:
print pagfr.title(), 'isredirect'
pass
return liste
def main2(pagesen,siteen,lang,titre,choix):
result = u''
pagesen.sort()
lstenfr = []
c = len(pagesen)
for pen in pagegenerators.PreloadingGenerator(pagesen,50):
#try:
if 1==1 :
c = c-1
id_=u''
if c%50==0:
print c, pen.title()
pagenameen = pen.title()
sectionen = pen.section()
if pen.isRedirectPage():
pen = pen.getRedirectTarget()
if pen.namespace()!=0:
print u'n est pas un article'
elif not sectionen :
texten = pen.get()
lines = texten.split('\n')
if re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',texten):
valueen=u''
valueen = re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',texten).group(1)
if 1==1: #l.sectionFreeTitle()==pfr.title():
lstenfr = [pen,valueen,pen]
value = verifmodel(lstenfr,lang)
if len(value)>0:
result = value
f= open(u'Result.txt','a')
f.write(result.encode('utf-8'))
f.close()
lst,lstfr = [],[]
result = u''
#except : pass
# Define the main function
def main():
lang = 'fr'
site = pywikibot.getSite('fr','wikipedia')
sitefr = pywikibot.getSite('fr','wikipedia')
siteen = pywikibot.getSite(lang,'wikipedia')
choix = 2
f= open(u'Result.txt','w')
f.close()
f2= open(u'Result temp.txt','w')
f2.close()
if choix >=2:
model = (u"Imdb nom")
pagesen = []
page = pywikibot.Page(siteen,u"Template:"+model)
pywikibot.output( page)
if not page.exists():
page = pywikibot.Page(siteen,u"Template:"+model[0].lower()+model[1:])
if not page.exists():
pause = input(u'pas de modele de ce nom')
if page.isRedirectPage(): page = page.getRedirectTarget()
for t in page.getReferences():
if t.namespace()==0 :
pagesen.append(pywikibot.Page(siteen, t.title()))
pagesen.sort()
#pages = pages[-50:]
main2(pagesen,sitefr,lang,model,choix)
######################################################################################################
if __name__ == '__main__':
try:
main()
finally:
pywikibot.stopme()