Utilisateur:DSisyphBot/Script/importIMDBdatasurwikidata.py

Une page de Wikipédia, l'encyclopédie libre.
# -*- coding: utf-8 -*-
 
import re
import time
import sys
import urllib
import re
import unicodedata
import pywikibot
from pywikibot import pagegenerators
from pywikibot.pagegenerators import PreloadingGenerator
from pywikibot.compat import catlib

def uploadWD(page,value):
	item = pywikibot.ItemPage.fromPage(page)
	item.get()
	dictionary = item.get()
##	print dictionary
##	raw_input('pause')
	pid = u"P345"
	try : 
		iaaf = item.claims[pid][0].getTarget()
		print "Already the property"
	except :
		print 'pas de ' + pid + page.title()+ value

		if re.search(u'Catégorie\:Naissance ',page.get()):

			choice = 'u'
			choice = pywikibot.inputChoice(u'upload, Skip', [ 'upload', 'Skip' ], ['u', 'S'], 's') # à mettre en commentaire pour tourner en automatique
			if choice == 'u':
		
				fr_wikipedia = pywikibot.Site('fr', 'wikipedia')
				wikidata = fr_wikipedia.data_repository()

				claim = pywikibot.Claim(site=wikidata, pid=pid)
				claim.setTarget(value)
				item.addClaim(claim)
				frwiki = pywikibot.Site('fr', 'wikipedia')
				repo = frwiki.data_repository()
				REF = pywikibot.Claim(repo, 'P143') #importé de 
				REF.setTarget(pywikibot.ItemPage(repo, 'Q8447')) #wikipédia en français
				claim.addSource(REF)
		
		else :
			#raw_input('pause2')
			pass
	
 
def verifmodel(lstenfr,lang):
	liste = u''
	sitefr = pywikibot.getSite('fr','wikipedia')
	siteen = pywikibot.getSite(lang,'wikipedia')

	pagfr = lstenfr[2]
	pagen = lstenfr[0]
	valueen=lstenfr[1]

	if 1==1:
		valueen=lstenfr[1]
		if not pagfr.exists():
			print "no page"
      
		try:
		#if 1==1:
			textfr = pagfr.get()
			pagenamefr = pagfr.title(withNamespace=False)
			if re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',textfr):

				valuefr = re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',textfr).group(1)
				if valueen == valuefr:
					print pagenamefr + valueen+ valuefr
					
					if len(valueen)==7 and 1==2:
						uploadWD(pagfr,u"nm" + valuefr)
					elif len(valueen)==6 and 1==1:
						uploadWD(pagfr,u"nm0" + valuefr)
				#pywikibot.output(pagename)
		except pywikibot.NoPage:
			print pagfr.title(), 'nopage'
			pass
		except pywikibot.IsRedirectPage:
			print pagfr.title(), 'isredirect'
			pass
	return liste

def main2(pagesen,siteen,lang,titre,choix):
	result = u''
	pagesen.sort()
	lstenfr = []
	c = len(pagesen)
	for pen in pagegenerators.PreloadingGenerator(pagesen,50):
		#try:
		if 1==1 :
			c = c-1
			id_=u''
			if c%50==0:
				print c, pen.title()
			pagenameen = pen.title()
			sectionen = pen.section()
			if pen.isRedirectPage():
				pen = pen.getRedirectTarget()
			if pen.namespace()!=0:
				print u'n est pas un article'
			elif not sectionen :
				
				texten = pen.get()
				lines = texten.split('\n')
				if re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',texten):

					valueen=u''
					valueen = re.search(u'{{ *[Ii][Mm][Dd][Bb] n[oa]me? *\| *i?d? *=? *(\d+) *[\|}]',texten).group(1)

					if 1==1: #l.sectionFreeTitle()==pfr.title():
							lstenfr = [pen,valueen,pen]

							value = verifmodel(lstenfr,lang)
							if len(value)>0:
								result = value
							f= open(u'Result.txt','a')
							f.write(result.encode('utf-8'))
							f.close()
							lst,lstfr = [],[]
							result = u''
		#except : pass

	 
# Define the main function
def main():
	
	lang = 'fr'
	
	site = pywikibot.getSite('fr','wikipedia')
	sitefr = pywikibot.getSite('fr','wikipedia')
	siteen = pywikibot.getSite(lang,'wikipedia')
	choix = 2
	f= open(u'Result.txt','w')
	f.close()
	f2= open(u'Result temp.txt','w')
	f2.close()

	if choix >=2:
		model = (u"Imdb nom")

		pagesen = []
 
		page = pywikibot.Page(siteen,u"Template:"+model)
		pywikibot.output( page)
		if not page.exists():
			page = pywikibot.Page(siteen,u"Template:"+model[0].lower()+model[1:])
			if not page.exists():
				pause = input(u'pas de modele de ce nom')
		if page.isRedirectPage():			page = page.getRedirectTarget()
		for t in page.getReferences():
			if t.namespace()==0 :
				pagesen.append(pywikibot.Page(siteen, t.title()))

		pagesen.sort()
		#pages = pages[-50:]
		main2(pagesen,sitefr,lang,model,choix)
	

	######################################################################################################
 
 
if __name__ == '__main__':
	try:
		main()
	finally:
		pywikibot.stopme()