Utilisateur:Neuceu/Scripts

Une page de Wikipédia, l'encyclopédie libre.

Correction orthographique[modifier | modifier le code]

python replace.py -xml:..\frwiki-20090707-pages-articles.xml -exceptinside:"\{\{DEFAULTSORT:[^\}]*\}\}" -exceptinside:"<ref\s+name=[^>]*>" -exceptinside:"\[\[Cat[eé]gor(y|ie):[^\]]*\]\]" -exceptinside:"(http|https|ftp)://[^\s]*" -exceptinside:"\[\[[^\]\|]*[\]\|]" -exceptinsidetag:interwiki -regex "RX_WRONG" "RX_FIX" -namespace:0

API Queries[modifier | modifier le code]

Short pages[modifier | modifier le code]

http://fr.wikipedia.org/w/api.php?action=query&list=allpages&apfilterredir=nonredirects&aplimit=500&apmaxsize=50

Liste des articles d'un portail[modifier | modifier le code]

  1. -*- coding: utf-8 -*-

import pywikibot from pywikibot import catlib from pywikibot.pagegenerators import GeneratorFactory, parameterHelp, CategorizedPageGenerator, ReferringPageGenerator

docuReplacements = {'&params;': parameterHelp}


def get_evaluated_pages():

   cat = catlib.Category(pywikibot.Link('Category:' + u"Évaluation d'article sur le monde byzantin"))
   gen = CategorizedPageGenerator(cat, recurse = True)
   evaluated_pages = set()
   for i, page in enumerate(gen):

evaluated_pages.add(page.title(withNamespace=False)) # pywikibot.stdout(page.title(withNamespace=False))

   return evaluated_pages

def get_pages_with_banner():

   transclusionPage = pywikibot.Page(pywikibot.Link(u"Portail monde byzantin", defaultNamespace=10, source=pywikibot.Site()))
   gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True)
   portal_pages = set()
   for i, page in enumerate(gen):
       if page.namespace() == 0:
           portal_pages.add(page.title(withNamespace=False))
           # pywikibot.stdout(page.title(withNamespace=False))
   return portal_pages

def get_pages_from_all_categories():

   cat_pages = set()
   categories = [ u"Empire byzantin", u"Histoire byzantine", u"Histoire militaire de l'Empire byzantin", u"Bataille de l'Empire byzantin", u"Bataille navale de l'Empire byzantin",
   u"Siège de l'histoire de Constantinople", u"Guerre de l'Empire byzantin", u"Guerre entre Rus' et Byzantins", u"Armée byzantine", u"Marine byzantine", u"Personnalité militaire byzantine",
   u"Général byzantin", u"Thème byzantin", u"Constantinople", u"Chute de Constantinople", u"Concile de Constantinople", u"Décès à Constantinople", u"Édifice religieux de Constantinople",
   u"Monument de Constantinople", u"Naissance à Constantinople", u"Patriarche de Constantinople", u"Quartier de Constantinople"]
   
   for cat in categories:
       cat = catlib.Category(pywikibot.Link('Category:' + cat))
       gen = CategorizedPageGenerator(cat, recurse = False)
       for i, page in enumerate(gen):
           cat_pages.add(page.title(withNamespace=False))
   return cat_pages

def main(*args):

   # evaluated_pages = get_evaluated_pages()
   # portal_pages = get_pages_with_banner()
   # cat_pages = get_pages_from_all_categories()
   
   transclusionPage = pywikibot.Page(pywikibot.Link(u"Portail monde byzantin", defaultNamespace=10, source=pywikibot.Site()))
   gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True)
   # cat = catlib.Category(pywikibot.Link('Category:' + u"Quartier de Constantinople"))
   # gen = CategorizedPageGenerator(cat, recurse = False)
   
   for i, page in enumerate(gen):
       if page.namespace() == 0:
           record = "|- \n|"+page.title()+" (d · h · j · ) |"
           catgen = page.categories()
           for k, cat in enumerate(catgen):
               if not cat.isHiddenCategory():
                   record += ""+cat.title(withNamespace=False)+" "
           record += "|"+str(len(page.get())) +" |"
           record += str(len(list(page.linkedPages(namespaces=[0])))) +" |"
           record += str(len(list(page.backlinks(namespaces=[0]))))
           
           pywikibot.stdout(record)
       

"""

   print "Number of evaluated articles: %d" % len(evaluated_pages)
   print "Number of portal articles: %d" % len(portal_pages)
   union = evaluated_pages.union(portal_pages)
   print "Total Number of articles: %d" % len(union)
   print "Articles that are evaluated but do not include the template:"
   diff = evaluated_pages.difference(portal_pages)
   for page in diff:
       str = ""+page+" (d · h · j · )"
       pywikibot.stdout(str)


   print "Articles that include the template but are not evaluated:"
   diff = portal_pages.difference(evaluated_pages)
   for page in diff:
       str = ""+page+" (d · h · j · )"
       pywikibot.stdout(str)

"""

if __name__ == "__main__":

   try:
       main()
   finally:
       pywikibot.stopme()