[Pywikipedia-l] SVN: [6259] trunk/pywikipedia
russblau at svn.wikimedia.org
russblau at svn.wikimedia.org
Wed Jan 14 21:01:08 UTC 2009
Revision: 6259
Author: russblau
Date: 2009-01-14 21:01:07 +0000 (Wed, 14 Jan 2009)
Log Message:
-----------
Implement category redirect detection; category pages containing a listed redirect template will be treated as redirect pages (e.g., .IsRedirectPage() will return True)
Modified Paths:
--------------
trunk/pywikipedia/families/commons_family.py
trunk/pywikipedia/families/wikipedia_family.py
trunk/pywikipedia/family.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/families/commons_family.py
===================================================================
--- trunk/pywikipedia/families/commons_family.py 2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/families/commons_family.py 2009-01-14 21:01:07 UTC (rev 6259)
@@ -28,10 +28,16 @@
}
self.interwiki_forward = 'wikipedia'
+
+ self.category_redirect_templates = {
+ 'commons': ('Category redirect',),
+ }
+
self.disambiguationTemplates = {
'commons': [u'Disambig', u'Disambiguation', u'Razločitev',
u'Begriffsklärung']
}
+
self.disambcatname = {
'commons': u'Disambiguation'
}
Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py 2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/families/wikipedia_family.py 2009-01-14 21:01:07 UTC (rev 6259)
@@ -540,6 +540,48 @@
'als': u'Nochricht Diskussion',
}
+ self.category_redirect_templates = {
+ '_default': (),
+ 'ar': (u"تحويل تصنيف",),
+ 'arz': (u'تحويل تصنيف',),
+ 'cs': (u'Zastaralá kategorie',),
+ 'da': (u'Kategoriomdirigering',),
+ 'de': (u'Kategorieweiterleitung',),
+ 'en': (u"Category redirect",
+ u"Category redirect3",
+ ),
+ 'es': (u'Categoría redirigida',),
+ 'eu': (u'Kategoria redirect',),
+ 'fa': (u'رده بهتر',
+ u'انتقال رده',
+ u'فیلمهای امریکایی'),
+ 'fr': (u'Redirection de catégorie',),
+ 'hi': (u'श्रेणीअनुप्रेषित',),
+ 'id': (u'Alih kategori',),
+ # 'it' has removed its template
+ # 'ja' is discussing to remove this template
+ 'ja': (u"Category redirect",),
+ 'ko': (u'분류 넘겨주기',),
+ 'mk': (u'Премести категорија',),
+ 'ms': (u'Pengalihan kategori',),
+ 'mt': (u'Redirect kategorija',),
+ # 'nl' has removed its template
+ 'no': (u"Kategoriomdirigering",),
+ 'pl': (u'Przekierowanie kategorii',),
+ 'pt': (u'Redirecionamento de categoria',),
+ 'ro': (u'Redirect categorie',),
+ 'ru': (u'Переименованная категория',),
+ 'simple': (u"Category redirect",),
+ 'sq': (u'Kategori e zhvendosur',),
+ 'tl': (u'Category redirect',),
+ 'tr': (u'Kategori yönlendirme',),
+ 'uk': (u'Categoryredirect',),
+ 'vi': (u'Đổi hướng thể loại',),
+ 'yi': (u'קאטעגאריע אריבערפירן',),
+ 'zh': (u'分类重定向',),
+ 'zh-yue': (u'分類彈去',),
+ }
+
self.disambiguationTemplates = {
# set value to None, instead of a list, to retrieve names from
# the live wiki ([[MediaWiki:Disambiguationspage]]
Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/family.py 2009-01-14 21:01:07 UTC (rev 6259)
@@ -2851,6 +2851,12 @@
'zzz wiki': 'zzz wiki',
}
+ # A list of category redirect template names in different languages
+ # Note: It is *not* necessary to list template redirects here
+ self.category_redirect_templates = {
+ '_default': []
+ }
+
# A list of disambiguation template names in different languages
self.disambiguationTemplates = {
'_default': []
@@ -3123,6 +3129,16 @@
# give up
return None
+ def category_redirects(self, code, fallback="_default"):
+ if code in self.category_redirect_templates:
+ return self.category_redirect_templates[code]
+ elif fallback:
+ return self.category_redirect_templates[fallback]
+ else:
+ raise KeyError(
+"ERROR: title for category redirect template in language '%s' unknown"
+ % code)
+
def disambig(self, code, fallback = '_default'):
if self.disambiguationTemplates.has_key(code):
return self.disambiguationTemplates[code]
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/wikipedia.py 2009-01-14 21:01:07 UTC (rev 6259)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
"""
Library to get and put pages on a MediaWiki.
@@ -834,6 +834,10 @@
self._redirarg = redirtarget
else:
raise IsRedirectPage(redirtarget)
+ elif self.is_category_redirect(pagetext): # sets _redirarg
+ if not get_redirect:
+ self._getexception = IsRedirectPage
+ raise IsRedirectPage(self._redirarg)
if self.section():
# TODO: What the hell is this? Docu please.
m = re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" % re.escape(self.section()), sectionencode(text,self.site().encoding()))
@@ -977,6 +981,40 @@
return False
return False
+ def is_category_redirect(self, text=None):
+ """Return True if this is a category redirect.
+
+ Category redirects are identified by the presence of any of the
+ templates found in self.site().category_redirects(), including
+ redirects to any of those templates, in the page text.
+
+ """
+ if not self.isCategory():
+ return False
+ if not hasattr(self, "_catredirect"):
+ redir_list = [Page(self.site(), name, defaultNamespace=10)
+ for name in self.site().category_redirects()]
+ try:
+ templates_and_params = self.templatesWithParams(
+ thistxt=text,
+ get_redirect=True)
+ except Error: # couldn't retrieve templates
+ self._catredirect = False
+ else:
+ for item in templates_and_params:
+ tempname = item[0]
+ template = Page(self.site(), tempname, defaultNamespace=10)
+ while template.isRedirectPage():
+ template = template.getRedirectTarget()
+ if template in redir_list:
+ self._catredirect = True
+ self._redirarg = Page(self.site(), item[1][0],
+ defaultNamespace=14).title()
+ # treat first template arg as name of target category
+ else:
+ self._catredirect = False
+ return self._catredirect
+
def isEmpty(self):
"""Return True if the page text has less than 4 characters.
@@ -2962,6 +3000,8 @@
page2._revisionId = revisionId
page2._editTime = timestamp
section = page2.section()
+ # Store the content
+ page2._contents = text
m = self.site.redirectRegex().match(text)
if m:
## output(u"%s is a redirect" % page2.aslink())
@@ -2970,26 +3010,36 @@
redirectto = redirectto+"#"+section
page2._getexception = IsRedirectPage
page2._redirarg = redirectto
+ elif page2.is_category_redirect():
+ page2._getexception = IsRedirectPage
+
# This is used for checking deletion conflict.
# Use the data loading time.
- page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime())
+ page2._startTime = time.strftime('%Y%m%d%H%M%S',
+ time.gmtime())
if section:
- m = re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" % re.escape(section), sectionencode(text,page2.site().encoding()))
+ # WHAT IS THIS?
+ m = re.search(
+ "\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D"
+ % re.escape(section),
+ sectionencode(text, page2.site().encoding()))
if not m:
try:
page2._getexception
- output(u"WARNING: Section not found: %s" % page2.aslink(forceInterwiki = True))
+ output(u"WARNING: Section not found: %s"
+ % page2.aslink(forceInterwiki = True))
except AttributeError:
# There is no exception yet
page2._getexception = SectionError
- # Store the content
- page2._contents = text
successful = True
# Note that there is no break here. The reason is that there
# might be duplicates in the pages list.
if not successful:
- output(u"BUG>> title %s (%s) not found in list" % (title, page.aslink(forceInterwiki=True)))
- output(u'Expected one of: %s' % u','.join([page2.aslink(forceInterwiki=True) for page2 in self.pages]))
+ output(u"BUG>> title %s (%s) not found in list"
+ % (title, page.aslink(forceInterwiki=True)))
+ output(u'Expected one of: %s'
+ % u','.join([page2.aslink(forceInterwiki=True)
+ for page2 in self.pages]))
raise PageNotFound
def headerDone(self, header):
@@ -6084,6 +6134,9 @@
"""Return list of language codes that can be used in interwiki links."""
return self._validlanguages
+ def category_redirects(self):
+ return self.family.category_redirects(self.lang, fallback="_default")
+
def disambcategory(self):
"""Return Category in which disambig pages are listed."""
import catlib
@@ -6807,6 +6860,7 @@
raise
return data
+
class MyURLopener(urllib.FancyURLopener):
version="PythonWikipediaBot/1.0"
@@ -6817,7 +6871,6 @@
return urllib.FancyURLopener.http_error_default(self, url, fp, errcode, errmsg, headers)
-
# Special opener in case we are using a site with authentication
if config.authenticate:
import urllib2, cookielib
More information about the Pywikipedia-l
mailing list