[Pywikipedia-l] SVN: [6259] trunk/pywikipedia

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Wed Jan 14 21:01:08 UTC 2009


Revision: 6259
Author:   russblau
Date:     2009-01-14 21:01:07 +0000 (Wed, 14 Jan 2009)

Log Message:
-----------
Implement category redirect detection; category pages containing a listed redirect template will be treated as redirect pages (e.g., .IsRedirectPage() will return True)

Modified Paths:
--------------
    trunk/pywikipedia/families/commons_family.py
    trunk/pywikipedia/families/wikipedia_family.py
    trunk/pywikipedia/family.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/families/commons_family.py
===================================================================
--- trunk/pywikipedia/families/commons_family.py	2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/families/commons_family.py	2009-01-14 21:01:07 UTC (rev 6259)
@@ -28,10 +28,16 @@
         }
 
         self.interwiki_forward = 'wikipedia'
+
+        self.category_redirect_templates = {
+            'commons': ('Category redirect',),
+        }
+        
         self.disambiguationTemplates = {
             'commons': [u'Disambig', u'Disambiguation', u'Razločitev',
                         u'Begriffsklärung']
         }
+        
         self.disambcatname = {
             'commons':  u'Disambiguation'
         }

Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py	2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/families/wikipedia_family.py	2009-01-14 21:01:07 UTC (rev 6259)
@@ -540,6 +540,48 @@
             'als': u'Nochricht Diskussion',
         }
 
+        self.category_redirect_templates = {
+            '_default': (),
+            'ar': (u"تحويل تصنيف",),
+            'arz': (u'تحويل تصنيف',),
+            'cs': (u'Zastaralá kategorie',),
+            'da': (u'Kategoriomdirigering',),
+            'de': (u'Kategorieweiterleitung',),
+            'en': (u"Category redirect",
+                   u"Category redirect3",
+                  ),
+            'es': (u'Categoría redirigida',),
+            'eu': (u'Kategoria redirect',),
+            'fa': (u'رده بهتر',
+                   u'انتقال رده',
+                   u'فیلم‌های امریکایی'),
+            'fr': (u'Redirection de catégorie',),
+            'hi': (u'श्रेणीअनुप्रेषित',),
+            'id': (u'Alih kategori',),
+            # 'it' has removed its template
+            # 'ja' is discussing to remove this template
+            'ja': (u"Category redirect",),
+            'ko': (u'분류 넘겨주기',),
+            'mk': (u'Премести категорија',),
+            'ms': (u'Pengalihan kategori',),
+            'mt': (u'Redirect kategorija',),
+            # 'nl' has removed its template
+            'no': (u"Kategoriomdirigering",),
+            'pl': (u'Przekierowanie kategorii',),
+            'pt': (u'Redirecionamento de categoria',),
+            'ro': (u'Redirect categorie',),
+            'ru': (u'Переименованная категория',),
+            'simple': (u"Category redirect",),
+            'sq': (u'Kategori e zhvendosur',),
+            'tl': (u'Category redirect',),
+            'tr': (u'Kategori yönlendirme',),
+            'uk': (u'Categoryredirect',),
+            'vi': (u'Đổi hướng thể loại',),
+            'yi': (u'קאטעגאריע אריבערפירן',),
+            'zh': (u'分类重定向',),
+            'zh-yue': (u'分類彈去',),
+        }
+        
         self.disambiguationTemplates = {
             # set value to None, instead of a list, to retrieve names from
             # the live wiki ([[MediaWiki:Disambiguationspage]]

Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py	2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/family.py	2009-01-14 21:01:07 UTC (rev 6259)
@@ -2851,6 +2851,12 @@
             'zzz wiki':         'zzz wiki',
         }
 
+        # A list of category redirect template names in different languages
+        # Note: It is *not* necessary to list template redirects here
+        self.category_redirect_templates = {
+            '_default': []
+        }
+
         # A list of disambiguation template names in different languages
         self.disambiguationTemplates = {
             '_default': []
@@ -3123,6 +3129,16 @@
             # give up
             return None
 
+    def category_redirects(self, code, fallback="_default"):
+        if code in self.category_redirect_templates:
+            return self.category_redirect_templates[code]
+        elif fallback:
+            return self.category_redirect_templates[fallback]
+        else:
+            raise KeyError(
+"ERROR: title for category redirect template in language '%s' unknown"
+                % code)
+
     def disambig(self, code, fallback = '_default'):
         if self.disambiguationTemplates.has_key(code):
             return self.disambiguationTemplates[code]

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2009-01-14 18:22:16 UTC (rev 6258)
+++ trunk/pywikipedia/wikipedia.py	2009-01-14 21:01:07 UTC (rev 6259)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8  -*-
+# -*- coding: utf-8 -*-
 """
 Library to get and put pages on a MediaWiki.
 
@@ -834,6 +834,10 @@
                 self._redirarg = redirtarget
             else:
                 raise IsRedirectPage(redirtarget)
+        elif self.is_category_redirect(pagetext): # sets _redirarg
+            if not get_redirect:
+                self._getexception = IsRedirectPage
+                raise IsRedirectPage(self._redirarg)
         if self.section():
             # TODO: What the hell is this? Docu please.
             m = re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" % re.escape(self.section()), sectionencode(text,self.site().encoding()))
@@ -977,6 +981,40 @@
             return False
         return False
 
+    def is_category_redirect(self, text=None):
+        """Return True if this is a category redirect.
+
+        Category redirects are identified by the presence of any of the
+        templates found in self.site().category_redirects(), including
+        redirects to any of those templates, in the page text.
+
+        """
+        if not self.isCategory():
+            return False
+        if not hasattr(self, "_catredirect"):
+            redir_list = [Page(self.site(), name, defaultNamespace=10)
+                          for name in self.site().category_redirects()]
+            try:
+                templates_and_params = self.templatesWithParams(
+                                                    thistxt=text,
+                                                    get_redirect=True)
+            except Error:  # couldn't retrieve templates
+                self._catredirect = False
+            else:
+                for item in templates_and_params:
+                    tempname = item[0]
+                    template = Page(self.site(), tempname, defaultNamespace=10)
+                    while template.isRedirectPage():
+                        template = template.getRedirectTarget()
+                    if template in redir_list:
+                        self._catredirect = True
+                        self._redirarg = Page(self.site(), item[1][0],
+                                              defaultNamespace=14).title()
+                        # treat first template arg as name of target category
+                    else:
+                        self._catredirect = False
+        return self._catredirect
+
     def isEmpty(self):
         """Return True if the page text has less than 4 characters.
 
@@ -2962,6 +3000,8 @@
                     page2._revisionId = revisionId
                     page2._editTime = timestamp
                     section = page2.section()
+                    # Store the content
+                    page2._contents = text
                     m = self.site.redirectRegex().match(text)
                     if m:
                         ## output(u"%s is a redirect" % page2.aslink())
@@ -2970,26 +3010,36 @@
                             redirectto = redirectto+"#"+section
                         page2._getexception = IsRedirectPage
                         page2._redirarg = redirectto
+                    elif page2.is_category_redirect():
+                        page2._getexception = IsRedirectPage
+                        
                     # This is used for checking deletion conflict.
                     # Use the data loading time.
-                    page2._startTime = time.strftime('%Y%m%d%H%M%S', time.gmtime())
+                    page2._startTime = time.strftime('%Y%m%d%H%M%S',
+                                                     time.gmtime())
                     if section:
-                        m = re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" % re.escape(section), sectionencode(text,page2.site().encoding()))
+						# WHAT IS THIS?
+                        m = re.search(
+    "\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D"
+                                        % re.escape(section),
+                                sectionencode(text, page2.site().encoding()))
                         if not m:
                             try:
                                 page2._getexception
-                                output(u"WARNING: Section not found: %s" % page2.aslink(forceInterwiki = True))
+                                output(u"WARNING: Section not found: %s"
+                                       % page2.aslink(forceInterwiki = True))
                             except AttributeError:
                                 # There is no exception yet
                                 page2._getexception = SectionError
-                    # Store the content
-                    page2._contents = text
                 successful = True
                 # Note that there is no break here. The reason is that there
                 # might be duplicates in the pages list.
         if not successful:
-            output(u"BUG>> title %s (%s) not found in list" % (title, page.aslink(forceInterwiki=True)))
-            output(u'Expected one of: %s' % u','.join([page2.aslink(forceInterwiki=True) for page2 in self.pages]))
+            output(u"BUG>> title %s (%s) not found in list"
+                   % (title, page.aslink(forceInterwiki=True)))
+            output(u'Expected one of: %s'
+                   % u','.join([page2.aslink(forceInterwiki=True)
+                                for page2 in self.pages]))
             raise PageNotFound
 
     def headerDone(self, header):
@@ -6084,6 +6134,9 @@
         """Return list of language codes that can be used in interwiki links."""
         return self._validlanguages
 
+    def category_redirects(self):
+        return self.family.category_redirects(self.lang, fallback="_default")
+    
     def disambcategory(self):
         """Return Category in which disambig pages are listed."""
         import catlib
@@ -6807,6 +6860,7 @@
             raise
     return data
 
+
 class MyURLopener(urllib.FancyURLopener):
     version="PythonWikipediaBot/1.0"
 
@@ -6817,7 +6871,6 @@
             return urllib.FancyURLopener.http_error_default(self, url, fp, errcode, errmsg, headers)
 
 
-
 # Special opener in case we are using a site with authentication
 if config.authenticate:
     import urllib2, cookielib





More information about the Pywikipedia-l mailing list