[Pywikipedia-l] SVN: [4787] trunk/pywikipedia

rotem at svn.wikimedia.org rotem at svn.wikimedia.org
Mon Dec 31 20:25:05 UTC 2007


Revision: 4787
Author:   rotem
Date:     2007-12-31 20:25:04 +0000 (Mon, 31 Dec 2007)

Log Message:
-----------
In family.namespace, making the 'all' parameter include more than just the aliases: also the default namespaces, the lowercase forms and the the underscore forms. Using it in more places. This should also fix the problem for which patch 1861136 was suggested.

Modified Paths:
--------------
    trunk/pywikipedia/commonsdelinker/delinker.py
    trunk/pywikipedia/cosmetic_changes.py
    trunk/pywikipedia/family.py
    trunk/pywikipedia/image.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/commonsdelinker/delinker.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/delinker.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/commonsdelinker/delinker.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -186,7 +186,7 @@
 			def create_regex_i(s):
 				return ur'(?:%s)' % u''.join([u'[%s%s]' % (c.upper(), c.lower()) for c in s])
 			
-			namespaces = ('Image', 'Media') + site.namespace(6, all = True) + site.namespace(-2, all = True)
+			namespaces = site.namespace(6, all = True) + site.namespace(-2, all = True)
 			r_namespace = ur'\s*(?:%s)\s*\:\s*' % u'|'.join(map(create_regex_i, namespaces))
 			# Note that this regex creates a group!
 			r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]')

Modified: trunk/pywikipedia/cosmetic_changes.py
===================================================================
--- trunk/pywikipedia/cosmetic_changes.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/cosmetic_changes.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -149,38 +149,10 @@
                 continue
             namespaces = list(family.namespace(self.site.lang, nsNumber, all = True))
             thisNs = namespaces.pop(0)
-            try:
-                default = list(family.namespace('_default', nsNumber, all = True))
-            except KeyError:
-                default = []
 
             # skip main (article) namespace
-            if thisNs:
-                wrongNamespaces = []
-
-                # Add aliases and default namespaces
-                wrongNamespaces.extend([ns for ns in namespaces if ns != thisNs])
-                wrongNamespaces.extend([ns for ns in default if ns != thisNs and ns not in wrongNamespaces])
-
-                # Lowercase versions of namespaces
-                if not self.site.nocapitalize:
-                    # Add lowercase version of the current wrong namespaces
-                    wrongNamespaces.extend([ns[0].lower() + ns[1:] for ns in wrongNamespaces if ns[0].lower() != ns[0].upper()])
-
-                    # Add lowercase version of the correct namespace
-                    uncapitalized = thisNs[0].lower() + thisNs[1:]
-                    if uncapitalized != thisNs:
-                        wrongNamespaces.append(uncapitalized)
-
-                # Underscore versions of namespaces
-                # Add underscore versions of all wrong namespaces
-                wrongNamespaces.extend([ns.replace(' ', '_') for ns in wrongNamespaces if ' ' in ns])
-                # Add underscore version of correct namespace
-                if ' ' in thisNs:
-                    wrongNamespaces.append(thisNs.replace(' ', '_'))
-
-                if wrongNamespaces:
-                    text = wikipedia.replaceExcept(text, r'\[\[\s*(' + '|'.join(wrongNamespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions)
+            if thisNs and namespaces:
+                text = wikipedia.replaceExcept(text, r'\[\[\s*(' + '|'.join(namespaces) + ') *:(?P<nameAndLabel>.*?)\]\]', r'[[' + thisNs + ':\g<nameAndLabel>]]', exceptions)
         return text
 
     def cleanUpLinks(self, text):

Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/family.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -2437,21 +2437,39 @@
             raise KeyError('ERROR: Unknown namespace %d for %s:%s' % (ns_number, code, self.name))
         elif self.isNsI18N(ns_number, code):
             v = self.namespaces[ns_number][code]
+            if type(v) is not list:
+                v = [v,]
+            if all and self.isNsI18N(ns_number, fallback):
+                v2 = self.namespaces[ns_number][fallback]
+                if type(v2) is list:
+                    v.extend(v2)
+                else:
+                    v.append(v2)
         elif fallback and self.isNsI18N(ns_number, fallback):
             v = self.namespaces[ns_number][fallback]
+            if type(v) is not list:
+                v = [v,]
         else:
             raise KeyError('ERROR: title for namespace %d in language %s unknown' % (ns_number, code))
 
         if all:
-            if type(v) is list:
-                return tuple(v)
-            else:
-                return (v, )
+            namespaces = []
+
+            # Unique list
+            for ns in v:
+                if ns not in namespaces:
+                    namespaces.append(ns)
+
+            # Lowercase versions of namespaces
+            if code not in self.nocapitalize:
+                namespaces.extend([ns[0].lower() + ns[1:] for ns in namespaces if ns and ns[0].lower() != ns[0].upper()])
+
+            # Underscore versions of namespaces
+            namespaces.extend([ns.replace(' ', '_') for ns in namespaces if ns and ' ' in ns])
+
+            return tuple(namespaces)
         else:
-            if type(v) is list:
-                return v[0]
-            else:
-                return v
+            return v[0]
 
     def isDefinedNS(self, ns_number):
         """Return True if the namespace has been defined in this family.
@@ -2561,17 +2579,7 @@
         return self.namespace(code, 14, fallback)
 
     def category_namespaces(self, code):
-        namespaces = []
-        namespace_title = self.namespace(code, 14)
-        namespaces.append(namespace_title)
-        if namespace_title != namespace_title.lower():
-            namespaces.append(namespace_title.lower())
-        default_namespace_title = self.namespace('_default', 14)
-        if namespace_title != default_namespace_title:
-            namespaces.append(default_namespace_title)
-            if default_namespace_title != default_namespace_title.lower():
-                namespaces.append(default_namespace_title.lower())
-        return namespaces
+        return self.namespace(code, 14, all = True)
 
     # Redirect code can be translated.
     # Note that redirect codes are case-insensitive, so it is enough

Modified: trunk/pywikipedia/image.py
===================================================================
--- trunk/pywikipedia/image.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/image.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -109,15 +109,14 @@
             old = self.oldImage
 
         old = re.sub('[_ ]', '[_ ]', old)
-        #TODO: Add internationalization of Image namespace name.
         if not self.loose or not self.newImage:
-            ImageRegex = re.compile(r'\[\[ *[Ii]mage:' + old + ' *(?P<parameters>\|[^\n]+|) *\]\]')
+            ImageRegex = re.compile(r'\[\[ *(?:' + '|'.join(wikipedia.getSite().namespace(6, all = True)) + ')\s*:\s*' + old + ' *(?P<parameters>\|[^\n]+|) *\]\]')
         else:
             ImageRegex = re.compile(r'' + old)
 
         if self.newImage:
             if not self.loose:
-                replacements.append((ImageRegex, '[[Image:' + self.newImage + '\g<parameters>]]'))
+                replacements.append((ImageRegex, '[[(?:' + '|'.join(wikipedia.getSite().namespace(6, all = True)) + ')\s*:\s*' + self.newImage + '\g<parameters>]]'))
             else:
                 replacements.append((ImageRegex, self.newImage))
         else:

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py	2007-12-31 19:45:27 UTC (rev 4786)
+++ trunk/pywikipedia/wikipedia.py	2007-12-31 20:25:04 UTC (rev 4787)
@@ -2182,14 +2182,6 @@
         text = self.get()
         new_text = text
 
-        def caseInsensitivePattern(s):
-            """
-            Creates a pattern that matches the string case-insensitively.
-            """
-            return ur'(?:%s)' % u''.join([u'[%s%s]'
-                                            % (re.escape(c.upper()), re.escape(c.lower()))
-                                          for c in s])
-
         def capitalizationPattern(s):
             """
             Given a string, creates a pattern that matches the string, with
@@ -2201,9 +2193,9 @@
             else:
                 return ur'(?:[%s%s]%s)' % (re.escape(s[0].upper()), re.escape(s[0].lower()), re.escape(s[1:]))
 
-        namespaces = set(('Image', 'Media') + site.namespace(6, all = True) + site.namespace(-2, all = True))
+        namespaces = set(site.namespace(6, all = True) + site.namespace(-2, all = True))
         # note that the colon is already included here
-        namespacePattern = ur'\s*(?:%s)\s*\:\s*' % u'|'.join(map(caseInsensitivePattern, namespaces))
+        namespacePattern = ur'\s*(?:%s)\s*\:\s*' % u'|'.join(namespaces)
 
         imagePattern = u'(%s)' % capitalizationPattern(image).replace(r'\_', '[ _]')
 





More information about the Pywikipedia-l mailing list