[Pywikipedia-svn] SVN: [7281] branches/rewrite/pywikibot

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Sat Sep 19 19:41:50 UTC 2009


Revision: 7281
Author:   russblau
Date:     2009-09-19 19:41:50 +0000 (Sat, 19 Sep 2009)

Log Message:
-----------
Delegate parsing of page titles to the Link object; this eliminates a lot of duplicate code and also defers parsing of namespaces until it is actually needed

Modified Paths:
--------------
    branches/rewrite/pywikibot/page.py
    branches/rewrite/pywikibot/pagegenerators.py

Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py	2009-09-19 19:29:56 UTC (rev 7280)
+++ branches/rewrite/pywikibot/page.py	2009-09-19 19:41:50 UTC (rev 7281)
@@ -27,6 +27,9 @@
 reNamespace = re.compile("^(.+?) *: *(.*)$")
 
 
+# Note: Link objects (defined later on) represent a wiki-page's title, while
+# Page objects (defined here) represent the page itself, including its contents.
+
 class Page(object):
     """Page: A MediaWiki page
 
@@ -70,77 +73,80 @@
 
         """
         if isinstance(source, pywikibot.site.BaseSite):
-            self._site = source
-            if ns not in source.namespaces():
-                raise pywikibot.Error(
-                      "Invalid namespace '%i' for site %s."
-                      % (ns, source.sitename()))
-            self._ns = ns
-            if ns and not title.startswith(source.namespace(ns)+u":"):
-                title = source.namespace(ns) + u":" + title
-            elif not ns and u":" in title:
-                pos = title.index(u':')
-                nsindex = source.ns_index(title[ :pos])
-                if nsindex:
-                    self._ns = nsindex
-                    # normalize namespace, in case an alias was used
-                    title = source.namespace(nsindex) + title[pos: ]
-            if u"#" in title:
-                title, self._section = title.split(u"#", 1)
-            else:
-                self._section = None
-            if not title:
-                raise pywikibot.Error(
-                      "Page object cannot be created from Site without title.")
-            self._title = title
+            self._link = Link(title, source=source, defaultNamespace=ns)
+##            self._site = source
+##            if ns not in source.namespaces():
+##                raise pywikibot.Error(
+##                      "Invalid namespace '%i' for site %s."
+##                      % (ns, source.sitename()))
+##            self._ns = ns
+##            if ns and not title.startswith(source.namespace(ns)+u":"):
+##                title = source.namespace(ns) + u":" + title
+##            elif not ns and u":" in title:
+##                pos = title.index(u':')
+##                nsindex = source.ns_index(title[ :pos])
+##                if nsindex:
+##                    self._ns = nsindex
+##                    # normalize namespace, in case an alias was used
+##                    title = source.namespace(nsindex) + title[pos: ]
+##            if u"#" in title:
+##                title, self._section = title.split(u"#", 1)
+##            else:
+##                self._section = None
+##            if not title:
+##                raise pywikibot.Error(
+##                      "Page object cannot be created from Site without title.")
+##            self._title = title
         elif isinstance(source, Page):
             # copy all of source's attributes to this object
             self.__dict__ = source.__dict__
             if title:
                 # overwrite title
-                if ":" in title:
-                    prefix = title[ :title.index(":")]
-                    self._ns = self._site.ns_index(prefix)
-                    if self._ns is None:
-                        self._ns = 0
-                    else:
-                        title = title[title.index(":")+1 : ].strip(" _")
-                        self._title = "%s:%s" % (
-                                         self.site().namespace(self._ns),
-                                         self._title)
-                else:
-                    self._ns = 0
-                if "#" in title:
-                    self._section = title[title.index("#") + 1 : ].strip(" _")
-                    title = title[ : title.index("#")].strip(" _")
-                self._title = title
+                self._link = Link(title, source=source, defaultNamespace=ns)
+##                if ":" in title:
+##                    prefix = title[ :title.index(":")]
+##                    self._ns = self._site.ns_index(prefix)
+##                    if self._ns is None:
+##                        self._ns = 0
+##                    else:
+##                        title = title[title.index(":")+1 : ].strip(" _")
+##                        self._title = "%s:%s" % (
+##                                         self.site().namespace(self._ns),
+##                                         self._title)
+##                else:
+##                    self._ns = 0
+##                if "#" in title:
+##                    self._section = title[title.index("#") + 1 : ].strip(" _")
+##                    title = title[ : title.index("#")].strip(" _")
+##                self._title = title
         elif isinstance(source, Link):
-            self._site = source.site
-            self._section = source.section
-            self._ns = source.namespace
-            self._title = source.title
-            # reassemble the canonical title from components
-            if self._ns:
-                self._title = "%s:%s" % (self.site().namespace(self._ns),
-                                         self._title)
+            self._link = source
+##            self._site = source.site
+##            self._section = source.section
+##            self._ns = source.namespace
+##            self._title = source.title
+##            # reassemble the canonical title from components
+##            if self._ns:
+##                self._title = "%s:%s" % (self.site().namespace(self._ns),
+##                                         self._title)
         else:
             raise pywikibot.Error(
                   "Invalid argument type '%s' in Page constructor: %s"
                   % (type(source), source))
-        if self._section is not None:
-            self._title = self._title + "#" + self._section
+##        if self._section is not None:
+##            self._title = self._title + "#" + self._section
         self._revisions = {}
 
-        # Always capitalize the first letter
-        self._title = self._title[:1].upper() + self._title[1:]
+##        # Always capitalize the first letter
+##        self._title = self._title[:1].upper() + self._title[1:]
 
     def site(self):
         """Return the Site object for the wiki on which this Page resides."""
-        return self._site
+        return self._link.site
 
     def namespace(self):
         """Return the number of the namespace of the page."""
-        return self._ns
+        return self._link.namespace
 
     @deprecate_arg("decode", None)
     @deprecate_arg("savetitle", "asUrl")
@@ -165,16 +171,9 @@
             in filenames
 
         """
-        title = self._title
-        if not withNamespace and self._ns != 0:
-            title = title.split(u':', 1)[1]
-        if not withSection and self._section:
-            title = title.split(u'#', 1)[0]
-        if underscore or asUrl:
-            title = title.replace(u' ', u'_')
-        if asUrl:
-            encodedTitle = title.encode(self.site().encoding())
-            title = urllib.quote(encodedTitle)
+        title = self._link.canonical_title()
+        if withSection and self._link.section:
+            title = title + "#" + self._link.section
         if asLink:
             if forceInterwiki or (allowInterwiki and
                     (self.site().family.name != config.family
@@ -183,16 +182,25 @@
                         and self.site().family.name != self.site().code:
                     return u'[[%s:%s:%s]]' % (self.site().family.name,
                                               self.site().code,
-                                              self._title)
+                                              title)
                 else:
                     # use this form for sites like commons, where the
                     # code is the same as the family name
                     return u'[[%s:%s]]' % (self.site().code,
-                                           self._title)
+                                           title)
             elif textlink and (self.isImage() or self.isCategory()):
                 return u'[[:%s]]' % title
             else:
                 return u'[[%s]]' % title
+        if not withNamespace and self.namespace() != 0:
+            title = self._link.title
+            if withSection and self._link.section:
+                title = title + "#" + self._link.section
+        if underscore or asUrl:
+            title = title.replace(u' ', u'_')
+        if asUrl:
+            encodedTitle = title.encode(self.site().encoding())
+            title = urllib.quote(encodedTitle)
         if as_filename:
             # Replace characters that are not possible in file names on some
             # systems.
@@ -210,10 +218,7 @@
         any. If no section is present, return None.
 
         """
-        if self._section:
-            return self._section
-        else:
-            return None
+        return self._link.section
 
     def __str__(self):
         """Return a console representation of the pagelink."""
@@ -241,11 +246,11 @@
         if not isinstance(other, Page):
             # especially, return -1 if other is None
             return -1
-        if self._site != other._site:
-            return cmp(self._site, other._site)
-        if self._ns != other._ns:
-            return cmp(self._ns, other._ns)
-        return cmp(self._title, other._title)
+        if self.site() != other.site():
+            return cmp(self.site(), other.site())
+        if self.namespace() != other.namespace():
+            return cmp(self.namespace(), other.namespace())
+        return cmp(self._link.title, other._link.title)
 
     def __hash__(self):
         # Pseudo method that makes it possible to store Page objects as keys

Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py	2009-09-19 19:29:56 UTC (rev 7280)
+++ branches/rewrite/pywikibot/pagegenerators.py	2009-09-19 19:41:50 UTC (rev 7281)
@@ -709,9 +709,9 @@
     seenPages = {}
     for page in generator:
         if page not in seenPages:
-            _page = u"%s:%s:%s" % (page._site.family.name,
-                                   page._site.code,
-                                   page._title)
+            _page = u"%s:%s:%s" % (page.site().family.name,
+                                   page.site().code,
+                                   page.title())
             seenPages[_page] = True
             yield page
 





More information about the Pywikipedia-svn mailing list