[Pywikipedia-svn] SVN: [7281] branches/rewrite/pywikibot
russblau at svn.wikimedia.org
russblau at svn.wikimedia.org
Sat Sep 19 19:41:50 UTC 2009
Revision: 7281
Author: russblau
Date: 2009-09-19 19:41:50 +0000 (Sat, 19 Sep 2009)
Log Message:
-----------
Delegate parsing of page titles to the Link object; this eliminates a lot of duplicate code and also defers parsing of namespaces until it is actually needed
Modified Paths:
--------------
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/pagegenerators.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2009-09-19 19:29:56 UTC (rev 7280)
+++ branches/rewrite/pywikibot/page.py 2009-09-19 19:41:50 UTC (rev 7281)
@@ -27,6 +27,9 @@
reNamespace = re.compile("^(.+?) *: *(.*)$")
+# Note: Link objects (defined later on) represent a wiki-page's title, while
+# Page objects (defined here) represent the page itself, including its contents.
+
class Page(object):
"""Page: A MediaWiki page
@@ -70,77 +73,80 @@
"""
if isinstance(source, pywikibot.site.BaseSite):
- self._site = source
- if ns not in source.namespaces():
- raise pywikibot.Error(
- "Invalid namespace '%i' for site %s."
- % (ns, source.sitename()))
- self._ns = ns
- if ns and not title.startswith(source.namespace(ns)+u":"):
- title = source.namespace(ns) + u":" + title
- elif not ns and u":" in title:
- pos = title.index(u':')
- nsindex = source.ns_index(title[ :pos])
- if nsindex:
- self._ns = nsindex
- # normalize namespace, in case an alias was used
- title = source.namespace(nsindex) + title[pos: ]
- if u"#" in title:
- title, self._section = title.split(u"#", 1)
- else:
- self._section = None
- if not title:
- raise pywikibot.Error(
- "Page object cannot be created from Site without title.")
- self._title = title
+ self._link = Link(title, source=source, defaultNamespace=ns)
+## self._site = source
+## if ns not in source.namespaces():
+## raise pywikibot.Error(
+## "Invalid namespace '%i' for site %s."
+## % (ns, source.sitename()))
+## self._ns = ns
+## if ns and not title.startswith(source.namespace(ns)+u":"):
+## title = source.namespace(ns) + u":" + title
+## elif not ns and u":" in title:
+## pos = title.index(u':')
+## nsindex = source.ns_index(title[ :pos])
+## if nsindex:
+## self._ns = nsindex
+## # normalize namespace, in case an alias was used
+## title = source.namespace(nsindex) + title[pos: ]
+## if u"#" in title:
+## title, self._section = title.split(u"#", 1)
+## else:
+## self._section = None
+## if not title:
+## raise pywikibot.Error(
+## "Page object cannot be created from Site without title.")
+## self._title = title
elif isinstance(source, Page):
# copy all of source's attributes to this object
self.__dict__ = source.__dict__
if title:
# overwrite title
- if ":" in title:
- prefix = title[ :title.index(":")]
- self._ns = self._site.ns_index(prefix)
- if self._ns is None:
- self._ns = 0
- else:
- title = title[title.index(":")+1 : ].strip(" _")
- self._title = "%s:%s" % (
- self.site().namespace(self._ns),
- self._title)
- else:
- self._ns = 0
- if "#" in title:
- self._section = title[title.index("#") + 1 : ].strip(" _")
- title = title[ : title.index("#")].strip(" _")
- self._title = title
+ self._link = Link(title, source=source, defaultNamespace=ns)
+## if ":" in title:
+## prefix = title[ :title.index(":")]
+## self._ns = self._site.ns_index(prefix)
+## if self._ns is None:
+## self._ns = 0
+## else:
+## title = title[title.index(":")+1 : ].strip(" _")
+## self._title = "%s:%s" % (
+## self.site().namespace(self._ns),
+## self._title)
+## else:
+## self._ns = 0
+## if "#" in title:
+## self._section = title[title.index("#") + 1 : ].strip(" _")
+## title = title[ : title.index("#")].strip(" _")
+## self._title = title
elif isinstance(source, Link):
- self._site = source.site
- self._section = source.section
- self._ns = source.namespace
- self._title = source.title
- # reassemble the canonical title from components
- if self._ns:
- self._title = "%s:%s" % (self.site().namespace(self._ns),
- self._title)
+ self._link = source
+## self._site = source.site
+## self._section = source.section
+## self._ns = source.namespace
+## self._title = source.title
+## # reassemble the canonical title from components
+## if self._ns:
+## self._title = "%s:%s" % (self.site().namespace(self._ns),
+## self._title)
else:
raise pywikibot.Error(
"Invalid argument type '%s' in Page constructor: %s"
% (type(source), source))
- if self._section is not None:
- self._title = self._title + "#" + self._section
+## if self._section is not None:
+## self._title = self._title + "#" + self._section
self._revisions = {}
- # Always capitalize the first letter
- self._title = self._title[:1].upper() + self._title[1:]
+## # Always capitalize the first letter
+## self._title = self._title[:1].upper() + self._title[1:]
def site(self):
"""Return the Site object for the wiki on which this Page resides."""
- return self._site
+ return self._link.site
def namespace(self):
"""Return the number of the namespace of the page."""
- return self._ns
+ return self._link.namespace
@deprecate_arg("decode", None)
@deprecate_arg("savetitle", "asUrl")
@@ -165,16 +171,9 @@
in filenames
"""
- title = self._title
- if not withNamespace and self._ns != 0:
- title = title.split(u':', 1)[1]
- if not withSection and self._section:
- title = title.split(u'#', 1)[0]
- if underscore or asUrl:
- title = title.replace(u' ', u'_')
- if asUrl:
- encodedTitle = title.encode(self.site().encoding())
- title = urllib.quote(encodedTitle)
+ title = self._link.canonical_title()
+ if withSection and self._link.section:
+ title = title + "#" + self._link.section
if asLink:
if forceInterwiki or (allowInterwiki and
(self.site().family.name != config.family
@@ -183,16 +182,25 @@
and self.site().family.name != self.site().code:
return u'[[%s:%s:%s]]' % (self.site().family.name,
self.site().code,
- self._title)
+ title)
else:
# use this form for sites like commons, where the
# code is the same as the family name
return u'[[%s:%s]]' % (self.site().code,
- self._title)
+ title)
elif textlink and (self.isImage() or self.isCategory()):
return u'[[:%s]]' % title
else:
return u'[[%s]]' % title
+ if not withNamespace and self.namespace() != 0:
+ title = self._link.title
+ if withSection and self._link.section:
+ title = title + "#" + self._link.section
+ if underscore or asUrl:
+ title = title.replace(u' ', u'_')
+ if asUrl:
+ encodedTitle = title.encode(self.site().encoding())
+ title = urllib.quote(encodedTitle)
if as_filename:
# Replace characters that are not possible in file names on some
# systems.
@@ -210,10 +218,7 @@
any. If no section is present, return None.
"""
- if self._section:
- return self._section
- else:
- return None
+ return self._link.section
def __str__(self):
"""Return a console representation of the pagelink."""
@@ -241,11 +246,11 @@
if not isinstance(other, Page):
# especially, return -1 if other is None
return -1
- if self._site != other._site:
- return cmp(self._site, other._site)
- if self._ns != other._ns:
- return cmp(self._ns, other._ns)
- return cmp(self._title, other._title)
+ if self.site() != other.site():
+ return cmp(self.site(), other.site())
+ if self.namespace() != other.namespace():
+ return cmp(self.namespace(), other.namespace())
+ return cmp(self._link.title, other._link.title)
def __hash__(self):
# Pseudo method that makes it possible to store Page objects as keys
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py 2009-09-19 19:29:56 UTC (rev 7280)
+++ branches/rewrite/pywikibot/pagegenerators.py 2009-09-19 19:41:50 UTC (rev 7281)
@@ -709,9 +709,9 @@
seenPages = {}
for page in generator:
if page not in seenPages:
- _page = u"%s:%s:%s" % (page._site.family.name,
- page._site.code,
- page._title)
+ _page = u"%s:%s:%s" % (page.site().family.name,
+ page.site().code,
+ page.title())
seenPages[_page] = True
yield page
More information about the Pywikipedia-svn
mailing list