Revision: 5891
Author: russblau
Date: 2008-09-13 10:13:44 +0000 (Sat, 13 Sep 2008)
Log Message:
-----------
lazy evaluation of Link objects; clean up site tests
Modified Paths:
--------------
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
branches/rewrite/pywikibot/tests/site_tests.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-09-12 19:13:48 UTC (rev 5890)
+++ branches/rewrite/pywikibot/page.py 2008-09-13 10:13:44 UTC (rev 5891)
@@ -1425,7 +1425,7 @@
)
def __init__(self, text, source=None, defaultNamespace=0):
- """Parse text into a Link object.
+ """Constructor
@param text: the link text (everything appearing between [[ and ]]
on a wiki page)
@@ -1438,22 +1438,29 @@
@type defaultNamespace: int
"""
+ self._text = text
+ self.source = source
+ self._defaultns = defaultNamespace
+
+ def parse(self):
+ """Parse text; called internally when accessing attributes"""
+
# First remove the anchor, which is stored unchanged, if there is one
- if u"|" in text:
- text, self.anchor = text.split(u"|", 1)
+ if u"|" in self._text:
+ self._text, self._anchor = self._text.split(u"|", 1)
else:
- self.anchor = None
+ self._anchor = None
- if source is None:
- source = pywikibot.Site()
- self.source = self.site = source
+ if self.source is None:
+ self.source = pywikibot.Site()
+ self._site = self.source
# Clean up the name, it can come from anywhere.
# Convert HTML entities to unicode
- t = html2unicode(text)
+ t = html2unicode(self._text)
# Convert URL-encoded characters to unicode
- t = url2unicode(t, site=self.site)
+ t = url2unicode(t, site=self._site)
# Normalize unicode string to a NFC (composed) format to allow proper
# string comparisons. According to
@@ -1466,7 +1473,7 @@
#
if u'\ufffd' in t:
raise pywikibot.Error("Title contains illegal char (\\uFFFD)")
- self.namespace = defaultNamespace
+ self._namespace = self._defaultns
# Replace underscores by spaces
t = t.replace(u"_", u" ")
@@ -1481,34 +1488,34 @@
while u":" in t:
# Initial colon indicates main namespace rather than default
if t.startswith(u":"):
- self.namespace = 0
+ self._namespace = 0
# remove the colon but continue processing
# remove any subsequent whitespace
t = t.lstrip(u":").lstrip(u" ")
continue
- fam = self.site.family
+ fam = self._site.family
prefix = t[ :t.index(u":")].lower()
- ns = self.site.ns_index(prefix)
+ ns = self._site.ns_index(prefix)
if ns:
# Ordinary namespace
t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ")
- self.namespace = ns
+ self._namespace = ns
break
if prefix in fam.langs.keys()\
- or prefix in fam.get_known_families(site=self.site):
+ or prefix in fam.get_known_families(site=self._site):
# looks like an interwiki link
if not firstPass:
# Can't make a local interwiki link to an interwiki link.
raise pywikibot.Error(
"Improperly formatted interwiki link '%s'"
- % text)
+ % self._text)
t = t[t.index(u":"): ].lstrip(u":").lstrip(u" ")
if prefix in fam.langs.keys():
newsite = pywikibot.Site(prefix, fam)
else:
- otherlang = self.site.code
- familyName = fam.get_known_families(site=self.site)[prefix]
+ otherlang = self._site.code
+ familyName = fam.get_known_families(site=self._site)[prefix]
if familyName in ['commons', 'meta']:
otherlang = familyName
try:
@@ -1517,25 +1524,25 @@
raise pywikibot.Error("""\
%s is not a local page on %s, and the %s family is
not supported by PyWikiBot!"""
- % (title, self.site(), familyName))
+ % (title, self._site(), familyName))
# Redundant interwiki prefix to the local wiki
- if newsite == self.site:
+ if newsite == self._site:
if not t:
# Can't have an empty self-link
raise pywikibot.Error(
- "Invalid link title: '%s'" % text)
+ "Invalid link title: '%s'" % self._text)
firstPass = False
continue
- self.site = newsite
+ self._site = newsite
else:
break # text before : doesn't match any known prefix
if u"#" in t:
t, sec = t.split(u'#', 1)
- t, self.section = t.rstrip(), sec.lstrip()
+ t, self._section = t.rstrip(), sec.lstrip()
else:
- self.section = None
+ self._section = None
# Reject illegal characters.
m = Link.illegal_titles_pattern.search(t)
@@ -1558,27 +1565,60 @@
):
raise pywikibot.Error(
"Invalid title (contains . / combinations): '%s'"
- % text)
+ % self._text)
# Magic tilde sequences? Nu-uh!
if u"~~~" in t:
- raise pywikibot.Error("Invalid title (contains ~~~): '%s'" % text)
+ raise pywikibot.Error("Invalid title (contains ~~~): '%s'" % self._text)
- if self.namespace != -1 and len(t) > 255:
+ if self._namespace != -1 and len(t) > 255:
raise pywikibot.Error("Invalid title (over 255 bytes): '%s'" % t)
- if self.site.case() == 'first-letter':
+ if self._site.case() == 'first-letter':
t = t[:1].upper() + t[1:]
# Can't make a link to a namespace alone...
# "empty" local links can only be self-links
# with a fragment identifier.
- if not t and self.site == self.source and self.namespace != 0:
- raise ValueError("Invalid link (no page title): '%s'" % text)
+ if not t and self._site == self.source and self._namespace != 0:
+ raise ValueError("Invalid link (no page title): '%s'" % self._text)
- self.title = t
+ self._title = t
+ # define attributes, to be evaluated lazily
+ @property
+ def site(self):
+ if not hasattr(self, "_site"):
+ self.parse()
+ return self._site
+
+ @property
+ def namespace(self):
+ if not hasattr(self, "_namespace"):
+ self.parse()
+ return self._namespace
+
+ @property
+ def title(self):
+ if not hasattr(self, "_title"):
+ self.parse()
+ return self._title
+
+ @property
+ def section(self):
+ if not hasattr(self, "_section"):
+ self.parse()
+ return self._section
+
+ @property
+ def anchor(self):
+ if not hasattr(self, "_anchor"):
+ self.parse()
+ return self._anchor
+
+
+
# Utility functions for parsing page titles
def html2unicode(text, ignore = []):
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-09-12 19:13:48 UTC (rev 5890)
+++ branches/rewrite/pywikibot/site.py 2008-09-13 10:13:44 UTC (rev 5891)
@@ -968,17 +968,21 @@
return eigen
def pagereferences(self, page, followRedirects=False, filterRedirects=None,
- withTemplateInclusion=True, onlyTemplateInclusion=False):
+ withTemplateInclusion=True, onlyTemplateInclusion=False,
+ namespaces=None):
"""Convenience method combining pagebacklinks and page_embeddedin."""
if onlyTemplateInclusion:
- return self.page_embeddedin(page)
+ return self.page_embeddedin(page, namespaces=namespaces)
if not withTemplateInclusion:
- return self.pagebacklinks(page, followRedirects)
+ return self.pagebacklinks(page, followRedirects,
+ namespaces=namespaces)
import itertools
return itertools.chain(
- self.pagebacklinks(page, followRedirects, filterRedirects),
- self.page_embeddedin(page, filterRedirects)
+ self.pagebacklinks(page, followRedirects,
+ filterRedirects, namespaces=namespaces),
+ self.page_embeddedin(page, filterRedirects,
+ namespaces=namespaces)
)
def pagelinks(self, page, namespaces=None, follow_redirects=False):
Modified: branches/rewrite/pywikibot/tests/site_tests.py
===================================================================
--- branches/rewrite/pywikibot/tests/site_tests.py 2008-09-12 19:13:48 UTC (rev 5890)
+++ branches/rewrite/pywikibot/tests/site_tests.py 2008-09-13 10:13:44 UTC (rev 5891)
@@ -96,34 +96,34 @@
self.assertTrue(isinstance(mysite.token(mainpage, ttype),
basestring))
-## def testLinkMethods(self):
-## """Test site methods for getting links to and from a page"""
-##
-## backlinks = set(mysite.pagebacklinks(mainpage))
-## embedded = set(mysite.page_embeddedin(mainpage))
-## refs = set(mysite.pagereferences(mainpage))
-## for bl in backlinks:
-## self.assertTrue(isinstance(bl, pywikibot.Page))
-## self.assertTrue(bl in refs)
-## for ei in embedded:
-## self.assertTrue(isinstance(ei, pywikibot.Page))
-## self.assertTrue(ei in refs)
-## for ref in refs:
-## self.assertTrue(ref in backlinks or ref in embedded)
-## for pl in mysite.pagelinks(mainpage):
-## self.assertTrue(isinstance(pl, pywikibot.Page))
-## for cat in mysite.pagecategories(mainpage):
-## self.assertTrue(isinstance(cat, pywikibot.Category))
-## for cm in mysite.categorymembers(cat):
-## self.assertTrue(isinstance(cat, pywikibot.Page))
-## self.assertTrue(all(isinstance(im, pywikibot.ImagePage)
-## for im in mysite.pageimages(mainpage)))
-## self.assertTrue(all(isinstance(te, pywikibot.Page)
-## for te in mysite.pagetemplates(mainpage)))
-## for ll in mysite.pagelanglinks(mainpage):
-## self.assertTrue(isinstance(ll, pywikibot.Link))
-## self.assertTrue(all(isinstance(el, basestring)
-## for el in mysite.page_extlinks(mainpage)))
+ def testLinkMethods(self):
+ """Test site methods for getting links to and from a page"""
+
+ backlinks = set(mysite.pagebacklinks(mainpage, namespaces=[0]))
+ embedded = set(mysite.page_embeddedin(mainpage, namespaces=[0]))
+ refs = set(mysite.pagereferences(mainpage, namespaces=[0]))
+ for bl in backlinks:
+ self.assertTrue(isinstance(bl, pywikibot.Page))
+ self.assertTrue(bl in refs)
+ for ei in embedded:
+ self.assertTrue(isinstance(ei, pywikibot.Page))
+ self.assertTrue(ei in refs)
+ for ref in refs:
+ self.assertTrue(ref in backlinks or ref in embedded)
+ for pl in mysite.pagelinks(mainpage):
+ self.assertTrue(isinstance(pl, pywikibot.Page))
+ for cat in mysite.pagecategories(mainpage):
+ self.assertTrue(isinstance(cat, pywikibot.Category))
+ for cm in mysite.categorymembers(cat):
+ self.assertTrue(isinstance(cat, pywikibot.Page))
+ self.assertTrue(all(isinstance(im, pywikibot.ImagePage)
+ for im in mysite.pageimages(mainpage)))
+ self.assertTrue(all(isinstance(te, pywikibot.Page)
+ for te in mysite.pagetemplates(mainpage)))
+ for ll in mysite.pagelanglinks(mainpage):
+ self.assertTrue(isinstance(ll, pywikibot.Link))
+ self.assertTrue(all(isinstance(el, basestring)
+ for el in mysite.page_extlinks(mainpage)))
def testLoadRevisions(self):
"""Test the site.loadrevisions() method"""
Revision: 5890
Author: russblau
Date: 2008-09-12 19:13:48 +0000 (Fri, 12 Sep 2008)
Log Message:
-----------
update more site methods and tests
Modified Paths:
--------------
branches/rewrite/pywikibot/__init__.py
branches/rewrite/pywikibot/login.py
branches/rewrite/pywikibot/site.py
branches/rewrite/pywikibot/tests/site_tests.py
Modified: branches/rewrite/pywikibot/__init__.py
===================================================================
--- branches/rewrite/pywikibot/__init__.py 2008-09-12 14:56:16 UTC (rev 5889)
+++ branches/rewrite/pywikibot/__init__.py 2008-09-12 19:13:48 UTC (rev 5890)
@@ -34,11 +34,11 @@
"""
logger = logging.getLogger("wiki")
- if code == None:
+ if code is None:
code = default_code
- if fam == None:
+ if fam is None:
fam = default_family
- if user == None:
+ if user is None:
try:
user = config.usernames[fam][code]
except KeyError:
Modified: branches/rewrite/pywikibot/login.py
===================================================================
--- branches/rewrite/pywikibot/login.py 2008-09-12 14:56:16 UTC (rev 5889)
+++ branches/rewrite/pywikibot/login.py 2008-09-12 19:13:48 UTC (rev 5890)
@@ -70,12 +70,14 @@
class LoginManager:
- def __init__(self, password = None, sysop = False, site = None):
+ def __init__(self, password=None, sysop=False, site=None, user=None):
if site is not None:
self.site = site
else:
self.site = pywikibot.Site()
- if sysop:
+ if user:
+ self.username = user
+ elif sysop:
try:
self.username = config.sysopnames\
[self.site.family.name][self.site.code]
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-09-12 14:56:16 UTC (rev 5889)
+++ branches/rewrite/pywikibot/site.py 2008-09-12 19:13:48 UTC (rev 5890)
@@ -114,6 +114,7 @@
@property
def throttle(self):
"""Return this Site's throttle. Initialize a new one if needed."""
+
if not hasattr(self, "_throttle"):
self._throttle = Throttle(self, multiplydelay=True, verbosedelay=True)
try:
@@ -125,16 +126,28 @@
@property
def family(self):
"""The Family object for this Site's wiki family."""
+
return self.__family
@property
def code(self):
"""The identifying code for this Site."""
+
return self.__code
+ @property
+ def lang(self):
+ """The ISO language code for this Site.
+
+ Presumed to be equal to the wiki prefix, but this can be overridden.
+
+ """
+ return self.__code
+
def __cmp__(self, other):
"""Perform equality and inequality tests on Site objects."""
- if not isinstance(other, Site):
+
+ if not isinstance(other, BaseSite):
return 1
if self.family == other.family:
return cmp(self.code, other.code)
@@ -142,12 +155,14 @@
def user(self):
"""Return the currently-logged in bot user, or None."""
+
if self.logged_in():
return self._username
return None
def __getattr__(self, attr):
"""Calls to methods not defined in this object are passed to Family."""
+
if hasattr(self.__class__, attr):
return self.__class__.attr
try:
@@ -163,6 +178,7 @@
def sitename(self):
"""Return string representing this Site's name and language."""
+
return self.family.name+':'+self.code
__str__ = sitename
@@ -174,15 +190,34 @@
return hash(repr(self))
def linktrail(self):
- """Return regex for trailing chars displayed as part of a link."""
+ """Return regex for trailing chars displayed as part of a link.
+
+ Returns a string, not a compiled regular expression object.
+
+ This reads from the family file, and ''not'' from
+ [[MediaWiki:Linktrail]], because the MW software currently uses a
+ built-in linktrail from its message files and ignores the wiki
+ value.
+
+ """
return self.family.linktrail(self.code)
def languages(self):
"""Return list of all valid language codes for this site's Family."""
+
return self.family.langs.keys()
+
+ def validLanguageLinks(self):
+ """Return list of language codes that can be used in interwiki links."""
+
+ nsnames = sum(self.namespaces().values(), [])
+ return [l for l in self.languages()
+ if l[:1].upper() + l[1:] not in self.namespaces()]
+
def ns_index(self, namespace):
"""Given a namespace name, return its int index, or None if invalid."""
+
for ns in self.namespaces():
if namespace.lower() in [name.lower()
for name in self.namespaces()[ns]]:
@@ -193,6 +228,7 @@
def namespaces(self):
"""Return dict of valid namespaces on this wiki."""
+
return self._namespaces
def ns_normalize(self, value):
@@ -258,7 +294,7 @@
"""Return Category in which disambig pages are listed."""
try:
- name = self.namespace(14)+':'+self.family.disambcatname[self.code])
+ name = self.namespace(14)+':'+self.family.disambcatname[self.code]
except KeyError:
raise Error(u"No disambiguation category name found for %(site)s"
% {'site': self})
@@ -338,10 +374,12 @@
def category_on_one_line(self):
"""Return True if this site wants all category links on one line."""
+
return self.code in self.family.category_on_one_line
def interwiki_putfirst(self):
"""Return list of language codes for ordering of interwiki links."""
+
return self.family.interwiki_putfirst.get(self.code, None)
def getSite(self, code):
@@ -447,7 +485,6 @@
}
self.sitelock = threading.Lock()
self._msgcache = {}
- self._username = ""
return
# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
@@ -493,18 +530,11 @@
self._getsiteinfo()
# check whether a login cookie already exists for this user
if hasattr(self, "_userinfo"):
- try:
- if sysop:
- name = config.sysopnames[self.family.name][self.code]
- else:
- name = config.usernames[self.family.name][self.code]
- if self._userinfo['name'] == name:
- self._username = name
- except KeyError:
- # no username for this site
- pass
+ if self._userinfo['name'] == self._username:
+ return
if not self.logged_in(sysop):
- loginMan = api.LoginManager(site=self, sysop=sysop)
+ loginMan = api.LoginManager(site=self, sysop=sysop,
+ user=self._username)
if loginMan.login(retry = True):
self._username = loginMan.username
if hasattr(self, "_userinfo"):
@@ -678,20 +708,25 @@
@property
def siteinfo(self):
"""Site information dict."""
+
if not hasattr(self, "_siteinfo"):
self._getsiteinfo()
return self._siteinfo
def case(self):
+
return self.siteinfo['case']
def language(self):
"""Return the code for the language of this Site."""
- # N.B. this code may or may not be the same as self.code
+
return self.siteinfo['lang']
+ lang = property(fget=language, doc=language.__doc__)
+
def namespaces(self):
"""Return dict of valid namespaces on this wiki."""
+
if not hasattr(self, "_siteinfo"):
self._getsiteinfo()
return self._namespaces
@@ -2654,7 +2689,3 @@
return False
else:
return False
-
- def validLanguageLinks(self):
- """Return list of language codes that can be used in interwiki links."""
- return self._validlanguages
Modified: branches/rewrite/pywikibot/tests/site_tests.py
===================================================================
--- branches/rewrite/pywikibot/tests/site_tests.py 2008-09-12 14:56:16 UTC (rev 5889)
+++ branches/rewrite/pywikibot/tests/site_tests.py 2008-09-12 19:13:48 UTC (rev 5890)
@@ -23,11 +23,29 @@
"""Test cases for BaseSite methods"""
self.assertEqual(mysite.family.name, pywikibot.config.family)
self.assertEqual(mysite.code, pywikibot.config.mylang)
+ self.assertTrue(isinstance(mysite.language(), basestring))
+ self.assertTrue(isinstance(mysite == pywikibot.Site("en", "wikipedia"),
+ bool))
self.assertTrue(isinstance(mysite.user(), (basestring, type(None))))
self.assertEqual(mysite.sitename(),
"%s:%s" % (pywikibot.config.family, pywikibot.config.mylang))
+ self.assertEqual(repr(mysite),
+ 'Site("%s", "%s")'
+ % (pywikibot.config.mylang, pywikibot.config.family))
+ self.assertTrue(isinstance(mysite.linktrail(), basestring))
+ langs = mysite.languages()
+ self.assertTrue(isinstance(langs, list))
+ self.assertTrue(mysite.code in langs)
+ self.assertEqual(mysite.ns_index("Talk"), 1)
+ ns = mysite.namespaces()
+ self.assertTrue(isinstance(ns, dict))
+ for x in xrange(0, 16): # built-in namespaces always present
+ self.assertTrue(x in ns)
+ self.assertTrue(isinstance(ns[x], list))
self.assertTrue(isinstance(mysite.ns_normalize("project"), basestring))
self.assertTrue(isinstance(mysite.redirect(), basestring))
+ self.assertTrue(isinstance(mysite.disambcategory(), pywikibot.Category))
+ self.assertTrue(isinstance(mysite.redirectRegex().pattern, basestring))
def testApiMethods(self):
"""Test generic ApiSite methods"""
@@ -78,34 +96,34 @@
self.assertTrue(isinstance(mysite.token(mainpage, ttype),
basestring))
- def testLinkMethods(self):
- """Test site methods for getting links to and from a page"""
-
- backlinks = set(mysite.pagebacklinks(mainpage))
- embedded = set(mysite.page_embeddedin(mainpage))
- refs = set(mysite.pagereferences(mainpage))
- for bl in backlinks:
- self.assertTrue(isinstance(bl, pywikibot.Page))
- self.assertTrue(bl in refs)
- for ei in embedded:
- self.assertTrue(isinstance(ei, pywikibot.Page))
- self.assertTrue(ei in refs)
- for ref in refs:
- self.assertTrue(ref in backlinks or ref in embedded)
- for pl in mysite.pagelinks(mainpage):
- self.assertTrue(isinstance(pl, pywikibot.Page))
- for cat in mysite.pagecategories(mainpage):
- self.assertTrue(isinstance(cat, pywikibot.Category))
- for cm in mysite.categorymembers(cat):
- self.assertTrue(isinstance(cat, pywikibot.Page))
- self.assertTrue(all(isinstance(im, pywikibot.ImagePage)
- for im in mysite.pageimages(mainpage)))
- self.assertTrue(all(isinstance(te, pywikibot.Page)
- for te in mysite.pagetemplates(mainpage)))
- for ll in mysite.pagelanglinks(mainpage):
- self.assertTrue(isinstance(ll, pywikibot.Link))
- self.assertTrue(all(isinstance(el, basestring)
- for el in mysite.page_extlinks(mainpage)))
+## def testLinkMethods(self):
+## """Test site methods for getting links to and from a page"""
+##
+## backlinks = set(mysite.pagebacklinks(mainpage))
+## embedded = set(mysite.page_embeddedin(mainpage))
+## refs = set(mysite.pagereferences(mainpage))
+## for bl in backlinks:
+## self.assertTrue(isinstance(bl, pywikibot.Page))
+## self.assertTrue(bl in refs)
+## for ei in embedded:
+## self.assertTrue(isinstance(ei, pywikibot.Page))
+## self.assertTrue(ei in refs)
+## for ref in refs:
+## self.assertTrue(ref in backlinks or ref in embedded)
+## for pl in mysite.pagelinks(mainpage):
+## self.assertTrue(isinstance(pl, pywikibot.Page))
+## for cat in mysite.pagecategories(mainpage):
+## self.assertTrue(isinstance(cat, pywikibot.Category))
+## for cm in mysite.categorymembers(cat):
+## self.assertTrue(isinstance(cat, pywikibot.Page))
+## self.assertTrue(all(isinstance(im, pywikibot.ImagePage)
+## for im in mysite.pageimages(mainpage)))
+## self.assertTrue(all(isinstance(te, pywikibot.Page)
+## for te in mysite.pagetemplates(mainpage)))
+## for ll in mysite.pagelanglinks(mainpage):
+## self.assertTrue(isinstance(ll, pywikibot.Link))
+## self.assertTrue(all(isinstance(el, basestring)
+## for el in mysite.page_extlinks(mainpage)))
def testLoadRevisions(self):
"""Test the site.loadrevisions() method"""
Revision: 5886
Author: nicdumz
Date: 2008-09-12 01:56:26 +0000 (Fri, 12 Sep 2008)
Log Message:
-----------
When the default (or command-line) family + wiki is not Public, getSite() tries to log in before completely loading the wikipedia module, causing a NameError :
( python login.py )
Traceback (most recent call last):
File "\login.py", line 49, in <module>
import wikipedia, config
File "\wikipedia.py", line 6041, in <module>
getSite()
File "\wikipedia.py", line 5930, in getSite
ret.forceLogin()
File "\wikipedia.py", line 4178, in forceLogin
if not self.loggedInAs(sysop = sysop):
File "\wikipedia.py", line 4170, in loggedInAs
self._load(sysop = sysop)
File "\wikipedia.py", line 4756, in _load
text = self.getUrl(url, sysop = sysop)
File "\wikipedia.py", line 4438, in getUrl
uo = MyURLopener()
NameError: global name 'MyURLopener' is not defined
fixing this introducing a noLogin parameter in getSite
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-09-10 21:27:40 UTC (rev 5885)
+++ trunk/pywikipedia/wikipedia.py 2008-09-12 01:56:26 UTC (rev 5886)
@@ -5916,7 +5916,7 @@
_sites = {}
_namespaceCache = {}
-def getSite(code=None, fam=None, user=None, persistent_http=None):
+def getSite(code=None, fam=None, user=None, persistent_http=None, noLogin=False):
if code == None:
code = default_code
if fam == None:
@@ -5926,7 +5926,7 @@
_sites[key] = Site(code=code, fam=fam, user=user,
persistent_http=persistent_http)
ret = _sites[key]
- if not ret.family.isPublic():
+ if not ret.family.isPublic() and not noLogin:
ret.forceLogin()
return ret
@@ -6038,7 +6038,10 @@
logfile = None
# Check
try:
- getSite()
+ # if the default family+wiki is a non-public one,
+ # getSite will try login in. We don't want that, the module
+ # is not yet loaded.
+ getSite(noLogin=True)
except KeyError:
print(
u"""Please create a file user-config.py, and put in there:\n
Bugs item #2105373, was opened at 2008-09-11 10:01
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2105373&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Nobody/Anonymous (nobody)
Assigned to: Nobody/Anonymous (nobody)
Summary: replace.py crashes on redirect pages
Initial Comment:
replace.py (r5884 Python 2.5.1) crashes on redirect pages?
Getting 60 pages from wikipedia:ru...
Sleeping for 18.4 seconds, 2008-09-11 13:40:55
No changes were necessary in [[Esuvee]]
No changes were necessary in [[Et Cetera ()]]
Traceback (most recent call last):
File "D:\pywikipedia\pagegenerators.py", line 763, in __iter__
yield loaded_page
GeneratorExit
Traceback (most recent call last):
File "D:\pywikipedia\replace.py", line 708, in <module>
main()
File "D:\pywikipedia\replace.py", line 704, in main
bot.run()
File "D:\pywikipedia\replace.py", line 373, in run
new_text = self.doReplacements(new_text)
File "D:\pywikipedia\replace.py", line 341, in doReplacements
allowoverlap=self.allowoverlap)
File "D:\pywikipedia\wikipedia.py", line 3315, in replaceExcept
text = text[:match.start()] + replacement + text[match.end():]
UnicodeDecodeError: 'ascii' codec can't decode byte 0xd0 in position 1: ordinal not in range(128)
There is a number of redirect pages following [[Et Cetera ()]] - http://ru.wikipedia.org/w/index.php?title=%D0%A1%D0%BB%D1%83%D0%B6%D0%B5%D0… - may this be a source of the problem?
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2105373&group_…
Revision: 5885
Author: russblau
Date: 2008-09-10 21:27:40 +0000 (Wed, 10 Sep 2008)
Log Message:
-----------
implement several more site methods
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-09-10 20:40:55 UTC (rev 5884)
+++ branches/rewrite/pywikibot/site.py 2008-09-10 21:27:40 UTC (rev 5885)
@@ -71,6 +71,7 @@
"""Site methods that are independent of the communication interface."""
# to implement a specific interface, define a Site class that inherits
# from this
+
def __init__(self, code, fam=None, user=None):
"""
@param code: the site's language code
@@ -131,6 +132,14 @@
"""The identifying code for this Site."""
return self.__code
+ def __cmp__(self, other):
+ """Perform equality and inequality tests on Site objects."""
+ if not isinstance(other, Site):
+ return 1
+ if self.family == other.family:
+ return cmp(self.code, other.code)
+ return cmp(self.family.name, other.family.name)
+
def user(self):
"""Return the currently-logged in bot user, or None."""
if self.logged_in():
@@ -206,12 +215,7 @@
"""
if default:
- if self.language() == 'ar':
- # It won't work with REDIRECT[[]] but it work with the local,
- # if problems, try to find a work around. FixMe!
- return self.family.redirect.get(self.code, [u"تحويل"])[0]
- else:
- return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
+ return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
else:
return self.family.redirect.get(self.code, None)
@@ -250,7 +254,102 @@
finally:
self._pagemutex.release()
+ def disambcategory(self):
+ """Return Category in which disambig pages are listed."""
+
+ try:
+ name = self.namespace(14)+':'+self.family.disambcatname[self.code])
+ except KeyError:
+ raise Error(u"No disambiguation category name found for %(site)s"
+ % {'site': self})
+ return pywikibot.Category(pywikibot.Link(name, self))
+ def linkto(self, title, othersite = None):
+ """Return unicode string in the form of a wikilink to 'title'
+
+ Use optional Site argument 'othersite' to generate an interwiki link.
+
+ """
+ # TODO convert to Link method, deprecate
+ if othersite and othersite.code != self.code:
+ return u'[[%s:%s]]' % (self.code, title)
+ else:
+ return u'[[%s]]' % title
+
+ def isInterwikiLink(self, s):
+ """Return True if s is in the form of an interwiki link.
+
+ Interwiki links have the form "foo:bar" or ":foo:bar" where foo is a
+ known language code or family. Called recursively if the first part
+ of the link refers to this site's own family and/or language. Do
+ not include brackets around the link!
+
+ """
+ # TODO: convert to Link method
+ s = s.strip().lstrip(":")
+ if not ':' in s:
+ return False
+ first, rest = s.split(':',1)
+ # interwiki codes are case-insensitive
+ first = first.lower().strip()
+ # commons: forwards interlanguage links to wikipedia:, etc.
+ if self.family.interwiki_forward:
+ interlangTargetFamily = pywikibot.Family(self.family.interwiki_forward)
+ else:
+ interlangTargetFamily = self.family
+ if self.ns_index(first):
+ return False
+ if first in interlangTargetFamily.langs:
+ if first == self.code:
+ return self.isInterwikiLink(rest)
+ else:
+ return True
+ if first in self.family.get_known_families(site = self):
+ if first == self.family.name:
+ return self.isInterwikiLink(rest)
+ else:
+ return True
+ return False
+
+ def redirectRegex(self):
+ """Return a compiled regular expression matching on redirect pages.
+
+ Group 1 in the regex match object will be the target title.
+
+ """
+ #TODO: is this needed, since the API identifies redirects?
+ # (maybe, the API can give false positives)
+ default = 'REDIRECT'
+ try:
+ keywords = set(self.family.redirect[self.code])
+ keywords.add(default)
+ pattern = r'(?:' + '|'.join(keywords) + ')'
+ except KeyError:
+ # no localized keyword for redirects
+ pattern = r'%s' % default
+ # A redirect starts with hash (#), followed by a keyword, then
+ # arbitrary stuff, then a wikilink. The wikilink may contain
+ # a label, although this is not useful.
+ return re.compile(r'\s*#%(pattern)s\s*:?\s*\[\[(.+?)(?:\|.*?)?\]\]'
+ % locals(),
+ re.IGNORECASE | re.UNICODE | re.DOTALL)
+
+ # site-specific formatting preferences
+
+ def category_on_one_line(self):
+ """Return True if this site wants all category links on one line."""
+ return self.code in self.family.category_on_one_line
+
+ def interwiki_putfirst(self):
+ """Return list of language codes for ordering of interwiki links."""
+ return self.family.interwiki_putfirst.get(self.code, None)
+
+ def getSite(self, code):
+ """Return Site object for language 'code' in this Family."""
+
+ return pywikibot.Site(code=code, fam=self.family, user=self.user)
+
+
class APISite(BaseSite):
"""API interface to MediaWiki site.
@@ -301,9 +400,6 @@
## (note, some methods yield other information in a tuple along with the
## Pages; see method docs for details) --
##
-## search(query): query results from Special:Search
-## allpages(): Special:Allpages
-## prefixindex(): Special:Prefixindex
## newpages(): Special:Newpages
## newimages(): Special:Log&type=upload
## longpages(): Special:Longpages
@@ -397,12 +493,16 @@
self._getsiteinfo()
# check whether a login cookie already exists for this user
if hasattr(self, "_userinfo"):
- if sysop:
- name = config.sysopnames[self.family.name][self.code]
- else:
- name = config.usernames[self.family.name][self.code]
- if self._userinfo['name'] == name:
- self._username = name
+ try:
+ if sysop:
+ name = config.sysopnames[self.family.name][self.code]
+ else:
+ name = config.usernames[self.family.name][self.code]
+ if self._userinfo['name'] == name:
+ self._username = name
+ except KeyError:
+ # no username for this site
+ pass
if not self.logged_in(sysop):
loginMan = api.LoginManager(site=self, sysop=sysop)
if loginMan.login(retry = True):
@@ -607,6 +707,20 @@
return self.namespaces()[num]
return self.namespaces()[num][0]
+ def live_version(self):
+ """Return the 'real' version number found on [[Special:Version]]
+
+ Return value is a tuple (int, int, str) of the major and minor
+ version numbers and any other text contained in the version.
+
+ """
+ versionstring = self.siteinfo['generator']
+ m = re.match(r"^MediaWiki ([0-9]+)\.([0-9]+)(.*)$", versionstring)
+ if m:
+ return (int(m.group(1)), int(m.group(2)), m.group(3))
+ else:
+ return None
+
def loadpageinfo(self, page):
"""Load page info from api and save in page attributes"""
title = page.title(withSection=False)
@@ -821,16 +935,16 @@
def pagereferences(self, page, followRedirects=False, filterRedirects=None,
withTemplateInclusion=True, onlyTemplateInclusion=False):
"""Convenience method combining pagebacklinks and page_embeddedin."""
- #TODO Warn about deprecated arguments
+
if onlyTemplateInclusion:
return self.page_embeddedin(page)
if not withTemplateInclusion:
return self.pagebacklinks(page, followRedirects)
import itertools
- return itertools.chain(self.pagebacklinks(
- page, followRedirects, filterRedirects),
- self.page_embeddedin(page, filterRedirects)
- )
+ return itertools.chain(
+ self.pagebacklinks(page, followRedirects, filterRedirects),
+ self.page_embeddedin(page, filterRedirects)
+ )
def pagelinks(self, page, namespaces=None, follow_redirects=False):
"""Iterate internal wikilinks contained (or transcluded) on page.
@@ -854,10 +968,13 @@
for ns in namespaces)
return plgen
- def pagecategories(self, page, withSortKey=False):
+ def pagecategories(self, page, withSortKey=None):
"""Iterate categories to which page belongs."""
- # Sortkey doesn't work with generator; FIXME or deprecate
+ # Sortkey doesn't work with generator; deprecate
+ if withSortKey is not None:
+ logger.debug(
+ "site.pagecategories(): withSortKey option is deprecated")
clgen = api.CategoryPageGenerator("categories", site=self)
if hasattr(page, "_pageid"):
clgen.request['pageids'] = str(page._pageid)
@@ -868,12 +985,14 @@
def pageimages(self, page):
"""Iterate images used (not just linked) on the page."""
+
imtitle = page.title(withSection=False).encode(self.encoding())
imgen = api.ImagePageGenerator("images", titles=imtitle, site=self)
return imgen
def pagetemplates(self, page, namespaces=None):
"""Iterate templates transcluded (not just linked) on the page."""
+
tltitle = page.title(withSection=False).encode(self.encoding())
tlgen = api.PageGenerator("templates", titles=tltitle, site=self)
if namespaces is not None:
@@ -2114,8 +2233,7 @@
# TODO: implement undelete
-
-#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
+#### METHODS NOT IMPLEMENTED YET ####
class NotImplementedYet:
# TODO: is this needed any more? can it be obtained from the http module?
@@ -2503,101 +2621,8 @@
cache.append(title)
yield Page(self, title)
- def linkto(self, title, othersite = None):
- """Return unicode string in the form of a wikilink to 'title'
-
- Use optional Site argument 'othersite' to generate an interwiki link.
-
- """
- if othersite and othersite.code != self.code:
- return u'[[%s:%s]]' % (self.code, title)
- else:
- return u'[[%s]]' % title
-
- def isInterwikiLink(self, s):
- """Return True if s is in the form of an interwiki link.
-
- Interwiki links have the form "foo:bar" or ":foo:bar" where foo is a
- known language code or family. Called recursively if the first part
- of the link refers to this site's own family and/or language.
-
- """
- s = s.strip().lstrip(":")
- if not ':' in s:
- return False
- first, rest = s.split(':',1)
- # interwiki codes are case-insensitive
- first = first.lower().strip()
- # commons: forwards interlanguage links to wikipedia:, etc.
- if self.family.interwiki_forward:
- interlangTargetFamily = Family(self.family.interwiki_forward)
- else:
- interlangTargetFamily = self.family
- if self.ns_index(first):
- return False
- if first in interlangTargetFamily.langs:
- if first == self.code:
- return self.isInterwikiLink(rest)
- else:
- return True
- if first in self.family.get_known_families(site = self):
- if first == self.family.name:
- return self.isInterwikiLink(rest)
- else:
- return True
- return False
-
- def redirectRegex(self):
- """Return a compiled regular expression matching on redirect pages.
-
- Group 1 in the regex match object will be the target title.
-
- """
- redDefault = 'redirect'
- red = 'redirect'
- if self.language() == 'ar':
- red = u"تحويل"
- try:
- if redDefault == red:
- redirKeywords = [red] + self.family.redirect[self.code]
- redirKeywordsR = r'(?:' + '|'.join(redirKeywords) + ')'
- else:
- redirKeywords = [red] + self.family.redirect[self.code]
- redirKeywordsR = r'(?:' + redDefault + '|'.join(redirKeywords) + ')'
- except KeyError:
- # no localized keyword for redirects
- if redDefault == red:
- redirKeywordsR = r'%s' % red
- else:
- redirKeywordsR = r'(?:%s|%s)' % (red, redDefault)
- # A redirect starts with hash (#), followed by a keyword, then
- # arbitrary stuff, then a wikilink. The wikilink may contain
- # a label, although this is not useful.
- return re.compile(r'#' + redirKeywordsR +
- '.*?\[\[(.*?)(?:\|.*?)?\]\]',
- re.IGNORECASE | re.UNICODE | re.DOTALL)
-
- def live_version(self):
- """Return the 'real' version number found on [[Special:Version]]
-
- Return value is a tuple (int, int, str) of the major and minor
- version numbers and any other text contained in the version.
-
- """
- global htmldata
- if not hasattr(self, "_mw_version"):
- versionpage = self.getUrl(self.get_address("Special:Version"))
- htmldata = BeautifulSoup(versionpage, convertEntities="html")
- versionstring = htmldata.findAll(text="MediaWiki"
- )[1].parent.nextSibling
- m = re.match(r"^: ([0-9]+)\.([0-9]+)(.*)$", str(versionstring))
- if m:
- self._mw_version = (int(m.group(1)), int(m.group(2)),
- m.group(3))
- else:
- self._mw_version = self.family.version(self.code).split(".")
- return self._mw_version
-
+ # TODO: why should we rely on the family file to contain the correct
+ # encoding?
def checkCharset(self, charset):
"""Warn if charset returned by wiki doesn't match family file."""
if not hasattr(self,'charset'):
@@ -2610,26 +2635,6 @@
"code2encodings has wrong charset for %s. It should be %s, but is %s"
% (repr(self), charset, self.encoding()))
- def shared_image_repository(self):
- """Return a tuple of image repositories used by this site."""
- return self.family.shared_image_repository(self.code)
-
- def __cmp__(self, other):
- """Perform equality and inequality tests on Site objects."""
- if not isinstance(other, Site):
- return 1
- if self.family == other.family:
- return cmp(self.code, other.code)
- return cmp(self.family.name, other.family.name)
-
- def category_on_one_line(self):
- """Return True if this site wants all category links on one line."""
- return self.code in self.family.category_on_one_line
-
- def interwiki_putfirst(self):
- """Return list of language codes for ordering of interwiki links."""
- return self.family.interwiki_putfirst.get(self.code, None)
-
def interwiki_putfirst_doubled(self, list_of_links):
# TODO: is this even needed? No family in the framework has this
# dictionary defined!
@@ -2650,19 +2655,6 @@
else:
return False
- def getSite(self, code):
- """Return Site object for language 'code' in this Family."""
- return getSite(code = code, fam = self.family, user=self.user)
-
def validLanguageLinks(self):
"""Return list of language codes that can be used in interwiki links."""
return self._validlanguages
-
- def disambcategory(self):
- """Return Category in which disambig pages are listed."""
- import catlib
- try:
- return catlib.Category(self,
- self.namespace(14)+':'+self.family.disambcatname[self.code])
- except KeyError:
- raise NoPage(u'No page %s.' % page)