Revision: 4112
Author: siebrand
Date: 2007-08-26 13:07:35 +0000 (Sun, 26 Aug 2007)
Log Message:
-----------
* Changes to make a script work (submitted by Filnik)
* EOL whitespace removed
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2007-08-26 10:46:37 UTC (rev 4111)
+++ trunk/pywikipedia/wikipedia.py 2007-08-26 13:07:35 UTC (rev 4112)
@@ -1,4 +1,4 @@
-## -*- coding: utf-8 -*-
+## -*- coding: utf-8 -*-
"""
Library to get and put pages on a MediaWiki.
@@ -201,7 +201,7 @@
class ServerError(Error):
"""Got unexpected server response"""
-
+
class BadTitle(Error):
"""Server responded with BadTitle."""
@@ -237,12 +237,12 @@
# to have an edit restriction, but we do not know yet whether the
# restriction affects us or not
self._editrestriction = False
-
+
if site == None:
site = getSite()
elif type(site) in [type(''), type(u'')]:
site = getSite(site)
-
+
self._site = site
if not insite:
@@ -250,16 +250,16 @@
# Convert HTML entities to unicode
t = html2unicode(title)
-
+
# Convert URL-encoded characters to unicode
# Sometimes users copy the link to a site from one to another. Try both the source site and the destination site to decode.
t = url2unicode(t, site = insite, site2 = site)
-
+
#Normalize unicode string to a NFC (composed) format to allow proper string comparisons
# According to http://svn.wikimedia.org/viewvc/mediawiki/branches/REL1_6/phase3/includes/n…
# the mediawiki code normalizes everything to NFC, not NFKC (which might result in information loss).
t = unicodedata.normalize('NFC', t)
-
+
# Clean up the name, it can come from anywhere.
# Replace underscores by spaces, also multiple spaces and underscores with a single space
# Strip spaces at both ends
@@ -328,7 +328,7 @@
if sectionStart >= 0:
self._section = t[sectionStart+1:].strip()
self._section = sectionencode(self._section, self.site().encoding())
- if self._section == u'': self._section = None
+ if self._section == u'': self._section = None
t = t[:sectionStart].strip()
else:
self._section = None
@@ -341,10 +341,10 @@
if self._namespace != 0:
t = self.site().namespace(self._namespace) + u':' + t
-
+
if self._section:
t += u'#' + self._section
-
+
self._title = t
self.editRestriction = None
self._permalink = None
@@ -690,7 +690,7 @@
Get the permalink page for this page
"""
return "%s://%s%s&oldid=%i"%(self.site().protocol, self.site().hostname(), self.site().get_address(self.title()), self.latestRevision())
-
+
def latestRevision(self):
"""
Get the latest revision for this page
@@ -764,7 +764,7 @@
templates = self.templatesWithParams();
except (NoPage, IsRedirectPage, SectionError):
return True
-
+
try:
if self.editRestriction:
self.site().forceLogin(sysop=True)
@@ -795,10 +795,10 @@
return True
# no restricting template found
return True
-
+
def userName(self):
return self._userName
-
+
def isIpEdit(self):
return self._ipedit
@@ -1442,7 +1442,7 @@
name = Page(self.site(), name).title()
result.append((name, params))
return result
-
+
def templatePages(self):
"""
Gives a list of Page objects containing the templates used on the page. Template parameters are ignored.
@@ -1680,7 +1680,7 @@
now = time.time()
count = 0
- output = []
+ output = []
while count < max and max != -1:
if self.site().hostname() in config.authenticate.keys():
@@ -1691,16 +1691,16 @@
data = response.read().decode(self.site().encoding())
else:
response, data = self.site().postForm(address, predata)
-
+
get_throttle.setDelay(time.time() - now)
data = simplejson.loads(data)
- page = data['query']['pages'].values()[0]
+ page = data['query']['pages'].values()[0]
if 'missing' in page:
raise NoPage, 'Page %s not found' % self
revisions = page.get('revisions', ())
for revision in revisions:
if not comment:
- output.append((revision['timestamp'],
+ output.append((revision['timestamp'],
revision['user'], revision.get('*', u'')))
else:
output.append((revision['timestamp'], revision['user'],
@@ -1714,7 +1714,7 @@
break
return output
fullRevisionHistory = fullVersionHistory
-
+
def contributingUsers(self):
"""
Returns a set of all user names (including anonymous IPs) of those who
@@ -1929,7 +1929,7 @@
self._deletedRevs = None
#TODO: Check for errors below (have we succeeded? etc):
return self.site().postForm(address,formdata,sysop=True)
-
+
def protect(self, edit = 'sysop', move = 'sysop', unprotect = False, reason = None, prompt = True, throttle = False):
"""(Un)protects a wiki page. Requires administrator status. If reason is None,
asks for a reason. If prompt is True, asks the user if he wants to protect the page.
@@ -1951,7 +1951,7 @@
answer = inputChoice(u'Do you want to (un)protect %s?' % self.aslink(forceInterwiki = True), ['Yes', 'No'], ['y', 'N'], 'N')
if answer in ['y', 'Y']:
host = self.site().hostname()
-
+
self.site().forceLogin(sysop = True)
token = self.site().getToken(self, sysop = True)
@@ -1984,31 +1984,31 @@
output(u'Protection failed:')
output(data)
return False
-
+
def removeImage(self, image, put = False, summary = None, safe = True):
return self.replaceImage(image, None, put, summary, safe)
-
+
def replaceImage(self, image, replacement = None, put = False, summary = None, safe = True):
"""Replace all occurences of an image by another image.
- Giving None as argument for replacement will delink
- instead of replace.
-
+ Giving None as argument for replacement will delink
+ instead of replace.
+
The argument image must be without namespace and all
spaces replaced by underscores.
-
+
If put is false, the new text will be returned.
-
+
If put is true, the edits will be saved to the wiki
- and True will be returned on succes, and otherwise
+ and True will be returned on succes, and otherwise
False. Edit errors propagate."""
-
+
# Copyright (c) Orgullomoore, Bryan
-
+
site = self.site()
-
+
text = self.get()
new_text = text
-
+
def create_regex(s):
s = re.escape(s)
return ur'(?:[%s%s]%s)' % (s[0].upper(), s[0].lower(), s[1:])
@@ -2019,7 +2019,7 @@
# note that the colon is already included here
r_namespace = ur'\s*(?:%s)\s*\:\s*' % u'|'.join(map(create_regex_i, namespaces))
r_image = u'(%s)' % create_regex(image).replace(r'\_', '[ _]')
-
+
def simple_replacer(match, groupNumber = 1):
if replacement == None:
return u''
@@ -2041,24 +2041,24 @@
new_text = new_text[:m.start()] + simple_replacer(m, 2) + new_text[m.end():]
# Remove the image from galleries
- r_galleries = ur'(?s)(\<%s\>)(?s)(.*?)(\<\/%s\>)' % (create_regex_i('gallery'),
+ r_galleries = ur'(?s)(\<%s\>)(?s)(.*?)(\<\/%s\>)' % (create_regex_i('gallery'),
create_regex_i('gallery'))
r_gallery = ur'(?m)^((?:%s)?)(%s)(\s*(?:\|.*?)?\s*)$' % (r_namespace, r_image)
def gallery_replacer(match):
- return ur'%s%s%s' % (match.group(1), re.sub(r_gallery,
+ return ur'%s%s%s' % (match.group(1), re.sub(r_gallery,
simple_replacer, match.group(2)), match.group(3))
new_text = re.sub(r_galleries, gallery_replacer, new_text)
-
+
if (text == new_text) or (not safe):
# All previous steps did not work, so the image is
# likely embedded in a complicated template.
r_templates = ur'(?s)(\{\{.*?\}\})'
r_complicated = u'(?s)((?:%s)?)%s' % (r_namespace, r_image)
-
+
def template_replacer(match):
return re.sub(r_complicated, simple_replacer, match.group(1))
new_text = re.sub(r_templates, template_replacer, new_text)
-
+
if put:
if text != new_text:
# Save to the wiki
@@ -2067,7 +2067,7 @@
return False
else:
return new_text
-
+
class ImagePage(Page):
# a Page in the Image namespace
def __init__(self, site, title = None, insite = None):
@@ -2114,18 +2114,20 @@
def getFileVersionHistory(self):
result = []
- history = re.search('(?s)<ul class="special">.+?</ul>', self.getImagePageHtml())
-
- if history:
- lineR = re.compile('<li> \(.+?\) \(.+?\) <a href=".+?" title=".+?">(?P<datetime>.+?)</a> . . <a href=".+?" title=".+?">(?P<username>.+?)</a> \(.+?\) . . (?P<resolution>\d+.+?\d+) \((?P<size>[\d,\.]+) .+?\)( <span class="comment">(?P<comment>.*?)</span>)?</li>')
-
- for match in lineR.finditer(history.group()):
- datetime = match.group('datetime')
- username = match.group('username')
- resolution = match.group('resolution')
- size = match.group('size')
- comment = match.group('comment') or ''
- result.append((datetime, username, resolution, size, comment))
+ history = self.getImagePageHtml()
+ pat = re.compile(r'</p><table class=\"filehistory\">((.*?\n)*?)</table>', re.M)
+ lineR = re.findall(pat, history)[0][0]
+ for match in lineR.split('\n'):
+ if not '(<a href=' in match:
+ continue
+ res = re.findall(r'\">(\d\d:\d\d, \d\d .*? \d\d\d\d)</a></td><td><a href=\".*?\" (?:class=\"new\" |)title=\".*?\">(.*?)</a> ' + \
+ '\(.*?\)</td><td>(.*?)</td><td class=\"mw-imagepage-filesize\">(.*?)</td><td>(.*?)</td></tr>', match)[0]
+ datetime = res[0]
+ username = res[1]
+ size = res[2]
+ resolution = res[3]
+ comment = res[4]
+ result.append((datetime, username, resolution, size, comment))
return result
def getFileVersionHistoryTable(self):
@@ -2249,7 +2251,7 @@
# seems to be the safest possible time.
page2._startTime = str(int(timestamp)+1)
if section:
- m = re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" % re.escape(section), sectionencode(text,page2.site().encoding()))
+ m = re.search("\.3D\_*(\.27\.27+)?(\.5B\.5B)?\_*%s\_*(\.5B\.5B)?(\.27\.27+)?\_*\.3D" % re.escape(section), sectionencode(text,page2.site().encoding()))
if not m:
try:
page2._getexception
@@ -2259,7 +2261,7 @@
page2._getexception = SectionError
# Store the content
page2._contents = text
-
+
def headerDone(self, header):
# Verify our family data
lang = self.site.lang
@@ -2377,7 +2379,7 @@
def logfn(self):
import wikipediatools as _wt
return _wt.absoluteFilename('throttle.log')
-
+
def checkMultiplicity(self):
self.lock.acquire()
try:
@@ -2545,7 +2547,7 @@
old = re.compile(old, re.IGNORECASE | re.UNICODE)
else:
old = re.compile(old)
-
+
#noTouch = '|'.join([exceptions[name] for name in exceptList])
#noTouchR = re.compile(noTouch)
# How much of the text we have looked at so far
@@ -3319,7 +3321,7 @@
compressedstream = StringIO.StringIO(text)
gzipper = gzip.GzipFile(fileobj=compressedstream)
text = gzipper.read()
-
+
# Find charset in the content-type meta tag
contentType = f.info()['Content-Type']
R = re.compile('charset=([^\'\";]+)')
@@ -3438,7 +3440,7 @@
path = self.newpages_address(n=number)
get_throttle()
html = self.getUrl(path)
-
+
entryR = re.compile('<li[^>]*>(?P<date>.+?) \S*?<a href=".+?" title="(?P<title>.+?)">.+?</a>.+?[\(\[](?P<length>\d+)[^\)\]]*[\)\]] .?<a href=".+?" title=".+?:(?P<username>.+?)">')
for m in entryR.finditer(html):
date = m.group('date')
@@ -3838,7 +3840,7 @@
except KeyError:
# no localized keyword for redirects
redirKeywordsR = r'redirect'
- # A redirect starts with hash (#), followed by a keyword, then
+ # A redirect starts with hash (#), followed by a keyword, then
# arbitrary stuff, then a wikilink. The link target ends before
# either a | or a ].
return re.compile(r'#' + redirKeywordsR + '.*?\[\[(.*?)(?:\]|\|)', re.IGNORECASE | re.UNICODE | re.DOTALL)
@@ -4106,7 +4108,7 @@
def languages(self):
return self.family.langs.keys()
-
+
def validLanguageLinks(self):
return self._validlanguages
@@ -4557,7 +4559,7 @@
while(output_cache):
(args, kwargs) = output_cache.pop(0)
ui.output(*args, **kwargs)
-
+
def input(question, password = False):
"""
Asks the user a question, then returns the user's answer.
@@ -4573,12 +4575,12 @@
input_lock.acquire()
try:
data = ui.input(question, password)
- finally:
+ finally:
flush_output_cache()
input_lock.release()
-
+
return data
-
+
def inputChoice(question, answers, hotkeys, default = None):
"""
Asks the user a question and offers several options, then returns the
@@ -4680,7 +4682,7 @@
_putthread.setName('Put-Thread')
_putthread.setDaemon(True)
_putthread.start()
-
+
def stopme():
"""This should be run when a bot does not interact with the Wiki, or
when it has stopped doing so. After a bot has run stopme() it will
@@ -4696,9 +4698,9 @@
import datetime
remaining = datetime.timedelta(seconds=(page_put_queue.qsize()+1) * config.put_throttle)
output('Waiting for %i pages to be put. Estimated time remaining: %s' % (page_put_queue.qsize()+1, remaining))
-
+
page_put_queue.put((None, None, None, None, None, None))
-
+
while(_putthread.isAlive()):
try:
_putthread.join(1)
@@ -4756,5 +4758,4 @@
print 'Pywikipediabot %s' % version.getversion()
print 'Python %s' % sys.version
doctest.testmod()
-
-
+
Revision: 4111
Author: a_engels
Date: 2007-08-26 10:46:37 +0000 (Sun, 26 Aug 2007)
Log Message:
-----------
Decide the comment text at the time of putting the page into the queue, and not at the time of actually saving. In the old version, when using solve_disambiguation.py -start, the bot would indicate the wrong page as the disambiguation page that was worked on.
Modified Paths:
--------------
trunk/pywikipedia/solve_disambiguation.py
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2007-08-25 22:09:12 UTC (rev 4110)
+++ trunk/pywikipedia/solve_disambiguation.py 2007-08-26 10:46:37 UTC (rev 4111)
@@ -74,9 +74,6 @@
# Application specific imports
import wikipedia, pagegenerators, editarticle
-# This is a purely interactive robot. We set the delays lower.
-#wikipedia.put_throttle.setDelay(4)
-
# Summary message when working on disambiguation pages
msg = {
'cs': u'Odstranění linku na rozcestník [[%s]] s použitím robota',
@@ -452,6 +449,7 @@
self.mysite = wikipedia.getSite()
self.mylang = self.mysite.language()
+ self.comment = None
self.setupRegexes()
@@ -526,7 +524,7 @@
if choice in ['y', 'Y']:
redir_text = '#%s [[%s]]' % (self.mysite.redirect(default=True), target)
try:
- refPage.put_async(redir_text)
+ refPage.put_async(redir_text,comment=self.comment)
except wikipedia.PageNotSaved, error:
wikipedia.output(u'Page not saved: %s' % error.args)
else:
@@ -715,7 +713,7 @@
wikipedia.output(u'')
# save the page
try:
- refPage.put_async(text)
+ refPage.put_async(text,comment=self.comment)
except wikipedia.LockedPage:
wikipedia.output(u'Page not saved: page is locked')
except wikipedia.PageNotSaved, error:
@@ -770,17 +768,15 @@
def setSummaryMessage(self, disambPage):
# first check whether user has customized the edit comment
if wikipedia.config.disambiguation_comment.has_key(self.mysite.family.name) and wikipedia.config.disambiguation_comment[self.mysite.family.name].has_key(self.mylang):
- comment = wikipedia.translate(self.mysite,
+ self.comment = wikipedia.translate(self.mysite,
wikipedia.config.disambiguation_comment[
self.mysite.family.name]
) % disambPage.title()
elif disambPage.isRedirectPage():
# when working on redirects, there's another summary message
- comment = wikipedia.translate(self.mysite, msg_redir) % disambPage.title()
+ self.comment = wikipedia.translate(self.mysite, msg_redir) % disambPage.title()
else:
- comment = wikipedia.translate(self.mysite, msg) % disambPage.title()
-
- wikipedia.setAction(comment)
+ self.comment = wikipedia.translate(self.mysite, msg) % disambPage.title()
def run(self):
if self.main_only:
Hi Ben,
I think you're having the same problems that Russell had. Hope this helps.
Daniel
---------- Weitergeleitete Nachricht ----------
Subject: Re: [Pywikipedia-l] SVN commits
Date: Mittwoch, 25. Juli 2007 20:21
From: Brion Vibber <brion(a)wikimedia.org>
To: Russell Blau <russblau(a)imapmail.org>
Cc: pywikipedia-l(a)lists.wikimedia.org
Russell Blau wrote:
> Can someone explain how to do a commit to the new repository? I'm using
> TortoiseSVN on a Windows XP machine. Checking out the files worked fine;
> but trying to commit gets me the following error:
>
> C:\...\pywikipedia>svn commit wikipedia.py --username russblau --message
> "Catch NoPage exception in Page.templates()"
> svn: Commit failed (details follow):
> svn: Can't create directory
> '/svnroot/pywikipedia/db/transactions/3896-1.txn': P
> ermission denied
>
> I've got developer access (right?) and I've got my SSH key loaded in
> Pageant. What else do I need to do to be able to commit?
Make sure you checked out your working directory from the svn+ssh://
repository, not the http:// repository (which is read-only).
-- brion vibber (brion @ wikimedia.org)
_______________________________________________
Pywikipedia-l mailing list
Pywikipedia-l(a)lists.wikimedia.org
http://lists.wikimedia.org/mailman/listinfo/pywikipedia-l
-------------------------------------------------------