Revision: 5753
Author: nicdumz
Date: 2008-07-24 17:38:01 +0000 (Thu, 24 Jul 2008)
Log Message:
-----------
Wait for the end of the thread putting the pages before deleting Site objects. This should fix [ pywikipediabot-Bugs-2026525 ] diskcache fails with an IOError[22] in welcome.py
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-07-24 16:51:51 UTC (rev 5752)
+++ trunk/pywikipedia/wikipedia.py 2008-07-24 17:38:01 UTC (rev 5753)
@@ -6397,6 +6397,8 @@
for site in _sites.itervalues():
if site._mediawiki_messages:
try:
+ while(_putthread.isAlive()):
+ time.sleep(1)
site._mediawiki_messages.delete()
except OSError:
# stopme has been called several times...
Revision: 5750
Author: nicdumz
Date: 2008-07-24 12:50:38 +0000 (Thu, 24 Jul 2008)
Log Message:
-----------
* Several regex fixes (handle unquoted references names)
* Better var names to make script understanding easier
* Adding support for quoted/unquoted ref names. Will not add quotes when there were no quotes wrapping the ref name
* Detect when a reference name is used with several different references, and remember it to eventually keep one reference per name
Modified Paths:
--------------
trunk/pywikipedia/reflinks.py
Modified: trunk/pywikipedia/reflinks.py
===================================================================
--- trunk/pywikipedia/reflinks.py 2008-07-23 16:59:16 UTC (rev 5749)
+++ trunk/pywikipedia/reflinks.py 2008-07-24 12:50:38 UTC (rev 5750)
@@ -229,18 +229,18 @@
"""
def __init__(self):
# Match references
- self.REFS = re.compile(u'(?i)<ref(?P<name>[^>/]*)>(?P<content>.*?)</ref>')
- self.NAMES = re.compile(u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote).*')
- self.GROUPS = re.compile(u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.*?)\s*(?P=quote).*')
+ self.REFS = re.compile(u'(?i)<ref(?P<params>[^>/]*)>(?P<content>.*?)</ref>')
+ self.NAMES = re.compile(u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.+)\s*(?P=quote).*')
+ self.GROUPS = re.compile(u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.+)\s*(?P=quote).*')
def process(self, text):
# keys are ref groups
# values are a dict where :
# keys are ref content
- # values are [name, [list of full ref matches]]
+ # values are [name, [list of full ref matches], quoted, need_to_change]
foundRefs = {}
- foundRefNames = []
- # Replace key by value
+ foundRefNames = {}
+ # Replace key by [value, quoted]
namedRepl = {}
for match in self.REFS.finditer(text):
@@ -248,8 +248,8 @@
if not content.strip():
continue
- name = match.group('name')
- group = self.GROUPS.match(name)
+ params = match.group('params')
+ group = self.GROUPS.match(params)
if not foundRefs.has_key(group):
foundRefs[group] = {}
@@ -258,19 +258,34 @@
v = groupdict[content]
v[1].append(match.group())
else:
- v = [None, [match.group()]]
- n = self.NAMES.match(name)
- if n:
- n = n.group('name')
+ v = [None, [match.group()], False, False]
+ name = self.NAMES.match(params)
+ if name:
+ quoted = name.group('quote') == '"'
+ name = name.group('name')
if v[0]:
- namedRepl[n] = v[0]
+ if v[0] != name:
+ namedRepl[name] = [v[0], v[2]]
else:
- v[0] = n
- foundRefNames.append(n)
+ #First name associated with this content
+
+ if name == 'population':
+ wikipedia.output(content)
+ if not foundRefNames.has_key(name):
+ # first time ever we meet this name
+ if name == 'population':
+ print "in"
+ v[2] = quoted
+ v[0] = name
+ else:
+ # if has_key, means that this name is used
+ # with another content. We'll need to change it
+ v[3] = True
+ foundRefNames[name] = 1
groupdict[content] = v
id = 1
- while 'autogenerated%s' % id in foundRefNames:
+ while foundRefNames.has_key('autogenerated%s' % id):
id += 1
for (g, d) in foundRefs.iteritems():
if g:
@@ -279,13 +294,15 @@
group = ""
for (k, v) in d.iteritems():
- if len(v[1]) == 1:
+ if len(v[1]) == 1 and not v[3]:
continue
name = v[0]
if not name:
name = 'autogenerated%s' % id
id += 1
- named = u'<ref %sname="%s">%s</ref>' % (group, name, k)
+ elif v[2]:
+ name = u'"%s"' % name
+ named = u'<ref %sname=%s>%s</ref>' % (group, name, k)
text = text.replace(v[1][0], named, 1)
# make sure that the first (named ref) is not
@@ -294,14 +311,17 @@
header = text[:pos]
end = text[pos:]
- unnamed = u'<ref %sname="%s" />' % (group, name)
+ unnamed = u'<ref %sname=%s />' % (group, name)
for ref in v[1][1:]:
end = end.replace(ref, unnamed)
text = header + end
for (k,v) in namedRepl.iteritems():
# TODO : Support ref groups
- text = re.sub(u'<ref name\s*=\s*(?P<quote>"?)\s*%s\s*(?P=quote)\s*/>' % k, u'<ref name="%s" />' % v, text)
+ name = v[0]
+ if v[1]:
+ name = u'"%s"' % name
+ text = re.sub(u'<ref name\s*=\s*(?P<quote>"?)\s*%s\s*(?P=quote)\s*/>' % k, u'<ref name=%s />' % name, text)
return text
class ReferencesRobot:
Bugs item #2026525, was opened at 2008-07-24 10:26
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2026525&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: NicDumZ Nicolas Dumazet (nicdumz)
Assigned to: Nobody/Anonymous (nobody)
Summary: diskcache fails with an IOError[22] in welcome.py
Initial Comment:
Traceback (most recent call last):
File "pywikipedia/welcome.py", line 967, in <module>
for number_user in main(settingsBot):
File "pywikipedia/welcome.py", line 704, in main
contrib = string.capitalize(wsite.mediawiki_message('contribslink'))
File "/home/filnik/pywikipedia/wikipedia.py", line 4678, in mediawiki_message
value = self._mediawiki_messages[key]
File "/home/filnik/pywikipedia/diskcache.py", line 94, in __getitem__
self.cache_file.seek(self.lookup[i])
IOError: [Errno 22] Invalid argument
Does that mean that some lookup values are still -1 even after the initialization ?
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2026525&group_…
Revision: 5747
Author: nicdumz
Date: 2008-07-23 15:37:34 +0000 (Wed, 23 Jul 2008)
Log Message:
-----------
* Replace all the named refs when a name is changed
* Regex fix
Modified Paths:
--------------
trunk/pywikipedia/reflinks.py
Modified: trunk/pywikipedia/reflinks.py
===================================================================
--- trunk/pywikipedia/reflinks.py 2008-07-21 20:35:21 UTC (rev 5746)
+++ trunk/pywikipedia/reflinks.py 2008-07-23 15:37:34 UTC (rev 5747)
@@ -229,7 +229,7 @@
"""
def __init__(self):
# Match references
- self.REFS = re.compile(u'(?i)<ref(?P<name>[^>]*)>(?P<content>.*?)</ref>')
+ self.REFS = re.compile(u'(?i)<ref(?P<name>[^>/]*)>(?P<content>.*?)</ref>')
self.NAMES = re.compile(u'(?i).*name\s*=\s*(?P<quote>"?)\s*(?P<name>.*?)\s*(?P=quote).*')
self.GROUPS = re.compile(u'(?i).*group\s*=\s*(?P<quote>"?)\s*(?P<group>.*?)\s*(?P=quote).*')
@@ -240,6 +240,8 @@
# values are [name, [list of full ref matches]]
foundRefs = {}
foundRefNames = []
+ # Replace key by value
+ namedRepl = {}
for match in self.REFS.finditer(text):
content = match.group('content')
@@ -257,12 +259,14 @@
v[1].append(match.group())
else:
v = [None, [match.group()]]
- if not v[0]:
- n = self.NAMES.match(name)
- if n:
- n = n.group('name')
+ n = self.NAMES.match(name)
+ if n:
+ n = n.group('name')
+ if v[0]:
+ namedRepl[n] = v[0]
+ else:
v[0] = n
- foundRefNames.append(n)
+ foundRefNames.append(n)
groupdict[content] = v
id = 1
@@ -294,6 +298,10 @@
for ref in v[1][1:]:
end = end.replace(ref, unnamed)
text = header + end
+
+ for (k,v) in namedRepl.iteritems():
+ # TODO : Support ref groups
+ text = re.sub(u'<ref name\s*=\s*(?P<quote>"?)\s*%s\s*(?P=quote)\s*/>' % k, u'<ref name="%s" />' % v, text)
return text
class ReferencesRobot:
Revision: 5745
Author: nicdumz
Date: 2008-07-21 11:39:00 +0000 (Mon, 21 Jul 2008)
Log Message:
-----------
* Implementing a TODO
* Minor code cleanups
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2008-07-21 10:45:27 UTC (rev 5744)
+++ trunk/pywikipedia/interwiki.py 2008-07-21 11:39:00 UTC (rev 5745)
@@ -546,8 +546,13 @@
def openSites(self):
"""Return a list of sites for all things we still need to do"""
- return [page.site() for page in self.todo] # TODO: remove duplicates
+ distinctSites = {}
+ for page in self.todo:
+ site = page.site()
+ distinctSites[site] = site
+ return distinctSites.values()
+
def willWorkOn(self, site):
"""
By calling this method, you 'promise' this instance that you will
@@ -557,7 +562,7 @@
# Bug-check: Isn't there any work still in progress? We can't work on
# different sites at a time!
if self.pending != []:
- raise 'BUG: Can\'t start to work on %s; still working on %s' % (repr(site), self.pending)
+ raise 'BUG: Can\'t start to work on %s; still working on %s' % (site, self.pending)
# Prepare a list of suitable pages
for page in self.todo:
if page.site() == site:
@@ -741,8 +746,6 @@
# Register this fact at the todo-counter.
counter.minus(page.site())
- # Assume it's not a redirect
- isRedirect = False
# Now check whether any interwiki links should be added to the
# todo list.
if page.section() and not page.isRedirectPage():
@@ -759,7 +762,6 @@
self.originPage = redirectTargetPage
self.pending.append(redirectTargetPage)
counter.plus(redirectTargetPage.site)
- pass
else:
# This is a redirect page to the origin. We don't need to
# follow the redirection.
@@ -767,7 +769,6 @@
for page2 in self.todo:
counter.minus(page2.site())
self.todo = []
- pass
elif not globalvar.followredirect:
wikipedia.output(u"NOTE: not following redirects.")
else: