Revision: 5909
Author: russblau
Date: 2008-09-19 15:00:13 +0000 (Fri, 19 Sep 2008)
Log Message:
-----------
Improve recordkeeping
Modified Paths:
--------------
trunk/pywikipedia/category_redirect.py
Modified: trunk/pywikipedia/category_redirect.py
===================================================================
--- trunk/pywikipedia/category_redirect.py 2008-09-19 11:27:33 UTC (rev 5908)
+++ trunk/pywikipedia/category_redirect.py 2008-09-19 15:00:13 UTC (rev 5909)
@@ -488,18 +488,22 @@
for cat in pagegenerators.PreloadingGenerator(catpages, 120):
cat_title = cat.titleWithoutNamespace()
if "category redirect" in cat_title:
- self.log_text.append("* Ignoring %s%s" % (self.catprefix,
cat_title))
+ self.log_text.append(u"* Ignoring %s%s"
+ % (self.catprefix, cat_title))
continue
try:
text = cat.get(get_redirect=True)
except wikipedia.Error:
self.log_text.append(u"* Could not load %s%s; ignoring"
- % (self.catprefix, cat_title))
+ % (self.catprefix, cat_title))
continue
match = template_regex.search(text)
if match is None:
self.log_text.append(u"* False positive: %s" % cat_title)
continue
+ if cat_title not in record:
+ # make sure every redirect has a record entry
+ record[cat_title] = {today: None}
catlist.append(cat)
destination = match.group(2)
target = catlib.Category(self.site, self.catprefix+destination)
@@ -514,6 +518,12 @@
u"* Removed category prefix from parameter in %s"
% cat.aslink(textlink=True))
+ # delete record entries for non-existent categories
+ for cat_name in list(record.keys()):
+ if catlib.Category(self.site,
+ self.catprefix+cat_name) not in catmap:
+ del record[cat_name]
+
wikipedia.output(u"")
wikipedia.output(u"Checking %s destination categories" % len(destmap))
for dest in pagegenerators.PreloadingGenerator(destmap.keys(), 120):
@@ -568,8 +578,8 @@
wikipedia.output(u"")
wikipedia.output(u"Moving pages out of %s redirected categories."
% len(cats_to_empty))
- thread_limit = int(math.log(len(cats_to_empty), 8) + 1)
- threadpool = ThreadList(limit=1) # temporarily disabling multi-threads
+# thread_limit = int(math.log(len(cats_to_empty), 8) + 1)
+ threadpool = ThreadList(limit=1) # disabling multi-threads
for cat in cats_to_empty:
cat_title = cat.titleWithoutNamespace()
@@ -595,16 +605,8 @@
u"* [[:%s%s]]: %d found, %d moved"
% (self.catprefix, title, found, moved))
counts[title] = found
+ record[title][today] = found
- for cat in record.keys():
- if cat not in counts.keys():
- del record[cat]
- for cat in counts.keys():
- if counts[cat] is not None:
- if counts[cat]:
- record.setdefault(cat, {})[today] = counts[cat]
- else:
- record.setdefault(cat, {})
cPickle.dump(record, open(datafile, "wb"))
wikipedia.setAction(wikipedia.translate(self.site.lang,
Show replies by date