Revision: 7974
Author: russblau
Date: 2010-03-01 21:42:57 +0000 (Mon, 01 Mar 2010)
Log Message:
-----------
Changes to drastically reduce the time and memory demands of this bot on larger wikis; to
achieve this, checking for double-redirects and broken redirects (to deleted categories)
is done only for redirects that contain pages, the old version did this check for all
redirected categories.
Modified Paths:
--------------
branches/rewrite/scripts/category_redirect.py
Modified: branches/rewrite/scripts/category_redirect.py
===================================================================
--- branches/rewrite/scripts/category_redirect.py 2010-02-28 16:06:08 UTC (rev 7973)
+++ branches/rewrite/scripts/category_redirect.py 2010-03-01 21:42:57 UTC (rev 7974)
@@ -419,129 +419,57 @@
)
# get a list of all members of the category-redirect category
- catpages = list(redircat.subcategories())
+ catpages = dict((c, None) for c in
+ self.site.categorymembers(redircat, namespaces=[14]))
- # preload the category pages for redirected categories
+ # check the category pages for redirected categories
pywikibot.output(u"")
- pywikibot.output(u"Preloading %s category redirect pages"
+ pywikibot.output(u"Checking %s category redirect pages"
% len(catpages))
- for cat in pagegenerators.PreloadingGenerator(catpages, step=250):
- catdata = cat.categoryinfo
- if "size" in catdata and int(catdata['size']):
- # save those categories that have contents
- nonemptypages.append(cat)
+ for cat in catpages:
cat_title = cat.title(withNamespace=False)
if "category redirect" in cat_title:
self.log_text.append(u"* Ignoring %s"
% cat.title(asLink=True, textlink=True))
continue
- try:
- if not cat.isCategoryRedirect():
- self.log_text.append(u"* False positive: %s"
- % cat.title(asLink=True,
- textlink=True))
- continue
- except pywikibot.Error:
- self.log_text.append(u"* Could not load %s; ignoring"
- % cat.title(asLink=True, textlink=True))
- continue
- catlist.append(cat)
- target = cat.getCategoryRedirectTarget()
- destination = target.title(withNamespace=False)
- destmap.setdefault(target, []).append(cat)
- catmap[cat] = destination
+ if hasattr(cat, "_catinfo"):
+ # skip empty categories that don't return a "categoryinfo"
key
+ catdata = cat.categoryinfo
+ if "size" in catdata and int(catdata['size']):
+ # save those categories that have contents
+ nonemptypages.append(cat)
if cat_title not in record:
# make sure every redirect has a record entry
record[cat_title] = {today: None}
- newredirs.append("*# %s -> %s"
- % (cat.title(asLink=True, textlink=True),
- target.title(asLink=True, textlink=True)))
-## if match.group(1):
-## # category redirect target starts with "Category:" - fix it
-## text = text[ :match.start(1)] + text[match.end(1): ]
-## try:
-## cat.put(text,
-## u"Robot: fixing category redirect parameter
format")
-## self.log_text.append(
-## u"* Removed category prefix from parameter in %s"
-## % cat.title(asLink=True, textlink=True))
-## except pywikibot.Error:
-## self.log_text.append(
-## u"* Unable to save changes to %s"
-## % cat.title(asLink=True, textlink=True))
+ try:
+ newredirs.append("*# %s -> %s"
+ % (cat.title(asLink=True, textlink=True),
+ cat.getCategoryRedirectTarget().title(
+ asLink=True, textlink=True)))
+ except pywikibot.Error:
+ pass
# delete record entries for non-existent categories
for cat_name in record.keys():
- if pywikibot.Category(
- pywikibot.Link(self.catprefix+cat_name, self.site)
- ) not in catmap:
+ if pywikibot.Category(self.site, self.catprefix + cat_name
+ ) not in catpages:
del record[cat_name]
pywikibot.output(u"")
- pywikibot.output(u"Checking %s destination categories" % len(destmap))
- for dest in pagegenerators.PreloadingGenerator(destmap, step=250):
- if not dest.exists():
- for d in destmap[dest]:
- problems.append("# %s redirects to %s"
- % (d.title(asLink=True, textlink=True),
- dest.title(asLink=True, textlink=True)))
- catlist.remove(d)
- # do a null edit on d to make it appear in the
- # "needs repair" category (if this wiki has one)
- try:
- d.put(d.get(get_redirect=True))
- except:
- pass
- if dest in catlist:
- for d in destmap[dest]:
- # is catmap[dest] also a redirect?
- newcat = pywikibot.Category(
- pywikibot.Link(self.catprefix+catmap[dest],
- self.site)
- )
- while newcat in catlist:
- if newcat == d or newcat == dest:
- self.log_text.append(u"* Redirect loop from %s"
- % newcat.title(asLink=True,
- textlink=True))
- break
- newcat = pywikibot.Category(
- pywikibot.Link(
- self.catprefix+catmap[newcat],
- self.site)
- )
- else:
- self.log_text.append(
- u"* Fixed double-redirect: %s -> %s -> %s"
- % (d.title(asLink=True, textlink=True),
- dest.title(asLink=True, textlink=True),
- newcat.title(asLink=True, textlink=True)))
- oldtext = d.get(get_redirect=True)
- # remove the old redirect from the old text,
- # leaving behind any non-redirect text
- oldtext = template_regex.sub("", oldtext)
- newtext = (u"{{%(redirtemp)s|%(ncat)s}}"
- % {'redirtemp': template_list[0],
- 'ncat':
newcat.title(withNamespace=False)})
- newtext = newtext + oldtext.strip()
- try:
- d.put(newtext,
- pywikibot.translate(self.site.lang,
- self.dbl_redir_comment),
- minorEdit=True)
- except pywikibot.Error, e:
- self.log_text.append("** Failed: %s" % e)
-
- # only scan those pages that have contents (nonemptypages)
- # and that haven't been removed from catlist as broken redirects
- cats_to_empty = set(catlist) & set(nonemptypages)
- pywikibot.output(u"")
pywikibot.output(u"Moving pages out of %s redirected categories."
- % len(cats_to_empty))
-# thread_limit = int(math.log(len(cats_to_empty), 8) + 1)
-# threadpool = ThreadList(limit=1) # disabling multi-threads
+ % len(nonemptypages))
- for cat in cats_to_empty:
+ for cat in pagegenerators.PreloadingGenerator(nonemptypages):
+ try:
+ if not cat.isCategoryRedirect():
+ self.log_text.append(u"* False positive: %s"
+ % cat.title(asLink=True,
+ textlink=True))
+ continue
+ except pywikibot.Error:
+ self.log_text.append(u"* Could not load %s; ignoring"
+ % cat.title(asLink=True, textlink=True))
+ continue
cat_title = cat.title(withNamespace=False)
if not self.readyToEdit(cat):
counts[cat_title] = None
@@ -549,7 +477,49 @@
u"* Skipping %s; in cooldown period."
% cat.title(asLink=True, textlink=True))
continue
- found, moved = self.move_contents(cat_title, catmap[cat],
+ dest = cat.getCategoryRedirectTarget()
+ if not dest.exists():
+ problems.append("# %s redirects to %s"
+ % (cat.title(asLink=True, textlink=True),
+ dest.title(asLink=True, textlink=True)))
+ # do a null edit on cat to make it appear in the
+ # "needs repair" category (if this wiki has one)
+ try:
+ cat.put(cat.get(get_redirect=True))
+ except:
+ pass
+ continue
+ if dest.isCategoryRedirect():
+ double = dest.getCategoryRedirectTarget()
+ if double == dest or double == cat:
+ self.log_text.append(u"* Redirect loop from %s"
+ % dest.title(asLink=True,
+ textlink=True))
+ else:
+ self.log_text.append(
+ u"* Fixed double-redirect: %s -> %s -> %s"
+ % (cat.title(asLink=True, textlink=True),
+ dest.title(asLink=True, textlink=True),
+ double.title(asLink=True, textlink=True)))
+ oldtext = cat.get(get_redirect=True)
+ # remove the old redirect from the old text,
+ # leaving behind any non-redirect text
+ oldtext = template_regex.sub("", oldtext)
+ newtext = (u"{{%(redirtemp)s|%(ncat)s}}"
+ % {'redirtemp': template_list[0],
+ 'ncat': double.title(withNamespace=False)})
+ newtext = newtext + oldtext.strip()
+ try:
+ cat.put(newtext,
+ pywikibot.translate(self.site.lang,
+ self.dbl_redir_comment),
+ minorEdit=True)
+ except pywikibot.Error, e:
+ self.log_text.append("** Failed: %s" % e)
+ continue
+
+ found, moved = self.move_contents(cat_title,
+ dest.title(withNamespace=False),
editSummary=comment)
if found is None:
self.log_text.append(