Revision: 7712 Author: xqt Date: 2009-11-30 10:12:57 +0000 (Mon, 30 Nov 2009)
Log Message: ----------- selectQuerySite(): always return maxOpenSite() if restoreAll is enabled; remove old lmo-stuff
Modified Paths: -------------- trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2009-11-30 09:20:11 UTC (rev 7711) +++ trunk/pywikipedia/interwiki.py 2009-11-30 10:12:57 UTC (rev 7712) @@ -332,32 +332,6 @@ '&pagegenerators_help;': pagegenerators.parameterHelp }
-class XmlDumpLmoLinkPageGenerator: - """ - Generator which will yield Pages that might contain selflinks. - These pages will be retrieved from a local XML dump file - (cur table). - """ - def __init__(self, xmlFilename): - """ - Arguments: - * xmlFilename - The dump's path, either absolute or relative - """ - - self.xmlFilename = xmlFilename - - def __iter__(self): - import xmlreader - mysite = pywikibot.getSite() - dump = xmlreader.XmlDump(self.xmlFilename) - r = re.compile(r'\d') - for entry in dump.parse(): - if not r.search(entry.title): - selflinkR = re.compile(r'[[lmo:') - if selflinkR.search(entry.text): - yield pywikibot.Page(mysite, entry.title) - - class SaveError(pywikibot.Error): """ An attempt to save a page with changed interwiki has failed. @@ -548,6 +522,7 @@ lacklanguage = None minlinks = 0 quiet = False + restoreAll = False
def readOptions(self, arg): if arg == '-noauto': @@ -1673,16 +1648,8 @@ for rmsite in removing: if rmsite != page.site(): # Sometimes sites have an erroneous link to itself as an interwiki rmPage = old[rmsite] - ########## - # temporary hard-coded special case to get rid of thousands of broken links to the Lombard Wikipedia, - # where useless bot-created articles were mass-deleted. See for example: - # http://meta.wikimedia.org/wiki/Proposals_for_closing_projects/Closure_of_Lom... - if rmsite == pywikibot.getSite('lmo', 'wikipedia'): - pywikibot.output(u'Found bad link to %s. As many lmo pages were deleted, it is assumed that it can be safely removed.' % rmPage.aslink()) - else: - ########## - new[rmsite] = old[rmsite] - pywikibot.output(u"WARNING: %s is either deleted or has a mismatching disambiguation state." % rmPage.aslink(True)) + new[rmsite] = old[rmsite] #put it to new means don't delete it + pywikibot.output(u"WARNING: %s is either deleted or has a mismatching disambiguation state." % rmPage.aslink(True)) # Re-Check what needs to get done mods, mcomment, adding, removing, modifying = compareLanguages(old, new, insite = page.site())
@@ -1967,11 +1934,12 @@ else: break # If we have a few, getting the home language is a good thing. - try: - if self.counts[pywikibot.getSite()] > 4: - return pywikibot.getSite() - except KeyError: - pass + if not globalvar.restoreAll + try: + if self.counts[pywikibot.getSite()] > 4: + return pywikibot.getSite() + except KeyError: + pass # If getting the home language doesn't make sense, see how many # foreign page queries we can find. return self.maxOpenSite() @@ -2123,12 +2091,6 @@ for arg in pywikibot.handleArgs(): if globalvar.readOptions(arg): continue - elif arg.startswith('-xml'): - if len(arg) == 4: - xmlFilename = pywikibot.input(u'Please enter the XML dump's filename:') - else: - xmlFilename = arg[5:] - hintlessPageGen = XmlDumpLmoLinkPageGenerator(xmlFilename) elif arg.startswith('-warnfile:'): warnfile = arg[10:] elif arg.startswith('-years'): @@ -2156,8 +2118,8 @@ else: newPages = 100 elif arg.startswith('-restore'): - restoreAll = arg[9:].lower() == 'all' - optRestore = not restoreAll + globalvar.restoreAll = arg[9:].lower() == 'all' + optRestore = not globalvar.restoreAll elif arg == '-continue': optContinue = True elif arg.startswith('-namespace:'): @@ -2202,9 +2164,9 @@ hintlessPageGen = pagegenerators.NewpagesPageGenerator(newPages, namespace=ns)
- elif optRestore or optContinue or restoreAll: + elif optRestore or optContinue or globalvar.restoreAll: site = pywikibot.getSite() - if restoreAll: + if globalvar.restoreAll: import glob for FileName in glob.iglob('interwiki-dumps/interwikidump-*.txt'): s = FileName.split('\')[1].split('.')[0].split('-') @@ -2243,8 +2205,6 @@ pywikibot.output(u"Dump file is empty?! Starting at the beginning.") else: nextPage = page.titleWithoutNamespace() + '!' - # old generator is used up, create a new one - #hintlessPageGen = pagegenerators.CombinedPageGenerator([pagegenerators.TextfilePageGenerator(dumpFileName), pagegenerators.AllpagesPageGenerator(nextPage, namespace, includeredirects = False)]) hintlessPageGen = pagegenerators.CombinedPageGenerator([hintlessPageGen, pagegenerators.AllpagesPageGenerator(nextPage, namespace, includeredirects = False)]) if not hintlessPageGen: pywikibot.output(u'No Dumpfiles found.') @@ -2271,7 +2231,7 @@
try: try: - append = not (optRestore or optContinue or restoreAll) + append = not (optRestore or optContinue or globalvar.restoreAll) bot.run() except KeyboardInterrupt: dumpFileName = bot.dump(append)
pywikipedia-svn@lists.wikimedia.org