Revision: 7638 Author: xqt Date: 2009-11-12 15:01:35 +0000 (Thu, 12 Nov 2009)
Log Message: ----------- * Don't append in restore/continue mode * remove duplicate pages from generator
Modified Paths: -------------- trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2009-11-12 14:29:17 UTC (rev 7637) +++ trunk/pywikipedia/interwiki.py 2009-11-12 15:01:35 UTC (rev 7638) @@ -1738,12 +1738,14 @@ self.generateNumber = number self.generateUntil = until
- def dump(self): + def dump(self, append = True): site = pywikibot.getSite() dumpfn = pywikibot.config.datafilepath( 'interwiki-dumps', 'interwikidump-%s-%s.txt' % (site.family.name, site.lang)) - f = codecs.open(dumpfn, 'a', 'utf-8') + if append: mode = 'a' + else: mode = 'w' + f = codecs.open(dumpfn, mode, 'utf-8') for subj in self.subjects: f.write(subj.originPage.aslink(None)+'\n') f.close() @@ -2187,18 +2189,21 @@ u'interwikidump-%s-%s.txt' % (site.family.name, site.lang)) hintlessPageGen = pagegenerators.TextfilePageGenerator(dumpFileName) + hintlessPageGen = pagegenerators.DuplicateFilterPageGenerator(hintlessPageGen) if optContinue: # We waste this generator to find out the last page's title # This is an ugly workaround. + nextPage = "!" + namespace = 0 for page in hintlessPageGen: - pass - try: + lastPage = page.titleWithoutNamespace() + if lastPage > nextPage: + nextPage = lastPage + namespace = page.namespace() + if nextPage == "!": + pywikibot.output(u"Dump file is empty?! Starting at the beginning.") + else: nextPage = page.titleWithoutNamespace() + '!' - namespace = page.namespace() - except NameError: - pywikibot.output(u"Dump file is empty?! Starting at the beginning.") - nextPage = "!" - namespace = 0 # old generator is used up, create a new one hintlessPageGen = pagegenerators.CombinedPageGenerator([pagegenerators.TextfilePageGenerator(dumpFileName), pagegenerators.AllpagesPageGenerator(nextPage, namespace, includeredirects = False)])
@@ -2225,10 +2230,10 @@ try: bot.run() except KeyboardInterrupt: - bot.dump() + bot.dump(not (optRestore or optContinue)) dumped = True except: - bot.dump() + bot.dump(not (optRestore or optContinue)) dumped = True raise finally:
pywikipedia-svn@lists.wikimedia.org