http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9918
Revision: 9918 Author: binbot Date: 2012-02-21 11:39:41 +0000 (Tue, 21 Feb 2012) Log Message: ----------- My old dream completed: with -saveexc or -saveexcnew you may save excepted titles for later use, and don't have to bother them again on next runs. Currently you may paste the titles into exceptions of a fix, reading them from file will be implemented later. + some PEP 8
Modified Paths: -------------- trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2012-02-21 00:07:55 UTC (rev 9917) +++ trunk/pywikipedia/replace.py 2012-02-21 11:39:41 UTC (rev 9918) @@ -43,6 +43,22 @@ -savenew Just like -save, except that overwrites the existing file. Argument may also be given as "-savenew:filename".
+-saveexc With this parameter a new option will appear in choices: + "no+eXcept". If you press x, the text will not be replaced, + and the title of page will be saved to the given exception + file to exclude this page from future replacements. At the + moment you may paste the contents directly into 'title' list + of the exceptions dictionary of your fix (use tab to indent). + Reading back the list from file will be implemented later. + Argument may also be given as "-saveexc:filename". + Opens the file for append, if exists. + +-saveexcnew Just like -saveexc, except that overwrites the existing file. + Argument may also be given as "-saveexcnew:filename". + +-readexc Reserved for reading saved exceptions from a file. + Not implemented yet. + -addcat:cat_name Adds "cat_name" category to every altered page.
-excepttitle:XYZ Skip pages with titles that contain XYZ. If the -regex @@ -252,7 +268,8 @@ """ def __init__(self, generator, replacements, exceptions={}, acceptall=False, allowoverlap=False, recursive=False, - addedCat=None, sleep=None, editSummary='', articles=None): + addedCat=None, sleep=None, editSummary='', articles=None, + exctitles=None): """ Arguments: * generator - A generator that yields Page objects. @@ -269,6 +286,10 @@ touched. * articles - An open file to save the page titles. If None, we work on our wikisite immediately (default). + Corresponds to titlefile variable of main(). + * exctitles - An open file to save the excepted titles. If None, + we don't ask the user about saving them (default). + Corresponds to excoutfile variable of main().
Structure of the exceptions dictionary: This dictionary can have these keys: @@ -308,10 +329,13 @@ # Some function to set default editSummary should probably be added self.editSummary = editSummary self.articles = articles + self.exctitles = exctitles
- #An edit counter to split the file by 100 titles if -save or -savenew - #is on, and to display the number of edited articles otherwise. + # An edit counter to split the file by 100 titles if -save or -savenew + # is on, and to display the number of edited articles otherwise. self.editcounter = 0 + # A counter for saved exceptions + self.exceptcounter = 0
def isTitleExcepted(self, title): """ @@ -369,6 +393,16 @@ (lambda x: bool(x-1) and 's were' or ' was') (self.editcounter)))
+ def writeExceptCounter(self): + """ This writes the counter of saved exceptions if applicable. """ + if self.exctitles: + pywikibot.output(u'%d exception%s saved.' + % (self.exceptcounter, + (lambda x: bool(x-1) and 's were' or ' was') + (self.exceptcounter))) + else: + print 888 + def splitLine(self): """Returns a splitline after every 100th title. Splitline is in HTML comment format in case we want to insert the list into a wikipage. @@ -433,8 +467,15 @@ pywikibot.showDiff(original_text, new_text) if self.acceptall: break - choice = pywikibot.inputChoice( + if self.exctitles: + choice = pywikibot.inputChoice( u'Do you want to accept these changes?', + ['Yes', 'No', 'no+eXcept', 'Edit', + 'open in Browser', 'All', 'Quit'], + ['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N') + else: + choice = pywikibot.inputChoice( + u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'open in Browser', 'All', 'Quit'], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') @@ -461,17 +502,22 @@ continue if choice == 'q': self.writeEditCounter() + self.writeExceptCounter() return if choice == 'a': self.acceptall = True + if choice == 'x': #May happen only if self.exctitles isn't None + self.exctitles.write(u"u'%s',\n" % page.title()) + self.exctitles.flush() + self.exceptcounter += 1 if choice == 'y': if not self.articles: - #Primary behaviour: working on wiki + # Primary behaviour: working on wiki page.put_async(new_text, self.editSummary) self.editcounter += 1 - #Bug: this increments even if put_async fails - #This is separately in two clauses of if for - #future purposes to get feedback form put_async + # Bug: this increments even if put_async fails + # This is separately in two clauses of if for + # future purposes to get feedback form put_async else: #Save the title for later processing instead of editing self.editcounter += 1 @@ -510,6 +556,7 @@
#Finally: self.writeEditCounter() + self.writeExceptCounter()
def prepareRegexForMySQL(pattern): pattern = pattern.replace('\s', '[:space:]') @@ -579,11 +626,18 @@ # too much CPU sleep = None # Do not save the page titles, rather work on wiki - titlefile = None - filename = None + filename = None # The name of the file to save titles + titlefile = None # The file object itself # If we save, primary behaviour is append rather then new file append = True + # Default: don't write titles to exception file and don't read them. + excoutfilename = None # The name of the file to save exceptions + excoutfile = None # The file object itself + # excinfilename: reserved for later use (reading back exceptions) + # If we save exceptions, primary behaviour is append + excappend = True
+ # Read commandline parameters. for arg in pywikibot.handleArgs(*args): if arg == '-regex': @@ -608,11 +662,26 @@ u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) + elif arg.startswith('-saveexcnew'): + excappend = False + if len(arg) == 11: + excoutfilename = pywikibot.input( + u'Please enter the filename to save the excepted titles' +\ + u'\n(will be deleted if exists):') + else: + excoutfilename = arg[12:] + elif arg.startswith('-saveexc'): + if len(arg) == 8: + excoutfilename = pywikibot.input( + u'Please enter the filename to save the excepted titles:') + else: + excoutfilename = arg[9:] elif arg.startswith('-savenew'): append = False if len(arg) == 8: filename = pywikibot.input( -u'Please enter the filename to save the titles \n(will be deleted if exists):') + u'Please enter the filename to save the titles' + \ + u'\n(will be deleted if exists):') else: filename = arg[9:] elif arg.startswith('-save'): @@ -718,7 +787,8 @@ pywikibot.output(u'The summary message will default to: %s' % default_summary_message) summary_message = pywikibot.input( -u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:') + u'Press Enter to use this default message, or enter a' + \ + u'description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message editSummary = summary_message @@ -752,7 +822,7 @@ pywikibot.output(u"No replacements given in fix, don't joke with me!") return
- #Set the regular expression flags + # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE @@ -816,27 +886,38 @@ preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=maxquerysize)
- #Finally we open the file for page titles or set article to None + # Finally we open the file for page titles or set parameter article to None if filename: try: - #This opens in strict error mode, that means bot will stop - #on encoding errors with ValueError. - #See http://docs.python.org/library/codecs.html#codecs.open + # This opens in strict error mode, that means bot will stop + # on encoding errors with ValueError. + # See http://docs.python.org/library/codecs.html#codecs.open titlefile = codecs.open(filename, encoding='utf-8', mode=(lambda x: x and 'a' or 'w')(append)) except IOError: pywikibot.output("%s cannot be opened for writing." % filename) return + # The same process with exceptions file: + if excoutfilename: + try: + excoutfile = codecs.open( + excoutfilename, encoding='utf-8', + mode=(lambda x: x and 'a' or 'w')(excappend)) + except IOError: + pywikibot.output("%s cannot be opened for writing." % + excoutfilename) + return bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, editSummary, - titlefile) + titlefile, excoutfile) try: bot.run() finally: if titlefile: - #Just for the spirit of programming (it was flushed) + # Just for the spirit of programming (they were flushed) titlefile.close() + excoutfile.close()
if __name__ == "__main__":