http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10081
Revision: 10081 Author: binbot Date: 2012-04-04 12:44:52 +0000 (Wed, 04 Apr 2012) Log Message: ----------- ndashredir.py saves the titles that may need manual treatment + adding ndashredir.py to CONTENTS.
Modified Paths: -------------- trunk/pywikipedia/CONTENTS trunk/pywikipedia/ndashredir.py
Modified: trunk/pywikipedia/CONTENTS =================================================================== --- trunk/pywikipedia/CONTENTS 2012-04-04 03:32:55 UTC (rev 10080) +++ trunk/pywikipedia/CONTENTS 2012-04-04 12:44:52 UTC (rev 10081) @@ -125,6 +125,8 @@ category. movepages.py : Bot page moves to another title. nowcommons.py : This bot can delete images with NowCommons template. +ndashredir.py : Creates hyphenated redirects to articles with n dash + or m dash in their title. pagefromfile.py : This bot takes its input from a file that contains a number of pages to be put on the wiki. piper.py : Pipes article text through external program(s) on
Modified: trunk/pywikipedia/ndashredir.py =================================================================== --- trunk/pywikipedia/ndashredir.py 2012-04-04 03:32:55 UTC (rev 10080) +++ trunk/pywikipedia/ndashredir.py 2012-04-04 12:44:52 UTC (rev 10081) @@ -16,12 +16,16 @@ -namespace Works in the given namespace (only one at a time). Parameter -ns may be given as "-ns:<number>" or "-namespace:<number>". Defaults to 0 (main namespace). +-save Saves the title of existing hyphenated articles whose content + is _other_ than a redirect to the corresponding article with + n dash or m dash in the title and thus may need manual + treatment. If omitted, these titles will be written only to + the screen (or the log if logging is on). The file is in the + form you may upload it to a wikipage. + May be given as "-save:<filename>". If it exists, titles + will be appended. +"""
-""" -""" -TODO: -- listing existing hyphenated titles to a file/wikipage instead of just skipping -""" # # (C) Bináris, 2012 # @@ -29,6 +33,7 @@ # __version__='$Id$'
+import codecs import wikipedia as pywikibot from pagegenerators import RegexFilterPageGenerator as RPG from pywikibot import i18n @@ -37,10 +42,11 @@ regex = ur'.*[–—]' # Alt 0150 (n dash), alt 0151 (m dash), respectively. ns = 0 start = '!' + filename = None # The name of the file to save titles + titlefile = None # The file object itself
# Handling parameters: for arg in pywikibot.handleArgs(*args): - pass if arg == '-start': start = pywikibot.input( u'From which title do you want to continue?') @@ -50,6 +56,19 @@ ns = pywikibot.input(u'Which namespace should we process?') elif arg.startswith('-ns:') or arg.startswith('-namespace:'): ns = arg[arg.find(':')+1:] + elif arg == '-save': + filename = pywikibot.input('Please enter the filename:') + elif arg.startswith('-save:'): + filename = arg[6:] + if filename: + try: + # This opens in strict error mode, that means bot will stop + # on encoding errors with ValueError. + # See http://docs.python.org/library/codecs.html#codecs.open + titlefile = codecs.open(filename, encoding='utf-8', mode='a') + except IOError: + pywikibot.output("%s cannot be opened for writing." % filename) + return site = pywikibot.getSite() redirword = site.redirect() gen = RPG(site.allpages( @@ -71,14 +90,25 @@ % (newtitle, title)) else: pywikibot.output( - (u'Skipping [[%s]] beacuse it exists already with a ' + - u'different content.') % newtitle) - # TODO: list it for further examination to a file or wikipage + (u'\03{lightyellow}Skipping [[%s]] beacuse it exists ' + + u'already with a different content.\03{default}') + % newtitle) + if titlefile: + s = u'\n#%s does not redirect to %s.' %\ + (redirpage.title(asLink=True, textlink=True), + page.title(asLink=True, textlink=True)) + # For the unlikely case if someone wants to run it in + # file namespace. + titlefile.write(s) + titlefile.flush() else: text = u'#%s[[%s]]' % (redirword, title) redirpage.put(text, editSummary) # Todo: output the title upon Ctrl C? (KeyboardInterrupt always hits - # RegexFilterPageGenerator and cannot be catched in this loop.) + # RegexFilterPageGenerator or throttle.py or anything else and cannot + # be catched in this loop.) + if titlefile: + titlefile.close() # For the spirit of programming (it was flushed)
if __name__ == "__main__": try: