[Pywikipedia-l] SVN: [4780] trunk/pywikipedia/add_text.py
filnik at svn.wikimedia.org
filnik at svn.wikimedia.org
Sat Dec 29 21:35:32 UTC 2007
Revision: 4780
Author: filnik
Date: 2007-12-29 21:35:32 +0000 (Sat, 29 Dec 2007)
Log Message:
-----------
Rewrite to make add_text a function usable also by other script (better than before) + bugfix encoding
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2007-12-29 17:48:36 UTC (rev 4779)
+++ trunk/pywikipedia/add_text.py 2007-12-29 21:35:32 UTC (rev 4780)
@@ -62,6 +62,7 @@
msg = {
'en': u'Bot: Adding %s',
+ 'it': u'Bot: Aggiungo %s',
'pt': u'Bot: Adicionando %s',
}
@@ -112,8 +113,13 @@
for result in results:
yield wikipedia.Page(self.site, result)
-def add_text(generator = None, addText = None, summary = None, regexSkip = None, regexSkipUrl = None,
- always = False, up = False):
+def add_text(page = None, addText = None, summary = None, regexSkip = None, regexSkipUrl = None,
+ always = False, up = False, putText = True, oldTextGiven = None):
+ if not addText:
+ raise NoEnoughData('You have to specify what text you want to add!')
+ if not summary:
+ summary = wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg) % addText)
+
# When a page is tagged as "really well written" it has a star in the interwiki links.
# This is a list of all the templates used (in regex format) to make the stars appear.
starsList = ['link[ _]fa', 'link[ _]adq', 'enllaç[ _]ad',
@@ -124,107 +130,107 @@
site = wikipedia.getSite()
# /wiki/ is not always the right path in non-wiki projects
pathWiki = site.family.nicepath(site.lang)
- # Check if there are the minimal settings
- if not generator:
- raise NoEnoughData('You have to specify the generator you want to use for the script!')
- if not addText:
- raise NoEnoughData('You have to specify what text you want to add!')
- if not summary:
- summary = wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg) % addText)
- # Main Loop
- for page in generator:
+ if putText:
wikipedia.output(u'Loading %s...' % page.title())
+ if oldTextGiven == None:
try:
text = page.get()
except wikipedia.NoPage:
wikipedia.output(u"%s doesn't exist, skip!" % page.title())
- continue
+ return False # continue
except wikipedia.IsRedirectPage:
wikipedia.output(u"%s is a redirect, skip!" % page.title())
- continue
- # Understand if the bot has to skip the page or not
- # In this way you can use both -except and -excepturl
- if regexSkipUrl != None:
- url = '%s%s' % (pathWiki, page.urlname())
- result = re.findall(regexSkipUrl, site.getUrl(url))
- if result != []:
- wikipedia.output(u'Exception! regex (or word) used with -exceptUrl is in the page. Skip!')
- continue
- if regexSkip != None:
- result = re.findall(regexSkip, text)
- if result != []:
- wikipedia.output(u'Exception! regex (or word) used with -except is in the page. Skip!')
- continue
- # If not up, text put below
- if not up:
- newtext = text
- categoryNamespace = site.namespace(14)
- # Getting the categories
- regexpCat = re.compile(r'\[\[((?:category|%s):.*?)\]\]' % categoryNamespace.lower(), re.I)
- categorieInside = regexpCat.findall(text)
- # Deleting the categories
- newtext = wikipedia.removeCategoryLinks(newtext, site)
- # Getting the interwiki
- interwikiInside = page.interwiki()
- interwikiList = list()
- for paginetta in interwikiInside:
- nome = str(paginetta).split('[[')[1].split(']]')[0]
- interwikiList.append(nome)
- lang = nome.split(':')[0]
- # Removing the interwiki
- newtext = wikipedia.removeLanguageLinks(newtext, site)
- # Sorting the interwiki
- interwikiList.sort()
- newtext += "\n%s" % addText
- # Reputting the categories
- for paginetta in categorieInside:
+ return False # continue
+ else:
+ text = oldTextGiven
+ # Understand if the bot has to skip the page or not
+ # In this way you can use both -except and -excepturl
+ if regexSkipUrl != None:
+ url = '%s%s' % (pathWiki, page.urlname())
+ result = re.findall(regexSkipUrl, site.getUrl(url))
+ if result != []:
+ wikipedia.output(u'Exception! regex (or word) used with -exceptUrl is in the page. Skip!')
+ return False # continue
+ if regexSkip != None:
+ result = re.findall(regexSkip, text)
+ if result != []:
+ wikipedia.output(u'Exception! regex (or word) used with -except is in the page. Skip!')
+ return False # continue
+ # If not up, text put below
+ if not up:
+ newtext = text
+ categoryNamespace = site.namespace(14)
+ # Getting the categories
+ regexpCat = re.compile(r'\[\[((?:category|%s):.*?)\]\]' % categoryNamespace.lower(), re.I)
+ categorieInside = regexpCat.findall(text)
+ # Deleting the categories
+ newtext = wikipedia.removeCategoryLinks(newtext, site)
+ # Getting the interwiki
+ interwikiInside = page.interwiki()
+ interwikiList = list()
+ for paginetta in interwikiInside:
+ nome = str(paginetta).split('[[')[1].split(']]')[0]
+ interwikiList.append(nome)
+ lang = nome.split(':')[0]
+ # Removing the interwiki
+ newtext = wikipedia.removeLanguageLinks(newtext, site)
+ # Sorting the interwiki
+ interwikiList.sort()
+ newtext += "\n%s" % addText
+ # Reputting the categories
+ for paginetta in categorieInside:
+ try:
+ newtext += '\n[[%s]]' % paginetta.decode('utf-8')
+ except UnicodeDecodeError:
try:
- newtext += '\n[[%s]]' % paginetta.decode('utf-8')
- except UnicodeEncodeError:
- try:
- newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
- except UnicodeEncodeError:
- newtext += '\n[[%s]]' % paginetta
- newtext += '\n'
- # Dealing the stars' issue
- starsListInPage = list()
- for star in starsList:
- regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}\n)' % star, re.I)
- risultato = regex.findall(newtext)
- if risultato != []:
- newtext = regex.sub('', newtext)
- for element in risultato:
- newtext += '\n%s' % element
- # Adding the interwiki
- for paginetta in interwikiList:
+ newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
+ except UnicodeDecodeError:
+ newtext += '\n[[%s]]' % paginetta
+ newtext += '\n'
+ # Dealing the stars' issue
+ starsListInPage = list()
+ for star in starsList:
+ regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}\n)' % star, re.I)
+ risultato = regex.findall(newtext)
+ if risultato != []:
+ newtext = regex.sub('', newtext)
+ for element in risultato:
+ newtext += '\n%s' % element
+ # Adding the interwiki
+ for paginetta in interwikiList:
+ try:
+ newtext += '\n[[%s]]' % paginetta.decode('utf-8')
+ except UnicodeEncodeError:
try:
- newtext += '\n[[%s]]' % paginetta.decode('utf-8')
+ newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
except UnicodeEncodeError:
- try:
- newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
- except UnicodeEncodeError:
- newtext += '\n[[%s]]' % paginetta
- # If instead the text must be added above...
- else:
- newtext = addText + '\n' + text
+ newtext += '\n[[%s]]' % paginetta
+ # If instead the text must be added above...
+ else:
+ newtext = addText + '\n' + text
+ if putText:
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
wikipedia.showDiff(text, newtext)
- choice = ''
- # Let's put the changes.
- while 1:
+ choice = ''
+ # Let's put the changes.
+ while 1:
+ # If someone load it as module, maybe it's not so useful to put the text in the page
+ if putText:
if not always:
choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
if choice.lower() in ['a', 'all']:
always = True
if choice.lower() in ['n', 'no']:
- break
+ return False
+
if choice.lower() in ['y', 'yes'] or always:
try:
page.put(newtext, summary)
except wikipedia.EditConflict:
wikipedia.output(u'Edit conflict! skip!')
- break
+ return False
+
except wikipedia.ServerError:
errorCount += 1
if errorCount < 5:
@@ -235,17 +241,24 @@
raise wikipedia.ServerError(u'Fifth Server Error!')
except wikipedia.SpamfilterError, e:
wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
- break
+ return False
+
except wikipedia.PageNotSaved, error:
- wikipedia.output(u'Error putting page: %s' % (error.args,))
- break
+ wikipedia.output(u'Error putting page: %s' % error.args)
+ return False
+
except wikipedia.LockedPage:
- wikipedia.output(u'Skipping %s (locked page)' % (page.title(),))
- break
+ wikipedia.output(u'Skipping %s (locked page)' % page.title())
+ return False
+
else:
# Break only if the errors are one after the other...
errorCount = 0
- break
+ return True
+
+ else:
+ return (text, newtext)
+
def main():
# If none, the var is setted only for check purpose.
summary = None; addText = None; regexSkip = None; regexSkipUrl = None;
@@ -293,7 +306,12 @@
always = True
else:
generator = genFactory.handleArg(arg)
- add_text(generator, addText, summary, regexSkip, regexSkipUrl, always, up)
+ # Check if there are the minimal settings
+ if not generator:
+ raise NoEnoughData('You have to specify the generator you want to use for the script!')
+ # Main Loop
+ for page in generator:
+ add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True)
if __name__ == "__main__":
try:
More information about the Pywikipedia-l
mailing list