[Pywikipedia-l] SVN: [4816] trunk/pywikipedia/add_text.py
rotem at svn.wikimedia.org
rotem at svn.wikimedia.org
Sat Jan 5 17:21:36 UTC 2008
Revision: 4816
Author: rotem
Date: 2008-01-05 17:21:36 +0000 (Sat, 05 Jan 2008)
Log Message:
-----------
Using the predefined functions for categories and interwikies, should fix bug 1864547; fixing whitespace.
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2008-01-05 16:57:01 UTC (rev 4815)
+++ trunk/pywikipedia/add_text.py 2008-01-05 17:21:36 UTC (rev 4816)
@@ -102,7 +102,7 @@
if lang == 'commons':
link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html'
else:
- link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=' + lang + '&wikifam=' + project + '&order=img_timestamp&max=' + str(limit) + '&ofs=0&max=' + str(limit)
+ link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=' + lang + '&wikifam=' + project + '&order=img_timestamp&max=' + str(limit) + '&ofs=0&max=' + str(limit)
text = pageText(link)
#print text
regexp = r"""<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>"""
@@ -147,12 +147,12 @@
text = oldTextGiven
# Understand if the bot has to skip the page or not
# In this way you can use both -except and -excepturl
- if regexSkipUrl != None:
+ if regexSkipUrl != None:
url = '%s%s' % (pathWiki, page.urlname())
result = re.findall(regexSkipUrl, site.getUrl(url))
if result != []:
wikipedia.output(u'Exception! regex (or word) used with -exceptUrl is in the page. Skip!')
- return (False, always) # continue
+ return (False, always) # continue
if regexSkip != None:
result = re.findall(regexSkip, text)
if result != []:
@@ -161,39 +161,18 @@
# If not up, text put below
if not up:
newtext = text
- categoryNamespace = site.namespace(14)
# Getting the categories
- regexpCat = re.compile(r'\[\[((?:category|%s):.*?)\]\]' % categoryNamespace.lower(), re.I)
- categorieInside = regexpCat.findall(text)
+ categoriesInside = wikipedia.getCategoryLinks(newtext, site)
# Deleting the categories
newtext = wikipedia.removeCategoryLinks(newtext, site)
# Getting the interwiki
- interwikiInside = page.interwiki()
- interwikiList = list()
- for paginetta in interwikiInside:
- nome = str(paginetta).split('[[')[1].split(']]')[0]
- interwikiList.append(nome)
- lang = nome.split(':')[0]
+ interwikiInside = wikipedia.getLanguageLinks(newtext, site)
# Removing the interwiki
newtext = wikipedia.removeLanguageLinks(newtext, site)
- # Sorting the interwiki
- interwikiList.sort()
- newtext += "\n%s" % addText
+ # Adding the text
+ newtext += u"\n%s" % addText
# Reputting the categories
- for paginetta in categorieInside:
- try:
- newtext += '\n[[%s]]' % paginetta.decode('utf-8')
- except UnicodeDecodeError:
- try:
- newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
- except UnicodeDecodeError:
- newtext += '\n[[%s]]' % paginetta.encode(site.encoding())
- except UnicodeEncodeError:
- try:
- newtext += '\n[[%s]]' % paginetta.encode('utf-8')
- except UnicodeEncodeError:
- newtext += '\n[[%s]]' % paginetta.encode(site.encoding())
- newtext += '\n'
+ newtext = wikipedia.replaceCategoryLinks(newtext, categoriesInside, site)
# Dealing the stars' issue
starsListInPage = list()
for star in starsList:
@@ -204,19 +183,7 @@
for element in risultato:
newtext += '\n%s' % element
# Adding the interwiki
- for paginetta in interwikiList:
- try:
- newtext += '\n[[%s]]' % paginetta.decode('utf-8')
- except UnicodeDecodeError:
- try:
- newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
- except UnicodeDecodeError:
- newtext += '\n[[%s]]' % paginetta.encode(site.encoding())
- except UnicodeEncodeError:
- try:
- newtext += '\n[[%s]]' % paginetta.encode('utf-8')
- except UnicodeEncodeError:
- newtext += '\n[[%s]]' % paginetta.encode(site.encoding())
+ newtext = wikipedia.replaceLanguageLinks(newtext, interwikiInside, site)
# If instead the text must be added above...
else:
newtext = addText + '\n' + text
@@ -233,13 +200,13 @@
if choice.lower() in ['a', 'all']:
always = True
if choice.lower() in ['n', 'no']:
- return (False, always)
+ return (False, always)
if choice.lower() in ['y', 'yes'] or always:
try:
page.put(newtext, summary)
except wikipedia.EditConflict:
wikipedia.output(u'Edit conflict! skip!')
- return (False, always)
+ return (False, always)
except wikipedia.ServerError:
errorCount += 1
if errorCount < 5:
@@ -250,20 +217,20 @@
raise wikipedia.ServerError(u'Fifth Server Error!')
except wikipedia.SpamfilterError, e:
wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
- return (False, always)
+ return (False, always)
except wikipedia.PageNotSaved, error:
wikipedia.output(u'Error putting page: %s' % error.args)
- return (False, always)
+ return (False, always)
except wikipedia.LockedPage:
wikipedia.output(u'Skipping %s (locked page)' % page.title())
- return (False, always)
+ return (False, always)
else:
# Break only if the errors are one after the other...
errorCount = 0
- return (True, always)
+ return (True, always)
else:
return (text, newtext, always)
-
+
def main():
# If none, the var is setted only for check purpose.
summary = None; addText = None; regexSkip = None; regexSkipUrl = None;
@@ -315,7 +282,7 @@
if not generator:
raise NoEnoughData('You have to specify the generator you want to use for the script!')
# Main Loop
- for page in generator:
+ for page in generator:
(status, always) = add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True)
if __name__ == "__main__":
More information about the Pywikipedia-l
mailing list