[Pywikipedia-l] SVN: [4816] trunk/pywikipedia/add_text.py

rotem at svn.wikimedia.org rotem at svn.wikimedia.org
Sat Jan 5 17:21:36 UTC 2008


Revision: 4816
Author:   rotem
Date:     2008-01-05 17:21:36 +0000 (Sat, 05 Jan 2008)

Log Message:
-----------
Using the predefined functions for categories and interwikies, should fix bug 1864547; fixing whitespace.

Modified Paths:
--------------
    trunk/pywikipedia/add_text.py

Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py	2008-01-05 16:57:01 UTC (rev 4815)
+++ trunk/pywikipedia/add_text.py	2008-01-05 17:21:36 UTC (rev 4816)
@@ -102,7 +102,7 @@
     if lang == 'commons':
         link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html'
     else:
-        link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=' + lang + '&wikifam=' + project + '&order=img_timestamp&max=' + str(limit) + '&ofs=0&max=' + str(limit)         
+        link = 'http://tools.wikimedia.de/~daniel/WikiSense/UntaggedImages.php?wikilang=' + lang + '&wikifam=' + project + '&order=img_timestamp&max=' + str(limit) + '&ofs=0&max=' + str(limit)
     text = pageText(link)
     #print text
     regexp = r"""<td valign='top' title='Name'><a href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>"""
@@ -147,12 +147,12 @@
         text = oldTextGiven
     # Understand if the bot has to skip the page or not
     # In this way you can use both -except and -excepturl
-    if regexSkipUrl != None:          
+    if regexSkipUrl != None:
         url = '%s%s' % (pathWiki, page.urlname())
         result = re.findall(regexSkipUrl, site.getUrl(url))
         if result != []:
             wikipedia.output(u'Exception! regex (or word) used with -exceptUrl is in the page. Skip!')
-            return (False, always) # continue         
+            return (False, always) # continue
     if regexSkip != None:
         result = re.findall(regexSkip, text)
         if result != []:
@@ -161,39 +161,18 @@
     # If not up, text put below
     if not up:
         newtext = text
-        categoryNamespace = site.namespace(14)
         # Getting the categories
-        regexpCat = re.compile(r'\[\[((?:category|%s):.*?)\]\]' % categoryNamespace.lower(), re.I)
-        categorieInside = regexpCat.findall(text)
+        categoriesInside = wikipedia.getCategoryLinks(newtext, site)
         # Deleting the categories
         newtext = wikipedia.removeCategoryLinks(newtext, site)
         # Getting the interwiki
-        interwikiInside = page.interwiki()
-        interwikiList = list()
-        for paginetta in interwikiInside:
-            nome = str(paginetta).split('[[')[1].split(']]')[0]
-            interwikiList.append(nome)
-            lang = nome.split(':')[0]
+        interwikiInside = wikipedia.getLanguageLinks(newtext, site)
         # Removing the interwiki
         newtext = wikipedia.removeLanguageLinks(newtext, site)
-        # Sorting the interwiki
-        interwikiList.sort()
-        newtext += "\n%s" % addText
+        # Adding the text
+        newtext += u"\n%s" % addText
         # Reputting the categories
-        for paginetta in categorieInside:
-            try:
-                newtext += '\n[[%s]]' % paginetta.decode('utf-8')
-            except UnicodeDecodeError:
-                try:
-                    newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
-                except UnicodeDecodeError:
-                    newtext += '\n[[%s]]' % paginetta.encode(site.encoding())
-            except UnicodeEncodeError:
-                try:
-                    newtext += '\n[[%s]]' % paginetta.encode('utf-8')
-                except UnicodeEncodeError:
-                    newtext += '\n[[%s]]' % paginetta.encode(site.encoding())   
-        newtext += '\n'
+        newtext = wikipedia.replaceCategoryLinks(newtext, categoriesInside, site)
         # Dealing the stars' issue
         starsListInPage = list()
         for star in starsList:
@@ -204,19 +183,7 @@
                 for element in risultato:
                     newtext += '\n%s' % element
         # Adding the interwiki
-        for paginetta in interwikiList:
-            try:
-                newtext += '\n[[%s]]' % paginetta.decode('utf-8')
-            except UnicodeDecodeError:
-                try:
-                    newtext += '\n[[%s]]' % paginetta.decode('Latin-1')
-                except UnicodeDecodeError:
-                    newtext += '\n[[%s]]' % paginetta.encode(site.encoding())
-            except UnicodeEncodeError:
-                try:
-                    newtext += '\n[[%s]]' % paginetta.encode('utf-8')
-                except UnicodeEncodeError:
-                    newtext += '\n[[%s]]' % paginetta.encode(site.encoding())               
+        newtext = wikipedia.replaceLanguageLinks(newtext, interwikiInside, site)
     # If instead the text must be added above...
     else:
         newtext = addText + '\n' + text
@@ -233,13 +200,13 @@
             if choice.lower() in ['a', 'all']:
                 always = True
             if choice.lower() in ['n', 'no']:
-                return (False, always)              
+                return (False, always)
             if choice.lower() in ['y', 'yes'] or always:
                 try:
                     page.put(newtext, summary)
                 except wikipedia.EditConflict:
                     wikipedia.output(u'Edit conflict! skip!')
-                    return (False, always)                  
+                    return (False, always)
                 except wikipedia.ServerError:
                     errorCount += 1
                     if errorCount < 5:
@@ -250,20 +217,20 @@
                         raise wikipedia.ServerError(u'Fifth Server Error!')
                 except wikipedia.SpamfilterError, e:
                     wikipedia.output(u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url))
-                    return (False, always)                   
+                    return (False, always)
                 except wikipedia.PageNotSaved, error:
                     wikipedia.output(u'Error putting page: %s' % error.args)
-                    return (False, always)                   
+                    return (False, always)
                 except wikipedia.LockedPage:
                     wikipedia.output(u'Skipping %s (locked page)' % page.title())
-                    return (False, always)                   
+                    return (False, always)
                 else:
                     # Break only if the errors are one after the other...
                     errorCount = 0
-                    return (True, always)                   
+                    return (True, always)
         else:
             return (text, newtext, always)
-            
+
 def main():
     # If none, the var is setted only for check purpose.
     summary = None; addText = None; regexSkip = None; regexSkipUrl = None;
@@ -315,7 +282,7 @@
     if not generator:
         raise NoEnoughData('You have to specify the generator you want to use for the script!')
     # Main Loop
-    for page in generator:            
+    for page in generator:
         (status, always) = add_text(page, addText, summary, regexSkip, regexSkipUrl, always, up, True)
     
 if __name__ == "__main__":





More information about the Pywikipedia-l mailing list