[Pywikipedia-l] SVN: [6254] branches/rewrite/pywikibot/scripts/replace.py

russblau at svn.wikimedia.org russblau at svn.wikimedia.org
Wed Jan 14 13:28:39 UTC 2009


Revision: 6254
Author:   russblau
Date:     2009-01-14 13:28:39 +0000 (Wed, 14 Jan 2009)

Log Message:
-----------
merge (selected) changes from trunk; -category flag omitted as duplicative of pagegenerators module

Modified Paths:
--------------
    branches/rewrite/pywikibot/scripts/replace.py

Property Changed:
----------------
    branches/rewrite/pywikibot/scripts/replace.py

Modified: branches/rewrite/pywikibot/scripts/replace.py
===================================================================
--- branches/rewrite/pywikibot/scripts/replace.py	2009-01-14 02:45:23 UTC (rev 6253)
+++ branches/rewrite/pywikibot/scripts/replace.py	2009-01-14 13:28:39 UTC (rev 6254)
@@ -23,6 +23,11 @@
 
 -nocase           Use case insensitive regular expressions.
 
+-dotall           Make the dot match any character at all, including a newline.
+                  Without this flag, '.' will match anything except a newline.
+
+-multiline        '^' and '$' will now match begin and end of each line.
+
 -xmlstart         (Only works with -xml) Skip all articles in the XML dump
                   before the one specified (may also be given as
                   -xmlstart:Article).
@@ -136,37 +141,38 @@
 # NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
 # below.`v
 msg = {
-       'ar':u'%s روبوت : استبدال تلقائي للنص',
-       'cs':u'Robot automaticky nahradil text: %s',
-       'de':u'Bot: Automatisierte Textersetzung %s',
-       'el':u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s',
-       'en':u'Robot: Automated text replacement %s',
-       'es':u'Robot: Reemplazo automático de texto %s',
-       'fa':u'ربات: تغییر خودکار متن %s',
-       'fr':u'Bot : Remplacement de texte automatisé %s',
-       'he':u'בוט: החלפת טקסט אוטומטית %s',
-       'hu':u'Robot: Automatikus szövegcsere %s',
-       'ia':u'Robot: Reimplaciamento automatic de texto %s',
-       'id':u'Bot: Penggantian teks otomatis %s',
-       'is':u'Vélmenni: breyti texta %s',
-       'it':u'Bot: Sostituzione automatica %s',
-       'ja':u'ロボットによる: 文字置き換え %s',
-       'ka':u'რობოტი: ტექსტის ავტომატური შეცვლა %s',
-       'kk':u'Бот: Мәтінді өздікті алмастырды: %s',
-       'ksh':u'Bot: hät outomatesch Täx jetuusch: %s',
-       'lt':u'robotas: Automatinis teksto keitimas %s',
-       'nds':u'Bot: Text automaatsch utwesselt: %s',
-       'nds-nl':u'Bot: autematisch tekse vervungen %s',
-       'nl':u'Bot: automatisch tekst vervangen %s',
-       'nn':u'robot: automatisk teksterstatning: %s',
-       'no':u'robot: automatisk teksterstatning: %s',
-       'pl':u'Robot automatycznie zamienia tekst %s',
-       'pt':u'Bot: Mudança automática %s',
-       'ru':u'Робот: Автоматизированная замена текста',
-       'sr':u'Бот: Аутоматска замена текста %s',
-       'sv':u'Bot: Automatisk textersättning: %s',
-       'zh': u'機器人:執行文字代換作業 %s',
-       }
+    'ar': u'%s روبوت : استبدال تلقائي للنص',
+    'ca': u'Robot: Reemplaçament automàtic de text %s',
+    'cs': u'Robot automaticky nahradil text: %s',
+    'de': u'Bot: Automatisierte Textersetzung %s',
+    'el': u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s',
+    'en': u'Robot: Automated text replacement %s',
+    'es': u'Robot: Reemplazo automático de texto %s',
+    'fa': u'ربات: تغییر خودکار متن %s',
+    'fr': u'Bot : Remplacement de texte automatisé %s',
+    'he': u'בוט: החלפת טקסט אוטומטית %s',
+    'hu': u'Robot: Automatikus szövegcsere %s',
+    'ia': u'Robot: Reimplaciamento automatic de texto %s',
+    'id': u'Bot: Penggantian teks otomatis %s',
+    'is': u'Vélmenni: breyti texta %s',
+    'it': u'Bot: Sostituzione automatica %s',
+    'ja': u'ロボットによる: 文字置き換え %s',
+    'ka': u'რობოტი: ტექსტის ავტომატური შეცვლა %s',
+    'kk': u'Бот: Мәтінді өздікті алмастырды: %s',
+    'ksh': u'Bot: hät outomatesch Täx jetuusch: %s',
+    'lt': u'robotas: Automatinis teksto keitimas %s',
+    'nds': u'Bot: Text automaatsch utwesselt: %s',
+    'nds-nl': u'Bot: autematisch tekse vervungen %s',
+    'nl': u'Bot: automatisch tekst vervangen %s',
+    'nn': u'robot: automatisk teksterstatning: %s',
+    'no': u'robot: automatisk teksterstatning: %s',
+    'pl': u'Robot automatycznie zamienia tekst %s',
+    'pt': u'Bot: Mudança automática %s',
+    'ru': u'Робот: Автоматизированная замена текста %s',
+    'sr': u'Бот: Аутоматска замена текста %s',
+    'sv': u'Bot: Automatisk textersättning: %s',
+    'zh': u'機器人:執行文字代換作業 %s',
+}
 
 
 class XmlDumpReplacePageGenerator:
@@ -487,6 +493,10 @@
     acceptall = False
     # Will become True if the user inputs the commandline parameter -nocase
     caseInsensitive = False
+    # Will become True if the user inputs the commandline parameter -dotall
+    dotall = False
+    # Will become True if the user inputs the commandline parameter -multiline
+    multiline = False
     # Which namespaces should be processed?
     # default to [] which means all namespaces will be processed
     namespaces = []
@@ -546,8 +556,12 @@
             recursive = True
         elif arg == '-nocase':
             caseInsensitive = True
+        elif arg == '-dotall':
+            dotall = True
+        elif arg == '-multiline':
+            multiline = True
         elif arg.startswith('-addcat:'):
-            add_cat = arg[8:]
+            add_cat = arg[len('-addcat:'):]
         elif arg.startswith('-namespace:'):
             try:
                 namespaces.append(int(arg[11:]))
@@ -633,15 +647,21 @@
             exceptions = fix['exceptions']
         replacements = fix['replacements']
 
-    # already compile all regular expressions here to save time later
+    #Set the regular expression flags
+    flags = re.UNICODE
+    if caseInsensitive:
+        flags = flags | re.IGNORECASE
+    if dotall:
+        flags = flags | re.DOTALL
+    if multiline:
+        flags = flags | re.MULTILINE
+
+    # Pre-compile all regular expressions here to save time later
     for i in range(len(replacements)):
         old, new = replacements[i]
         if not regex:
             old = re.escape(old)
-        if caseInsensitive:
-            oldR = re.compile(old, re.UNICODE | re.IGNORECASE)
-        else:
-            oldR = re.compile(old, re.UNICODE)
+        oldR = re.compile(old, flags)
         replacements[i] = oldR, new
 
     for exceptionCategory in ['title', 'require-title', 'text-contains', 'inside']:
@@ -649,12 +669,7 @@
             patterns = exceptions[exceptionCategory]
             if not regex:
                 patterns = [re.escape(pattern) for pattern in patterns]
-            if caseInsensitive:
-                patterns = [re.compile(pattern, re.UNICODE | re.IGNORECASE)
-                            for pattern in patterns]
-            else:
-                patterns = [re.compile(pattern, re.UNICODE)
-                            for pattern in patterns]
+            patterns = [re.compile(pattern, flags) for pattern in patterns]
             exceptions[exceptionCategory] = patterns
 
     if xmlFilename:
@@ -682,7 +697,6 @@
 %s
 LIMIT 200""" % (whereClause, exceptClause)
         gen = pagegenerators.MySQLPageGenerator(query)
-
     elif PageTitles:
         pages = [pywikibot.Page(pywikibot.getSite(), PageTitle)
                  for PageTitle in PageTitles]


Property changes on: branches/rewrite/pywikibot/scripts/replace.py
___________________________________________________________________
Added: svn:mergeinfo
   + /trunk/pywikipedia/replace.py:6187-6253





More information about the Pywikipedia-l mailing list