SVN: [8553] branches/rewrite/pywikibot/textlib.py - Pywikipedia-svn

14 Sep 2010

Revision: 8553
Author:   xqt
Date:     2010-09-14 15:37:42 +0000 (Tue, 14 Sep 2010)
Log Message:
-----------
remove obsolete parameter  in extract_templates_and_params; enable getLanguageLinks on template subpages (update from trunk)
Modified Paths:
--------------
    branches/rewrite/pywikibot/textlib.py
Modified: branches/rewrite/pywikibot/textlib.py
===================================================================

--- branches/rewrite/pywikibot/textlib.py	2010-09-14 05:47:44 UTC (rev 8552)
+++ branches/rewrite/pywikibot/textlib.py	2010-09-14 15:37:42 UTC (rev 8553)
@@ -109,11 +109,14 @@
             if exc in exceptionRegexes:
                 dontTouchRegexes.append(exceptionRegexes[exc])
             else:
-                # nowiki, noinclude, includeonly, timeline, math ond other extensions
-                dontTouchRegexes.append(re.compile(r'(?is)<%s>.*?</%s>' % (exc, exc)))
+                # nowiki, noinclude, includeonly, timeline, math ond other
+                # extensions
+                dontTouchRegexes.append(re.compile(r'(?is)<%s>.*?</%s>'
+                                                   % (exc, exc)))
             # handle alias
             if exc == 'source':
-                dontTouchRegexes.append(re.compile(r'(?is)<syntaxhighlight .*?</syntaxhighlight>'))
+                dontTouchRegexes.append(re.compile(
+                    r'(?is)<syntaxhighlight .*?</syntaxhighlight>'))
         else:
             # assume it's a regular expression
             dontTouchRegexes.append(exc)
@@ -167,11 +170,11 @@
                     groupMatch = groupR.search(replacement)
                     if not groupMatch:
                         break
-                    groupID = (groupMatch.group('name')
-                               or int(groupMatch.group('number')))
-                    replacement = (replacement[:groupMatch.start()]
-                                   + match.group(groupID)
-                                   + replacement[groupMatch.end():])
+                    groupID = groupMatch.group('name') or \
+                              int(groupMatch.group('number'))
+                    replacement = replacement[:groupMatch.start()] + \
+                                  match.group(groupID) + \
+                                  replacement[groupMatch.end():]
             text = text[:match.start()] + replacement + text[match.end():]
# continue the search on the remaining text
@@ -196,6 +199,7 @@
The exact set of parts which should be removed can be passed as the
     'parts' parameter, which defaults to all.
+
     """
     regexes = {
             'comments' :       r'<!--.*?-->',
@@ -250,9 +254,9 @@
         striploopcontinue = True
         while firstinseparator > 0 and striploopcontinue:
             striploopcontinue = False
-            if ( (firstinseparator >= lenseparator) and
-                 (separator ==
-                    text[firstinseparator-lenseparator:firstinseparator])):
+            if (firstinseparator >= lenseparator) and \
+               (separator == text[firstinseparator - \
+                                  lenseparator : firstinseparator]):
                 firstinseparator -= lenseparator
                 striploopcontinue = True
             elif text[firstinseparator-1] < ' ':
@@ -271,7 +275,7 @@
 #        or change links to a different project, or any that are formatted
 #        as in-line interwiki links (e.g., "[[:es:Articulo]]".  (CONFIRM)
-def getLanguageLinks(text, insite = None, pageLink = "[[]]"):
+def getLanguageLinks(text, insite=None, pageLink="[[]]", template_subpage=False):
     """
     Return a dict of interlanguage links found in text.
@@ -285,7 +289,10 @@
     result = {}
     # Ignore interwiki links within nowiki tags, includeonly tags, pre tags,
     # and HTML comments
-    text = removeDisabledParts(text)
+    tags = ['comments', 'nowiki', 'pre', 'source']
+    if not template_subpage:
+        tags += ['includeonly']
+    text = removeDisabledParts(text, tags)
# This regular expression will find every link that is possibly an
     # interwiki link.
@@ -332,7 +339,7 @@
     return text.strip()
-def removeLanguageLinksAndSeparator(text, site = None, marker = '', separator = ''):
+def removeLanguageLinksAndSeparator(text, site=None, marker='', separator=''):
     """
     Return text with all interlanguage links, plus any preceeding whitespace
     and separateor occurrences removed.
@@ -352,8 +359,8 @@
         return removeLanguageLinks(text, site, marker)
-def replaceLanguageLinks(oldtext, new, site = None, addOnly = False,
-    template = False):
+def replaceLanguageLinks(oldtext, new, site=None, addOnly=False,
+    template=False, template_subpage=False):
     """Replace interlanguage links in the text with a new set of links.
'new' should be a dict with the Site objects as keys, and Page or Link
@@ -400,17 +407,25 @@
                 newtext = replaceCategoryLinks(s2, cats, site=site,
                                                addOnly=True)
             else:
-                if template:
+                if template or template_subpage:
+                    if template_subpage:
+                        includeOn  = '<includeonly>'
+                        includeOff = '</includeonly>'
+                    else:
+                        includeOn  = '<noinclude>'
+                        includeOff = '</noinclude>'
+                        separator = ''
                     # Do we have a noinclude at the end of the template?
-                    parts = s2.split('</noinclude>')
+                    parts = s2.split(includeOff)
                     lastpart = parts[-1]
                     if re.match('\s*%s' % marker, lastpart):
                         # Put the langlinks back into the noinclude's
-                        regexp = re.compile('</noinclude>\s*%s' % marker)
-                        newtext = regexp.sub(s + '</noinclude>', s2)
+                        regexp = re.compile('%s\s*%s' % (includeOff, marker))
+                        newtext = regexp.sub(s + includeOff, s2)
                     else:
                         # Put the langlinks at the end, inside noinclude's
-                        newtext = s2.replace(marker,'').strip() + separator + u'<noinclude>\n%s</noinclude>\n' % s
+                        newtext = s2.replace(marker,'').strip() + separator + \
+                                  u'%s\n%s%s\n' % (includeOn, s, includeOff)
                 else:
                     newtext = s2.replace(marker,'').strip() + separator + s
     else:
@@ -506,7 +521,7 @@
     return result
-def removeCategoryLinks(text, site, marker = ''):
+def removeCategoryLinks(text, site, marker=''):
     """Return text with all category links removed.
Put the string marker after the last replacement (at the end of the text
@@ -599,7 +614,8 @@
 The PyWikipediaBot is no longer allowed to touch categories on the German
 Wikipedia on pages that contain the Personendaten template because of the
 non-standard placement of that template.
-See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006#...""")
+See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006#...
+""")
     separator = site.family.category_text_separator
     iseparator = site.family.interwiki_text_separator
     separatorstripped = separator.strip()
@@ -626,15 +642,16 @@
             if "</noinclude>" in s2[firstafter:]:
                 if separatorstripped:
                     s = separator + s
-                newtext = (s2[:firstafter].replace(marker,'') + s
-                           + s2[firstafter:])
+                newtext = s2[:firstafter].replace(marker, '') + s + \
+                          s2[firstafter:]
             elif site.language() in site.family.categories_last:
                 newtext = s2.replace(marker,'').strip() + separator + s
             else:
                 interwiki = getLanguageLinks(s2)
-                s2 = removeLanguageLinksAndSeparator(
-                         s2.replace(marker,''), site, '', iseparatorstripped
-                     ) + separator + s
+                s2 = removeLanguageLinksAndSeparator(s2.replace(marker, ''),
+                                                     site, '',
+                                                     iseparatorstripped
+                                                     ) + separator + s
                 newtext = replaceLanguageLinks(s2, interwiki, site=site,
                                                addOnly=True)
     else:
@@ -700,7 +717,7 @@
 # Functions dealing with templates
 #----------------------------------
-def extract_templates_and_params(text, get_redirect=False):
+def extract_templates_and_params(text):
     """Return list of template calls found in text.
Return value is a list of tuples. There is one tuple for each use of a
@@ -937,10 +954,11 @@
     if code == 'ckb':
         return ['ku', 'ar']
     #Chinese
-    if code in ['minnan', 'zh', 'zh-classical', 'zh-min-nan', 'zh-tw', 'zh-hans', 'zh-hant']:
+    if code in ['minnan', 'zh', 'zh-classical', 'zh-min-nan', 'zh-tw',
+                'zh-hans', 'zh-hant']:
         return ['zh', 'zh-tw', 'zh-cn', 'zh-classical']
-    if code in ['cdo', 'gan', 'hak', 'ii', 'wuu', 'za', 'zh-cdo', 'zh-classical',
-                'zh-cn', 'zh-yue']:
+    if code in ['cdo', 'gan', 'hak', 'ii', 'wuu', 'za', 'zh-cdo',
+                'zh-classical', 'zh-cn', 'zh-yue']:
         return ['zh', 'zh-cn', 'zh-tw', 'zh-classical']
     #Scandinavian languages
     if code in ['da', 'sv']: