http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10352
Revision: 10352
Author: xqt
Date: 2012-06-12 16:46:30 +0000 (Tue, 12 Jun 2012)
Log Message:
-----------
update standardizePageFooter() from trunk;
fix cleanUpSectionHeaders using config.line_separator to prevent empty changes by different LSs
Modified Paths:
--------------
branches/rewrite/scripts/cosmetic_changes.py
Modified: branches/rewrite/scripts/cosmetic_changes.py
===================================================================
--- branches/rewrite/scripts/cosmetic_changes.py 2012-06-12 07:47:03 UTC (rev 10351)
+++ branches/rewrite/scripts/cosmetic_changes.py 2012-06-12 16:46:30 UTC (rev 10352)
@@ -180,7 +180,7 @@
text = self.cleanUpSectionHeaders(text)
text = self.putSpacesInLists(text)
## text = self.translateAndCapitalizeNamespaces(text) ##excluded since family.namespaces does not exist anymore
-## text = self.translateMagicWords(text)
+## text = self.translateMagicWords(text) # siteinfo is a dict, not a function
text = self.replaceDeprecatedTemplates(text)
text = self.resolveHtmlEntities(text)
text = self.validXhtml(text)
@@ -216,13 +216,137 @@
def standardizePageFooter(self, text):
"""
- Makes sure that categories are put to the correct position, but
- does not sort them.
+ Makes sure that interwiki links, categories and star templates are
+ put to the correct position and into the right order. This combines the
+ old instances standardizeInterwiki and standardizeCategories
+ The page footer has the following section in that sequence:
+ 1. categories
+ 2. ## TODO: template beyond categories ##
+ 3. additional information depending on local site policy
+ 4. stars templates for featured and good articles
+ 5. interwiki links
"""
- # The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…
- if self.site != pywikibot.getSite('de', 'wikipedia') and not self.template:
+ starsList = [
+ u'bueno',
+ u'bom interwiki',
+ u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
+ u'destacado', u'destaca[tu]',
+ u'enllaç[ _]ad',
+ u'enllaz[ _]ad',
+ u'leam[ _]vdc',
+ u'legătură[ _]a[bcf]',
+ u'liamm[ _]pub',
+ u'lien[ _]adq',
+ u'lien[ _]ba',
+ u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
+ u'liên[ _]kết[ _]chọn[ _]lọc',
+ u'ligam[ _]adq',
+ u'ligoelstara',
+ u'ligoleginda',
+ u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
+ u'link[ _]sm', u'linkfa',
+ u'na[ _]lotura',
+ u'nasc[ _]ar',
+ u'tengill[ _][úg]g',
+ u'ua',
+ u'yüm yg',
+ u'רא',
+ u'وصلة مقالة جيدة',
+ u'وصلة مقالة مختارة',
+ ]
+
+ categories = None
+ interwikiLinks = None
+ allstars = []
+ hasCommentLine = False
+
+ # The PyWikipediaBot is no longer allowed to touch categories on the
+ # German Wikipedia. See
+ # http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Positi…
+ # ignoring nn-wiki of cause of the comment line above iw section
+ if not self.template and not '{{Personendaten' in text and \
+ not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \
+ not self.site.lang in ('et', 'it', 'bg', 'ru'):
categories = pywikibot.getCategoryLinks(text, site = self.site)
- text = pywikibot.replaceCategoryLinks(text, categories, site = self.site)
+
+ if not self.talkpage:# and pywikibot.calledModuleName() <> 'interwiki':
+ subpage = False
+ if self.template:
+ loc = None
+ try:
+ tmpl, loc = moved_links[self.site.lang]
+ del tmpl
+ except KeyError:
+ pass
+ if loc != None and loc in self.title:
+ subpage = True
+ interwikiLinks = pywikibot.getLanguageLinks(
+ text, insite=self.site, template_subpage=subpage)
+
+ # Removing the interwiki
+ text = pywikibot.removeLanguageLinks(text, site = self.site)
+ # Removing the stars' issue
+ starstext = pywikibot.removeDisabledParts(text)
+ for star in starsList:
+ regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
+ % star, re.I)
+ found = regex.findall(starstext)
+ if found != []:
+ if config.verbose_output:
+ print found
+ text = regex.sub('', text)
+ allstars += found
+
+ # nn got a message between the categories and the iw's
+ # and they want to keep it there, first remove it
+ if self.site.lang in msg_interwiki:
+ iw_msg = msg_interwiki[self.site.lang]
+ if isinstance(iw_msg, tuple):
+ iw_reg = iw_msg[1]
+ iw_msg = iw_msg[0]
+ else:
+ iw_reg = u'(%s)' % iw_msg
+ regex = re.compile(iw_reg)
+ found = regex.findall(text)
+ if found:
+ if config.verbose_output:
+ print found
+ hasCommentLine = True
+ text = regex.sub('', text)
+
+ # Adding categories
+ if categories:
+ ##Sorting categories in alphabetic order. beta test only on Persian Wikipedia, TODO fix bug for sorting
+ #if self.site.language() == 'fa':
+ # categories.sort()
+ ##Taking main cats to top
+ # for name in categories:
+ # if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
+ # categories.remove(name)
+ # categories.insert(0, name)
+ text = pywikibot.replaceCategoryLinks(text, categories,
+ site=self.site)
+ # Put the iw message back
+ if not self.talkpage and \
+ ((interwikiLinks or hasCommentLine) and
+ self.site.language() == 'nn' or
+ (interwikiLinks and hasCommentLine) and
+ self.site.language() == 'fr'):
+ text += config.line_separator * 2 + iw_msg
+ # Adding stars templates
+ if allstars:
+ text = text.strip()+self.site.family.interwiki_text_separator
+ allstars.sort()
+ for element in allstars:
+ text += '%s%s' % (element.strip(),config.line_separator)
+ if config.verbose_output:
+ pywikibot.output(u'%s' %element.strip())
+ # Adding the interwiki
+ if interwikiLinks:
+ text = pywikibot.replaceLanguageLinks(text, interwikiLinks,
+ site=self.site,
+ template=self.template,
+ template_subpage=subpage)
return text
def translateAndCapitalizeNamespaces(self, text):
@@ -463,9 +587,13 @@
German Wikipedia. It might be that it is not wanted on other wikis.
If there are any complaints, please file a bug report.
"""
- for level in range(1, 7):
- equals = '=' * level
- text = pywikibot.replaceExcept(text, r'\n' + equals + ' *(?P<title>[^=]+?) *' + equals + ' *\r\n', '\n' + equals + ' \g<title> ' + equals + '\r\n', ['comment', 'math', 'nowiki', 'pre'])
+ for level in xrange(1, 7):
+ param = {'equals': '=' * level, 'LS' : config.LS}
+ text = pywikibot.replaceExcept(
+ text,
+ r'\n%(equals)s *(?P<title>[^=]+?) *%(equals)s *\r?\n' % param,
+ '%(LS)s%(equals)s \g<title> %(equals)s%(LS)s' % param,
+ ['comment', 'math', 'nowiki', 'pre'])
return text
def putSpacesInLists(self, text):
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10350
Revision: 10350
Author: xqt
Date: 2012-06-12 07:43:08 +0000 (Tue, 12 Jun 2012)
Log Message:
-----------
doc for cosmetic_changes_deny_script here, introduced with r10337
Modified Paths:
--------------
branches/rewrite/scripts/cosmetic_changes.py
Modified: branches/rewrite/scripts/cosmetic_changes.py
===================================================================
--- branches/rewrite/scripts/cosmetic_changes.py 2012-06-12 07:29:31 UTC (rev 10349)
+++ branches/rewrite/scripts/cosmetic_changes.py 2012-06-12 07:43:08 UTC (rev 10350)
@@ -50,6 +50,18 @@
your user-config.py:
cosmetic_changes_disable['wikipedia'] = ('de', 'en', 'fr')
+
+You may disable cosmetic changes for a given script by appending the all
+unwanted scripts to the list cosmetic_changes_deny_script in your
+user-config.py. By default it contains cosmetic_changes.py itself and touch.py.
+This overrides all other enabling settings for cosmetic changes. Please modify
+the given list by adding such lines to your user-config.py:
+
+ cosmetic_changes_deny_script.append('your_script_name_1')
+
+or by adding a list to the given one:
+
+ cosmetic_changes_deny_script += ['your_script_name_1', 'your_script_name_2']
"""
#
# (C) xqt, 2009-2012
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10349
Revision: 10349
Author: xqt
Date: 2012-06-12 07:29:31 +0000 (Tue, 12 Jun 2012)
Log Message:
-----------
cosmetic_changes_enable/disable dict enabled in r10336; here is the doc from trunk
Modified Paths:
--------------
branches/rewrite/scripts/cosmetic_changes.py
Modified: branches/rewrite/scripts/cosmetic_changes.py
===================================================================
--- branches/rewrite/scripts/cosmetic_changes.py 2012-06-11 23:03:00 UTC (rev 10348)
+++ branches/rewrite/scripts/cosmetic_changes.py 2012-06-12 07:29:31 UTC (rev 10349)
@@ -26,12 +26,30 @@
cosmetic_changes = True
+You may enable cosmetic changes for additional languages by adding the
+dictionary cosmetic_changes_enable to your user-config.py. It should contain
+a tuple of languages for each site where you wish to enable in addition to
+your own langlanguage if cosmetic_changes_mylang_only is True (see below).
+Please set your dictionary by adding such lines to your user-config.py:
+
+ cosmetic_changes_enable['wikipedia'] = ('de', 'en', 'fr')
+
There is another config variable: You can set
cosmetic_changes_mylang_only = False
if you're running a bot on multiple sites and want to do cosmetic changes on
all of them, but be careful if you do.
+
+You may disable cosmetic changes by adding the all unwanted languages to the
+dictionary cosmetic_changes_disable in your user-config.py. It should contain
+a tuple of languages for each site where you wish to disable cosmetic changes.
+You may use it with cosmetic_changes_mylang_only is False, but you can also
+disable your own language. This also overrides the settings in the dictionary
+cosmetic_changes_enable. Please set this dictionary by adding such lines to
+your user-config.py:
+
+ cosmetic_changes_disable['wikipedia'] = ('de', 'en', 'fr')
"""
#
# (C) xqt, 2009-2012
@@ -716,8 +734,7 @@
if self.done: break
self.treat(page)
except KeyboardInterrupt:
- raise
- #pywikibot.output('\nQuitting program...')
+ pywikibot.output('\nQuitting program...')
def main():
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10345
Revision: 10345
Author: xqt
Date: 2012-06-11 05:34:45 +0000 (Mon, 11 Jun 2012)
Log Message:
-----------
exclude the "K?\195?\169p" -> "F?\195?\161jl" replacements on hu-wiki per request https://hu.wikipedia.org/wiki/WP:KF-JA#F.C3.A1jl_vs._K.C3.A9p
Modified Paths:
--------------
branches/rewrite/scripts/cosmetic_changes.py
Modified: branches/rewrite/scripts/cosmetic_changes.py
===================================================================
--- branches/rewrite/scripts/cosmetic_changes.py 2012-06-10 23:47:18 UTC (rev 10344)
+++ branches/rewrite/scripts/cosmetic_changes.py 2012-06-11 05:34:45 UTC (rev 10345)
@@ -221,6 +221,11 @@
for image in [u'Image', u'image']:
if image in namespaces:
namespaces.remove(image)
+ if self.site.lang == 'hu':
+ # do not change "Kép" on hu-wiki
+ for image in [u'Kép', u'kép']:
+ if image in namespaces:
+ namespaces.remove(image)
elif self.site.lang == 'pt':
# bug #3346901 should be implemented
continue