jenkins-bot has submitted this change and it was merged.
Change subject: bugfixes, code improvements, i18n support
......................................................................
bugfixes, code improvements, i18n support
- fix TextEditor import
- -automatic option has been changed to mor common -always
- now we have i18n support from twn
- use generator instead of a list for all pages to be processed
- preload pages
- show page counter after processing, because we use a generator
- remove obsolete pywikibot.stopme() call
Change-Id: I8c983620e8d9a335b4fafa61e84936dc4c41f006
---
M scripts/spamremove.py
1 file changed, 73 insertions(+), 85 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/spamremove.py b/scripts/spamremove.py
index 8ee5397..2c6c50b 100755
--- a/scripts/spamremove.py
+++ b/scripts/spamremove.py
@@ -15,16 +15,16 @@
* not change the page in question
Command line options:
--automatic: Do not ask, but remove the lines automatically. Be very careful
- in using this option!
+-always Do not ask, but remove the lines automatically. Be very
+ careful in using this option!
--namespace: Filters the search to a given namespace. If this is specified
- multiple times it will search all given namespaces
+-namespace: Filters the search to a given namespace. If this is specified
+ multiple times it will search all given namespaces
"""
#
-# (C) Pywikipedia bot team, 2007-2010
+# (C) Pywikipedia bot team, 2007-2014
#
# Distributed under the terms of the MIT license.
#
@@ -33,35 +33,17 @@
#
import pywikibot
-from pywikibot import config
-from pywikibot import pagegenerators
-import editarticle
-import sys
+from pywikibot import pagegenerators, i18n
+from pywikibot.editor import TextEditor
def main():
- automatic = False
+ always = False
namespaces = []
- msg = {
- 'ar': u'إزالة الوصلات إلى موقع سبام %s',
- 'de': u'Entferne in Spam-Blacklist eingetragenen Weblink auf
%s',
- 'en': u'Removing links to spamming site %s',
- 'es': u'Removiendo enlaces a sitio publicitario %s',
- 'fa': u'حذف پیوند به وبگاه هرزنگاری %s',
- 'he': u'מסיר קישורים לאתר ספאם %s',
- 'fr': u'Suppression du lien blacklisté %s',
- 'it': u'Rimuovo link contenuto nella Spam-Blacklist %s',
- 'ja': u'ロボットによる: 迷惑リンク削除 %s',
- 'nl': u'Links naar gespamde site: %s verwijderd',
- 'pt': u'Removendo links de spam do site %s',
- 'ta': u'எரிதமாக இணைக்கப்பட்ட %s இணையத்தளம் நீக்கப்பட்டது',
- 'vi': u'xóa các liên kết đến website spam %s',
- 'zh': u'機器人: 移除廣告黑名單連結 %s',
- }
spamSite = ''
for arg in pywikibot.handleArgs():
- if arg.startswith("-automatic"):
- automatic = True
+ if arg == "-always":
+ always = True
elif arg.startswith('-namespace:'):
try:
namespaces.append(int(arg[len('-namespace:'):]))
@@ -69,62 +51,68 @@
namespaces.append(arg[len('-namespace:'):])
else:
spamSite = arg
- if not automatic:
- config.put_throttle = 1
- if not spamSite:
- pywikibot.showHelp('spamremove')
- pywikibot.output(u"No spam site specified.")
- sys.exit()
- mysite = pywikibot.getSite()
- pages = list(set(mysite.exturlusage(spamSite)))
- if namespaces:
- pages = list(set(pagegenerators.NamespaceFilterPageGenerator(pages,
- namespaces)))
- if len(pages) == 0:
- pywikibot.output('No page found.')
- else:
- pywikibot.output('%d pages found.' % len(pages))
- for p in pages:
- text = p.get()
- if not spamSite in text:
- continue
- # Show the title of the page we're working on.
- # Highlight the title in purple.
- pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default}
<<<"
- % p.title())
- lines = text.split('\n')
- newpage = []
- lastok = ""
- for line in lines:
- if spamSite in line:
- if lastok:
- pywikibot.output(lastok)
- pywikibot.output('\03{lightred}%s\03{default}' % line)
- lastok = None
- else:
- newpage.append(line)
- if line.strip():
- if lastok is None:
- pywikibot.output(line)
- lastok = line
- if automatic:
- answer = "y"
- else:
- answer = pywikibot.inputChoice(u'\nDelete the red lines?',
- ['yes', 'no',
'edit'],
- ['y', 'N', 'e'],
'n')
- if answer == "n":
- continue
- elif answer == "e":
- editor = editarticle.TextEditor()
- newtext = editor.edit(text, highlight=spamSite,
- jumpIndex=text.find(spamSite))
- else:
- newtext = "\n".join(newpage)
- if newtext != text:
- p.put(newtext, pywikibot.translate(mysite, msg) % spamSite)
-try:
+ if not spamSite:
+ pywikibot.showHelp()
+ pywikibot.output(u"No spam site specified.")
+ return
+
+ mysite = pywikibot.getSite()
+ pages = mysite.exturlusage(spamSite)
+ if namespaces:
+ pages = pagegenerators.NamespaceFilterPageGenerator(pages, namespaces)
+ pages = pagegenerators.PreloadingGenerator(pages)
+
+ summary = i18n.twtranslate(mysite, 'spamremove-remove',
+ {'url': spamSite})
+ for i, p in enumerate(pages, 1):
+ text = p.text
+ if not spamSite in text:
+ continue
+ # Show the title of the page we're working on.
+ # Highlight the title in purple.
+ pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default}
<<<"
+ % p.title())
+ lines = text.split('\n')
+ newpage = []
+ lastok = ""
+ for line in lines:
+ if spamSite in line:
+ if lastok:
+ pywikibot.output(lastok)
+ pywikibot.output('\03{lightred}%s\03{default}' % line)
+ lastok = None
+ else:
+ newpage.append(line)
+ if line.strip():
+ if lastok is None:
+ pywikibot.output(line)
+ lastok = line
+ if always:
+ answer = "y"
+ else:
+ answer = pywikibot.inputChoice(u'\nDelete the red lines?',
+ ['yes', 'no',
'edit'],
+ ['y', 'N', 'e'],
'n')
+ if answer == "n":
+ continue
+ elif answer == "e":
+ editor = TextEditor()
+ newtext = editor.edit(text, highlight=spamSite,
+ jumpIndex=text.find(spamSite))
+ else:
+ newtext = "\n".join(newpage)
+ if newtext != text:
+ p.text = newtext
+ p.save(summary)
+ else:
+ if "i" not in locals():
+ pywikibot.output('No page found.')
+ elif i == 1:
+ pywikibot.output('1 pages done.')
+ else:
+ pywikibot.output('%d pages done.' % i)
+
+
+if __name__ == '__main__':
main()
-finally:
- pywikibot.stopme()
--
To view, visit
https://gerrit.wikimedia.org/r/120203
To unsubscribe, visit
https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8c983620e8d9a335b4fafa61e84936dc4c41f006
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>