Revision: 4389
Author: btongminh
Date: 2007-09-29 19:09:58 +0000 (Sat, 29 Sep 2007)
Log Message:
-----------
First version of the plugin system.
Modified Paths:
--------------
trunk/pywikipedia/commonsdelinker/delinker.py
Added Paths:
-----------
trunk/pywikipedia/commonsdelinker/plugins/debug.py
Modified: trunk/pywikipedia/commonsdelinker/delinker.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/delinker.py 2007-09-29 18:40:08 UTC (rev 4388)
+++ trunk/pywikipedia/commonsdelinker/delinker.py 2007-09-29 19:09:58 UTC (rev 4389)
@@ -33,6 +33,9 @@
# * There is a problem with images in the es.wikisource project namespace.
# The exact problem is described somewhere in Bryan's IRC logs, but it is
# unknown where exactly.
+# HOOKS:
+# before_delink, simple_replace, gallery_replace, complex_replace, before_save,
+# after_delink
import sys, os, threading, time
import traceback
@@ -71,6 +74,20 @@
return mysql_autoconnection.connect(**kwargs)
# TODO: Add support for sqlite3
raise RuntimeError('Unsupported database engine %s' % engine)
+
+class ImmutableByReference(object):
+ def __init__(self, data):
+ self.data = data
+ def set(self, value):
+ self.data = value
+ def get(self):
+ return self.data
+ def __str__(self):
+ return str(self.data)
+ def __unicode__(self):
+ return unicode(self.data)
+ def __int__(self):
+ return int(self.data)
class Delinker(threadpool.Thread):
# TODO: Method names could use some clean up
@@ -82,6 +99,9 @@
def delink_image(self, image, usage, timestamp, admin, reason, replacement = None):
""" Performs the delink for image on usage. """
output(u'%s Usage of %s: %s' % (self, image, usage))
+ if self.CommonsDelinker.exec_hook('before_delink',
+ (image, usage, timestamp, admin, reason, replacement)) is False:
+ return
skipped_images = {}
for (lang, family), pages in usage.iteritems():
@@ -121,7 +141,9 @@
page_namespace, page_title, result, replacement))
finally:
self.CommonsDelinker.unlock_site(site)
-
+
+ self.CommonsDelinker.exec_hook('after_delink', (image, usage, timestamp, admin,
reason, replacement))
+
if skipped_images:
time.sleep(self.CommonsDelinker.config['timeout'])
return self.delink_image(image, skipped_images, timestamp, admin, reason,
replacement)
@@ -134,6 +156,7 @@
will delink instead of replace."""
page = wikipedia.Page(site, page_title)
+ hook = None
# TODO: Per site config.
if page.namespace() in self.CommonsDelinker.config['delink_namespaces']:
@@ -155,11 +178,15 @@
r_image = u'(%s)' % create_regex(image).replace(r'\_', '[
_]')
def simple_replacer(match):
- if replacement == None:
+ m_replacement = ImmutableByReference(replacement)
+ groups = list(match.groups())
+ if hook:
+ self.CommonsDelinker.exec_hook('%s_replace' % hook,
+ (page, summary, image, m_replacement, match, groups))
+ if m_replacement.get() is None:
return u''
else:
- groups = list(match.groups())
- groups[1] = replacement
+ groups[1] = m_replacement.get()
return u''.join(groups)
# Previously links in image descriptions will cause
@@ -179,6 +206,7 @@
link_ends = [match.end() for match in re.finditer(r_e, text)]
r_simple = u'(\[\[%s)%s(.*)' % (r_namespace, r_image)
+ hook = 'simple'
replacements = []
for image_start in image_starts:
current_link_starts = [link_start for link_start in link_starts
@@ -206,6 +234,7 @@
if old: new_text = new_text.replace(old, new)
# Remove the image from galleries
+ hook = 'gallery'
r_galleries = ur'(?s)(\<%s\>)(.*?)(\<\/%s\>)' %
(create_regex_i('gallery'),
create_regex_i('gallery'))
r_gallery = ur'(?m)^((?:%s)?)%s(\s*(?:\|.*?)?\s*)$' % (r_namespace, r_image)
@@ -217,6 +246,7 @@
if text == new_text:
# All previous steps did not work, so the image is
# likely embedded in a complicated template.
+ hook = 'complex'
r_templates = ur'(?s)(\{\{.*?\}\})'
r_complicated = u'(?s)((?:%s)?)%s' % (r_namespace, r_image)
@@ -230,22 +260,16 @@
# to summary() code, to avoid checking the user page
# for each removal.
try:
- if config.CommonsDelinker.get('save_diff', False):
- # Save a diff
- import difflib
- diff = difflib.context_diff(
- text.encode('utf-8').splitlines(True),
- new_text.encode('utf-8').splitlines(True))
- f = open((u'diff/%s-%s-%s.txt' % (page_title.replace('/',
'-'),
- site.dbName(), page.editTime())).encode('utf-8', 'ignore'),
'w')
- f.writelines(diff)
- f.close()
+ new_text = ImmutableByReference(new_text)
+ m_summary = ImmutableByReference(summary)
+ if self.exec_hook('before_save', (page, text, new_text)) is False:
+ return 'skipped'
if self.CommonsDelinker.config.get('edit', True) and not \
((self.CommonsDelinker.site.lang == 'commons') ^ \
(config.usernames.get('commons', {}).get(
'commons') == 'CommonsDelinker')):
- page.put(new_text, summary)
+ page.put(new_text.get(), m_summary.get())
return 'ok'
except wikipedia.EditConflict:
# Try again
@@ -580,6 +604,32 @@
# self.log_limit = '500'
self.log_limit = '500'
+ def init_plugins(self):
+ self.hooks = {}
+ for item in self.config.get('plugins', ()):
+ mname, name = item.split('.', 1)
+ module = __import__('delinker_plugins.' + mname)
+ plugin = getattr(module, mname)
+ if type(plugin) is type:
+ plugin = plugin(self)
+ if plugin.hook not in self.hooks:
+ self.hooks[plugin.hook] = []
+ self.hooks[plugin.hook].append(plugin)
+
+ def exec_hook(self, name, args):
+ if name in self.hooks:
+ for plugin in self.hooks[name][:]:
+ try:
+ if plugin(*args) is False:
+ return False
+ except Exception, e:
+ if type(e) in (SystemExit, KeyboardInterrupt):
+ raise
+ output('Warning! Error executing hook %s' % plugin, False)
+ output('%s: %s' % (e.__class__.__name__, str(e)), False)
+ traceback.print_exc(file = sys.stderr)
+ self.hooks[name].remove(plugin)
+
def connect_mysql(self):
self.database = connect_database()
self.cursor = self.database.cursor()
@@ -766,10 +816,9 @@
sys.stdout.flush()
else:
sys.stderr.flush()
-
+
def main():
global CD
-
output(u'Running ' + __version__)
CD = CommonsDelinker()
output(u'This bot runs from: ' + str(CD.site))
@@ -810,5 +859,6 @@
# Flush the standard streams
sys.stdout.flush()
sys.stderr.flush()
-
+
if __name__ == '__main__': main()
+
Added: trunk/pywikipedia/commonsdelinker/plugins/debug.py
===================================================================
--- trunk/pywikipedia/commonsdelinker/plugins/debug.py (rev 0)
+++ trunk/pywikipedia/commonsdelinker/plugins/debug.py 2007-09-29 19:09:58 UTC (rev 4389)
@@ -0,0 +1,17 @@
+import difflib
+__version__ = '$Id: $'
+
+class Diff(object):
+ hook = 'before_save'
+ def __init__(self, CommonsDelinker):
+ self.CommonsDelinker = CommonsDelinker
+ def __call__(self, page, text, new_text, summary):
+ diff = difflib.context_diff(
+ text.encode('utf-8').splitlines(True),
+ new_text.get().encode('utf-8').splitlines(True))
+
+ f = open((u'diff/%s-%s-%s.txt' % (page.urlname().replace('/',
'-'),
+ page.site().dbName(), page.editTime())).encode('utf-8', 'ignore'),
'w')
+
+ f.writelines(diff)
+ f.close()
\ No newline at end of file