Revision: 5453
Author: russblau
Date: 2008-05-28 13:23:58 +0000 (Wed, 28 May 2008)
Log Message:
-----------
add site.code property and separate wiki i.d. code from language (which may be different)
Modified Paths:
--------------
branches/rewrite/pywikibot/date.py
branches/rewrite/pywikibot/login.py
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/date.py
===================================================================
--- branches/rewrite/pywikibot/date.py 2008-05-28 12:28:34 UTC (rev 5452)
+++ branches/rewrite/pywikibot/date.py 2008-05-28 13:23:58 UTC (rev 5453)
@@ -1500,7 +1500,7 @@
self.site = site
def __call__(self, m, d):
- return formats['Day_' + enMonthNames[m-1]][self.site.lang](d)
+ return formats['Day_' + enMonthNames[m-1]][self.site.code](d)
def formatYear(lang, year):
Modified: branches/rewrite/pywikibot/login.py
===================================================================
--- branches/rewrite/pywikibot/login.py 2008-05-28 12:28:34 UTC (rev 5452)
+++ branches/rewrite/pywikibot/login.py 2008-05-28 13:23:58 UTC (rev 5453)
@@ -72,24 +72,24 @@
if sysop:
try:
self.username = config.sysopnames\
- [self.site.family.name][self.site.language()]
+ [self.site.family.name][self.site.code]
except:
raise NoUsername(
u'ERROR: Sysop username for %s:%s is undefined.\nIf you have a sysop account for that site, please add such a line to user-config.py:\n\nsysopnames[\'%s\'][\'%s\'] = \'myUsername\''
- % (self.site.family.name, self.site.language(),
- self.site.family.name, self.site.language()))
+ % (self.site.family.name, self.site.code,
+ self.site.family.name, self.site.code))
else:
try:
self.username = config.usernames\
- [self.site.family.name][self.site.language()]
+ [self.site.family.name][self.site.code]
except:
raise NoUsername(
u"""ERROR: Username for %s:%s is undefined.
If you have an account for that site, please add a line to user-config.py:
usernames['%s']['%s'] = 'myUsername'"""
- % (self.site.family.name, self.site.language(),
- self.site.family.name, self.site.language()))
+ % (self.site.family.name, self.site.code,
+ self.site.family.name, self.site.code))
self.password = password
if getattr(config, 'password_file', ''):
self.readPassword()
@@ -100,8 +100,8 @@
the policy on the respective wiki.
"""
return True # DEBUG
- if botList.has_key(self.site.family.name) and botList[self.site.family.name].has_key(self.site.language()):
- botListPageTitle = botList[self.site.family.name][self.site.language()]
+ if botList.has_key(self.site.family.name) and botList[self.site.family.name].has_key(self.site.code):
+ botListPageTitle = botList[self.site.family.name][self.site.code]
botListPage = pywikibot.Page(self.site, botListPageTitle)
for linkedPage in botListPage.linkedPages():
if linkedPage.titleWithoutNamespace() == self.username:
@@ -189,7 +189,7 @@
"""
filename = config.datafilepath('%s-%s-%s-login.data'
% (self.site.family.name,
- self.site.language(),
+ self.site.code,
self.username))
f = open(filename, 'w')
f.write(data)
@@ -217,7 +217,7 @@
if len(entry) == 2:
if entry[0] == self.username: self.password = entry[1]
elif len(entry) == 4:
- if entry[0] == self.site.lang and \
+ if entry[0] == self.site.code and \
entry[1] == self.site.family.name and \
entry[2] == self.username:
self.password = entry[3]
@@ -238,7 +238,7 @@
logging.info(u"Should be logged in now")
# Show a warning according to the local bot policy
if not self.botAllowed():
- logging.error(u'*** Your username is not listed on [[%s]].\n*** Please make sure you are allowed to use the robot before actually using it!' % botList[self.site.family.name][self.site.lang])
+ logging.error(u'*** Your username is not listed on [[%s]].\n*** Please make sure you are allowed to use the robot before actually using it!' % botList[self.site.family.name][self.site.code])
return True
else:
logging.error(u"Login failed. Wrong password or CAPTCHA answer?")
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2008-05-28 12:28:34 UTC (rev 5452)
+++ branches/rewrite/pywikibot/page.py 2008-05-28 13:23:58 UTC (rev 5453)
@@ -155,12 +155,12 @@
if forceInterwiki or (
allowInterwiki and self.site() != pywikibot.Site()):
if self.site().family != pywikibot.Site().family \
- and self.site().family.name != self.site().language():
+ and self.site().family.name != self.site().code:
return u'[[%s:%s:%s]]' % (self.site().family.name,
- self.site().language(),
+ self.site().code,
self._title)
else:
- return u'[[%s:%s]]' % (self.site().language(),
+ return u'[[%s:%s]]' % (self.site().code,
self._title)
elif textlink and (self.isImage() or self.isCategory()):
return u'[[:%s]]' % title
@@ -231,7 +231,7 @@
if not hasattr(self, '_autoFormat'):
from pywikibot import date
self._autoFormat = date.getAutoFormat(
- self.site().language(),
+ self.site().code,
self.title(withNamespace=False)
)
return self._autoFormat
@@ -416,7 +416,7 @@
"""
if not hasattr(self, '_isDisambig'):
- locdis = self.site().family.disambig( self.site().lang )
+ locdis = self.site().family.disambig(self.site().code)
for template in self.templates():
tn = template.title(withNamespace=False)
if tn in locdis:
@@ -1393,7 +1393,7 @@
if prefix in fam.langs.keys():
newsite = pywikibot.Site(prefix, fam)
else:
- otherlang = self.site.language()
+ otherlang = self.site.code
familyName = fam.get_known_families(site=self.site)[prefix]
if familyName in ['commons', 'meta']:
otherlang = familyName
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-05-28 12:28:34 UTC (rev 5452)
+++ branches/rewrite/pywikibot/site.py 2008-05-28 13:23:58 UTC (rev 5453)
@@ -74,28 +74,28 @@
@type user: str
"""
- self._lang = code.lower()
+ self.__code = code.lower()
if isinstance(fam, basestring) or fam is None:
self.__family = Family(fam, fatal=False)
else:
self.__family = fam
# if we got an outdated language code, use the new one instead.
- if self.__family.obsolete.has_key(self._lang):
- if self.__family.obsolete[self._lang] is not None:
- self._lang = self.__family.obsolete[self._lang]
+ if self.__family.obsolete.has_key(self.__code):
+ if self.__family.obsolete[self.__code] is not None:
+ self.__code = self.__family.obsolete[self.__code]
else:
# no such language anymore
raise NoSuchSite("Language %s in family %s is obsolete"
- % (self._lang, self.__family.name))
- if self._lang not in self.languages():
- if self._lang == 'zh-classic' and 'zh-classical' in self.languages():
- self._lang = 'zh-classical'
+ % (self.__code, self.__family.name))
+ if self.__code not in self.languages():
+ if self.__code == 'zh-classic' and 'zh-classical' in self.languages():
+ self.__code = 'zh-classical'
# database hack (database is varchar[10] -> zh-classical
# is cut to zh-classic.
else:
raise NoSuchSite("Language %s does not exist in family %s"
- % (self._lang, self.__family.name))
+ % (self.__code, self.__family.name))
self._username = user
@@ -117,15 +117,13 @@
@property
def family(self):
- """Return the associated Family object."""
+ """The Family object for this Site's wiki family."""
return self.__family
- def language(self):
- """Return the site's language code."""
- # N.B. this code does not always identify a language as such, but
- # may identify a wiki that is part of any family grouping
- # FIXME: need to separate language (for L18N purposes) from code
- return self._lang
+ @property
+ def code(self):
+ """The identifying code for this Site."""
+ return self.__code
def user(self):
"""Return the currently-logged in bot user, or None."""
@@ -140,7 +138,7 @@
try:
method = getattr(self.family, attr)
f = lambda *args, **kwargs: \
- method(self.language(), *args, **kwargs)
+ method(self.code, *args, **kwargs)
if hasattr(method, "__doc__"):
f.__doc__ = method.__doc__
return f
@@ -150,19 +148,19 @@
def sitename(self):
"""Return string representing this Site's name and language."""
- return self.family.name+':'+self.language()
+ return self.family.name+':'+self.code
__str__ = sitename
def __repr__(self):
- return 'Site("%s", "%s")' % (self.language(), self.family.name)
+ return 'Site("%s", "%s")' % (self.code, self.family.name)
def __hash__(self):
return hash(repr(self))
def linktrail(self):
"""Return regex for trailing chars displayed as part of a link."""
- return self.family.linktrail(self.language())
+ return self.family.linktrail(self.code)
def languages(self):
"""Return list of all valid language codes for this site's Family."""
@@ -205,11 +203,11 @@
if self.language() == 'ar':
# It won't work with REDIRECT[[]] but it work with the local,
# if problems, try to find a work around. FixMe!
- return self.family.redirect.get(self.language(), [u"تحويل"])[0]
+ return self.family.redirect.get(self.code, [u"تحويل"])[0]
else:
- return self.family.redirect.get(self.language(), [u"REDIRECT"])[0]
+ return self.family.redirect.get(self.code, [u"REDIRECT"])[0]
else:
- return self.family.redirect.get(self.language(), None)
+ return self.family.redirect.get(self.code, None)
def lock_page(self, page, block=True):
"""Lock page for writing. Must be called before writing any page.
@@ -350,7 +348,7 @@
# ANYTHING BELOW THIS POINT IS NOT YET IMPLEMENTED IN __init__()
self._mediawiki_messages = {}
- self.nocapitalize = self._lang in self.family.nocapitalize
+ self.nocapitalize = self.__code in self.family.nocapitalize
self._userData = [False, False]
self._userName = [None, None]
self._isLoggedIn = [None, None]
@@ -488,6 +486,11 @@
def case(self):
return self.getsiteinfo()['case']
+ def language(self):
+ """Return the code for the language of this Site."""
+ # N.B. this code may or may not be the same as self.code
+ return self.getsiteinfo()['lang']
+
def namespaces(self):
"""Return dict of valid namespaces on this wiki."""
self.getsiteinfo()
@@ -1651,21 +1654,21 @@
try:
if sysop:
try:
- username = config.sysopnames[self.family.name][self.language()]
+ username = config.sysopnames[self.family.name][self.code]
except KeyError:
raise NoUsername("""\
You tried to perform an action that requires admin privileges, but you haven't
entered your sysop name in your user-config.py. Please add
sysopnames['%s']['%s']='name' to your user-config.py"""
- % (self.family.name, self.language()))
+ % (self.family.name, self.code))
else:
- username = config.usernames[self.family.name][self.language()]
+ username = config.usernames[self.family.name][self.code]
except KeyError:
self._cookies[index] = None
self._isLoggedIn[index] = False
else:
tmp = '%s-%s-%s-login.data' % (
- self.family.name, self.language(), username)
+ self.family.name, self.code, username)
fn = config.datafilepath('login-data', tmp)
if not os.path.exists(fn):
self._cookies[index] = None
@@ -2491,8 +2494,8 @@
Use optional Site argument 'othersite' to generate an interwiki link.
"""
- if othersite and othersite.lang != self.language():
- return u'[[%s:%s]]' % (self.language(), title)
+ if othersite and othersite.code != self.code:
+ return u'[[%s:%s]]' % (self.code, title)
else:
return u'[[%s]]' % title
@@ -2518,7 +2521,7 @@
if self.ns_index(first):
return False
if first in interlangTargetFamily.langs:
- if first == self.language():
+ if first == self.code:
return self.isInterwikiLink(rest)
else:
return True
@@ -2541,10 +2544,10 @@
red = u"تحويل"
try:
if redDefault == red:
- redirKeywords = [red] + self.family.redirect[self.language()]
+ redirKeywords = [red] + self.family.redirect[self.code]
redirKeywordsR = r'(?:' + '|'.join(redirKeywords) + ')'
else:
- redirKeywords = [red] + self.family.redirect[self.language()]
+ redirKeywords = [red] + self.family.redirect[self.code]
redirKeywordsR = r'(?:' + redDefault + '|'.join(redirKeywords) + ')'
except KeyError:
# no localized keyword for redirects
@@ -2561,7 +2564,7 @@
def version(self):
"""Return MediaWiki version number as a string."""
- return self.family.version(self.language())
+ return self.family.version(self.code)
def versionnumber(self):
"""Return an int identifying MediaWiki version.
@@ -2570,7 +2573,7 @@
number; i.e., 'X' in version '1.X.Y'
"""
- return self.family.versionnumber(self.language())
+ return self.family.versionnumber(self.code)
def live_version(self):
"""Return the 'real' version number found on [[Special:Version]]
@@ -2590,7 +2593,7 @@
self._mw_version = (int(m.group(1)), int(m.group(2)),
m.group(3))
else:
- self._mw_version = self.family.version(self.language()).split(".")
+ self._mw_version = self.family.version(self.code).split(".")
return self._mw_version
def checkCharset(self, charset):
@@ -2607,34 +2610,34 @@
def shared_image_repository(self):
"""Return a tuple of image repositories used by this site."""
- return self.family.shared_image_repository(self.language())
+ return self.family.shared_image_repository(self.code)
def __cmp__(self, other):
"""Perform equality and inequality tests on Site objects."""
if not isinstance(other, Site):
return 1
if self.family == other.family:
- return cmp(self.language(), other.lang)
+ return cmp(self.code, other.code)
return cmp(self.family.name, other.family.name)
def category_on_one_line(self):
"""Return True if this site wants all category links on one line."""
- return self.language() in self.family.category_on_one_line
+ return self.code in self.family.category_on_one_line
def interwiki_putfirst(self):
"""Return list of language codes for ordering of interwiki links."""
- return self.family.interwiki_putfirst.get(self.language(), None)
+ return self.family.interwiki_putfirst.get(self.code, None)
def interwiki_putfirst_doubled(self, list_of_links):
# TODO: is this even needed? No family in the framework has this
# dictionary defined!
- if self.family.interwiki_putfirst_doubled.has_key(self.language()):
- if len(list_of_links) >= self.family.interwiki_putfirst_doubled[self.language()][0]:
+ if self.family.interwiki_putfirst_doubled.has_key(self.code):
+ if len(list_of_links) >= self.family.interwiki_putfirst_doubled[self.code][0]:
list_of_links2 = []
for lang in list_of_links:
- list_of_links2.append(lang.language())
+ list_of_links2.append(lang.code)
list = []
- for lang in self.family.interwiki_putfirst_doubled[self.language()][1]:
+ for lang in self.family.interwiki_putfirst_doubled[self.code][1]:
try:
list.append(list_of_links[list_of_links2.index(lang)])
except ValueError:
@@ -2658,7 +2661,7 @@
import catlib
try:
return catlib.Category(self,
- self.namespace(14)+':'+self.family.disambcatname[self.language()])
+ self.namespace(14)+':'+self.family.disambcatname[self.code])
except KeyError:
raise NoPage(u'No page %s.' % page)
Revision: 5450
Author: nicdumz
Date: 2008-05-28 10:54:30 +0000 (Wed, 28 May 2008)
Log Message:
-----------
syntax / regex fixes for previous commit
Modified Paths:
--------------
trunk/pywikipedia/welcome.py
Modified: trunk/pywikipedia/welcome.py
===================================================================
--- trunk/pywikipedia/welcome.py 2008-05-28 10:37:23 UTC (rev 5449)
+++ trunk/pywikipedia/welcome.py 2008-05-28 10:54:30 UTC (rev 5450)
@@ -387,29 +387,29 @@
def parselog(wsite, raw, talk, number):
""" The function to load the users (only users who have a certain number of edits) """
- #FIXME : Why is there a need for this 'done' list ?
+ #FIXME : Why is there a need for this 'done' list ? We're not even checking for duplicates...
done = list()
- autocreated = wikipedia.mediawiki_message('newuserlog-autocreate-entry')
+ autocreated = wsite.mediawiki_message('newuserlog-autocreate-entry')
# I search with a regex how many user have not the talk page
# and i put them in a list (i find it more easy and secure).
# XXX: That's the regex, if there are problems, take a look here.
- reg = u'\(<a href=\"' + re.escape(wsite.path())
- + u'\?title=%s(?P<user>.*?)&(?:amp;|)action=(?:edit|editredlink|edit&redlink=1)\"' % talk
- + u'.*?\) (?P<reason>.*?) </li>'
+ reg = u'\(<a href=\"' + re.escape(wsite.path())
+ reg += u'\?title=%s(?P<user>.*?)&(?:amp;|)action=(?:edit|editredlink|edit&redlink=1)\"' % talk
+ reg += u'.*?\) (?P<reason>.*?) \u200E </li>'
p = re.compile(reg, re.UNICODE)
for x in p.finditer(raw):
+ username = x.group('user')
+ if username not in done:
+ done.append(username)
#skip autocreated users (SUL)
if autocreated in x.group('reason'):
- wikipedia.output(u'%s has been created automatically, skipping...')
+ wikipedia.output(u'%s has been created automatically, skipping...' % username)
continue
- username = x.group('user')
- if username not in done:
- done.append(username)
userpage = wikipedia.Page(wsite, username)
# Defing the contrib's page of the user.
pathWiki = wsite.family.nicepath(wsite.lang)
@@ -847,7 +847,7 @@
# is in username
lower_uname.replace(xy, '')
for word in elenco:
- baduser = word.lower() in lower_uname:
+ baduser = word.lower() in lower_uname
break
# He has a badusername, trying to report him...
if baduser:
Revision: 5449
Author: nicdumz
Date: 2008-05-28 10:37:23 +0000 (Wed, 28 May 2008)
Log Message:
-----------
* Now skipping autocreated users (SUL)
* CODE CLEANUP !!!!! :
** don't compile a static regex several times in a while/for, compile it before. Similarly, don't string.lower() several times in several 'for', just define lower_str = string.lower()
**
if test:
var = True
break
else:
var = False
break
#.... making it
var = test
break
** when you *know* that a boolean var has been affected, avoid 'if var == True:' : 'if var:' is enough
** using re.finditer instead of 'pos = 0;while 1: x = re.search(...); pos = x.end(); ...'
** We were yielding None entries in a generator, and then when calling the generator : 'for x in gen(): if x==None: continue; else: ...' .... Just don't raise None entries, it works the same and is simpler.
** When you know that some limits are induced by your code to your script, commenting for the devs "#won't work for v>50" is fine, but warning the end-user, in the documentation, or with some output, is even better.
** Adding several FIXMEs where I found wieird lines without being able to find a proper fix. Please take a look.
Modified Paths:
--------------
trunk/pywikipedia/welcome.py
Modified: trunk/pywikipedia/welcome.py
===================================================================
--- trunk/pywikipedia/welcome.py 2008-05-28 08:24:37 UTC (rev 5448)
+++ trunk/pywikipedia/welcome.py 2008-05-28 10:37:23 UTC (rev 5449)
@@ -45,7 +45,7 @@
This script understands the following command-line arguments:
-edit[:#] Define how many edits a new user needs to be welcomed
- (default: 1)
+ (default: 1, max: 50)
-time[:#] Define how many seconds the bot sleeps before restart
(default: 3600)
@@ -374,48 +374,39 @@
def load_word_function(wsite, raw):
""" This is a function used to load the badword and the whitelist."""
- list_loaded = list()
- pos = 0
- # I search with a regex how many user have not the talk page
- # and i put them in a list (i find it more easy and secure).
- while 1:
- regl = r"(\"|\')(.*?)(\"|\')(, |\))"
- page = re.compile(regl, re.UNICODE)
- xl = page.search(raw, pos)
- if xl == None:
- if len(list_loaded) >= 1:
- wikipedia.output(u'\nReal-time list loaded.')
- return list_loaded
- break
- elif len(done) == 0:
- wikipedia.output(u'There was no input on the real-time page.')
- load_2 = False
- continue
- pos = xl.end()
- badword = xl.group(2)
- if badword not in list_loaded:
- list_loaded.append(badword)
+ regl = r"(?:\"|\')(.*?)(?:\"|\')(?:, |\))"
+ page = re.compile(regl, re.UNICODE)
+ list_loaded = page.findall(raw)
+
+ if len(list_loaded) == 0:
+ wikipedia.output(u'There was no input on the real-time page.')
+ else:
+ wikipedia.output(u'\nReal-time list loaded.')
+ return list_loaded
+
def parselog(wsite, raw, talk, number):
""" The function to load the users (only users who have a certain number of edits) """
+ #FIXME : Why is there a need for this 'done' list ?
done = list()
- pos = 0
+
+ autocreated = wikipedia.mediawiki_message('newuserlog-autocreate-entry')
+
# I search with a regex how many user have not the talk page
# and i put them in a list (i find it more easy and secure).
- while 1:
- # FIXME: That's the regex, if there are problems, take a look here.
-
- reg = r'\(<a href=\"' + re.escape(wsite.path()) + r'\?title=%s(?P<user>.*?)&(?:amp;|)action=(?:edit|editredlink|edit&redlink=1)\"' % talk
- p = re.compile(reg, re.UNICODE)
- x = p.search(raw, pos)
- if x == None:
- if len(done) >= 1:
- wikipedia.output(u'\nLoaded all users...')
- break
- elif len(done) == 0:
- wikipedia.output(u'There is nobody to be welcomed...')
- break
- pos = x.end()
+
+ # XXX: That's the regex, if there are problems, take a look here.
+
+ reg = u'\(<a href=\"' + re.escape(wsite.path())
+ + u'\?title=%s(?P<user>.*?)&(?:amp;|)action=(?:edit|editredlink|edit&redlink=1)\"' % talk
+ + u'.*?\) (?P<reason>.*?) </li>'
+ p = re.compile(reg, re.UNICODE)
+
+ for x in p.finditer(raw):
+ #skip autocreated users (SUL)
+ if autocreated in x.group('reason'):
+ wikipedia.output(u'%s has been created automatically, skipping...')
+ continue
username = x.group('user')
if username not in done:
done.append(username)
@@ -425,7 +416,11 @@
con = '%sSpecial:Contributions/%s' % (pathWiki, userpage.urlname())
# Getting the contribs...
contribs = wsite.getUrl(con)
- contribnum = contribs.count('<li>') # It counts the first 50 edits but it shouldn't be a problem.
+
+ #FIXME: It counts the first 50 edits
+ # if number > 50, it won't work
+ contribnum = contribs.count('<li>')
+
if contribnum >= number:
wikipedia.output(u'%s has enough edits to be welcomed' % userpage.titleWithoutNamespace() )
# The user must be welcomed, return his data.
@@ -433,15 +428,19 @@
elif contribnum < number:
if contribnum == 0:
wikipedia.output(u'%s has no contributions.' % userpage.titleWithoutNamespace() )
- # That user mustn't be welcomed, return None.
- yield None
else:
wikipedia.output(u'%s has only %s contributions.' % (userpage.titleWithoutNamespace(), str(contribnum)) )
- # That user mustn't be welcomed, return None.
- yield None
+ # That user mustn't be welcomed.
+ continue
+ if len(done) == 0:
+ wikipedia.output(u'There is nobody to be welcomed...')
+ else:
+ wikipedia.output(u'\nLoaded all users...')
+
def report(wsite, rep_page, username, com, rep):
""" The function to report the username to a wiki-page. """
+
another_page = wikipedia.Page(wsite, rep_page)
if another_page.exists():
text_get = another_page.get()
@@ -482,7 +481,7 @@
""" Function to load the random signatures. """
reg = r"^\* ?(.*?)$"
creg = re.compile(reg, re.M)
- if fileOption == False:
+ if not fileOption:
signPage = wikipedia.Page(wsite, signPageTitle)
signText = signPage.get()
else:
@@ -696,7 +695,7 @@
welcomer = u'{{subst:Benvenuto}} %s'
welcomed_users = list()
- if savedata == True and os.path.exists(
+ if savedata and os.path.exists(
wikipedia.config.datafilepath(filename)):
f = file(filename)
number_user = cPickle.load(f)
@@ -707,7 +706,7 @@
# Here there is the main loop.
while True:
- if filter_wp == True:
+ if filter_wp:
# A standard list of bad username components (you can change/delate it in your project...).
# [ I divided the list into three to make it smaller...]
elencoaf = [' ano', ' anus', 'anal ', 'babies', 'baldracca', 'balle', 'bastardo',
@@ -756,7 +755,7 @@
elencovarie = list()
# Joining the three lists..
elenco = elencoaf + elencogz + elencovarie
- if filter_wp == True:
+ if filter_wp:
# That is the default whitelist (it contains few name because it has been improved in the latest days..).
whitelist_default = ['emiliano']
if wtlpg != None:
@@ -791,7 +790,7 @@
log = wsite.getUrl(URL)
wikipedia.output(u'Loading latest %s new users from %s...\n' % (limit, wsite.hostname()))
# Determine which signature to use
- if random == True:
+ if random:
try:
wikipedia.output(u'Loading random signatures...')
signList = defineSign(wsite, signPageTitle, fileSignName, fileOption)
@@ -799,10 +798,8 @@
wikipedia.output(u'The list with signatures is not available... Using default signature...')
random = False
for found_result in parselog(wsite, log, talk, number):
- if found_result == None:
- continue
# Compiling the signature to be used.
- if random == True:
+ if random:
if number_user + 1 > len(signList):
number_user = 0
yield number_user
@@ -828,10 +825,12 @@
wikipedia.output(u'%s has been blocked! Skipping...' % usertalkpage.titleWithoutNamespace())
continue
# Understand if the user has a bad-username.
+ username = str(username).encode(config.console_encoding)
+ lower_uname = username.lower()
for word in elenco:
- username = str(username).encode(config.console_encoding)
- if word.lower() in username.lower():
+ if word.lower() in lower_uname:
baduser = True
+ # What's this ? Docu please.
if wsite.lang == 'it':
final_rep = "%s%s}}" % (rep_text, word)
break
@@ -839,40 +838,46 @@
final_rep = rep_text
break
# Checking in the whitelist...
+
+ # FIXME I believe this is broken
for xy in whitelist:
- if xy.lower() in username.lower():
- username.replace(xy, '')
+ if xy.lower() in lower_uname:
+ # the next line does *not* change username
+ # besides replacing xy is useless if only xy.lower()
+ # is in username
+ lower_uname.replace(xy, '')
for word in elenco:
- if word.lower() in username.lower():
- baduser = True
- break
- else:
- baduser = False
- break
+ baduser = word.lower() in lower_uname:
+ break
# He has a badusername, trying to report him...
if baduser:
+ # FIXME : while 1 ? What the... ? Do we need a while here ? Documentation please.
while 1:
- if ask == True:
+ if ask:
wikipedia.output(u'%s may have an unwanted username, what shall I do?' % usertalkpage.titleWithoutNamespace())
+
+ # FIXME : consider using inputChoice here
+
answer = wikipedia.input(u'[B]lock or [W]elcome?')
for w in block:
if w in answer:
if not usertalkpage.exists():
# Check if the user has been already blocked (second check).
- ki = blocked(wsite, username)
- if ki == True:
+ if blocked(wsite, username):
wikipedia.output(u'%s has been blocked! Skipping him...' % usertalkpage.titleWithoutNamespace())
+ # FIXME: that continue will continue on "for w in block:". Do we really want to do this ?
continue
report(wsite, rep_page, username, com, final_rep)
break
else:
wikipedia.output(u'The discussion page of the bad-user already exists...')
running = False
+ #FIXME : Why don't we break here ?
for w in say_hi:
if w in answer:
baduser = False
break
- elif ask == False:
+ else:
wikipedia.output(u'%s is possibly an unwanted username. He will be reported.' % usertalkpage.titleWithoutNamespace())
if not usertalkpage.exists():
report(wsite, rep_page, username, com, final_rep)
@@ -898,7 +903,7 @@
wikipedia.output(u'%s has been already welcomed when i was loading all the users... skipping' % usertalkpage.titleWithoutNamespace())
continue
# That's the log
- if log_variable == True and logg:
+ if log_variable and logg:
if len(welcomed_users) == 1:
wikipedia.output(u'One user has been welcomed.')
elif len(welcomed_users) == 0:
@@ -916,7 +921,7 @@
# If we haven't to report, do nothing.
elif log_variable == False:
pass
- if log_variable == True and logg and len(welcomed_users) != 0:
+ if log_variable and logg and len(welcomed_users) != 0:
if len(welcomed_users) == 1:
wikipedia.output(u'Putting the log of the latest user...')
else:
@@ -926,7 +931,7 @@
if logresult2 == False:
continue
# If recursive, don't exit, repeat after one hour.
- if recursive == True:
+ if recursive :
waitstr = unicode(time_variable)
if locale.getlocale()[1]:
strfstr = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()), locale.getlocale()[1])
@@ -936,6 +941,7 @@
time.sleep(time_variable)
# If not recursive, break.
elif recursive == False:
+ #FIXME : others "yields" yield a single integer. Why are we doing this here ? 'STOP' is not even being retrieved
yield [number_user, 'STOP']
if __name__ == "__main__":
@@ -949,6 +955,9 @@
random = settingsBot[11]
savedata = settingsBot[13]
# I need to know what is the number_user, in this way I get it.
+ #FIXME: Do we need to do this ?
+ # in other words, why can't main() return a SINGLE value,
+ # an integer, number_user ?
for x in main(settingsBot):
try:
number_user = x[0]
@@ -957,11 +966,12 @@
else:
break
except wikipedia.BadTitle:
+ #FIXME : This kind of error should be catched earlier.
wikipedia.output(u"Wikidown or server's problem. Quit.")
wikipedia.stopme()
finally:
# If there is the savedata, the script must save the number_user.
- if random == True and savedata == True and number_user != None:
+ if random and savedata and number_user != None:
f = file(filename, 'w')
cPickle.dump(number_user, f)
f.close()
Bugs item #1975571, was opened at 2008-05-28 00:08
Message generated for change (Comment added) made by nicdumz
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1975571&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Closed
>Resolution: Fixed
Priority: 5
Private: No
Submitted By: Carsrac (carsrac)
>Assigned to: NicDumZ — Nicolas Dumazet (nicdumz)
Summary: ext and disambiguation
Initial Comment:
{{Desambiguáncia}} that the template for the ext wiki, if someone cares.
----------------------------------------------------------------------
>Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2008-05-28 10:27
Message:
Logged In: YES
user_id=1963242
Originator: NO
added in r5448 :)
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1975571&group_…
Bugs item #1973804, was opened at 2008-05-27 02:53
Message generated for change (Comment added) made by melancholie
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1973804&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
Status: Open
Resolution: None
>Priority: 8
Private: No
Submitted By: Melancholie (melancholie)
Assigned to: Nobody/Anonymous (nobody)
Summary: Huge memory consumption during changing process
Initial Comment:
As soon as the changing process (putting/saving of pages) is started, interwiki.py (r5440) consumes more than 100 MB of memory (RAM+Swap) if bot is working on many wikis. Memory usage grows during changing process. When changing process is finished, the memory suddenly gets flushed. Memory usage is normal again then, but only until the next 'putting-pages process' proceeds ;-)
----------------------------------------------------------------------
>Comment By: Melancholie (melancholie)
Date: 2008-05-28 07:21
Message:
Logged In: YES
user_id=2089773
Originator: YES
On low memory systems that does even lead to:
Inconsistency detected by ld.so: dl-minimal.c: 84: __libc_memalign:
Assertion `page != ((void *) -1)' failed!
Does that have to do with BeautifulSoup.py?
The revision that used (c)ElementTree did not cause that kind of bug!
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1973804&group_…
Revision: 5447
Author: siebrand
Date: 2008-05-27 22:28:04 +0000 (Tue, 27 May 2008)
Log Message:
-----------
eol-style:native
Modified Paths:
--------------
trunk/pywikipedia/imagerecat.py
trunk/pywikipedia/imageuncat.py
Property Changed:
----------------
trunk/pywikipedia/djvutext.py
trunk/pywikipedia/imagerecat.py
trunk/pywikipedia/imageuncat.py
Property changes on: trunk/pywikipedia/djvutext.py
___________________________________________________________________
Name: svn:eol-style
+ native
Modified: trunk/pywikipedia/imagerecat.py
===================================================================
--- trunk/pywikipedia/imagerecat.py 2008-05-27 21:09:51 UTC (rev 5446)
+++ trunk/pywikipedia/imagerecat.py 2008-05-27 22:28:04 UTC (rev 5447)
@@ -1,343 +1,343 @@
-# -*- coding: utf-8 -*-
-"""
-Program to (re)categorize images at commons.
-
-The program uses commonshelper for category suggestions. The program consists of three parts.
-
-1. prefetchThread - Fetches all the information
-2. userThread - Gets input from the user
-3. putThread - modifies the images
-
-You need to install the Python Imaging Library http://www.pythonware.com/products/pil/ to get this program working
-
-The program is far from finished. The framework is there, but still a lot has to be implemented:
-1. The prefetch thread
- * Mostly finished.
- * Should add some error handling to cope with a slow toolserver
- * Should check if images with special chars work alright
- * Parameter to dont use commonshelper?
-2. The user thread
- * Tkinter layout is awful atm
- * Tkinter have to implement most of the interaction
- * Tkinter category webbrowser link
- * Tkinter something with category auto completion (like the javascript in the search box)
-3. The put thread
- * Nothing much to put atm
- * Should remove the Uncategorized template (+ redirects)
- * Should check if something is actually changed (set operations?)
-"""
-#
-# (C) Multichill 2008
-# (tkinter part loosely based on imagecopy.py)
-# Distributed under the terms of the MIT license.
-#
-#
-
-from Tkinter import *
-from PIL import Image, ImageTk
-import os, sys, re, codecs
-import urllib, httplib, urllib2
-import catlib, thread, webbrowser
-import time, threading
-import wikipedia, config
-import pagegenerators, add_text, Queue, StringIO
-
-exitProgram = 0
-
-class prefetchThread (threading.Thread):
- '''
- Class to fetch al the info for the user. This thread gets the imagepage, the commonshelper suggestions and the image.
- The thread puts this item in a queue. When there are no more pages left the thread puts a None object in the queue and exits.
- '''
- def __init__ (self, generator, prefetchToUserQueue):
- '''
- Get the thread ready
- '''
- self.generator = generator
- self.prefetchToUserQueue = prefetchToUserQueue
- self.currentCats = []
- self.commonshelperCats = []
- self.image = None
- self.imagepage = None
- self.pregenerator = pagegenerators.PreloadingGenerator(self.generator)
- threading.Thread.__init__ ( self )
-
- def run(self):
-
- global exitProgram
- for page in self.pregenerator:
- if exitProgram != 0:
- break;
- if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) :
- self.imagepage = wikipedia.ImagePage(page.site(), page.title())
- self.imagepage.get()
- self.currentCats = self.getCurrentCats(self.imagepage)
- self.commonshelperCats = self.filterCommonsHelperCats(self.currentCats, self.getCommonshelperCats(self.imagepage))
- self.image = self.getImage(self.imagepage)
- self.prefetchToUserQueue.put((self.imagepage, self.currentCats, self.commonshelperCats, self.image))
- self.prefetchToUserQueue.put(None)
- return
-
- def getCurrentCats(self, imagepage):
- '''
- Get the categories currently on the image
- '''
- result = []
- for cat in imagepage.categories():
- result.append(cat.titleWithoutNamespace())
- return result
-
- def getCommonshelperCats(self, imagepage):
- '''
- Get category suggestions from commonshelper. Parse them and return a list of suggestions.
- '''
- parameters = urllib.urlencode({'i' : imagepage.titleWithoutNamespace(), 'r' : 'on', 'go-clean' : 'Find+Categories'})
- commonsHelperPage = urllib.urlopen("http://tools.wikimedia.de/~daniel/WikiSense/CommonSense.php?%s" % parameters)
-
- commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usage>(\d)+)\)(.*)#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s#GALLERIES(\s)+\((?P<galnum>(\d)+)\)(.*)#EOF$', re.MULTILINE + re.DOTALL)
- matches = commonsenseRe.search(commonsHelperPage.read())
-
- if matches:
- if(matches.group('catnum') > 0):
- return matches.group('cats').splitlines()
- else:
- return []
-
- def filterCommonsHelperCats(self, currentCats, commonshelperCats):
- '''
- Remove the current categories from the suggestions.
- '''
- result = []
- currentCatsSet = set(currentCats)
- for cat in commonshelperCats:
- cat = cat.replace('_',' ')
- if cat not in currentCatsSet:
- result.append(cat)
- return result
-
- def getImage(self, imagepage):
- '''
- Get the image from the wiki
- '''
- url = imagepage.fileUrl()
- uo = wikipedia.MyURLopener()
-
- file = uo.open(url)
-
- if 'text/html' in file.info().getheader('Content-Type'):
- wikipedia.output(u'Couldn\'t download the image: the requested URL was not found on this server.')
- return
-
- image = file.read()
- file.close()
-
- return image
-
-class userThread (threading.Thread):
- def __init__ (self, prefetchToUserQueue, userToPutQueue):
- self.prefetchToUserQueue = prefetchToUserQueue
- self.userToPutQueue = userToPutQueue
- self.item = None
- self.imagepage = None
- self.image = None
- self.currentCats = []
- self.commonshelperCats = []
- self.newCats = []
- self.skip = 0
-
- threading.Thread.__init__ ( self )
-
- def run(self):
-
- global exitProgram
- while exitProgram == 0:
- self.item = self.prefetchToUserQueue.get()
- if self.item is None:
- break
- else:
- (self.imagepage, self.currentCats, self.commonshelperCats, self.image) = self.item
- (self.skip, exitProgram, self.newCats) = Tkdialog(self.imagepage.titleWithoutNamespace(), self.image, self.imagepage.get(), self.currentCats, self.commonshelperCats, self.imagepage.permalink()).run()
-
- if not self.skip:
- self.userToPutQueue.put((self.imagepage, self.newCats))
- self.userToPutQueue.put(None)
- return
-
-class putThread (threading.Thread):
- '''
- class to do the actual changing of images
- '''
- def __init__ (self, userToPutQueue):
- self.userToPutQueue = userToPutQueue
- threading.Thread.__init__ ( self )
-
- def run(self):
- item = None
- imagepage = None
- newtext = u''
- while True:
- item = self.userToPutQueue.get()
- if item is None:
- break
- else:
- (imagepage, newtext)=item
- #wikipedia.showDiff(imagepage.get(), newtext)
- #imagepage.put(newtext, u'Recat by bot')
- return
-
-class Tkdialog:
- '''
- The Tk dialog presented to the user. The user can add and remove categories. View the images in a webbrowser, skip the image, apply the changes or exit.
- '''
- def __init__(self, image_title = u'', image = None, pagetext=u'', currentCats = [], commonsHelperCats = [], url= ''):
- self.newCats = currentCats
- self.url = url
- self.skip = 0
- self.exit = 0
- self.root=Tk()
- self.root.title(image_title)
- w = 1600 #image1.width()
- h = 900 #image1.height()
- x = 50
- y = 50
- self.root.geometry("%dx%d+%d+%d" % (w, h, x, y))
- self.root.rowconfigure( 0, weight = 1 )
- self.root.columnconfigure( 0, weight = 1 )
-
- image1 = self.getImage(image, 800, 600)
-
- panel1 = Label(self.root, image=image1)
- panel1.grid(row=0, column=2, rowspan=11, columnspan=11)
- panel1.image = image1
-
- self.cb = []
- self.cbstate = []
- self.entry = []
- for i in range(0, 10):
- self.cbstate.append(IntVar())
- self.cb.append(Checkbutton (self.root, variable=self.cbstate[i]))
- self.entry.append(Entry (self.root, width=50))
- self.cb[i].grid(row=i, column=0)
- self.entry[i].grid(row=i, column=1)
-
- catindex = 0
-
- for cat in currentCats:
- self.entry[catindex].delete(0, END)
- self.entry[catindex].insert(0, cat)
- self.entry[catindex].config(background="green")
- self.cb[catindex].select()
- catindex = catindex + 1
-
- for cat in commonsHelperCats:
- self.entry[catindex].delete(0, END)
- self.entry[catindex].insert(0, cat)
- self.entry[catindex].config(background="yellow")
- self.cb[catindex].deselect()
- catindex = catindex + 1
-
- textarea=Text(self.root)
- scrollbar=Scrollbar(self.root, orient=VERTICAL)
- textarea.insert(END, pagetext.encode('utf-8'))
- textarea.config(state=DISABLED, height=12, width=80, padx=0, pady=0, wrap=WORD, yscrollcommand=scrollbar.set)
-
- scrollbar.config(command=textarea.yview)
-
- browserButton=Button(self.root, text='View in browser', command=self.openInBrowser)
- skipButton=Button(self.root, text="Skip", command=self.skipFile)
- okButton=Button(self.root, text="OK", command=self.okFile)
- exitButton=Button(self.root, text="EXIT", command=self.exitProgram)
-
- textarea.grid(row=12, column=4, columnspan=10)
- scrollbar.grid(row=12, column=3)
-
- okButton.grid(row=20, column=0, rowspan=2)
- skipButton.grid(row=20, column=1, rowspan=2)
- browserButton.grid(row=20, column=2, rowspan=2)
- exitButton.grid(row=20, column=3, rowspan=2)
-
- def getImage(self, image, width, height):
- output = StringIO.StringIO(image)
- image2 = Image.open(output)
- image2.thumbnail((width, height))
- imageTk = ImageTk.PhotoImage(image2)
- return imageTk
-
- def okFile(self):
- '''
- The user pressed the OK button.
- '''
- #Read what the user has entered
- self.root.destroy()
-
- def skipFile(self):
- '''
- The user pressed the Skip button.
- '''
- self.skip=1
- self.root.destroy()
-
- def openInBrowser(self):
- '''
- The user pressed the View in browser button.
- '''
- webbrowser.open(self.url)
-
- def exitProgram(self):
- '''
- Exit the program
- '''
- self.skip=1
- self.exit=1
- self.root.destroy()
-
- def run (self):
- self.root.mainloop()
- return (self.skip, self.exit, self.newCats)
-
-def main(args):
- '''
- Main loop. Get a generator. Set up the 3 threads and the 2 queue's and fire everything up.
- '''
- generator = None;
- genFactory = pagegenerators.GeneratorFactory()
-
- site = wikipedia.getSite(u'commons', u'commons')
- wikipedia.setSite(site)
- for arg in wikipedia.handleArgs():
- if arg.startswith('-page'):
- if len(arg) == 5:
- generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))]
- else:
- generator = [wikipedia.Page(site, arg[6:])]
- elif arg == '-always':
- always = True
- else:
- generator = genFactory.handleArg(arg)
- if not generator:
- generator = pagegenerators.CategorizedPageGenerator(catlib.Category(site, u'Category:Media needing categories'))
- #raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')
-
- prefetchToUserQueue=Queue.Queue()
- userToPutQueue=Queue.Queue()
-
- # Start the prefetch thread
- prefetchThread(generator, prefetchToUserQueue).start()
-
- # Start the user thread
- userThread(prefetchToUserQueue, userToPutQueue).start()
-
- # Start the put thread
- putThread(userToPutQueue).start()
-
- # Wait for all threads to finish
- for openthread in threading.enumerate():
- if openthread != threading.currentThread():
- openthread.join()
- wikipedia.output(u'All threads are done')
-
-if __name__ == "__main__":
- try:
- main(sys.argv[1:])
- finally:
- wikipedia.stopme()
+# -*- coding: utf-8 -*-
+"""
+Program to (re)categorize images at commons.
+
+The program uses commonshelper for category suggestions. The program consists of three parts.
+
+1. prefetchThread - Fetches all the information
+2. userThread - Gets input from the user
+3. putThread - modifies the images
+
+You need to install the Python Imaging Library http://www.pythonware.com/products/pil/ to get this program working
+
+The program is far from finished. The framework is there, but still a lot has to be implemented:
+1. The prefetch thread
+ * Mostly finished.
+ * Should add some error handling to cope with a slow toolserver
+ * Should check if images with special chars work alright
+ * Parameter to dont use commonshelper?
+2. The user thread
+ * Tkinter layout is awful atm
+ * Tkinter have to implement most of the interaction
+ * Tkinter category webbrowser link
+ * Tkinter something with category auto completion (like the javascript in the search box)
+3. The put thread
+ * Nothing much to put atm
+ * Should remove the Uncategorized template (+ redirects)
+ * Should check if something is actually changed (set operations?)
+"""
+#
+# (C) Multichill 2008
+# (tkinter part loosely based on imagecopy.py)
+# Distributed under the terms of the MIT license.
+#
+#
+
+from Tkinter import *
+from PIL import Image, ImageTk
+import os, sys, re, codecs
+import urllib, httplib, urllib2
+import catlib, thread, webbrowser
+import time, threading
+import wikipedia, config
+import pagegenerators, add_text, Queue, StringIO
+
+exitProgram = 0
+
+class prefetchThread (threading.Thread):
+ '''
+ Class to fetch al the info for the user. This thread gets the imagepage, the commonshelper suggestions and the image.
+ The thread puts this item in a queue. When there are no more pages left the thread puts a None object in the queue and exits.
+ '''
+ def __init__ (self, generator, prefetchToUserQueue):
+ '''
+ Get the thread ready
+ '''
+ self.generator = generator
+ self.prefetchToUserQueue = prefetchToUserQueue
+ self.currentCats = []
+ self.commonshelperCats = []
+ self.image = None
+ self.imagepage = None
+ self.pregenerator = pagegenerators.PreloadingGenerator(self.generator)
+ threading.Thread.__init__ ( self )
+
+ def run(self):
+
+ global exitProgram
+ for page in self.pregenerator:
+ if exitProgram != 0:
+ break;
+ if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) :
+ self.imagepage = wikipedia.ImagePage(page.site(), page.title())
+ self.imagepage.get()
+ self.currentCats = self.getCurrentCats(self.imagepage)
+ self.commonshelperCats = self.filterCommonsHelperCats(self.currentCats, self.getCommonshelperCats(self.imagepage))
+ self.image = self.getImage(self.imagepage)
+ self.prefetchToUserQueue.put((self.imagepage, self.currentCats, self.commonshelperCats, self.image))
+ self.prefetchToUserQueue.put(None)
+ return
+
+ def getCurrentCats(self, imagepage):
+ '''
+ Get the categories currently on the image
+ '''
+ result = []
+ for cat in imagepage.categories():
+ result.append(cat.titleWithoutNamespace())
+ return result
+
+ def getCommonshelperCats(self, imagepage):
+ '''
+ Get category suggestions from commonshelper. Parse them and return a list of suggestions.
+ '''
+ parameters = urllib.urlencode({'i' : imagepage.titleWithoutNamespace(), 'r' : 'on', 'go-clean' : 'Find+Categories'})
+ commonsHelperPage = urllib.urlopen("http://tools.wikimedia.de/~daniel/WikiSense/CommonSense.php?%s" % parameters)
+
+ commonsenseRe = re.compile('^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usage>(\d)+)\)(.*)#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s#GALLERIES(\s)+\((?P<galnum>(\d)+)\)(.*)#EOF$', re.MULTILINE + re.DOTALL)
+ matches = commonsenseRe.search(commonsHelperPage.read())
+
+ if matches:
+ if(matches.group('catnum') > 0):
+ return matches.group('cats').splitlines()
+ else:
+ return []
+
+ def filterCommonsHelperCats(self, currentCats, commonshelperCats):
+ '''
+ Remove the current categories from the suggestions.
+ '''
+ result = []
+ currentCatsSet = set(currentCats)
+ for cat in commonshelperCats:
+ cat = cat.replace('_',' ')
+ if cat not in currentCatsSet:
+ result.append(cat)
+ return result
+
+ def getImage(self, imagepage):
+ '''
+ Get the image from the wiki
+ '''
+ url = imagepage.fileUrl()
+ uo = wikipedia.MyURLopener()
+
+ file = uo.open(url)
+
+ if 'text/html' in file.info().getheader('Content-Type'):
+ wikipedia.output(u'Couldn\'t download the image: the requested URL was not found on this server.')
+ return
+
+ image = file.read()
+ file.close()
+
+ return image
+
+class userThread (threading.Thread):
+ def __init__ (self, prefetchToUserQueue, userToPutQueue):
+ self.prefetchToUserQueue = prefetchToUserQueue
+ self.userToPutQueue = userToPutQueue
+ self.item = None
+ self.imagepage = None
+ self.image = None
+ self.currentCats = []
+ self.commonshelperCats = []
+ self.newCats = []
+ self.skip = 0
+
+ threading.Thread.__init__ ( self )
+
+ def run(self):
+
+ global exitProgram
+ while exitProgram == 0:
+ self.item = self.prefetchToUserQueue.get()
+ if self.item is None:
+ break
+ else:
+ (self.imagepage, self.currentCats, self.commonshelperCats, self.image) = self.item
+ (self.skip, exitProgram, self.newCats) = Tkdialog(self.imagepage.titleWithoutNamespace(), self.image, self.imagepage.get(), self.currentCats, self.commonshelperCats, self.imagepage.permalink()).run()
+
+ if not self.skip:
+ self.userToPutQueue.put((self.imagepage, self.newCats))
+ self.userToPutQueue.put(None)
+ return
+
+class putThread (threading.Thread):
+ '''
+ class to do the actual changing of images
+ '''
+ def __init__ (self, userToPutQueue):
+ self.userToPutQueue = userToPutQueue
+ threading.Thread.__init__ ( self )
+
+ def run(self):
+ item = None
+ imagepage = None
+ newtext = u''
+ while True:
+ item = self.userToPutQueue.get()
+ if item is None:
+ break
+ else:
+ (imagepage, newtext)=item
+ #wikipedia.showDiff(imagepage.get(), newtext)
+ #imagepage.put(newtext, u'Recat by bot')
+ return
+
+class Tkdialog:
+ '''
+ The Tk dialog presented to the user. The user can add and remove categories. View the images in a webbrowser, skip the image, apply the changes or exit.
+ '''
+ def __init__(self, image_title = u'', image = None, pagetext=u'', currentCats = [], commonsHelperCats = [], url= ''):
+ self.newCats = currentCats
+ self.url = url
+ self.skip = 0
+ self.exit = 0
+ self.root=Tk()
+ self.root.title(image_title)
+ w = 1600 #image1.width()
+ h = 900 #image1.height()
+ x = 50
+ y = 50
+ self.root.geometry("%dx%d+%d+%d" % (w, h, x, y))
+ self.root.rowconfigure( 0, weight = 1 )
+ self.root.columnconfigure( 0, weight = 1 )
+
+ image1 = self.getImage(image, 800, 600)
+
+ panel1 = Label(self.root, image=image1)
+ panel1.grid(row=0, column=2, rowspan=11, columnspan=11)
+ panel1.image = image1
+
+ self.cb = []
+ self.cbstate = []
+ self.entry = []
+ for i in range(0, 10):
+ self.cbstate.append(IntVar())
+ self.cb.append(Checkbutton (self.root, variable=self.cbstate[i]))
+ self.entry.append(Entry (self.root, width=50))
+ self.cb[i].grid(row=i, column=0)
+ self.entry[i].grid(row=i, column=1)
+
+ catindex = 0
+
+ for cat in currentCats:
+ self.entry[catindex].delete(0, END)
+ self.entry[catindex].insert(0, cat)
+ self.entry[catindex].config(background="green")
+ self.cb[catindex].select()
+ catindex = catindex + 1
+
+ for cat in commonsHelperCats:
+ self.entry[catindex].delete(0, END)
+ self.entry[catindex].insert(0, cat)
+ self.entry[catindex].config(background="yellow")
+ self.cb[catindex].deselect()
+ catindex = catindex + 1
+
+ textarea=Text(self.root)
+ scrollbar=Scrollbar(self.root, orient=VERTICAL)
+ textarea.insert(END, pagetext.encode('utf-8'))
+ textarea.config(state=DISABLED, height=12, width=80, padx=0, pady=0, wrap=WORD, yscrollcommand=scrollbar.set)
+
+ scrollbar.config(command=textarea.yview)
+
+ browserButton=Button(self.root, text='View in browser', command=self.openInBrowser)
+ skipButton=Button(self.root, text="Skip", command=self.skipFile)
+ okButton=Button(self.root, text="OK", command=self.okFile)
+ exitButton=Button(self.root, text="EXIT", command=self.exitProgram)
+
+ textarea.grid(row=12, column=4, columnspan=10)
+ scrollbar.grid(row=12, column=3)
+
+ okButton.grid(row=20, column=0, rowspan=2)
+ skipButton.grid(row=20, column=1, rowspan=2)
+ browserButton.grid(row=20, column=2, rowspan=2)
+ exitButton.grid(row=20, column=3, rowspan=2)
+
+ def getImage(self, image, width, height):
+ output = StringIO.StringIO(image)
+ image2 = Image.open(output)
+ image2.thumbnail((width, height))
+ imageTk = ImageTk.PhotoImage(image2)
+ return imageTk
+
+ def okFile(self):
+ '''
+ The user pressed the OK button.
+ '''
+ #Read what the user has entered
+ self.root.destroy()
+
+ def skipFile(self):
+ '''
+ The user pressed the Skip button.
+ '''
+ self.skip=1
+ self.root.destroy()
+
+ def openInBrowser(self):
+ '''
+ The user pressed the View in browser button.
+ '''
+ webbrowser.open(self.url)
+
+ def exitProgram(self):
+ '''
+ Exit the program
+ '''
+ self.skip=1
+ self.exit=1
+ self.root.destroy()
+
+ def run (self):
+ self.root.mainloop()
+ return (self.skip, self.exit, self.newCats)
+
+def main(args):
+ '''
+ Main loop. Get a generator. Set up the 3 threads and the 2 queue's and fire everything up.
+ '''
+ generator = None;
+ genFactory = pagegenerators.GeneratorFactory()
+
+ site = wikipedia.getSite(u'commons', u'commons')
+ wikipedia.setSite(site)
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-page'):
+ if len(arg) == 5:
+ generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))]
+ else:
+ generator = [wikipedia.Page(site, arg[6:])]
+ elif arg == '-always':
+ always = True
+ else:
+ generator = genFactory.handleArg(arg)
+ if not generator:
+ generator = pagegenerators.CategorizedPageGenerator(catlib.Category(site, u'Category:Media needing categories'))
+ #raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')
+
+ prefetchToUserQueue=Queue.Queue()
+ userToPutQueue=Queue.Queue()
+
+ # Start the prefetch thread
+ prefetchThread(generator, prefetchToUserQueue).start()
+
+ # Start the user thread
+ userThread(prefetchToUserQueue, userToPutQueue).start()
+
+ # Start the put thread
+ putThread(userToPutQueue).start()
+
+ # Wait for all threads to finish
+ for openthread in threading.enumerate():
+ if openthread != threading.currentThread():
+ openthread.join()
+ wikipedia.output(u'All threads are done')
+
+if __name__ == "__main__":
+ try:
+ main(sys.argv[1:])
+ finally:
+ wikipedia.stopme()
Property changes on: trunk/pywikipedia/imagerecat.py
___________________________________________________________________
Name: svn:eol-style
+ native
Modified: trunk/pywikipedia/imageuncat.py
===================================================================
--- trunk/pywikipedia/imageuncat.py 2008-05-27 21:09:51 UTC (rev 5446)
+++ trunk/pywikipedia/imageuncat.py 2008-05-27 22:28:04 UTC (rev 5447)
@@ -1,102 +1,102 @@
-# -*- coding: utf-8 -*-
-"""
-Program to add uncat template to images without categories at commons.
-See imagerecat.py (still working on that one) to add these images to categories.
-
-"""
-#
-# (C) Multichill 2008
-#
-# Distributed under the terms of the MIT license.
-#
-#
-
-import os, sys, re, codecs
-import wikipedia, config, pagegenerators
-
-#Probably unneeded because these are hidden categories. Have to figure it out.
-ignoreCategories = [u'[[Category:CC-BY-SA-3.0]]',
- u'[[Category:GFDL]]',
- u'[[Category:Media for cleanup]]',
- u'[[Category:Media lacking a description]]',
- u'[[Category:Media lacking author information]]',
- u'[[Category:Media lacking a description]]',
- u'[[Category:Self-published work]]']
-
-#Dont bother to put the template on a image with one of these templates
-ignoreTemplates = [u'Delete',
- u'Nocat',
- u'No license',
- u'No permission since',
- u'No source',
- u'No source since',
- u'Uncategorized',
- u'Uncat']
-
-puttext = u'\n{{Uncategorized|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}'
-putcomment = u'Please add categories to this image'
-
-def isUncat(page):
- '''
- Do we want to skip this page?
-
- If we found a category which is not in the ignore list it means that the page is categorized so skip the page.
- If we found a template which is in the ignore list, skip the page.
- '''
- for category in page.categories():
- if category not in ignoreCategories:
- #if category.title().count("Unknown") > 0:
- #print "Iets unknown"
- #else:
- #print "false"
- return False
- #print "true"
- for template in page.templates():
- if template in ignoreTemplates:
- return False
- return True
-
-def addUncat(page):
- '''
- Add the uncat template to the page
- '''
- newtext = page.get() + puttext
- wikipedia.showDiff(page.get(), newtext)
- try:
- page.put(newtext, putcomment)
- except wikipedia.EditConflict:
- # Skip this page
- pass
- return
-
-def main(args):
- '''
- Grab a bunch of images and tag them if they are not categorized.
- '''
- generator = None;
- genFactory = pagegenerators.GeneratorFactory()
-
- site = wikipedia.getSite(u'commons', u'commons')
- wikipedia.setSite(site)
- for arg in wikipedia.handleArgs():
- if arg.startswith('-page'):
- if len(arg) == 5:
- generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))]
- else:
- generator = [wikipedia.Page(site, arg[6:])]
- else:
- generator = genFactory.handleArg(arg)
- if not generator:
- wikipedia.output('You have to specify the generator you want to use for the program!')
- else:
- pregenerator = pagegenerators.PreloadingGenerator(generator)
- for page in pregenerator:
- if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) :
- if isUncat(page):
- addUncat(page)
-
-if __name__ == "__main__":
- try:
- main(sys.argv[1:])
- finally:
- wikipedia.stopme()
+# -*- coding: utf-8 -*-
+"""
+Program to add uncat template to images without categories at commons.
+See imagerecat.py (still working on that one) to add these images to categories.
+
+"""
+#
+# (C) Multichill 2008
+#
+# Distributed under the terms of the MIT license.
+#
+#
+
+import os, sys, re, codecs
+import wikipedia, config, pagegenerators
+
+#Probably unneeded because these are hidden categories. Have to figure it out.
+ignoreCategories = [u'[[Category:CC-BY-SA-3.0]]',
+ u'[[Category:GFDL]]',
+ u'[[Category:Media for cleanup]]',
+ u'[[Category:Media lacking a description]]',
+ u'[[Category:Media lacking author information]]',
+ u'[[Category:Media lacking a description]]',
+ u'[[Category:Self-published work]]']
+
+#Dont bother to put the template on a image with one of these templates
+ignoreTemplates = [u'Delete',
+ u'Nocat',
+ u'No license',
+ u'No permission since',
+ u'No source',
+ u'No source since',
+ u'Uncategorized',
+ u'Uncat']
+
+puttext = u'\n{{Uncategorized|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}'
+putcomment = u'Please add categories to this image'
+
+def isUncat(page):
+ '''
+ Do we want to skip this page?
+
+ If we found a category which is not in the ignore list it means that the page is categorized so skip the page.
+ If we found a template which is in the ignore list, skip the page.
+ '''
+ for category in page.categories():
+ if category not in ignoreCategories:
+ #if category.title().count("Unknown") > 0:
+ #print "Iets unknown"
+ #else:
+ #print "false"
+ return False
+ #print "true"
+ for template in page.templates():
+ if template in ignoreTemplates:
+ return False
+ return True
+
+def addUncat(page):
+ '''
+ Add the uncat template to the page
+ '''
+ newtext = page.get() + puttext
+ wikipedia.showDiff(page.get(), newtext)
+ try:
+ page.put(newtext, putcomment)
+ except wikipedia.EditConflict:
+ # Skip this page
+ pass
+ return
+
+def main(args):
+ '''
+ Grab a bunch of images and tag them if they are not categorized.
+ '''
+ generator = None;
+ genFactory = pagegenerators.GeneratorFactory()
+
+ site = wikipedia.getSite(u'commons', u'commons')
+ wikipedia.setSite(site)
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-page'):
+ if len(arg) == 5:
+ generator = [wikipedia.Page(site, wikipedia.input(u'What page do you want to use?'))]
+ else:
+ generator = [wikipedia.Page(site, arg[6:])]
+ else:
+ generator = genFactory.handleArg(arg)
+ if not generator:
+ wikipedia.output('You have to specify the generator you want to use for the program!')
+ else:
+ pregenerator = pagegenerators.PreloadingGenerator(generator)
+ for page in pregenerator:
+ if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) :
+ if isUncat(page):
+ addUncat(page)
+
+if __name__ == "__main__":
+ try:
+ main(sys.argv[1:])
+ finally:
+ wikipedia.stopme()
Property changes on: trunk/pywikipedia/imageuncat.py
___________________________________________________________________
Name: svn:eol-style
+ native