[Pywikipedia-svn] SVN: [7328] trunk/pywikipedia/welcome.py
alexsh at svn.wikimedia.org
alexsh at svn.wikimedia.org
Tue Sep 29 07:27:57 UTC 2009
Revision: 7328
Author: alexsh
Date: 2009-09-29 07:27:56 +0000 (Tue, 29 Sep 2009)
Log Message:
-----------
Re-organize entire welcome.py code structure by object-oriented.
Modified Paths:
--------------
trunk/pywikipedia/welcome.py
Modified: trunk/pywikipedia/welcome.py
===================================================================
--- trunk/pywikipedia/welcome.py 2009-09-29 04:00:40 UTC (rev 7327)
+++ trunk/pywikipedia/welcome.py 2009-09-29 07:27:56 UTC (rev 7328)
@@ -174,7 +174,7 @@
#
import wikipedia, config, query, userlib
-import time, re, cPickle, os, urllib, string, locale
+import time, re, cPickle, os, urllib, string, locale, random
import codecs
from datetime import timedelta
@@ -412,8 +412,6 @@
# Text after the {{welcome}} template, if you want to add something
# Default (en): nothing.
final_new_text_additions = {
- 'ar':u'',
- 'en':u'',
'it':u'\n<!-- fine template di benvenuto -->',
'zh':'<small>(via ~~~)</small>',
}
@@ -435,589 +433,528 @@
class FilenameNotSet(wikipedia.Error):
"""An exception indicating that a signature filename was not specifed."""
+class Global(object):
+ """Container class for global settings.
+ Use of globals outside of this is to be avoided."""
+
+ attachEditCount = 1 # number of edits that an user required to be welcomed
+ dumpToLog = 15 # number of users that are required to add the log :)
+ offset = 0 # skip users newer than that timestamp
+ timeoffset = 0 # skip users newer than # minutes
+ recursive = True # define if the Bot is recursive or not
+ timeRecur = 3600 # how much time (sec.) the bot sleeps before restart
+ makeWelcomeLog = True # create the welcome log or not
+ confirm = False # should bot ask to add username to bad-username list
+ welcomeAuto = False # should bot welcome auto-created users
+ filtBadName = False # check if the username is ok or not
+ randomSign = False # should signature be random or not
+ saveSignIndex = False # should save the signature index or not
+ signFileName = None # File name, default: None
+ defaultSign = '--~~~~' # default signature
+ queryLimit = 50 # number of users that the bot load to check
+ #fileOption = False # check if the user wants to use a file or the wikipage
class WelcomeBot(object):
-##developing to re-organize entire program.
-##
def __init__(self):
-
+ #Initial
self.site = wikipedia.getSite()
- pass
+ self._totallyCount = 0
+ self.welcomed_users = list()
+ if globalvar.randomSign:
+ self.defineSign(True)
- def reportBadAccount(self):
- pass
+ self.final_additions = wikipedia.translate(self.site, final_new_text_additions)
+
+ #self.contrib = string.capitalize(self.site.mediawiki_message('contribslink'))
-
-
- def makelogpage(self):
- pass
- def parseLog(self):
- pass
- def _parseLogOld(self):
- pass
- def defineSign(self):
- pass
-
- def run(self):
- pass
-
-
-def load_word_function(wsite, raw):
- """ This is a function used to load the badword and the whitelist."""
- regl = r"(?:\"|\')(.*?)(?:\"|\')(?:, |\))"
- page = re.compile(regl, re.UNICODE)
-
- list_loaded = page.findall(raw)
-
- if len(list_loaded) == 0:
- wikipedia.output(u'There was no input on the real-time page.')
- else:
- wikipedia.output(u'\nReal-time list loaded.')
- return list_loaded
-
-def parselog(wsite, raw, number, sul):
- """ The function to load the users (only users who have a certain number of edits) """
- someone_found = False
-
- # I search with a regex how many user have not the talk page
- # and i put them in a list (i find it more easy and secure).
-
- # XXX: That's the regex, if there are problems, take a look here.
- reg = u'\(<a href=\"' + re.escape(wsite.path())
- reg += u'\?title=%s(?P<user>.*?)&(?:amp;|)action=(?:edit|editredlink|edit&redlink=1)\"' % \
- re.escape('%s:' % urllib.quote(wsite.namespace(3).replace(" ", "_").encode(wsite.encoding())))
- reg += u'.*?</span> (?P<reason>.*?) *?</li>'
-
- p = re.compile(reg, re.UNICODE)
-
- for x in p.finditer(raw):
- someone_found = True
- userN = unicode(urllib.unquote(str(x.group('user'))), 'utf-8')
- username = userlib.User(wsite, userN)
- #skip autocreated users (SUL)
- if wsite.mediawiki_message('newuserlog-autocreate-entry') in x.group('reason') and not sul:
- wikipedia.output(u'%s has been created automatically, skipping...' % username.name())
- continue
-
- #FIXME: It counts the first 50 edits
- # if number > 50, it won't work
- # (not *so* useful, it should be enough).
- contribnum = username.editCount()
-
- if contribnum >= number:
- wikipedia.output(u'%s has enough edits to be welcomed' % username.name() )
- # The user must be welcomed, return his data.
- yield ([username.name(), contribnum, username.isBlocked()])
- elif contribnum < number:
- if contribnum == 0:
- wikipedia.output(u'%s has no contributions.' % username.name() )
- else:
- wikipedia.output(u'%s has only %s contributions.' % (username.name(), str(contribnum)) )
- # That user mustn't be welcomed.
- continue
-
- if someone_found:
- wikipedia.output(u'There is nobody to be welcomed...')
- else:
- wikipedia.output(u'\nLoaded all users...')
-
-def report(wsite, rep_page, username, com, rep):
- """ The function to report the username to a wiki-page. """
-
- another_page = wikipedia.Page(wsite, rep_page)
- if another_page.exists():
- text_get = another_page.get()
- else:
- nameBot = config.usernames[wsite.family.name][wsite.lang]
- text_get = u'This is a report page for the Bad-username, please translate me. --[[User:%s|%s]]' % (nameBot, nameBot)
- pos = 0
- # The talk page includes "_" between the two names, in this way i replace them to " ".
- username = wikipedia.url2link(username, wsite, wsite)
- regex = re.escape(username)
- n = re.compile(regex, re.UNICODE)
- y = n.search(text_get, pos)
- if y == None:
- # Adding the log.
- rep_text = rep % username
- another_page.put(text_get + rep_text, comment = com, minorEdit = True)
- wikipedia.output(u'...Reported...')
- else:
- pos = y.end()
- wikipedia.output(u'%s is already in the report page.' % username)
-
-def defineSign(wsite, signPageTitle, fileSignName = None, fileOption = False):
- """ Function to load the random signatures. """
- reg = r"^\* ?(.*?)$"
- creg = re.compile(reg, re.M)
- if not fileOption:
- signPage = wikipedia.Page(wsite, signPageTitle)
- signText = signPage.get()
- else:
- if fileSignName == None:
- wikipedia.output(u'Error! - No fileName!')
- raise FilenameNotSet("No signature filename specified.")
- try:
- f = codecs.open(wikipedia.config.datafilepath(fileSignName), 'r',
- encoding=config.console_encoding)
- except:
- f = codecs.open(wikipedia.config.datafilepath(fileSignName), 'r',
- encoding='utf-8')
- signText = f.read()
- f.close()
-
- listSign = creg.findall(signText)
- return listSign
-
-def logmaker(wsite, welcomed_users, logg, summ2, usernam, contrib):
- """ Deduct the correct sub page name form the current date """
- safety = list()
- target = logg + '/' + time.strftime('%Y/%m/%d', time.localtime(time.time()))
-
- if wsite.lang == 'it':
- target = logg + '/' + time.strftime('%d/%m/%Y', time.localtime(time.time()))
-
- page = wikipedia.Page(wsite, target)
- try:
- safety.append(page.get())
- except wikipedia.NoPage:
- #Add the table heading each new period. See http://commons.wikimedia.org/wiki/Commons:Welcome_log
- safety.append(wikipedia.translate(wsite, logpage_header) )
- # The string below show how the "Usernames" will be notified.
- safety.append('\n!%s' % usernam)
- # The string below show how the "Contribs" will be notified.
- safety.append(u'\n!%s' % contrib)
-
- for found_result in welcomed_users:
- # Adding the log... (don't take care of the variable's name...).
- luserpage = found_result[0]
- luser = wikipedia.url2link(luserpage, wsite, wsite)
- edit_count = str(found_result[1])
- logtext = u'\n{{WLE|user=%s|contribs=%s}}' % (luser, edit_count)
- safety.append(logtext)
- try:
- page.put(''.join(safety), summ2)
- return True
- except wikipedia.EditConflict:
- wikipedia.output(u'An edit conflict has occured. Pausing for 10 seconds before continuing.')
- time.sleep(10)
- page = wikipedia.Page(wsite, target)
- try:
- page.put(u''.join(safety), summ2)
- return True
- except wikipedia.EditConflict:
- wikipedia.output(u'Another edit conflict... Skipping...')
+ def badNameFilter(self, name, force = False):
+ if not globalvar.filtBadName:
return False
-
-def mainSettings():
- global filename
- global random
- global savedata
- """ Function to get the settings via arg and return them """
- number = 1 # number of edits that an user required to be welcomed
- numberlog = 15 # number of users that are required to add the log :)
- limit = 50 # number of users that the bot load to check
- offset_variable = 0 # skip users newer than that timestamp
- timeoffset_variable = 0 # skip users newer than # minutes
- recursive = True # define if the Bot is recursive or not
- time_variable = 3600 # how much time (sec.) the bot sleeps before restart
- log_variable = True # create the welcome log or not
- ask = False # should bot ask to add username to bad-username list
- sul = False # should bot welcome auto-created users
- filter_wp = False # check if the username is ok or not
- sign = ' --~~~~' # default signature
- random = False # should signature be random or not
- savedata = False # should save the signature index or not
- fileOption = False # check if the user wants to use a file or the wikipage
- fileSignName = None # File name, default: None
-
- # The block below is used for the parameters.
- for arg in wikipedia.handleArgs():
- if arg.startswith('-edit'):
- if len(arg) == 5:
- number = int(wikipedia.input(u'After how many edits would you like to welcome new users? (0 is allowed)'))
- else:
- number = int(arg[6:])
- elif arg.startswith('-timeoffset'):
- if len(arg) == 11:
- timeoffset_variable = int(wikipedia.input(u'Which time offset (in minutest) for new users would you like to use?'))
- else:
- timeoffset_variable = int(arg[12:])
- elif arg.startswith('-time'):
- if len(arg) == 5:
- time_variable = int(wikipedia.input(u'For how many seconds would you like to bot to sleep before checking again?'))
- else:
- time_variable = int(arg[6:])
- elif arg.startswith('-offset'):
- if len(arg) == 7:
- offset_variable = int(wikipedia.input(u'Which time offset for new users would you like to use? (yyyymmddhhmmss)'))
- else:
- offset_variable = int(arg[8:])
- if len(str(offset_variable)) != 14:
- # upon request, we might want to check for software version here
- raise ValueError("Mediawiki has changed, -offset:# is not supported anymore, but -offset:TIMESTAMP is, assuming TIMESTAMP is yyyymmddhhmmss. -timeoffset is now also supported. Please read this script source header for documentation.")
- elif arg.startswith('-file:'):
- random = True
- fileOption = True
- if len(arg) == 6:
- fileSignName = wikipedia.input(u'Where have you saved your signatures?')
- else:
- fileSignName = arg[6:]
- elif arg == '-break':
- recursive = False
- elif arg == '-nlog':
- log_variable = False
- elif arg == '-ask':
- ask = True
- elif arg == '-filter':
- filter_wp = True
- elif arg == '-savedata':
- savedata = True
- elif arg == '-random':
- random = True
- elif arg == '-sul':
- sul = True
- elif arg.startswith('-limit'):
- if len(arg) == 6:
- limit = int(wikipedia.input(u'How many of the latest new users would you like to load?'))
- else:
- limit = int(arg[7:])
- elif arg.startswith('-numberlog'):
- if len(arg) == 10:
- numberlog = int(wikipedia.input(u'After how many welcomed users would you like to update the welcome log?'))
- else:
- numberlog = int(arg[11:])
- # TODO: Maybe it's better change the tuple with a dictionary..
- wsite = wikipedia.getSite()
- # Filename and pywikipedia path
- filename = wikipedia.config.datafilepath('welcome-%s-%s.data' % (wsite.family.name, wsite.lang)) # file where is stored the random signature index
- if offset_variable and timeoffset_variable:
- wikipedia.output('WARING: both -offset and -timeoffset were provided, ignoring -offset')
- offset_variable = 0
- return (None, ask, filename, fileOption, fileSignName, filter_wp, limit, log_variable, number, numberlog, offset_variable, random, recursive,
- savedata, sign, time_variable, timeoffset_variable, sul)
-
-def main(settingsBot):
- # Taking the messages inside the function namespace.
- global netext, summary, logbook, summary2, report_page, project_inserted
- global comment, bad_pag, report_text, random_sign, whitelist_pg, final_new_text_additions
-
- """
- 0 1 2 3 4 5 6 7 8 9 10 11
- Returned tuple: (None, ask, filename, fileOption, fileSignName, filter_wp, limit, log_variable, number, numberlog, offset_variable, random,
- (mainSettings()) 12 13 14 15
- recursive, savedata, sign, time_variable)
- """
- # Loading the option of the mainSettings()
- ask = settingsBot[1]
- filename = settingsBot[2]
- fileOption = settingsBot[3]
- fileSignName = settingsBot[4]
- filter_wp = settingsBot[5]
- limit = settingsBot[6]
- log_variable = settingsBot[7]
- number = settingsBot[8]
- numberlog = settingsBot[9]
- offset_variable = settingsBot[10]
- random = settingsBot[11]
- recursive = settingsBot[12]
- savedata = settingsBot[13]
- sign = settingsBot[14]
- time_variable = settingsBot[15]
- timeoffset_variable = settingsBot[16]
- sul = settingsBot[17]
-
- # The site
- wsite = wikipedia.getSite()
-
- # The follow lines translate the language's parameters.
- welcomer = wikipedia.translate(wsite, netext)
- summ = wikipedia.translate(wsite, summary)
- logg = wikipedia.translate(wsite, logbook)
- summ2 = wikipedia.translate(wsite, summary2)
- rep_page = wikipedia.translate(wsite, report_page)
- com = wikipedia.translate(wsite, comment)
- bad_page = wikipedia.translate(wsite, bad_pag)
- rep_text = wikipedia.translate(wsite, report_text)
- signPageTitle = wikipedia.translate(wsite, random_sign)
- wtlpg = wikipedia.translate(wsite, whitelist_pg)
- final_additions = wikipedia.translate(wsite, final_new_text_additions)
-
- usernam = wsite.namespace(2)
- contrib = string.capitalize(wsite.mediawiki_message('contribslink'))
- # The talk_page's variable gives "Talk page".
- # Some project of the same language, have different settings. (this is the place to add them).
-
- welcomed_users = list()
- if savedata and os.path.exists(filename):
- f = file(filename)
- number_user = cPickle.load(f)
- yield number_user
- else:
- number_user = 0
- yield number_user
-
- # Here there is the main loop.
- while True:
- if filter_wp:
- # A standard list of bad username components (you can change/delate it in your project...).
- # [ I divided the list into three to make it smaller...]
- elencoaf = [' ano', ' anus', 'anal ', 'babies', 'baldracca', 'balle', 'bastardo',
- 'bestiali', 'bestiale', 'bastarda', 'b.i.t.c.h.', 'bitch', 'boobie',
- 'bordello', 'breast', 'cacata', 'cacca', 'cachapera', 'cagata',
- 'cane', 'cazz', 'cazzo', 'cazzata', 'chiavare', 'chiavata', 'chick',
- 'christ ', 'cristo', 'clitoride', 'coione', 'cojdioonear', 'cojones',
- 'cojo', 'coglione', 'coglioni', 'cornuto', 'cula', 'culatone',
- 'culattone', 'culo', 'deficiente', 'deficente', 'dio', 'die ',
- 'died ', 'ditalino', 'ejackulate', 'enculer', 'eroticunt', 'fanculo',
- 'fellatio', 'fica ', 'ficken', 'figa', 'sfiga', 'fottere', 'fotter',
- 'fottuto', 'fuck', 'f.u.c.k.', "funkyass"]
- elencogz = ['gay', 'hentai.com', 'horne', 'horney', 'virgin', 'hotties', 'idiot',
- '@alice.it', 'incest', 'jesus', 'gesu', 'gesù', 'kazzo', 'kill',
- 'leccaculo', 'lesbian', 'lesbica', 'lesbo', 'masturbazione',
- 'masturbare', 'masturbo', 'merda', 'merdata', 'merdoso', 'mignotta',
- 'minchia', 'minkia', 'minchione', 'mona', 'nudo', 'nuda', 'nudi',
- 'oral', 'sex', 'orgasmso', 'porc', 'pompa', 'pompino', 'porno',
- 'puttana', 'puzza', 'puzzone', "racchia", 'sborone', 'sborrone',
- 'sborata', 'sborolata', 'sboro', 'scopata', 'scopare', 'scroto',
- 'scrotum', 'sega', 'sesso', 'shit', 'shiz', 's.h.i.t.', 'sadomaso',
- 'sodomist', 'stronzata', 'stronzo', 'succhiamelo', 'succhiacazzi',
- 'testicol', 'troia', 'universetoday.net', 'vaffanculo', 'vagina',
- 'vibrator', "vacca", 'yiddiot', "zoccola"]
+
+ if not hasattr(self, '_blacklist') or force:
+ elenco = [
+ ' ano', ' anus', 'anal ', 'babies', 'baldracca', 'balle', 'bastardo',
+ 'bestiali', 'bestiale', 'bastarda', 'b.i.t.c.h.', 'bitch', 'boobie',
+ 'bordello', 'breast', 'cacata', 'cacca', 'cachapera', 'cagata',
+ 'cane', 'cazz', 'cazzo', 'cazzata', 'chiavare', 'chiavata', 'chick',
+ 'christ ', 'cristo', 'clitoride', 'coione', 'cojdioonear', 'cojones',
+ 'cojo', 'coglione', 'coglioni', 'cornuto', 'cula', 'culatone',
+ 'culattone', 'culo', 'deficiente', 'deficente', 'dio', 'die ',
+ 'died ', 'ditalino', 'ejackulate', 'enculer', 'eroticunt', 'fanculo',
+ 'fellatio', 'fica ', 'ficken', 'figa', 'sfiga', 'fottere', 'fotter',
+ 'fottuto', 'fuck', 'f.u.c.k.', "funkyass",
+ 'gay', 'hentai.com', 'horne', 'horney', 'virgin', 'hotties', 'idiot',
+ '@alice.it', 'incest', 'jesus', 'gesu', 'gesù', 'kazzo', 'kill',
+ 'leccaculo', 'lesbian', 'lesbica', 'lesbo', 'masturbazione',
+ 'masturbare', 'masturbo', 'merda', 'merdata', 'merdoso', 'mignotta',
+ 'minchia', 'minkia', 'minchione', 'mona', 'nudo', 'nuda', 'nudi',
+ 'oral', 'sex', 'orgasmso', 'porc', 'pompa', 'pompino', 'porno',
+ 'puttana', 'puzza', 'puzzone', "racchia", 'sborone', 'sborrone',
+ 'sborata', 'sborolata', 'sboro', 'scopata', 'scopare', 'scroto',
+ 'scrotum', 'sega', 'sesso', 'shit', 'shiz', 's.h.i.t.', 'sadomaso',
+ 'sodomist', 'stronzata', 'stronzo', 'succhiamelo', 'succhiacazzi',
+ 'testicol', 'troia', 'universetoday.net', 'vaffanculo', 'vagina',
+ 'vibrator', "vacca", 'yiddiot', "zoccola",
+ ]
elenco_others = ['@', ".com", ".sex", ".org", ".uk", ".en", ".it", "admin",
- "administrator", "amministratore", '@yahoo.com', '@alice.com',
- "amministratrice", "burocrate", "checkuser", "developer",
- "http://", "jimbo", "mediawiki", "on wheals", "on wheal",
- "on wheel", "planante", "razinger", "sysop", "troll", "vandal",
- " v.f. ", "v. fighter",
- "vandal f.", "vandal fighter", 'wales jimmy', "wheels", "wales",
- "www."]
- badword_page = wikipedia.Page(wsite, bad_page)
+ "administrator", "amministratore", '@yahoo.com', '@alice.com', "amministratrice",
+ "burocrate", "checkuser", "developer", "http://", "jimbo", "mediawiki",
+ "on wheals", "on wheal", "on wheel", "planante", "razinger", "sysop", "troll",
+ "vandal", " v.f. ", "v. fighter", "vandal f.", "vandal fighter", 'wales jimmy',
+ "wheels", "wales", "www.",
+ ]
+ badword_page = wikipedia.Page(self.site, wikipedia.translate(self.site, bad_pag) )
+ list_loaded = list()
if badword_page.exists():
- wikipedia.output(u'\nLoading the bad words list from %s...' % wsite.hostname() )
- text_bad = badword_page.get()
- list_loaded = load_word_function(wsite,text_bad)
+ wikipedia.output(u'\nLoading the bad words list from %s...' % self.site )
+ list_loaded = load_word_function(badword_page.get())
else:
- wikipedia.output(u'\t\t>>>WARNING: The bad word page doesn\'t exist!<<<')
- list_loaded = list()
- # Joining the "other things" with the loaded...
- elencovarie = elenco_others + list_loaded
- else:
- elencoaf = list()
- elencogz = list()
- elencovarie = list()
- # Joining the three lists..
- elenco = elencoaf + elencogz + elencovarie
- if filter_wp:
- # That is the default whitelist (it contains few name because it has been improved in the latest days..).
+ showStatus(4)
+ wikipedia.output(u'The bad word page doesn\'t exist!')
+ self._blacklist = elenco + elenco_others + list_loaded
+ del elenco, elenco_others, list_loaded
+
+ if not hasattr(self, '_whitelist') or force:
whitelist_default = ['emiliano']
+ wtlpg = wikipedia.translate(self.site, whitelist_pg)
+ list_white = list()
if wtlpg != None:
- whitelist_page = wikipedia.Page(wsite, wtlpg)
+ whitelist_page = wikipedia.Page(self.site, wtlpg)
if whitelist_page.exists():
- wikipedia.output(u'\nLoading the whitelist from %s...' % wsite.hostname() )
- list_white = load_word_function(wsite, whitelist_page.get())
+ wikipedia.output(u'\nLoading the whitelist from %s...' % self.site )
+ list_white = load_word_function(whitelist_page.get())
else:
- wikipedia.output(u"\t\t>>>WARNING: The whitelist's page doesn't exist!<<<")
- list_white = list()
+ showStatus(4)
+ wikipedia.output(u"The whitelist's page doesn't exist!")
else:
- wikipedia.output(u"\t\t>>>WARNING: The whitelist hasn't been setted!<<<")
- list_white = list()
- else:
- list_white = list()
- whitelist_default = list()
- # Join the whitelist words.
- whitelist = list_white + whitelist_default
+ showStatus(4)
+ wikipedia.output(u"WARNING: The whitelist hasn't been setted!")
+ # Join the whitelist words.
+ self._whitelist = list_white + whitelist_default
+ del list_white, whitelist_default
+
+ try:
+ for bname in self._blacklist:
+ if bname.lower() in str(name.lower()): #bad name positive
+ return True
+ except UnicodeEncodeError:
+ pass
+
+ try:
+ for wname in self._whitelist:
+ if wname.lower() in str(name.lower()):
+ name = name.replace(wname.lower(), '')
+ for bname in self._blacklist:
+ return bname.lower() in name.lower()
+ except UnicodeEncodeError:
+ pass
+
+ return False
- # think about non-wikimedia wikis. Use Site functions.
- URL = wsite.log_address(limit, 'newusers')
- if timeoffset_variable != 0:
- now = wsite.server_time() - timedelta(minutes=timeoffset_variable)
- offset_variable = int(now.strftime("%Y%m%d%H%M%S"))
- if offset_variable != 0:
- URL += "&offset=%d" % offset_variable
- log = wsite.getUrl(URL)
- wikipedia.output(u'Loading latest %s new users from %s:%s...\n' % (limit, wikipedia.default_family,wikipedia.default_code))
- # Determine which signature to use
- if random:
- try:
- wikipedia.output(u'Loading random signatures...')
- signList = defineSign(wsite, signPageTitle, fileSignName, fileOption)
- except wikipedia.NoPage:
- wikipedia.output(u'The list with signatures is not available... Using default signature...')
- random = False
- for found_result in parselog(wsite, log, number, sul):
- # Compiling the signature to be used.
- if random:
- if number_user + 1 > len(signList):
- number_user = 0
- yield number_user
- welcom = welcomer % signList[number_user] + timeselected
- # If there's something extra to add at the end of the template, add it!
- if final_additions != '':
- welcom += final_additions
+ def reportBadAccount(self, name = None, final = False):
+ #Queue process
+ if name:
+ if globalvar.confirm:
+ answer = wikipedia.inputChoice(u'%s may have an unwanted username, do you want to report this user?'
+ % name, ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
+ if answer in ['a', 'all']:
+ answer = 'y'
+ globalvar.confirm = False
else:
- welcom = welcomer % sign
- username = found_result[0]
- usertalkpage = wikipedia.Page(wsite, username, defaultNamespace=3)
- baduser = False
- # Check if the username is composed by only numbers.
+ answer = 'y'
+
+ if answer.lower() in ['yes', 'y'] or not globalvar.confirm:
+ showStatus()
+ wikipedia.output(u'%s is possibly an unwanted username. It will be reported.' % name)
+ if hasattr(self, '_BAQueue'):
+ self._BAQueue.append(name)
+ else:
+ self._BAQueue = [name]
+
+ if len(self._BAQueue) >= globalvar.dumpToLog or final:
+ rep_text = ''
+ #name in queue is max, put detail to report page
+ wikipedia.output("Updating badname accounts to report page...")
+ rep_page = wikipedia.Page(self.site, wikipedia.translate(self.site, report_page) )
+ if rep_page.exists():
+ text_get = rep_page.get()
+ else:
+ text_get = u'This is a report page for the Bad-username, please translate me. --~~~'
+ pos = 0
+ # The talk page includes "_" between the two names, in this way i replace them to " ".
+ for usrna in self._BAQueue:
+ username = wikipedia.url2link(usrna, self.site, self.site)
+ n = re.compile(re.escape(username), re.UNICODE)
+ y = n.search(text_get, pos)
+ if y:
+ wikipedia.output(u'%s is already in the report page.' % username)
+ else:
+ # Adding the log.
+ rep_text += wikipedia.translate(self.site, report_text) % username
+ if self.site.lang == 'it':
+ rep_text = "%s%s}}" % (rep_text, word)
+
+ com = wikipedia.translate(self.site, comment)
+ if rep_text != '':
+ rep_page.put(text_get + rep_text, comment = com, minorEdit = True)
+ showStatus(5)
+ wikipedia.output(u'Reported')
+ self.BAQueue = list()
+ else:
+ return True
+
+ def makelogpage(self, queue = []):
+ if len(queue) == 0:
+ return None
+
+ text = u''
+ logg = wikipedia.translate(self.site, logbook)
+ target = logg + '/' + time.strftime('%Y/%m/%d', time.localtime(time.time()))
+ if self.site.lang == 'it':
+ target = logg + '/' + time.strftime('%d/%m/%Y', time.localtime(time.time()))
+
+ logPage = wikipedia.Page(self.site, target)
+ if logPage.exists():
+ text = logPage.get()
+ else:
+ #make new log page
+ showStatus()
+ wikipedia.output('Log page is not exist, getting information for page creation')
+ text = wikipedia.translate(self.site, logpage_header)
+ text += u'\n!%s' % self.site.namespace(2)
+ text += u'\n!%s' % string.capitalize(self.site.mediawiki_message('contribslink'))
+
+ for result in queue:
+ # Adding the log... (don't take care of the variable's name...).
+ luser = wikipedia.url2link(result.name(), self.site, self.site)
+ text += u'\n{{WLE|user=%s|contribs=%d}}' % (luser, result.editCount())
+ #update log page.
+ while True:
try:
- int(username)
- baduser = True
- except ValueError:
- # OK, no problem
- pass
- # Check if the user has been already blocked.
+ logPage.put(text, wikipedia.translate(self.site, summary2) )
+ return True
+ except wikipedia.EditConflict:
+ wikipedia.output(u'An edit conflict has occured. Pausing for 10 seconds before continuing.')
+ time.sleep(10)
+
+ def parseNewUserLog(self):
+ try:
+ if config.use_api and self.site.versionnumber() >= 13:
+ x = self.site.api_address()
+ del x
+ else:
+ raise NotImplementedError
+ except NotImplementedError:
+ for x in self._parseNewUserLogOld():
+ yield x
+ return
+
+ params = {
+ 'action':'query',
+ 'list':'logevents',
+ 'letype':'newusers',
+ 'ledir':'older',
+ 'leprop':'ids|type|user',
+ 'lelimit':int(globalvar.queryLimit),
+ }
+ if globalvar.timeoffset != 0:
+ now = self.site.server_time() - timedelta(minutes=globalvar.timeoffset)
+ params['lestart'] = int(now.strftime("%Y%m%d%H%M%S"))
+ elif globalvar.offset != 0:
+ params['lestart'] = globalvar.offset
+
+ count = 0
+ wikipedia.output("Querying new user log from API....")
+ while True:
+ lev = query.GetData(params, self.site)
+ for x in lev['query']['logevents']:
+ someone_found = True
+ if not globalvar.welcomeAuto and x['action'] == 'autocreate':
+ showStatus(3)
+ wikipedia.output(u'%s has been created automatically.' % x['user'])
+ continue
+ if x.has_key("userhidden"):
+ continue
+ count += 1
+ yield userlib.User(self.site, x['user'])
- if found_result[2] == True:
- wikipedia.output(u'%s has been blocked! Skipping...' % usertalkpage.titleWithoutNamespace())
+ if count < globalvar.queryLimit and lev.has_key('query-continue'):
+ params['lestart'] = lev['query-continue']['logevents']['lestart']
+ else:
+ break
+ if someone_found:
+ wikipedia.output(u'There is nobody to be welcomed...')
+ else:
+ wikipedia.output(u'\nLoaded all users...')
+
+ def _parseNewUserLogOld(self):
+ someone_found = False
+ URL = self.site.log_address(globalvar.queryLimit, 'newusers')
+ if globalvar.timeoffset != 0:
+ now = self.site.server_time() - timedelta(minutes=globalvar.timeoffset)
+ globalvar.offset = int(now.strftime("%Y%m%d%H%M%S"))
+ if globalvar.offset != 0:
+ URL += "&offset=%d" % globalvar.offset
+ wikipedia.output("Getting new user log from Special:Log/newusers....")
+ raw = self.site.getUrl(URL)
+
+ # I search with a regex how many user have not the talk page
+ # and i put them in a list (i find it more easy and secure).
+ # XXX: That's the regex, if there are problems, take a look here.
+ reg = u'\(<a href=\"' + re.escape(self.site.path())
+ reg += u'\?title=%s(?P<user>.*?)&(?:amp;|)action=(?:edit|editredlink|edit&redlink=1)\"' % re.escape('%s:' % urllib.quote(self.site.namespace(3).replace(" ", "_").encode(self.site.encoding())))
+ reg += u'.*?</span> (?P<reason>.*?) *?</li>'
+
+ p = re.compile(reg, re.UNICODE)
+
+ for x in p.finditer(raw):
+ someone_found = True
+ userN = unicode(urllib.unquote(str(x.group('user'))), 'utf-8')
+ #skip autocreated users (SUL)
+ if not globalvar.welcomeAuto and self.site.mediawiki_message('newuserlog-autocreate-entry') in x.group('reason'):
+ showStatus(3)
+ wikipedia.output(u'%s has been created automatically, skipping...' % userN)
continue
- # Understand if the user has a bad-username.
- #username = username.encode(config.console_encoding)
- lower_uname = username.lower()
- for word in elenco: # elenco = list of bad words
- lower_uname = str(lower_uname)
- if word.lower() in lower_uname:
- baduser = True
- # The format of the italian report template is:
- # {{Reported|NICKNAME|BADWORD}}
- # The nickname is already added before and
- # here we add the "badword" part, but
- # this function is used only in the italian wiki.
- if wsite.lang == 'it':
- final_rep = "%s%s}}" % (rep_text, word)
- break
+
+ #FIXME: It counts the first 50 edits
+ # if number > 50, it won't work
+ # (not *so* useful, it should be enough).
+ yield userlib.User(self.site, userN)
+
+ if someone_found:
+ wikipedia.output(u'There is nobody to be welcomed...')
+ else:
+ wikipedia.output(u'\nLoaded all users...')
+
+ def defineSign(self, force = False):
+ if hasattr(self,'_randomSignature') and not force:
+ return self._randomSignature
+
+ signText = u''
+ creg = re.compile(r"^\* ?(.*?)$", re.M)
+ if not globalvar.signFileName:
+ signPage = wikipedia.Page(self.site, wikipedia.translate(self.site, random_sign) )
+ if signPage.exists():
+ wikipedia.output('Loading signature list...')
+ signText = signPage.get()
+ else:
+ wikipedia.output('The Signature list page is not exist, random signature will disable.')
+ globalvar.randomSign = False
+ else:
+ try:
+ f = codecs.open(wikipedia.config.datafilepath(globalvar.signFileName), 'r',
+ encoding=config.console_encoding)
+ except LookupError:
+ f = codecs.open(wikipedia.config.datafilepath(globalvar.signFileName), 'r',
+ encoding='utf-8')
+ except IOError:
+ wikipedia.output(u'Error! - No fileName!')
+ raise FilenameNotSet("No signature filename specified.")
+
+ signText = f.read()
+ f.close()
+ self._randomSignature = creg.findall(signText)
+ return self._randomSignature
+
+ def run(self):
+ while True:
+ welcomed_count = 0
+ #usoj = [x for x in ]
+ #userlib.batchDumpInfo(usoj)
+ for users in self.parseNewUserLog():
+ if users.isBlocked():
+ showStatus(3)
+ wikipedia.output(u'%s has been blocked!' % users.name() )
+ continue
+ if self.badNameFilter(users.name()):
+ self.reportBadAccount(users.name())
+ continue
+ if users.editCount() >= globalvar.attachEditCount:
+ showStatus(2)
+ wikipedia.output(u'%s has enough edits to be welcomed.' % users.name() )
+ ustp = users.getUserTalkPage()
+ if ustp.exists():
+ showStatus(3)
+ wikipedia.output(u'%s has been already welcomed.' % users.name())
+ continue
else:
- final_rep = rep_text
- break
- # Checking in the whitelist...
- for xy in whitelist:
- if xy.lower() in lower_uname:
- # Deleting the white word found and check
- # the word that remains for badwords inside.
- lower_uname = lower_uname.replace(xy, '')
- for word in elenco:
- baduser = word.lower() in lower_uname
- break
- # He has a badusername, trying to report him...
- if baduser:
- if ask:
- answer = wikipedia.inputChoice(u'%s may have an unwanted username, do you want to report this user?'
- % usertalkpage.titleWithoutNamespace(), ['Yes', 'No', 'All'], ['y', 'N', 'a'], 'N')
- if answer.lower() in ['yes', 'y']:
- if not usertalkpage.exists():
- # Check if the user has been already blocked (second check).
- if blocked(username):
- wikipedia.output(u'%s has been blocked! Skipping him...' % usertalkpage.titleWithoutNamespace())
- report(wsite, rep_page, username, com, final_rep)
- continue
+ welcome_text = wikipedia.translate(self.site, netext)
+ if globalvar.randomSign:
+ welcome_text = welcome_text % random.choice(self.defineSign())
+ welcome_text += timeselected
else:
- wikipedia.output(u'The discussion page of the bad-user already exists...')
- continue
- if answer.lower() in ['no', 'n', 'nope']:
- baduser = False
+ welcome_text = welcome_text % globalvar.defaultSign
+ if self.site.lang in final_new_text_additions:
+ welcome_text += wikipedia.translate(self.site, final_new_text_additions)
+ welcome_comment = wikipedia.translate(self.site, summary)
+ try:
+ #append welcomed, welcome_count++
+ ustp.put(welcome_text, welcome_comment)
+ welcomed_count += 1
+ self._totallyCount += 1
+ self.welcomed_users.append(users)
+ except wikipedia.EditConflit:
+ showStatus(4)
+ wikipedia.output(u'An edit conflict has occured, skipping this user.')
+
+ if globalvar.makeWelcomeLog and wikipedia.translate(self.site, logbook):
+ showStatus(5)
+ if welcomed_count == 1:
+ wikipedia.output(u'One user has been welcomed.')
+ elif welcomed_count == 0:
+ wikipedia.output(u'No users have been welcomed.')
+ else:
+ wikipedia.output(u'%s users have been welcomed.' % welcomed_count )
+ if welcomed_count >= globalvar.dumpToLog:
+ if self.makelogpage(self.welcomed_users):
+ self.welcomed_users = list()
+ else:
+ continue
+ # If we haven't to report, do nothing.
else:
- wikipedia.output(u'%s is possibly an unwanted username. He will be reported.' % usertalkpage.titleWithoutNamespace())
- if not usertalkpage.exists():
- report(wsite, rep_page, username, com, final_rep)
- continue
+ showStatus(1)
+ if users.editCount() == 0:
+ wikipedia.output(u'%s has no contributions.' % users.name() )
else:
- wikipedia.output(u'The discussion page of the bad-user already exists...')
- continue
- # He has a good username, welcome!
- else:
- if not usertalkpage.exists():
- # Tring to put the welcome...
- try:
- # make non-minor edit to trigger new talk page message.
- usertalkpage.put(welcom, summ, minorEdit = False)
- welcomed_users.append(found_result)
- if random == True:
- number_user += 1
- yield number_user
- except wikipedia.EditConflict:
- wikipedia.output(u'An edit conflict has occured, skipping this user.')
- continue
- else:
- wikipedia.output(u'%s has been already welcomed when i was loading all the users... skipping' % usertalkpage.titleWithoutNamespace())
+ wikipedia.output(u'%s has only %d contributions.' % (users.name(), users.editCount() ) )
+ # That user mustn't be welcomed.
continue
- # That's the log
- if log_variable and logg:
- if len(welcomed_users) == 1:
- wikipedia.output(u'One user has been welcomed.')
- elif len(welcomed_users) == 0:
- wikipedia.output(u'No users have been welcomed.')
+ if globalvar.makeWelcomeLog and wikipedia.translate(self.site, logbook) and welcomed_count > 0:
+ showStatus()
+ if welcomed_count == 1:
+ wikipedia.output(u'Putting the log of the latest user...')
else:
- wikipedia.output(u'%s users have been welcomed.' % str(len(welcomed_users)) )
- if len(welcomed_users) < numberlog:
+ wikipedia.output(u'Putting the log of the latest %d users...' % welcomed_count)
+ if self.makelogpage(self.welcomed_users):
+ self.welcomed_users = list()
+ else:
continue
- # Update the welcome log each fifth welcome message.
- elif len(welcomed_users) >= numberlog:
- logresult = logmaker(wsite, welcomed_users, logg, summ2, usernam, contrib)
- welcomed_users = list()
- if logresult == False:
- continue
- # If we haven't to report, do nothing.
- elif log_variable == False:
- pass
- if log_variable and logg and len(welcomed_users) != 0:
- if len(welcomed_users) == 1:
- wikipedia.output(u'Putting the log of the latest user...')
- else:
- wikipedia.output(u'Putting the log of the latest %d users...' % len(welcomed_users))
- logresult2 = logmaker(wsite, welcomed_users, logg, summ2, usernam, contrib)
- welcomed_users = list()
- if logresult2 == False:
- continue
- # If recursive, don't exit, repeat after one hour.
- if recursive :
- waitstr = unicode(time_variable)
- if locale.getlocale()[1]:
- strfstr = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()), locale.getlocale()[1])
- else:
- strfstr = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()))
- wikipedia.output(u'Sleeping %s seconds before rerun. %s' % (waitstr, strfstr))
+ self.welcomed_users = list()
+ if hasattr(self, '_BAQueue'):
+ showStatus()
+ wikipedia.output("Putting bad name to report page....")
+ self.reportBadAccount(None, final = True)
try:
- time.sleep(time_variable)
+ if globalvar.recursive:
+ showStatus()
+ if locale.getlocale()[1]:
+ strfstr = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()), locale.getlocale()[1])
+ else:
+ strfstr = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", time.gmtime()))
+ wikipedia.output(u'Sleeping %d seconds before rerun. %s' % (globalvar.timeRecur, strfstr))
+ time.sleep(globalvar.timeRecur)
+ else:
+ raise KeyboardInterrupt
except KeyboardInterrupt:
- recursive = False
+ #if globalvar.makeWelcomeLog and len(self.welcomed_users) > 0:
+ # wikipedia.output("Update log before qutting script.")
+ # self.makelogpage(self.welcomed_users)
+ #if hasattr(self, '_BAQueue') and len(self._BAQueue) > 0 and globalvar.filtBadName:
+ # self.reportBadAccount(None, final = True)
break
- # If not recursive, break.
- elif recursive == False:
- yield number_user
- break
-if __name__ == "__main__":
- # Use try and finally, to put the wikipedia.stopme() always at the end of the code.
+def showStatus(n = 0):
+ staColor = {
+ 0:'white',
+ 1:'lightaqua',
+ 2:'lightgreen',
+ 3:'lightyellow',
+ 4:'lightred',
+ 5:'lightblue'
+ }
+ staMsg = {
+ 0:'MSG',
+ 1:'NoAct',
+ 2:'Match',
+ 3:'Skip',
+ 4:'Warning',
+ 5:'Done',
+ }
+ wikipedia.output("\03{%s}[%s]\03{default} " % (staColor[n], staMsg[n]) , newline = False)
+
+def load_word_function(raw):
+ """ This is a function used to load the badword and the whitelist."""
+ page = re.compile(r"(?:\"|\')(.*?)(?:\"|\')(?:, |\))", re.UNICODE)
+ list_loaded = page.findall(raw)
+ if len(list_loaded) == 0:
+ wikipedia.output(u'There was no input on the real-time page.')
+ return list_loaded
+
+globalvar = Global()
+
+if __name__ == "__main__":
try:
- #try:
number_user = 0
- settingsBot = mainSettings()
- # Take two settings for the "finally" block.
- filename = settingsBot[2]
- random = settingsBot[11]
- savedata = settingsBot[13]
- # I need to know what is the number_user, in this way I get it.
- # If recursive, just wait some error or something to get the number
- # and save it, otherwise just save the first one.
- for number_user in main(settingsBot):
- #print number_user
- pass # number_user get with the for cicle the value
- #except wikipedia.BadTitle:
- # If the server is down, pywikipediabot raise that error.
- # Better to catch it in order not to get useless errors
- # (in particular if you're running that script on toolserver).
- # FixME: put this except in a better "place" (next to the part
- # that raises the error)
- # wikipedia.output(u"Wikidown or server's problem. Quit.")
- # wikipedia.stopme()
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-edit'):
+ if len(arg) == 5:
+ globalvar.attachEditCount = int(wikipedia.input(u'After how many edits would you like to welcome new users? (0 is allowed)'))
+ else:
+ globalvar.attachEditCount = int(arg[6:])
+ elif arg.startswith('-timeoffset'):
+ if len(arg) == 11:
+ globalvar.timeoffset = int(wikipedia.input(u'Which time offset (in minutest) for new users would you like to use?'))
+ else:
+ globalvar.timeoffset = int(arg[12:])
+ elif arg.startswith('-time'):
+ if len(arg) == 5:
+ globalvar.timeRecur = int(wikipedia.input(u'For how many seconds would you like to bot to sleep before checking again?'))
+ else:
+ globalvar.timeRecur = int(arg[6:])
+ elif arg.startswith('-offset'):
+ if len(arg) == 7:
+ globalvar.offset = int(wikipedia.input(u'Which time offset for new users would you like to use? (yyyymmddhhmmss)'))
+ else:
+ globalvar.offset = int(arg[8:])
+ if len(str(globalvar.offset)) != 14:
+ # upon request, we might want to check for software version here
+ raise ValueError("Mediawiki has changed, -offset:# is not supported anymore, but -offset:TIMESTAMP is, assuming TIMESTAMP is yyyymmddhhmmss. -timeoffset is now also supported. Please read this script source header for documentation.")
+ elif arg.startswith('-file:'):
+ globalvar.randomSign = True
+ if len(arg) == 6:
+ globalvar.signFileName = wikipedia.input(u'Where have you saved your signatures?')
+ else:
+ globalvar.signFileName = arg[6:]
+ elif arg == '-break':
+ globalvar.recursive = False
+ elif arg == '-nlog':
+ globalvar.makeWelcomLog = False
+ elif arg == '-ask':
+ globalvar.confirm = True
+ elif arg == '-filter':
+ globalvar.filtBadName = True
+ #elif arg == '-savedata':
+ # globalvar.saveSignIndex = True
+ elif arg == '-random':
+ globalvar.randomSign = True
+ elif arg == '-sul':
+ globalvar.welcomeAuto = True
+ elif arg.startswith('-limit'):
+ if len(arg) == 6:
+ globalvar.queryLimit = int(wikipedia.input(u'How many of the latest new users would you like to load?'))
+ else:
+ globalvar.queryLimit = int(arg[7:])
+ elif arg.startswith('-numberlog'):
+ if len(arg) == 10:
+ globalvar.dumpToLog = int(wikipedia.input(u'After how many welcomed users would you like to update the welcome log?'))
+ else:
+ globalvar.dumpToLog = int(arg[11:])
+ # Filename and pywikipedia path
+ # file where is stored the random signature index
+ filename = wikipedia.config.datafilepath('welcome-%s-%s.data' % (wikipedia.default_family, wikipedia.default_code))
+ if globalvar.offset and globalvar.timeoffset:
+ wikipedia.output('WARING: both -offset and -timeoffset were provided, ignoring -offset')
+ globalvar.offset = 0
+ bot = WelcomeBot()
+ bot.run()
finally:
# If there is the savedata, the script must save the number_user.
- if random and savedata and number_user != None:
+ if globalvar.randomSign and globalvar.saveSignIndex and bot.welcomed_users:
f = file(filename, 'w')
- cPickle.dump(number_user, f)
+ cPickle.dump(bot.welcomed_users, f)
f.close()
- wikipedia.stopme()
+ wikipedia.stopme()
\ No newline at end of file
More information about the Pywikipedia-svn
mailing list