http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10781
Revision: 10781
Author: xqt
Date: 2012-12-11 10:53:08 +0000 (Tue, 11 Dec 2012)
Log Message:
-----------
corresponding image_repository and data_repository of a Page
Modified Paths:
--------------
branches/rewrite/pywikibot/page.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2012-12-11 10:45:44 UTC (rev 10780)
+++ branches/rewrite/pywikibot/page.py 2012-12-11 10:53:08 UTC (rev 10781)
@@ -94,6 +94,16 @@
"""Return the Site object for the wiki on which this Page resides."""
return self._link.site
+ @property
+ def image_repository(self):
+ """Return the Site object for the image repository."""
+ return self.site.image_repository()
+
+ @property
+ def data_repository(self):
+ """Return the Site object for the data repository."""
+ return self.site.data_repository()
+
def namespace(self):
"""Return the number of the namespace of the page.
@@ -460,8 +470,8 @@
"""
txt = self.get()
- txt = pywikibot.removeLanguageLinks(txt, site = self.site)
- txt = pywikibot.removeCategoryLinks(txt, site = self.site)
+ txt = pywikibot.removeLanguageLinks(txt, site=self.site)
+ txt = pywikibot.removeCategoryLinks(txt, site=self.site)
if len(txt) < 4:
return True
else:
@@ -837,8 +847,8 @@
"""
self.text = newtext
return self.save(comment=comment, watch=watchArticle,
- minor=minorEdit, botflag=botflag, force=force,
- async=async, callback=callback)
+ minor=minorEdit, botflag=botflag, force=force,
+ async=async, callback=callback)
def put_async(self, newtext, comment=u'', watchArticle=None,
minorEdit=True, botflag=None, force=False, callback=None):
@@ -2560,7 +2570,6 @@
Assumes that the lang & title come clean, no checks are made.
"""
link = Link.__new__(Link)
-
link._site = pywikibot.Site(lang, source.family.name)
link._section = None
link._source = source
@@ -2584,7 +2593,7 @@
# This regular expression will match any decimal and hexadecimal entity and
# also entities that might be named entities.
entityR = re.compile(
- r'&(#(?P<decimal>\d+)|#x(?P<hex>[0-9a-fA-F]+)|(?P<name>[A-Za-z]+));')
+ r'&(?:amp;)?(#(?P<decimal>\d+)|#x(?P<hex>[0-9a-fA-F]+)|(?P<name>[A-Za-z]+));')
# These characters are Html-illegal, but sadly you *can* find some of
# these and converting them to unichr(decimal) is unsuitable
convertIllegalHtmlEntities = {
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10779
Revision: 10779
Author: amir
Date: 2012-12-10 21:34:01 +0000 (Mon, 10 Dec 2012)
Log Message:
-----------
Finally problem of wikidata is solved and fixing of my previous commits
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2012-12-10 14:37:29 UTC (rev 10778)
+++ trunk/pywikipedia/wikipedia.py 2012-12-10 21:34:01 UTC (rev 10779)
@@ -757,14 +757,6 @@
'inprop': ['protection', 'subjectid'],
#'intoken': 'edit',
}
- params1=params.copy()
- if self.site().lang==u"wikidata" and self.namespace()==0:
- params['action']='wbgetentities'
- params['sites']='enwiki'
- del params['prop']
- del params['rvprop']
- del params['rvlimit']
- del params['inprop']
if oldid:
params['rvstartid'] = oldid
if expandtemplates:
@@ -775,14 +767,6 @@
textareaFound = False
# retrying loop is done by query.GetData
data = query.GetData(params, self.site(), sysop=sysop)
- if self.site().lang==u"wikidata" and self.namespace()==0:
- data['query']={'pages':data['entities']}
- for pageid in data['entities'].keys():
- if pageid=="-1":
- continue #Means the page does not exist
- params1['titles']="Q"+pageid
- ndata=query.GetData(params1, self.site(), sysop=sysop)['query']['pages']
- data['query']['pages'].update(ndata)
if 'error' in data:
raise RuntimeError("API query error: %s" % data)
if not 'pages' in data['query']:
@@ -4050,6 +4034,8 @@
setitem : Setting item(s) on a page
+ getentity : Getting item(s) of a page
+
"""
def __init__(self, site, title, insite=False):
Page.__init__(self, getSite('wikidata', fam='wikidata'), title, insite,
@@ -4160,8 +4146,113 @@
if data['success'] == u"1":
return 302, response.msg, data['success']
return response.code, response.msg, data
+ def getentity(self,force=False, get_redirect=False, throttle=True,
+ sysop=False, change_edit_time=True):
+ """Returns items of a entity in a dictionary
+ """
+ params = {
+ 'action': 'query',
+ 'titles': self.title(),
+ 'prop': ['revisions', 'info'],
+ 'rvprop': ['content', 'ids', 'flags', 'timestamp', 'user', 'comment', 'size'],
+ 'rvlimit': 1,
+ 'inprop': ['protection', 'subjectid'],
+ }
+ params1=params.copy()
+ params['action']='wbgetentities'
+ params['sites']='enwiki'
+ del params['prop']
+ del params['rvprop']
+ del params['rvlimit']
+ del params['inprop']
+ textareaFound = False
+ # retrying loop is done by query.GetData
+ data = query.GetData(params, self.site(), sysop=sysop)
+ data['query']={'pages':data['entities']}
+ for pageid in data['entities'].keys():
+ if pageid=="-1":
+ continue #Means the page does not exist
+ params1['titles']=pageid
+ ndata=query.GetData(params1, self.site(), sysop=sysop)
+ data['entities'].update(ndata['query']['pages'])
+ data['query']['pages'].update(data['entities'])
+ if 'error' in data:
+ raise RuntimeError("API query error: %s" % data)
+ if not 'pages' in data['query']:
+ raise RuntimeError("API query error, no pages found: %s" % data)
+ pageInfo = ndata['query']['pages'].values()[0]
+ if data['query']['pages'].keys()[0] == "-1":
+ if 'missing' in pageInfo:
+ raise NoPage(self.site(), unicode(self),
+"Page does not exist. In rare cases, if you are certain the page does exist, look into overriding family.RversionTab")
+ elif 'invalid' in pageInfo:
+ raise BadTitle('BadTitle: %s' % self)
+ elif 'revisions' in pageInfo: #valid Title
+ lastRev = pageInfo['revisions'][0]
+ if isinstance(lastRev['*'], basestring):
+ textareaFound = True
+ # I got page date with 'revisions' in pageInfo but
+ # lastRev['*'] = False instead of the content. The Page itself was
+ # deleted but there was not 'missing' in pageInfo as expected
+ # I raise a ServerError() yet, but maybe it should be NoPage().
+ if not textareaFound:
+ if verbose:
+ print pageInfo
+ raise ServerError('ServerError: No textarea found in %s' % self)
+ self.editRestriction = ''
+ self.moveRestriction = ''
+ # Note: user may be hidden and mw returns 'userhidden' flag
+ if 'userhidden' in lastRev:
+ self._userName = None
+ else:
+ self._userName = lastRev['user']
+ self._ipedit = 'anon' in lastRev
+ try:
+ self._comment = lastRev['comment']
+ except KeyError:
+ self._comment = None
+ for restr in pageInfo['protection']:
+ if restr['type'] == 'edit':
+ self.editRestriction = restr['level']
+ elif restr['type'] == 'move':
+ self.moveRestriction = restr['level']
+
+ self._revisionId = lastRev['revid']
+
+ if change_edit_time:
+ self._editTime = parsetime2stamp(lastRev['timestamp'])
+ if "starttimestamp" in pageInfo:
+ self._startTime = parsetime2stamp(pageInfo["starttimestamp"])
+
+ self._isWatched = False #cannot handle in API in my research for now.
+
+ pagetext = lastRev['*']
+ pagetext = pagetext.rstrip()
+ # pagetext must not decodeEsperantoX() if loaded via API
+ m = self.site().redirectRegex().match(pagetext)
+ if m:
+ # page text matches the redirect pattern
+ if self.section() and not "#" in m.group(1):
+ redirtarget = "%s#%s" % (m.group(1), self.section())
+ else:
+ redirtarget = m.group(1)
+ if get_redirect:
+ self._redirarg = redirtarget
+ else:
+ raise IsRedirectPage(redirtarget)
+
+ if self.section() and \
+ not does_text_contain_section(pagetext, self.section()):
+ try:
+ self._getexception
+ except AttributeError:
+ raise SectionError # Page has no section by this name
+ return pagetext
+
+
+
class ImagePage(Page):
"""A subclass of Page representing an image descriptor wiki page.
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10778
Revision: 10778
Author: xqt
Date: 2012-12-10 14:37:29 +0000 (Mon, 10 Dec 2012)
Log Message:
-----------
New doRollower() method for RotatingFileHandler which keeps the file extension of the log files and inserts the counter number before the extension instead of behind it.
New config variables are introduced to control the FileHandler:
logfilesize allows to specify the size of a single logfiles given in kilobytes
logfilecount specifies the number of rotation logfiles. If 0 is given no logfile will be archived but the current logfile will reused and the previous data are lost. (New!) If logfilecount is -1, there are no rotating logfiles but the files where renamed if the logfile is full. The newest file gets the highest number until some logfiles where deleted.
Modified Paths:
--------------
branches/rewrite/pywikibot/bot.py
branches/rewrite/pywikibot/config2.py
Modified: branches/rewrite/pywikibot/bot.py
===================================================================
--- branches/rewrite/pywikibot/bot.py 2012-12-10 14:06:38 UTC (rev 10777)
+++ branches/rewrite/pywikibot/bot.py 2012-12-10 14:37:29 UTC (rev 10778)
@@ -3,7 +3,7 @@
User-interface related functions for building bots
"""
#
-# (C) Pywikipedia bot team, 2008-2011
+# (C) Pywikipedia bot team, 2008-2012
#
# Distributed under the terms of the MIT license.
#
@@ -35,16 +35,65 @@
# User interface initialization
# search for user interface module in the 'userinterfaces' subdirectory
uiModule = __import__("pywikibot.userinterfaces.%s_interface"
- % config.userinterface,
- fromlist=['UI'] )
+ % config.userinterface,
+ fromlist=['UI'] )
ui = uiModule.UI()
# Logging module configuration
class RotatingFileHandler(logging.handlers.RotatingFileHandler):
- """Strip trailing newlines before outputting text to file"""
+
+ def doRollover(self):
+ """
+ Overwrites the default Rollover renaming by inserting the count number
+ between file name root and extension. If backupCount is >= 1, the system
+ will successively create new files with the same pathname as the base
+ file, but with inserting ".1", ".2" etc. in front of the filename
+ suffix. For example, with a backupCount of 5 and a base file name of
+ "app.log", you would get "app.log", "app.1.log", "app.2.log", ...
+ through to "app.5.log". The file being written to is always "app.log" -
+ when it gets filled up, it is closed and renamed to "app.1.log", and if
+ files "app.2.log", "app.2.log" etc. exist, then they are renamed to
+ "app.2.log", "app.3.log" etc. respectively.
+ If backupCount is >= 1 do not rotate but create new numbered filenames.
+ The newest file has the highest number except some older numbered files
+ where deleted and the bot was restarted. In this case the ordering
+ starts from the lowest availlable (unused) number.
+
+ """
+ if self.stream:
+ self.stream.close()
+ self.stream = None
+ root, ext = os.path.splitext(self.baseFilename)
+ if self.backupCount > 0:
+ for i in range(self.backupCount - 1, 0, -1):
+ sfn = "%s.%d%s" % (root, i, ext)
+ dfn = "%s.%d%s" % (root, i + 1, ext)
+ if os.path.exists(sfn):
+ #print "%s -> %s" % (sfn, dfn)
+ if os.path.exists(dfn):
+ os.remove(dfn)
+ os.rename(sfn, dfn)
+ dfn = "%s.1%s" % (root, ext)
+ if os.path.exists(dfn):
+ os.remove(dfn)
+ os.rename(self.baseFilename, dfn)
+ #print "%s -> %s" % (self.baseFilename, dfn)
+ elif self.backupCount == -1:
+ if not hasattr(self, lastNo):
+ self._lastNo = 1
+ while True:
+ fn = "%s.%d%s" % (root, self._lastNo, ext)
+ self._lastNo += 1
+ if not os.path.exists(fn):
+ break
+ os.rename(self.baseFilename, fn)
+ self.mode = 'w'
+ self.stream = self._open()
+
def format(self, record):
+ """Strip trailing newlines before outputting text to file"""
text = logging.handlers.RotatingFileHandler.format(self, record)
return text.rstrip("\r\n")
@@ -152,8 +201,9 @@
logfile = config.datafilepath("logs", config.logfilename)
else:
logfile = config.datafilepath("logs", "%s-bot.log" % moduleName)
- file_handler = RotatingFileHandler(
- filename=logfile, maxBytes=2 << 20, backupCount=5)
+ file_handler = RotatingFileHandler(filename=logfile,
+ maxBytes=1024 * config.logfilesize,
+ backupCount=config.logfilecount)
file_handler.setLevel(DEBUG)
form = LoggingFormatter(
@@ -167,7 +217,7 @@
# or for all components if nothing was specified
for component in config.debug_log:
if component:
- debuglogger = logging.getLogger("pywiki."+component)
+ debuglogger = logging.getLogger("pywiki." + component)
else:
debuglogger = logging.getLogger("pywiki")
debuglogger.setLevel(DEBUG)
@@ -215,7 +265,7 @@
# done filching
def logoutput(text, decoder=None, newline=True, _level=INFO, _logger="",
- **kwargs):
+ **kwargs):
"""Format output and send to the logging module.
Backend function used by all the user-output convenience functions.
Modified: branches/rewrite/pywikibot/config2.py
===================================================================
--- branches/rewrite/pywikibot/config2.py 2012-12-10 14:06:38 UTC (rev 10777)
+++ branches/rewrite/pywikibot/config2.py 2012-12-10 14:37:29 UTC (rev 10778)
@@ -256,6 +256,17 @@
log = ['interwiki']
# filename defaults to modulename-bot.log
logfilename = None
+# maximal size of a logfile in kilobytes. If the size reached that limit the
+# logfile will be renamed (if logfilecount is not 0) and the old file is filled
+# again. logfilesize must be an integer value
+logfilesize = 1024
+# Number of rotating logfiles are created. The older files get the higher
+# number. If logfilecount is 0, no logfile will be archived but the current
+# logfile will be overwritten if the file size reached the logfilesize above.
+# If logfilecount is -1 there are no rotating logfiles but the files where
+# renamed if the logfile is full. The newest file gets the highest number until
+# some logfiles where deleted.
+logfilecount = 5
# set to 1 (or higher) to generate "informative" messages to terminal
verbose_output = 0
# if True, include a lot of debugging info in logfile
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10775
Revision: 10775
Author: xqt
Date: 2012-12-10 13:56:56 +0000 (Mon, 10 Dec 2012)
Log Message:
-----------
New functionality for bot restriction templates "bots" and "nobots".
update from trunk r10774
Modified Paths:
--------------
branches/rewrite/pywikibot/page.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2012-12-10 13:05:50 UTC (rev 10774)
+++ branches/rewrite/pywikibot/page.py 2012-12-10 13:56:56 UTC (rev 10775)
@@ -671,22 +671,32 @@
The framework enforces this restriction by default. It is possible
to override this by setting ignore_bot_templates=True in
- user_config.py, or using page.put(force=True).
+ user-config.py, or using page.put(force=True).
- """ # TODO: move this to Site object?
+ """
+ # TODO: move this to Site object?
if config.ignore_bot_templates: #Check the "master ignore switch"
return True
username = self.site.user()
try:
- templates = self.templatesWithParams();
+ templates = self.templatesWithParams()
except (pywikibot.NoPage,
pywikibot.IsRedirectPage,
pywikibot.SectionError):
return True
+
+ # go through all templates and look for any restriction
+ # multiple bots/nobots templates are allowed
for template in templates:
title = template[0].title(withNamespace=False)
if title == 'Nobots':
- return False
+ if len(template[1]) == 0:
+ return False
+ else:
+ bots = template[1][0].split(',')
+ if 'all' in bots or calledModuleName() in bots \
+ or username in bots:
+ return False
elif title == 'Bots':
if len(template[1]) == 0:
return True
@@ -694,15 +704,13 @@
(ttype, bots) = template[1][0].split('=', 1)
bots = bots.split(',')
if ttype == 'allow':
- if 'all' in bots or username in bots:
- return True
- else:
- return False
+ return 'all' in bots or username in bots
if ttype == 'deny':
- if 'all' in bots or username in bots:
- return False
- else:
- return True
+ return not ('all' in bots or username in bots)
+ if ttype == 'allowscript':
+ return 'all' in bots or calledModuleName() in bots
+ if ttype == 'denyscript':
+ return not ('all' in bots or calledModuleName() in bots)
# no restricting template found
return True
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10774
Revision: 10774
Author: xqt
Date: 2012-12-10 13:05:50 +0000 (Mon, 10 Dec 2012)
Log Message:
-----------
New functionality for bot restriction templates "bots" and "nobots".
"nobots" may have a parameter either a script name or a bots username.
{{nobots|interwiki}} denies all bots using interwiki.py
{{nobots|botname}} is equal to {{bots|deny=botname}}
{{nobots|all}} is equal to {{nobots}} and prohibits all bot edits.
"bots" has two new parameter "allowscript" and "denyscript". Guess what it does.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2012-12-09 14:28:00 UTC (rev 10773)
+++ trunk/pywikipedia/wikipedia.py 2012-12-10 13:05:50 UTC (rev 10774)
@@ -1599,9 +1599,17 @@
except (NoPage, IsRedirectPage, SectionError):
return True
+ # go through all templates and look for any restriction
+ # multiple bots/nobots templates are allowed
for template in templates:
if template[0].lower() == 'nobots':
- return False
+ if len(template[1]) == 0:
+ return False
+ else:
+ bots = template[1][0].split(',')
+ if 'all' in bots or calledModuleName() in bots \
+ or username in bots:
+ return False
elif template[0].lower() == 'bots':
if len(template[1]) == 0:
return True
@@ -1609,15 +1617,13 @@
(ttype, bots) = template[1][0].split('=', 1)
bots = bots.split(',')
if ttype == 'allow':
- if 'all' in bots or username in bots:
- return True
- else:
- return False
+ return 'all' in bots or username in bots
if ttype == 'deny':
- if 'all' in bots or username in bots:
- return False
- else:
- return True
+ return not ('all' in bots or username in bots)
+ if ttype == 'allowscript':
+ return 'all' in bots or calledModuleName() in bots
+ if ttype == 'denyscript':
+ return not ('all' in bots or calledModuleName() in bots)
# no restricting template found
return True