Revision: 5336
Author: filnik
Date: 2008-05-08 19:03:30 +0000 (Thu, 08 May 2008)
Log Message:
-----------
Dupe function even better. Now I have to handle more than two dupes in order to catch all the possibilities and then the work is done :-) Let's commit this btw, untill the better version is done
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-05-08 16:42:39 UTC (rev 5335)
+++ trunk/pywikipedia/checkimages.py 2008-05-08 19:03:30 UTC (rev 5336)
@@ -408,6 +408,17 @@
""" Function that let you send email trough the Wikipedia system """
pass # Empty, need work
+def returnOlderTime(listGiven, timeListGiven):
+ for element in listGiven:
+ time = element[0]
+ imageName = element[1]
+ not_the_oldest = False
+ for time_selected in timeListGiven:
+ if time > time_selected:
+ not_the_oldest = True
+ break
+ if not_the_oldest == False:
+ return imageName
# Here there is the main class.
class main:
@@ -431,6 +442,8 @@
self.botolist = botolist
self.sendemailActive = sendemailActive
self.duplicatesReport = duplicatesReport
+ image_n = self.site.image_namespace()
+ self.image_namespace = "%s:" % image_n # Example: "User_talk:"
def report(self, newtext, image, notification = None, head = None, notification2 = None, unver = True, commx = None):
""" Function to make the reports easier (or I hope so). """
# Defining some useful variable for next...
@@ -501,10 +514,8 @@
# has upload the image (FixME: Rewrite a bit this part)
if put:
p.put(testoa + self.newtext, comment = self.commento, minorEdit = True)
- image_n = self.site.image_namespace()
- image_namespace = "%s:" % image_n # Example: "User_talk:"
# paginetta it's the image page object.
- paginetta = wikipedia.ImagePage(self.site, image_namespace + self.image)
+ paginetta = wikipedia.ImagePage(self.site, self.image_namespace + self.image)
# I take the data of the latest uploader and I take only the name
imagedata = paginetta.getFileVersionHistory()
#print imagedata # Let it so for de-buggin porpuse (wikipedia.output gives error)
@@ -517,7 +528,7 @@
# We have a problem! Report and exit!
return False
try:
- nick = paginetta.getFileVersionHistory()[-1][1]
+ nick = paginetta.getFileVersionHistory()[0][1] # Get the latest uploader
except IndexError:
wikipedia.output(u"Seems that %s hasn't the image at all, but there is something in the description..." % self.image)
repme = "\n*[[:Image:%s]] seems to have problems ('''no data found in the image''')"
@@ -705,19 +716,33 @@
if not result:
return True # If Errors, exit (but continue the check)
if not dupText == None and not dupRegex == None:
+ time_image_list = list()
+ time_list = list()
for duplicate in duplicates:
- if wikipedia.Page(self.site, u'Image:%s' % duplicate) == wikipedia.Page(self.site, u'Image:%s' % self.image):
- continue # the image itself, not report also this as duplicate
- DupePage = wikipedia.Page(self.site, u'Image:%s' % duplicate)
+ DupePage = wikipedia.ImagePage(self.site, u'Image:%s' % duplicate)
+ imagedata = DupePage.getFileVersionHistory()[-1][0]
+ # Example: 21:15, 5 ott 2005
+ data = time.strptime(imagedata, "%H:%M, %d %b %Y")
+ data_seconds = time.mktime(data)
+ time_image_list.append([data_seconds, self.image])
+ time_list.append(data_seconds)
+ older_image = returnOlderTime(time_image_list, time_list)
+ # And if the images are more than two?
+ for duplicate in duplicates:
+ if wikipedia.ImagePage(self.site, u'%s:%s' % (self.image_namespace, duplicate)) == \
+ wikipedia.ImagePage(self.site, u'%s:%s' % (self.image_namespace, older_image)):
+ continue # the older image, not report also this as duplicate
try:
DupPageText = DupePage.get()
except wikipedia.NoPage:
continue # The page doesn't exists
if re.findall(dupRegex, DupPageText) == []:
wikipedia.output(u'Adding the duplicate template in the image...')
- dupTalkText = dupTalkText % (duplicate, self.image)
- self.report(re.sub(r'__image__', r'%s' % self.image, dupText),
- duplicate, dupTalkText, dupTalkHead, commx = dupComment, unver = False)
+ self.report(re.sub(r'__image__', r'%s' % older_image, dupText), duplicate,
+ dupTalkText % (duplicate, older_image), dupTalkHead, commx = dupComment, unver = True)
+ else:
+ wikipedia.output(u"Already put the dupe-template in the image's page or in the dupe's page. Skip.")
+ break
return True # Ok - No problem. Let's continue the checking phase
def report_image(self, image, rep_page = None, com = None, rep_text = None, addings = True, regex = None):
Revision: 5334
Author: russblau
Date: 2008-05-08 16:06:22 +0000 (Thu, 08 May 2008)
Log Message:
-----------
Site: allimages, allusers, and blocks methods
Modified Paths:
--------------
branches/rewrite/pywikibot/data/api.py
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/data/api.py
===================================================================
--- branches/rewrite/pywikibot/data/api.py 2008-05-08 16:04:53 UTC (rev 5333)
+++ branches/rewrite/pywikibot/data/api.py 2008-05-08 16:06:22 UTC (rev 5334)
@@ -278,6 +278,7 @@
'allpages': 'aplimit',
'alllinks': 'allimit',
'allcategories': 'aclimit',
+ 'allimages': 'ailimit',
'backlinks': 'bllimit',
'categorymembers': 'cmlimit',
'embeddedin': 'eilimit',
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-05-08 16:04:53 UTC (rev 5333)
+++ branches/rewrite/pywikibot/site.py 2008-05-08 16:06:22 UTC (rev 5334)
@@ -508,7 +508,7 @@
def loadpageinfo(self, page):
"""Load page info from api and save in page attributes"""
title = page.title(withSection=False)
- query = api.PropertyGenerator("info",
+ query = api.PropertyGenerator("info", site=self,
titles=title.encode(self.encoding()))
for pageitem in query:
if pageitem['title'] != title:
@@ -603,7 +603,7 @@
if hasattr(p, "_pageid")
and p._pageid > 0]
cache = dict((p.title(withSection=False), p) for p in sublist)
- rvgen = api.PropertyGenerator("revisions|info")
+ rvgen = api.PropertyGenerator("revisions|info", site=self)
if len(pageids) == len(sublist):
# only use pageids if all pages have them
rvgen.request["pageids"] = "|".join(pageids)
@@ -660,7 +660,7 @@
"""
bltitle = page.title(withSection=False).encode(self.encoding())
- blgen = api.PageGenerator("backlinks", gbltitle=bltitle)
+ blgen = api.PageGenerator("backlinks", gbltitle=bltitle, site=self)
if namespaces is not None:
blgen.request["gblnamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
@@ -683,7 +683,7 @@
"""
eititle = page.title(withSection=False).encode(self.encoding())
- eigen = api.PageGenerator("embeddedin", geititle=eititle)
+ eigen = api.PageGenerator("embeddedin", geititle=eititle, site=self)
if namespaces is not None:
eigen.request["geinamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
@@ -707,7 +707,7 @@
def pagelinks(self, page, namespaces=None):
"""Iterate internal wikilinks contained (or transcluded) on page."""
- plgen = api.PageGenerator("links")
+ plgen = api.PageGenerator("links", site=self)
if hasattr(page, "_pageid"):
plgen.request['pageids'] = str(page._pageid)
else:
@@ -721,7 +721,7 @@
def pagecategories(self, page, withSortKey=False):
"""Iterate categories to which page belongs."""
# Sortkey doesn't work with generator; FIXME or deprecate
- clgen = api.CategoryPageGenerator("categories")
+ clgen = api.CategoryPageGenerator("categories", site=self)
if hasattr(page, "_pageid"):
clgen.request['pageids'] = str(page._pageid)
else:
@@ -732,13 +732,13 @@
def pageimages(self, page):
"""Iterate images used (not just linked) on the page."""
imtitle = page.title(withSection=False).encode(self.encoding())
- imgen = api.ImagePageGenerator("images", titles=imtitle)
+ imgen = api.ImagePageGenerator("images", titles=imtitle, site=self)
return imgen
def pagetemplates(self, page, namespaces=None):
"""Iterate templates transcluded (not just linked) on the page."""
tltitle = page.title(withSection=False).encode(self.encoding())
- tlgen = api.PageGenerator("templates", titles=tltitle)
+ tlgen = api.PageGenerator("templates", titles=tltitle, site=self)
if namespaces is not None:
tlgen.request["gtlnamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
@@ -762,7 +762,7 @@
% category.title())
cmtitle = category.title(withSection=False).encode(self.encoding())
cmgen = api.PageGenerator(u"categorymembers", gcmtitle=cmtitle,
- gcmprop="ids|title|sortkey")
+ gcmprop="ids|title|sortkey", site=self)
if namespaces is not None:
cmgen.request[u"gcmnamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
@@ -845,10 +845,12 @@
# assemble API request
if revids is None:
rvtitle = page.title(withSection=False).encode(self.encoding())
- rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle)
+ rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle,
+ site=self)
else:
ids = u"|".join(unicode(r) for r in revids)
- rvgen = api.PropertyGenerator(u"revisions", revids=ids)
+ rvgen = api.PropertyGenerator(u"revisions", revids=ids,
+ site=self)
if getText:
rvgen.request[u"rvprop"] = \
u"ids|flags|timestamp|user|comment|content"
@@ -906,8 +908,8 @@
"""Iterate all interlanguage links on page, yielding Link objects."""
lltitle = page.title(withSection=False)
llquery = api.PropertyGenerator("langlinks",
- titles=lltitle.encode(self.encoding())
- )
+ titles=lltitle.encode(self.encoding()),
+ site=self)
for pageitem in llquery:
if pageitem['title'] != lltitle:
raise Error(
@@ -923,8 +925,8 @@
"""Iterate all external links on page, yielding URL strings."""
eltitle = page.title(withSection=False)
elquery = api.PropertyGenerator("extlinks",
- titles=eltitle.encode(self.encoding())
- )
+ titles=eltitle.encode(self.encoding()),
+ site=self)
for pageitem in elquery:
if pageitem['title'] != eltitle:
raise RuntimeError(
@@ -986,7 +988,7 @@
filterredirs = False
apgen = api.PageGenerator("allpages", gapnamespace=str(namespace),
- gapfrom=start)
+ gapfrom=start, site=self)
if prefix:
apgen.request["gapprefix"] = prefix
if filterredir is not None:
@@ -1037,7 +1039,7 @@
if not isinstance(namespace, int):
raise Error("alllinks: only one namespace permitted.")
algen = api.ListGenerator("alllinks", alnamespace=str(namespace),
- alfrom=start)
+ alfrom=start, site=self)
if prefix:
algen.request["alprefix"] = prefix
if isinstance(limit, int):
@@ -1052,7 +1054,6 @@
p.fromid = link['fromid']
yield p
-
def allcategories(self, start="!", prefix="", limit=None,
reverse=False):
"""Iterate categories used (which need not have a Category page).
@@ -1067,7 +1068,7 @@
order (default: iterate in forward order)
"""
- acgen = api.CategoryGenerator("allcategories", gapfrom=start)
+ acgen = api.CategoryGenerator("allcategories", gapfrom=start, site=self)
if prefix:
acgen.request["gacprefix"] = prefix
if isinstance(limit, int):
@@ -1076,7 +1077,114 @@
acgen.request["gacdir"] = "descending"
return acgen
+ def allusers(self, start="!", prefix="", limit=None, group=None):
+ """Iterate registered users, ordered by username.
+ Iterated values are dicts containing 'name', 'editcount',
+ 'registration', and (sometimes) 'groups' keys. 'groups' will be
+ present only if the user is a member of at least 1 group, and will
+ be a list of unicodes; all the other values are unicodes and should
+ always be present.
+
+ @param start: start at this username (name need not exist)
+ @param prefix: only iterate usernames starting with this substring
+ @param limit: maximum number of users to iterate (default: all)
+ @param group: only iterate users that are members of this group
+ @type group: str
+
+ """
+ augen = api.ListGenerator("allusers", aufrom=start,
+ auprop="editcount|groups|registration",
+ site=self)
+ if prefix:
+ augen.request["auprefix"] = prefix
+ if group:
+ augen.request["augroup"] = group
+ if isinstance(limit, int):
+ augen.request["aulimit"] = str(limit)
+ return augen
+
+ def allimages(self, start="!", prefix="", minsize=None, maxsize=None,
+ limit=None, reverse=False, sha1=None, sha1base36=None):
+ """Iterate all images, ordered by image title.
+
+ Yields ImagePages, but these pages need not exist on the wiki.
+
+ @param start: start at this title (name need not exist)
+ @param prefix: only iterate titles starting with this substring
+ @param limit: maximum number of titles to iterate (default: all)
+ @param minsize: only iterate images of at least this many bytes
+ @param maxsize: only iterate images of no more than this many bytes
+ @param reverse: if True, iterate in reverse lexigraphic order
+ @param sha1: only iterate image (it is theoretically possible there
+ could be more than one) with this sha1 hash
+ @param sha1base36: same as sha1 but in base 36
+
+ """
+ aigen = api.ImagePageGenerator("allimages", gaifrom=start,
+ site=self)
+ if prefix:
+ aigen.request["gaiprefix"] = prefix
+ if isinstance(limit, int):
+ aigen.request["gailimit"] = str(limit)
+ if isinstance(minsize, int):
+ aigen.request["gaiminsize"] = str(minsize)
+ if isinstance(maxsize, int):
+ aigen.request["gaimaxsize"] = str(maxsize)
+ if reverse:
+ aigen.request["gaidir"] = "descending"
+ if sha1:
+ aigen.request["gaisha1"] = sha1
+ if sha1base36:
+ aigen.request["gaisha1base36"] = sha1base36
+ return aigen
+
+ def blocks(self, starttime=None, endtime=None, reverse=False,
+ blockids=None, users=None, limit=None):
+ """Iterate all current blocks, in order of creation.
+
+ Note that logevents only logs user blocks, while this method
+ iterates all blocks including IP ranges. The iterator yields dicts
+ containing keys corresponding to the block properties (see
+ http://www.mediawiki.org/wiki/API:Query_-_Lists for documentation).
+
+ @param starttime: start iterating at this timestamp
+ @param endtime: stop iterating at this timestamp
+ @param reverse: if True, iterate oldest blocks first (default: newest)
+ @param blockids: only iterate blocks with these id numbers
+ @param users: only iterate blocks affecting these usernames or IPs
+ @param limit: maximum number of blocks to iterate (default: all)
+
+ """
+ if starttime and endtime:
+ if reverse:
+ if starttime > endtime:
+ logging.error(
+ "blocks: starttime must be before endtime with reverse=True")
+ return
+ else:
+ if endtime < starttime:
+ logging.error(
+ "blocks: endtime must be before starttime with reverse=False")
+ return
+ bkgen = api.ListGenerator("blocks", site=self)
+ bkgen.request["bkprop"] = \
+ "id|user|by|timestamp|expiry|reason|range|flags"
+ if starttime:
+ bkgen.request["bkstart"] = starttime
+ if endtime:
+ bkgen.request["bkend"] = endtime
+ if reverse:
+ bkgen.request["bkdir"] = newer
+ if blockids:
+ bkgen.request["bkids"] = blockids
+ if users:
+ bkgen.request["bkusers"] = users
+ if isinstance(limit, int):
+ bkgen.request["bklimit"] = str(limit)
+ return bkgen
+
+
#### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####
class NotImplementedYet:
Bugs item #1959384, was opened at 2008-05-07 09:56
Message generated for change (Comment added) made by filnik
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1959384&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Closed
Resolution: None
Priority: 5
Private: No
Submitted By: DarkoNeko (darkoneko)
Assigned to: Nobody/Anonymous (nobody)
Summary: welcome.py seems to ignore offset
Initial Comment:
Version used:
5318 (latest available, since I did an update just before re-testing the bug)
Command used :
/usr/bin/python ~/pywikipedia/welcome.py -nlog -limit:300 -offset:40 -random -time:300
Problem :
the script ignores offset and welcomes users since the first one (verified using http://fr.wikipedia.org/wiki/Special:log/newusers )
I thought the problem was "it ignores the 40 first lines, regardless of what action it is" but it's not the case : the welcomed user (Yusukay) was 6th in user creation list, and 27th in the overall log.
----------------------------------------------------------------------
>Comment By: Filnik (filnik)
Date: 2008-05-08 13:08
Message:
Logged In: YES
user_id=1834469
Originator: NO
Yep, I've found this (strange, to say the truth) problem in another script
but I remembered just now that there's that problem also on welcome.py. By
the way, nicdumz has fixed it perfectly, I wouldn't have done it better :-)
Well done ;-)
Filnik
----------------------------------------------------------------------
Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2008-05-07 14:48
Message:
Logged In: YES
user_id=1963242
Originator: NO
Actually Mediawiki Special:Log has changed lately.
It's not using anymore a number offset, but a timestamp offset instead.
I fixed this on r5320, now you have to use a yyyymmddhhmmss timestamp as
an -offset argument : it will only welcome users OLDER than that time.
(Actually this time is server-dependant, it's GMT time for wikimedia
projects).
After thinking more about this issue, I also added a timeoffset:#
parameter : it will only welcome users older than # minutes.
(These argument names are confusing, offset, timeoffset, and time, but I
could not really find better names. If a developer has better names, just
go ahead, and *please add a deprecated warning for the old names* )
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1959384&group_…
Hallo all,
as you might know, the Flagged Revisions extension has recently been enabled
on the German Wikipedia. A minor change to the PyWikipediaBot was required so
that bots work properly with that extension. I have done this change and
committed it to SVN as revision 5330. [1]
If you run a bot that edits pages on de: (e. g. an interwiki bot), this update
is mandatory. Please update immediately, otherwise your bot might be in
danger of getting blocked because it increases the workload for users who are
reviewing articles.
If you are unable to update via SVN, please stop your bot and wait until
revision 5330 or higher is available on
http://tools.wikimedia.de/~valhallasw/pywiki/ .
Thank you
Daniel
[1] diff:
http://svn.wikimedia.org/viewvc/pywikipedia/trunk/pywikipedia/wikipedia.py?…