pywikibot April 2008

pywikibot@lists.wikimedia.org

34 participants
213 discussions

[Pywikipedia-l] SVN: [5275] trunk/pywikipedia/commonsdelinker/delinker.py
by btongminh＠svn.wikimedia.org 26 Apr '08

26 Apr '08

Revision: 5275 Author: btongminh Date: 2008-04-26 12:05:51 +0000 (Sat, 26 Apr 2008) Log Message: ----------- Add starvation to logger threads. Remove obsolete code. Modified Paths: -------------- trunk/pywikipedia/commonsdelinker/delinker.py Modified: trunk/pywikipedia/commonsdelinker/delinker.py =================================================================== --- trunk/pywikipedia/commonsdelinker/delinker.py 2008-04-25 19:13:20 UTC (rev 5274) +++ trunk/pywikipedia/commonsdelinker/delinker.py 2008-04-26 12:05:51 UTC (rev 5275) @@ -535,6 +535,8 @@ self.pool.jobLock.release() class Logger(threadpool.Thread): + timeout = 360 + def __init__(self, pool, CommonsDelinker): threadpool.Thread.__init__(self, pool) self.CommonsDelinker = CommonsDelinker @@ -607,7 +609,21 @@ traceback.print_exc(file = sys.stderr) self.exit() self.CommonsDelinker.thread_died() + + def starve(self): + self.pool.jobLock.acquire() + try: + if self.pool[id(self)].isSet(): return False + output(u'%s Starving' % self) + self.database.close() + del self.pool[id(self)] + self.pool.threads.remove(self) + return True + finally: + self.pool.jobLock.release() + + class CommonsDelinker(object): def __init__(self): self.config = config.CommonsDelinker @@ -832,33 +848,8 @@ time.sleep(self.config['timeout']) def thread_died(self): - # A thread died, it may be possible that we cannot - # function any more. Currently only for CheckUsages - # and Loggers. Delinkers should not be able to die. - cu = 0 - self.CheckUsages.jobLock.acquire() - for thread in self.CheckUsages.threads: - if thread.isAlive() and not thread.quit: - cu += 1 - self.CheckUsages.jobLock.release() - lg = 0 - self.Loggers.jobLock.acquire() - for thread in self.Loggers.threads: - if thread.isAlive() and not thread.quit: - lg += 1 - unlogged = self.Loggers.jobQueue[:] - self.Loggers.jobLock.release() - - # We can no longer function if we have only one - # CheckUsage or zero Loggers available. - # TODO: config settings? - if cu <= 1: - output(u'ERROR!!! Too few CheckUsages left to function', False) - threadpool.terminate() - if lg <= 0: - output(u'ERROR!!! Too few Loggers left to function', False) - print >>sys.stderr, 'Currently unlogged:', unlogged - threadpool.terminate() + # Obsolete + return @staticmethod def output(*args):

1 0

[Pywikipedia-l] [ pywikipediabot-Bugs-1924322 ] interwiki links on subpages in templates
by SourceForge.net 25 Apr '08

25 Apr '08

Bugs item #1924322, was opened at 2008-03-24 06:19 Message generated for change (Comment added) made by nobody You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1924322&group_… Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: interwiki Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Nobody/Anonymous (nobody) Assigned to: Nobody/Anonymous (nobody) Summary: interwiki links on subpages in templates Initial Comment: In English and some other major wikipedias interwiki links are placed on /doc subpage (or whatever it's called) in templates. Interwiki bot should check if such a page exists and not place interwiki links on main template page but place/update links on that subpage. Otherwise, everytime a bot places interwiki on a template with this structure, the main template page needs to be cleaned and interwiki links moved to a subpage manually ---------------------------------------------------------------------- Comment By: Nobody/Anonymous (nobody) Date: 2008-04-25 14:57 Message: Logged In: NO A few pages in the project namespace also use subpages for headers and interwiki links. Subpages usually have <includeonly> around the iw and category links, interwiki bots should leave that in place. ---------------------------------------------------------------------- Comment By: Nobody/Anonymous (nobody) Date: 2008-04-17 13:13 Message: Logged In: NO Every like it about his problem. If there is a page with subpages in templates. The interwiki links on the subpages needs to places in inside the nowiki section. So if the last word of the page is </nowiki> the interwiki link needs to be placed before the </nowiki> tag. I'm the operator of the CarsracBot with its home on the nl.wikipedia.org And I have seen this behaviour on that articles where en:list_of_asteroides/1101-1200 is a part of. ---------------------------------------------------------------------- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1924322&group_…

1 0

[Pywikipedia-l] SVN: [5274] branches/rewrite/pywikibot
by russblau＠svn.wikimedia.org 25 Apr '08

25 Apr '08

Revision: 5274 Author: russblau Date: 2008-04-25 19:13:20 +0000 (Fri, 25 Apr 2008) Log Message: ----------- more site methods; fix bugs; lazier site login (although I'm still not happy with this; getting langlinks requires connecting to each wiki server) Modified Paths: -------------- branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/exceptions.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/__init__.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -51,11 +51,6 @@ key = '%s:%s:%s' % (fam, code, user) if not _sites.has_key(key): _sites[key] = __Site(code=code, fam=fam, user=user) - _sites[key].getsiteinfo() - try: - _sites[key].login(False) - except NoUsername: - pass return _sites[key] getSite = Site # alias for backwards-compability Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/data/api.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -223,19 +223,7 @@ # double the next wait, but do not exceed 120 seconds self.retry_wait = min(120, self.retry_wait * 2) - def lag_wait(self, lag): - """Wait due to server lag.""" - # unlike regular wait, this shuts down all access to site - self.site.sitelock.acquire() - try: - # wait at least 5 seconds, no more than 120 - wait = max(5, min(120, lag//2)) - logging.warn("Pausing %s seconds due to server lag." % wait) - time.sleep(wait) - finally: - self.site.sitelock.release() - class PageGenerator(object): """Iterator for response to a request of type action=query&generator=foo.""" def __init__(self, generator, **kwargs): @@ -293,7 +281,6 @@ # FIXME: this won't handle generators with <redirlinks> subelements # correctly yet while True: - self.site.throttle() self.data = self.request.submit() if not self.data or not isinstance(self.data, dict): raise StopIteration @@ -392,7 +379,6 @@ """Iterate objects for elements found in response.""" # this looks for the resultkey ''inside'' a <page> entry while True: - self.site.throttle() self.data = self.request.submit() if not self.data or not isinstance(self.data, dict): raise StopIteration Modified: branches/rewrite/pywikibot/exceptions.py =================================================================== --- branches/rewrite/pywikibot/exceptions.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/exceptions.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -31,6 +31,15 @@ class IsNotRedirectPage(Error): """Page is not a redirect page""" +class CircularRedirect(Error): + """Page is a circular redirect + + Exception argument is the redirect target; this may be the same title + as this page or a different title (in which case the target page directly + or indirectly redirects back to this one) + + """ + class LockedPage(Error): """Page is locked""" Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/page.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -660,7 +660,11 @@ exception. This method also can raise a NoPage exception. """ - return self.site().follow_redirect(self) + if not self.isRedirectPage(): + raise pywikibot.IsNotRedirectPage + if not isinstance(self._redir, Page): + self.site().getredirtarget(self) + return self._redir def getVersionHistory(self, forceReload=False, reverseOrder=False, getAll=False, revCount=500): Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-04-25 16:19:26 UTC (rev 5273) +++ branches/rewrite/pywikibot/site.py 2008-04-25 19:13:20 UTC (rev 5274) @@ -103,7 +103,17 @@ self._pagemutex = threading.Lock() self._locked_pages = [] - self.throttle = Throttle(self, multiplydelay=True, verbosedelay=True) + @property + def throttle(self): + """Return this Site's throttle. Initialize a new one if needed.""" + if not hasattr(self, "_throttle"): + self._throttle = Throttle(self, multiplydelay=True, verbosedelay=True) + self.getsiteinfo() + try: + self.login(False) + except pywikibot.NoUsername: + pass + return self._throttle def family(self): """Return the associated Family object.""" @@ -124,6 +134,8 @@ def __getattr__(self, attr): """Calls to methods not defined in this object are passed to Family.""" + if hasattr(self.__class__, attr): + return self.__class__.attr try: method = getattr(self.family(), attr) f = lambda *args, **kwargs: \ @@ -509,7 +521,42 @@ if not hasattr(page, "_redir"): self.getpageinfo(page) return bool(page._redir) - + + def getredirtarget(self, page): + """Return Page object for the redirect target of page.""" + if not hasattr(page, "_redir"): + self.getpageinfo(page) + if not page._redir: + raise pywikibot.IsNotRedirectPage + title = page.title(withSection=False) + query = api.Request(site=self, action="query", property="info", + inprop="protection|talkid|subjectid", + titles=title.encode(self.encoding()), + redirects="") + result = query.submit() + if "query" not in result or "redirects" not in result["query"]: + raise RuntimeError( + "getredirtarget: No 'redirects' found for page %s." + % title) + redirmap = dict((item['from'], item['to']) + for item in result['query']['redirects']) + if title not in redirmap: + raise RuntimeError( + "getredirtarget: 'redirects' contains no key for page %s." + % title) + if "pages" not in result['query']: + # no "pages" element indicates a circular redirect + raise pywikibot.CircularRedirect(redirmap[title]) + for pagedata in result['query']['pages'].values(): + # there should be only one value in 'pages', and it is the target + if pagedata['title'] not in redirmap.values(): + raise RuntimeError( + "getredirtarget: target page '%s' not found in 'redirects'" + % pagedata['title']) + target = pywikibot.Page(self, pagedata['title'], pagedata['ns']) + api.update_page(target, pagedata) + page._redir = target + # following group of methods map more-or-less directly to API queries def getbacklinks(self, page, followRedirects=False, filterRedirects=None, @@ -783,7 +830,22 @@ yield pywikibot.Link(linkdata['*'], source=pywikibot.Site(linkdata['lang'])) + def getextlinks(self, page): + """Iterate all external links on page, yielding URL strings.""" + eltitle = page.title(withSection=False) + elquery = api.PropertyGenerator("extlinks", + titles=eltitle.encode(self.encoding()) + ) + for pageitem in elquery: + if pageitem['title'] != eltitle: + raise RuntimeError( + "getlanglinks: Query on %s returned data on '%s'" + % (page, pageitem['title'])) + for linkdata in pageitem['extlinks']: + yield linkdata['*'] + + #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) #### class NotImplementedYet:

1 0

[Pywikipedia-l] SVN: [5273] trunk/pywikipedia/add_text.py
by filnik＠svn.wikimedia.org 25 Apr '08

25 Apr '08

Revision: 5273 Author: filnik Date: 2008-04-25 16:19:26 +0000 (Fri, 25 Apr 2008) Log Message: ----------- minor fixes at the docu Modified Paths: -------------- trunk/pywikipedia/add_text.py Modified: trunk/pywikipedia/add_text.py =================================================================== --- trunk/pywikipedia/add_text.py 2008-04-25 15:36:24 UTC (rev 5272) +++ trunk/pywikipedia/add_text.py 2008-04-25 16:19:26 UTC (rev 5273) @@ -1,7 +1,8 @@ #!/usr/bin/python # -*- coding: utf-8 -*- """ -This is a Bot written by Filnik to add a text in a given category. +This is a Bot written by Filnik to add a text at the end of the page but above categories, +interwiki and template for the stars of the interwiki. These command line parameters can be used to specify which pages to work on: @@ -15,8 +16,8 @@ -except Use a regex to understand if the template is already in the page -excepturl Use the html page as text where you want to see if there's the text, not the wiki-page. -newimages Add text in the new images --untagged Add text in the images that doesn't have any license template --always If used, the bot won't asked if it should add the text specified +-untagged Add text in the images that don't have any license template +-always If used, the bot won't ask if it should add the text specified -up If used, put the text above and not below --- Example --- @@ -36,11 +37,11 @@ """ # -# (C) Filnik, 2007 +# (C) Filnik, 2007-2008 # # Distributed under the terms of the MIT license. # -__version__ = '$Id: AddText.py,v 1.0 2007/11/27 17:08:30 filnik Exp$' +__version__ = '$Id: add_text.py,v 1.5 2008/04/25 17:08:30 filnik Exp$' # import re, pagegenerators, urllib2, urllib

1 0

[Pywikipedia-l] SVN: [5272] branches/rewrite/pywikibot
by russblau＠svn.wikimedia.org 25 Apr '08

25 Apr '08

Revision: 5272 Author: russblau Date: 2008-04-25 15:36:24 +0000 (Fri, 25 Apr 2008) Log Message: ----------- implemented getlanglinks(); current implementation causes the bot to connect to every site referred to in a language link, which probably isn't desirable. Modified Paths: -------------- branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py branches/rewrite/pywikibot/tests/page_tests.py Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-04-25 08:32:54 UTC (rev 5271) +++ branches/rewrite/pywikibot/data/api.py 2008-04-25 15:36:24 UTC (rev 5272) @@ -465,6 +465,7 @@ raise AssertionError( "Page %s has neither 'pageid' nor 'missing' attribute" % pagedict['title']) + page._redir = 'redirect' in pagedict if 'lastrevid' in pagedict: page._revid = pagedict['lastrevid'] if 'touched' in pagedict: Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-04-25 08:32:54 UTC (rev 5271) +++ branches/rewrite/pywikibot/page.py 2008-04-25 15:36:24 UTC (rev 5272) @@ -520,7 +520,6 @@ # no restricting template found return True - def put(self, newtext, comment=None, watchArticle=None, minorEdit=True, force=False): """Save the page with the contents of the first argument as the text. Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-04-25 08:32:54 UTC (rev 5271) +++ branches/rewrite/pywikibot/site.py 2008-04-25 15:36:24 UTC (rev 5272) @@ -484,21 +484,32 @@ return self._namespaces[num] return self._namespaces[num][0] + def getpageinfo(self, page): + """Load page info from api and save in page attributes""" + title = page.title(withSection=False) + query = api.PropertyGenerator( + "info", + inprop="protection|talkid|subjectid", + titles=title.encode(self.encoding())) + for pageitem in query: + if pageitem['title'] != title: + raise RuntimeError( + "getpageinfo: Query on %s returned data on '%s'" + % (page, pageitem['title'])) + api.update_page(page, pageitem) + def page_exists(self, page): """Return True if and only if page is an existing page on site.""" if not hasattr(page, "_pageid"): - query = api.PropertyGenerator( - "info", inprop="protection|talkid|subjectid", - titles=page.title(withSection=False - ).encode(self.encoding())) - for pageitem in query: - if pageitem['title'] != page.title(withSection=False): - raise RuntimeError( - "page_exists: Query on %s returned data on '%s'" - % (page, pageitem['title'])) - page._pageid = pageitem['pageid'] + self.getpageinfo(page) return page._pageid > 0 + def page_isredirect(self, page): + """Return True if and only if page is a redirect.""" + if not hasattr(page, "_redir"): + self.getpageinfo(page) + return bool(page._redir) + # following group of methods map more-or-less directly to API queries def getbacklinks(self, page, followRedirects=False, filterRedirects=None, @@ -752,8 +763,27 @@ page._revisions[revision.revid] = revision if latest: page._revid = revision.revid - + def getinterwiki(self, page): + # TODO + raise NotImplementedError + + def getlanglinks(self, page): + """Iterate all interlanguage links on page, yielding Link objects.""" + lltitle = page.title(withSection=False) + llquery = api.PropertyGenerator("langlinks", + titles=lltitle.encode(self.encoding()) + ) + for pageitem in llquery: + if pageitem['title'] != lltitle: + raise RuntimeError( + "getlanglinks: Query on %s returned data on '%s'" + % (page, pageitem['title'])) + for linkdata in pageitem['langlinks']: + yield pywikibot.Link(linkdata['*'], + source=pywikibot.Site(linkdata['lang'])) + + #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) #### class NotImplementedYet: Modified: branches/rewrite/pywikibot/tests/page_tests.py =================================================================== --- branches/rewrite/pywikibot/tests/page_tests.py 2008-04-25 08:32:54 UTC (rev 5271) +++ branches/rewrite/pywikibot/tests/page_tests.py 2008-04-25 15:36:24 UTC (rev 5272) @@ -203,6 +203,9 @@ if __name__ == '__main__': try: - unittest.main() - except SystemExit: - pass + try: + unittest.main() + except SystemExit: + pass + finally: + pywikibot.stopme()

1 0

[Pywikipedia-l] SVN: [5271] trunk/pywikipedia
by nicdumz＠svn.wikimedia.org 25 Apr '08

25 Apr '08

Revision: 5271 Author: nicdumz Date: 2008-04-25 08:32:54 +0000 (Fri, 25 Apr 2008) Log Message: ----------- * moving the Special:Protectedpages from blockpageschecker to wikipedia.py * Using a new protectedpages_address family function instead of a hard coded url * better sysop-protection check for blockpageschecker.py (In this case, we _already_ getEditRestrictions() via the api, so manually checking for editability is almost costless) Modified Paths: -------------- trunk/pywikipedia/blockpageschecker.py trunk/pywikipedia/family.py trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/blockpageschecker.py =================================================================== --- trunk/pywikipedia/blockpageschecker.py 2008-04-24 17:33:36 UTC (rev 5270) +++ trunk/pywikipedia/blockpageschecker.py 2008-04-25 08:32:54 UTC (rev 5271) @@ -24,6 +24,8 @@ or when you have problems with them. (add the namespace after ":" where you want to check - default checks all protected pages) +-moveprotected: Same as -protectedpages, for moveprotected pages + Furthermore, the following command line parameters are supported: -always Doesn't ask every time if the bot should make the change or not, do it always. @@ -166,35 +168,6 @@ return ('autoconfirmed-move', catchRegex) return ('editable', r'\A\n') # If editable means that we have no regex, won't change anything with this regex -def ProtectedPagesData(namespace = None): - """ Yield all the pages blocked, using Special:ProtectedPages """ - # Avoid problems of encoding and stuff like that, let it divided please - url = '/w/index.php?title=Special:ProtectedPages&type=edit&level=0' - if namespace != None: # /!\ if namespace seems simpler, but returns false when ns=0 - - url += '&namespace=%s' % namespace - site = wikipedia.getSite() - parser_text = site.getUrl(url) - while 1: - #<li><a href="/wiki/Pagina_principale" title="Pagina principale">Pagina principale</a>‎ <small>(6.522 byte)</small> ‎(protetta)</li> - m = re.findall(r'<li><a href=".*?" title=".*?">(.*?)</a>.*?<small>$(.*?)$</small>.*?$(.*?)$</li>', parser_text) - for data in m: - title = data[0] - size = data[1] - status = data[2] - yield (title, size, status) - nextpage = re.findall(r'<.ul>$.*?$.*?$.*?$.*?$<a href="(.*?)".*?</a>$ +?\(<a href=', parser_text) - if nextpage != []: - parser_text = site.getUrl(nextpage[0].replace('&', '&')) - continue - else: - break - -def ProtectedPages(namespace = 0): - """ Return only the wiki page object and not the tuple with all the data as above """ - for data in ProtectedPagesData(namespace): - yield wikipedia.Page(wikipedia.getSite(), data[0]) - def debugQuest(site, page): quest = wikipedia.input(u'Do you want to open the page on your [b]rowser, [g]ui or [n]othing?') pathWiki = site.family.nicepath(site.lang) @@ -226,6 +199,8 @@ moveBlockCheck = False; genFactory = pagegenerators.GeneratorFactory() # To prevent Infinite loops errorCount = 0 + # Load the right site + site = wikipedia.getSite() # Loading the default options. for arg in wikipedia.handleArgs(): if arg == '-always': @@ -236,9 +211,15 @@ debug = True elif arg.startswith('-protectedpages'): if len(arg) == 15: - generator = ProtectedPages() + generator = site.protectedpages(namespace = 0) else: - generator = ProtectedPages(int(arg[16:])) + generator = site.protectedpages(namespace = int(arg[16:])) + elif arg.startswith('-moveprotected'): + if len(arg) == 14: + generator = site.protectedpages(namespace = 0, type = 'move') + else: + generator = site.protectedpages(namespace = int(arg[16:]), + type = 'move') elif arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] @@ -246,8 +227,6 @@ generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] else: generator = genFactory.handleArg(arg) - # Load the right site - site = wikipedia.getSite() # Take the right templates to use, the category and the comment TSP = wikipedia.translate(site, templateSemiProtection) TTP = wikipedia.translate(site, templateTotalProtection) @@ -284,15 +263,27 @@ if debug: debugQuest(site, page) continue + """ + # This check does not work : + # PreloadingGenerator cannot set correctly page.editRestriction + # (see bug #1949476 ) if not page.canBeEdited(): - wikipedia.output("%s is protected : this account can't edit it! Skipping..." % pagename) - continue + wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) + continue + """ + editRestr = restrictions['edit'] + if editRestr and editRestr[0] == 'sysop': + try: + config.sysopnames[site.family.name][site.lang] + except: + wikipedia.output("%s is sysop-protected : this account can't edit it! Skipping..." % pagename) + continue + # Understand, according to the template in the page, what should be the protection # and compare it with what there really is. TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP) # Only to see if the text is the same or not... oldtext = text - editRestr = restrictions['edit'] # keep track of the changes for each step (edit then move) changes = -1 Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2008-04-24 17:33:36 UTC (rev 5270) +++ trunk/pywikipedia/family.py 2008-04-25 08:32:54 UTC (rev 5271) @@ -2986,6 +2986,9 @@ def lonelypages_address(self, code, limit=500): return "%s?title=%s:Lonelypages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit) + def protectedpages_address(self, code, limit=500): + return "%s?title=%s:ProtectedPages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit) + def unwatchedpages_address(self, code, limit=500): return "%s?title=%s:Unwatchedpages&limit=%d" % (self.path(code), self.special_namespace_url(code), limit) Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-24 17:33:36 UTC (rev 5270) +++ trunk/pywikipedia/wikipedia.py 2008-04-25 08:32:54 UTC (rev 5271) @@ -3773,6 +3773,7 @@ search(query): query results from Special:Search allpages(): Special:Allpages prefixindex(): Special:Prefixindex + protectedpages(): Special:ProtectedPages newpages(): Special:Newpages newimages(): Special:Log&type=upload longpages(): Special:Longpages @@ -3843,6 +3844,7 @@ deadendpages_address: Special:Deadendpages. ancientpages_address: Special:Ancientpages. lonelypages_address: Special:Lonelypages. + protectedpages_address: Special:ProtectedPages unwatchedpages_address: Special:Unwatchedpages. uncategorizedcategories_address: Special:Uncategorizedcategories. uncategorizedimages_address: Special:Uncategorizedimages. @@ -4930,6 +4932,34 @@ else: break + def protectedpages(self, namespace = None, type = 'edit', lvl = 0): + """ Yield all the protected pages, using Special:ProtectedPages + * namespace is a namespace number + * type can be 'edit' or 'move + * lvl : protection level, can be 0, 'autoconfirmed', or 'sysop' + """ + # Avoid problems of encoding and stuff like that, let it divided please + url = self.protectedpages_address() + url += '&type=%s&level=%s' % (type, lvl) + if namespace != None: # /!\ if namespace seems simpler, but returns false when ns=0 + + url += '&namespace=%s' % namespace + parser_text = self.getUrl(url) + while 1: + #<li><a href="/wiki/Pagina_principale" title="Pagina principale">Pagina principale</a>‎ <small>(6.522 byte)</small> ‎(protetta)</li> + m = re.findall(r'<li><a href=".*?" title=".*?">(.*?)</a>.*?<small>$(.*?)$</small>.*?$(.*?)$</li>', parser_text) + for data in m: + title = data[0] + size = data[1] + status = data[2] + yield Page(self, title) + nextpage = re.findall(r'<.ul>$.*?$.*?$.*?$.*?$<a href="(.*?)".*?</a>$ +?\(<a href=', parser_text) + if nextpage != []: + parser_text = self.getUrl(nextpage[0].replace('&', '&')) + continue + else: + break + def linksearch(self, siteurl, limit=500): """Yield Pages from results of Special:Linksearch for 'siteurl'.""" if siteurl.startswith('*.'): @@ -5223,6 +5253,10 @@ """Return path to Special:Lonelypages.""" return self.family.lonelypages_address(self.lang, n) + def protectedpages_address(self, n=500): + """Return path to Special:ProtectedPages""" + return self.family.protectedpages_address(self.lang, n) + def unwatchedpages_address(self, n=500): """Return path to Special:Unwatchedpages.""" return self.family.unwatchedpages_address(self.lang, n)

1 0

[Pywikipedia-l] Template:Bots
by Bináris 24 Apr '08

24 Apr '08

Hello for all, If I copy en:Template:Bots to huwiki, will it work here automatically? As far as I see, this template contains nothing except noinclude text. So, if I don't copy it to huwiki (just a theoretical question), and someone inserts a non-existing "nobots" template into his user page /talk page (both needed separately?), will it work as well? Bináris

2 1

[Pywikipedia-l] SVN: [5270] trunk/pywikipedia/blockpageschecker.py
by filnik＠svn.wikimedia.org 24 Apr '08

24 Apr '08

Revision: 5270 Author: filnik Date: 2008-04-24 17:33:36 +0000 (Thu, 24 Apr 2008) Log Message: ----------- Fixing italian regex, adding NicDumZ as coder that helps developing this script Modified Paths: -------------- trunk/pywikipedia/blockpageschecker.py Modified: trunk/pywikipedia/blockpageschecker.py =================================================================== --- trunk/pywikipedia/blockpageschecker.py 2008-04-24 15:19:15 UTC (rev 5269) +++ trunk/pywikipedia/blockpageschecker.py 2008-04-24 17:33:36 UTC (rev 5270) @@ -53,10 +53,11 @@ # # (C) Monobi a.k.a. Wikihermit, 2007 # (C) Filnik, 2007-2008 +# (C) NicDumZ, 2008 # # Distributed under the terms of the MIT license. # -__version__ = '$Id: blockpageschecker.py,v 1.1 2007/12/7 19.23.00 filnik Exp$' +__version__ = '$Id: blockpageschecker.py,v 1.5 2008/04/24 19.40.00 filnik Exp$' # import re, webbrowser @@ -77,8 +78,8 @@ # Regex to get the semi-protection template templateSemiProtection = { 'en': None, - 'it':[r'{\{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad\|.*?|\|.*?)\}\}', - r'{\{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad\|(?:.*?))\}\}'], + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccoparziale(?:|[ _]scad\|.*?|\|.*?)\}\}', + r'\{\{(?:[Tt]emplate:|)[Aa]bp(?:|[ _]scad\|(?:.*?))\}\}'], 'fr': [ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Ss]emi[- ]?protection(|[^\}]*)\}\}'], 'ja':[ur'\{\{(?:[Tt]emplate:|)半保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Ss]|[Ss]emi|半)(?:\|.+|)\}\}(\n+?|)'], @@ -86,8 +87,8 @@ # Regex to get the total-protection template templateTotalProtection = { 'en': None, - 'it':[r'{\{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad\|(?:.*?)|minaccia|cancellata)\}\}', - r'{\{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)\}\}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisoblocco(?:|[ _]scad\|(?:.*?)|minaccia|cancellata)\}\}', + r'\{\{(?:[Tt]emplate:|)(?:[Cc][Tt]|[Cc]anc fatte|[Cc][Ee].*?)\}\}', r'<div class="toccolours[ _]itwiki[ _]template[ _]avviso">(?:\s|\n)*?[Qq]uesta pagina'], 'fr':[ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)[Pp]rotection(|[^\}]*)\}\}', ur'\{\{(?:[Tt]emplate:|[Mm]odèle:|)(?:[Pp]age|[Aa]rchive|[Mm]odèle) protégée?(|[^\}]*)\}\}'], 'ja':[ur'\{\{(?:[Tt]emplate:|)保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], @@ -96,14 +97,14 @@ # Regex to get the semi-protection move template templateSemiMoveProtection = { 'en': None, - 'it':[r'{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], 'ja':[ur'\{\{(?:[Tt]emplate:|)移動半保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], 'zh':[r'\{\{(?:[Tt]emplate:|)Protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Mini-protected|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)',r'\{\{(?:[Tt]emplate:|)Protected-logo|(?:MS|ms)(?:\|.+|)\}\}(\n+?|)'], } # Regex to get the total-protection move template templateTotalMoveProtection = { 'en': None, - 'it':[r'{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], + 'it':[r'\{\{(?:[Tt]emplate:|)[Aa]vvisobloccospostamento(?:|[ _]scad\|.*?|\|.*?)\}\}'], 'ja':[ur'\{\{(?:[Tt]emplate:|)移動保護(?:[Ss]|)(?:\|.+|)\}\}(\n+?|)'], 'zh':[ur'\{\{(?:[Tt]emplate:|)Protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Mini-protected|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)',ur'\{\{(?:[Tt]emplate:|)Protected-logo|(?:[Mm]|[Mm]ove|移[動动])(?:\|.+|)\}\}(\n+?|)'], } @@ -169,7 +170,7 @@ """ Yield all the pages blocked, using Special:ProtectedPages """ # Avoid problems of encoding and stuff like that, let it divided please url = '/w/index.php?title=Special:ProtectedPages&type=edit&level=0' - if namespace != None: # /!\ if namespace seems simpler, but returns false when ns=0 + if namespace != None: # /!\ if namespace seems simpler, but returns false when ns=0 url += '&namespace=%s' % namespace site = wikipedia.getSite()

1 0

[Pywikipedia-l] SVN: [5269] trunk/pywikipedia/replace.py
by huji＠svn.wikimedia.org 24 Apr '08

24 Apr '08

Revision: 5269 Author: huji Date: 2008-04-24 15:19:15 +0000 (Thu, 24 Apr 2008) Log Message: ----------- * Adding/updating Persian translations Modified Paths: -------------- trunk/pywikipedia/replace.py Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2008-04-24 13:50:10 UTC (rev 5268) +++ trunk/pywikipedia/replace.py 2008-04-24 15:19:15 UTC (rev 5269) @@ -131,9 +131,10 @@ msg = { 'ar':u'%s روبوت : استبدال تلقائي للنص', 'de':u'Bot: Automatisierte Textersetzung %s', + 'el':u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s', 'en':u'Robot: Automated text replacement %s', - 'el':u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s', 'es':u'Robot: Reemplazo automático de texto %s', + 'fa':u'ربات: تغییر خودکار متن %s', 'fr':u'Bot : Remplacement de texte automatisé %s', 'he':u'בוט: החלפת טקסט אוטומטית %s', 'hu':u'Robot: Automatikus szövegcsere %s',

1 0

[Pywikipedia-l] SVN: [5268] branches/rewrite/pywikibot
by russblau＠svn.wikimedia.org 24 Apr '08

24 Apr '08

Revision: 5268 Author: russblau Date: 2008-04-24 13:50:10 +0000 (Thu, 24 Apr 2008) Log Message: ----------- further implementation of getrevisions; documentation; refactoring. Modified Paths: -------------- branches/rewrite/pywikibot/README-conversion.txt branches/rewrite/pywikibot/__init__.py branches/rewrite/pywikibot/data/api.py branches/rewrite/pywikibot/page.py branches/rewrite/pywikibot/site.py Modified: branches/rewrite/pywikibot/README-conversion.txt =================================================================== --- branches/rewrite/pywikibot/README-conversion.txt 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/README-conversion.txt 2008-04-24 13:50:10 UTC (rev 5268) @@ -12,15 +12,27 @@ bot. With pywikipedia scripts were importing "wikipedia" or "pagegenerators" -librairies; pywikibot is now written as a standard module. +libraries; pywikibot is now written as a standard package, and other modules +are contained within it (e.g., pywikibot.site contains Site classes). However, +most commonly-used names are imported into the pywikibot namespace, so that +module names don't need to be used unless specified in the documentation. + (To use it, just import "pywikibot", assuming that pywikibot/ is in sys.path) == Python librairies == -You will need, to run pywikibot, httplib2 and setuptools -* httplib2 : http://code.google.com/p/httplib2/ +[Note: the goal will be to package pywikibot with setuptools easy_install, +so that these dependencies will be loaded automatically when the package is +installed, and users won't need to worry about this...] + +To run pywikibot, you will need the httplib2, simplejson, and setuptools packages-- +* httplib2 : http://code.google.com/p/httplib2/ * setuptools : http://pypi.python.org/pypi/setuptools/ +* simplejson : http://https://svn.red-bean.com/bob/simplejson/tags/simplejson-1.7.1/docs/i… +or, if you already have setuptools installed, just execute 'easy_install httplib2' +and 'easy_install simplejson' + If you run into errors involving httplib2.urlnorm, update httplib2 to 0.4.0 (Ubuntu package python-httlib2 for example, is outdated) Modified: branches/rewrite/pywikibot/__init__.py =================================================================== --- branches/rewrite/pywikibot/__init__.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/__init__.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -60,7 +60,7 @@ getSite = Site # alias for backwards-compability -from page import Page, ImagePage, Category +from page import Page, ImagePage, Category, Link # DEBUG Modified: branches/rewrite/pywikibot/data/api.py =================================================================== --- branches/rewrite/pywikibot/data/api.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/data/api.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -329,22 +329,7 @@ """ p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns']) - if "pageid" in pagedata: - self._pageid = int(pagedata['pageid']) - elif "missing" in pagedata: - self._pageid = 0 # Non-existent page - else: - raise AssertionError( - "Page %s has neither 'pageid' nor 'missing' attribute" - % pagedata['title']) - if 'lastrevid' in pagedata: - p._revid = pagedata['lastrevid'] - if 'touched' in pagedata: - p._timestamp = pagedata['touched'] - if 'protection' in pagedata: - p._protection = {} - for item in pagedata['protection']: - p._protection[item['type']] = item['level'] + update_page(p, pagedata) return p @@ -366,8 +351,12 @@ class PropertyGenerator(object): - """Generator for queries of type action=query&property=...""" + """Generator for queries of type action=query&property=... + Note that this generator yields one or more dict object(s) corresponding + to each "page" item(s) from the API response; the calling module has to + decide what to do with the contents of the dict.""" + def __init__(self, prop, **kwargs): """ Required and optional parameters are as for C{Request}, except that @@ -384,7 +373,7 @@ if self.limits[prop] and kwargs.pop("getAll", False): self.request['g'+self.limits[generator]] = "max" self.site = self.request.site - self.resultkey = prop # element to look for in result + self.resultkey = prop # dict mapping property types to their limit parameter names limits = {'revisions': 'rvlimit', @@ -410,21 +399,8 @@ if not ("query" in self.data and "pages" in self.data["query"]): raise StopIteration pagedata = self.data["query"]["pages"].values() - assert len(pagedata)==1 - pagedata = pagedata[0] - if not self.resultkey in pagedata: - raise StopIteration - if isinstance(pagedata[self.resultkey], dict): - for v in pagedata[self.resultkey].itervalues(): - yield v - elif isinstance(pagedata[self.resultkey], list): - for v in pagedata[self.resultkey]: - yield v - else: - raise APIError("Unknown", - "Unknown format in ['%s'] value." - % self.resultkey, - data=pagedata[self.resultkey]) + for item in pagedata: + yield item if not "query-continue" in self.data: return if not self.resultkey in self.data["query-continue"]: @@ -472,6 +448,32 @@ pywikibot.cookie_jar.save() +def update_page(page, pagedict): + """Update attributes of Page object page, based on query data in pagequery + + @param page: object to be updated + @type page: Page + @param pagedict: the contents of a "page" element of a query response + @type pagedict: dict + + """ + if "pageid" in pagedict: + page._pageid = int(pagedict['pageid']) + elif "missing" in pagedict: + page._pageid = 0 # Non-existent page + else: + raise AssertionError( + "Page %s has neither 'pageid' nor 'missing' attribute" + % pagedict['title']) + if 'lastrevid' in pagedict: + page._revid = pagedict['lastrevid'] + if 'touched' in pagedict: + page._timestamp = pagedict['touched'] + if 'protection' in pagedict: + page._protection = {} + for item in pagedict['protection']: + page._protection[item['type']] = item['level'], item['expiry'] + if __name__ == "__main__": from pywikibot import Site logging.getLogger().setLevel(logging.DEBUG) Modified: branches/rewrite/pywikibot/page.py =================================================================== --- branches/rewrite/pywikibot/page.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/page.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -678,8 +678,8 @@ limit = None else: limit = revCount - return self.site().getrevisions(self, withText=False, - older=not reverseOrder, limit=limit) + return self.site().getrevisions(self, getText=False, + rvdir=not reverseOrder, limit=limit) def getVersionHistoryTable(self, forceReload=False, reverseOrder=False, getAll=False, revCount=500): Modified: branches/rewrite/pywikibot/site.py =================================================================== --- branches/rewrite/pywikibot/site.py 2008-04-23 22:00:06 UTC (rev 5267) +++ branches/rewrite/pywikibot/site.py 2008-04-24 13:50:10 UTC (rev 5268) @@ -491,8 +491,12 @@ "info", inprop="protection|talkid|subjectid", titles=page.title(withSection=False ).encode(self.encoding())) - for item in query(): - pass #FIXME + for pageitem in query: + if pageitem['title'] != page.title(withSection=False): + raise RuntimeError( + "page_exists: Query on %s returned data on '%s'" + % (page, pageitem['title'])) + page._pageid = pageitem['pageid'] return page._pageid > 0 # following group of methods map more-or-less directly to API queries @@ -621,30 +625,80 @@ return cmgen def getrevisions(self, page=None, getText=False, revids=None, - older=True, limit=None, sysop=False, user=None, - excludeuser=None): + limit=None, startid=None, endid=None, starttime=None, + endtime=None, rvdir=None, user=None, excludeuser=None, + section=None, sysop=False): """Retrieve and store revision information. - @param page: retrieve the history of this Page (required unless ids + By default, retrieves the last (current) revision of the page, + I{unless} any of the optional parameters revids, startid, endid, + starttime, endtime, rvdir, user, excludeuser, or limit are + specified. Unless noted below, all parameters not specified + default to False. + + If rvdir is False or not specified, startid must be greater than + endid if both are specified; likewise, starttime must be greater + than endtime. If rvdir is True, these relationships are reversed. + + @param page: retrieve revisions of this Page (required unless ids is specified) - @param getText: if True, retrieve the wiki-text of each revision as - well + @param getText: if True, retrieve the wiki-text of each revision; + otherwise, only retrieve the revision metadata (default) + @param section: if specified, retrieve only this section of the text + (getText must be True); section must be given by number (top of + the article is section 0), not name + @type section: int @param revids: retrieve only the specified revision ids (required unless page is specified) - @param older: if True, retrieve newest revisions first; otherwise, - retrieve oldest revisions first - @param limit: if specified, retrieve no more than this number of - revisions (defaults to latest revision only) + @type revids: list of ints + @param limit: Retrieve no more than this number of revisions @type limit: int + @param startid: retrieve revisions starting with this revid + @param endid: stop upon retrieving this revid + @param starttime: retrieve revisions starting at this timestamp + @param endtime: stop upon reaching this timestamp + @param rvdir: if false, retrieve newest revisions first (default); + if true, retrieve earliest first @param user: retrieve only revisions authored by this user @param excludeuser: retrieve all revisions not authored by this user @param sysop: if True, switch to sysop account (if available) to retrieve this page """ + latest = (revids is None and + startid is None and + endid is None and + starttime is None and + endtime is None and + rvdir is None and + user is None and + excludeuser is None and + limit is None) # if True, we are retrieving current revision + + # check for invalid argument combinations if page is None and revids is None: raise ValueError( - "getrevisions needs either page or revids argument.") + "getrevisions: either page or revids argument required") + if (startid is not None or endid is not None) and \ + (starttime is not None or endtime is not None): + raise ValueError( + "getrevisions: startid/endid combined with starttime/endtime") + if starttime is not None and endtime is not None: + if rvdir and starttime >= endtime: + raise ValueError( + "getrevisions: starttime > endtime with rvdir=True") + if (not rvdir) and endtime >= starttime: + raise ValueError( + "getrevisions: endtime > starttime with rvdir=False") + if startid is not None and endid is not None: + if rvdir and startid >= endid: + raise ValueError( + "getrevisions: startid > endid with rvdir=True") + if (not rvdir) and endid >= startid: + raise ValueError( + "getrevisions: endid > startid with rvdir=False") + + # assemble API request if revids is None: rvtitle = page.title(withSection=False).encode(self.encoding()) rvgen = api.PropertyGenerator(u"revisions", titles=rvtitle) @@ -654,28 +708,50 @@ if getText: rvgen.request[u"rvprop"] = \ u"ids|flags|timestamp|user|comment|content" - if page.section(): - rvgen.request[u"rvsection"] = unicode(page.section()) + if section is not None: + rvgen.request[u"rvsection"] = unicode(section) if limit: rvgen.request[u"rvlimit"] = unicode(limit) - if not older: + if rvdir: rvgen.request[u"rvdir"] = u"newer" + elif rvdir is not None: + rvgen.request[u"rvdir"] = u"older" + if startid: + rvgen.request[u"rvstartid"] = startid + if endid: + rvgen.request[u"rvendid"] = endid + if starttime: + rvgen.request[u"rvstart"] = starttime + if endtime: + rvgen.request[u"rvend"] = endtime if user: rvgen.request[u"rvuser"] = user elif excludeuser: rvgen.request[u"rvexcludeuser"] = excludeuser - # TODO if sysop: - for rev in rvgen: - revision = pywikibot.page.Revision(revid=rev['revid'], - timestamp=rev['timestamp'], - user=rev['user'], - anon=rev.has_key('anon'), - comment=rev.get('comment', u''), - minor=rev.has_key('minor'), - text=rev.get('*', None)) - page._revisions[revision.revid] = revision - if revids is None and limit is None and user is None and excludeuser is None: - page._revid = revision.revid + # TODO if sysop: something + for pagedata in rvgen: + if page is not None: + if pagedata['title'] != page.title(withSection=False): + raise RuntimeError( + "getrevisions: Query on %s returned data on '%s'" + % (page, pagedata['title'])) + else: + page = Page(self, pagedata['title']) + api.update_page(page, pagedata) + + for rev in pagedata['revisions']: + revision = pywikibot.page.Revision( + revid=rev['revid'], + timestamp=rev['timestamp'], + user=rev['user'], + anon=rev.has_key('anon'), + comment=rev.get('comment', u''), + minor=rev.has_key('minor'), + text=rev.get('*', None) + ) + page._revisions[revision.revid] = revision + if latest: + page._revid = revision.revid #### METHODS NOT IMPLEMENTED YET (but may be delegated to Family object) ####

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

pywikibot April 2008