Pywikipedia-svn February 2010

pywikipedia-svn@lists.wikimedia.org

9 participants
71 discussions

SVN: [7943] trunk/pywikipedia/imagecopy.py

by multichill＠svn.wikimedia.org

Revision: 7943 Author: multichill Date: 2010-02-18 18:48:32 +0000 (Thu, 18 Feb 2010) Log Message: ----------- Add the move to commons templates for the plwp Modified Paths: -------------- trunk/pywikipedia/imagecopy.py Modified: trunk/pywikipedia/imagecopy.py =================================================================== --- trunk/pywikipedia/imagecopy.py 2010-02-17 05:53:13 UTC (rev 7942) +++ trunk/pywikipedia/imagecopy.py 2010-02-18 18:48:32 UTC (rev 7943) @@ -206,18 +206,19 @@ moveToCommonsTemplate = { 'ar': [u'نقل إلى كومنز'], 'en': [u'Commons ok', u'Copy to Wikimedia Commons', u'Move to commons', u'Movetocommons', u'To commons', u'Copy to Wikimedia Commons by BotMultichill'], - 'fi':[u'Commonsiin'], - 'fr':[u'Image pour Commons'], + 'fi': [u'Commonsiin'], + 'fr': [u'Image pour Commons'], 'hsb':[u'Kopěruj do Wikimedia Commons'], - 'hu':[u'Commonsba'], - 'is':[u'Færa á Commons'], - 'ms':[u'Hantar ke Wikimedia Commons'], + 'hu': [u'Commonsba'], + 'is': [u'Færa á Commons'], + 'ms': [u'Hantar ke Wikimedia Commons'], 'nl': [u'Verplaats naar Wikimedia Commons', u'VNC'], - 'ru':[u'На Викисклад'], - 'sl':[u'Skopiraj v Zbirko'], - 'sr':[u'За оставу'], - 'sv':[u'Till Commons'], - 'zh':[u'Copy to Wikimedia Commons'], + 'pl': [u'Do Commons', u'NaCommons', u'Na Commons'], + 'ru': [u'На Викисклад'], + 'sl': [u'Skopiraj v Zbirko'], + 'sr': [u'За оставу'], + 'sv': [u'Till Commons'], + 'zh': [u'Copy to Wikimedia Commons'], } imageMoveMessage = { @@ -280,8 +281,7 @@ if self.category: CH = CH.replace(u'{{subst:Unc}} ', u'') CH = CH + u'[[Category:' + self.category + u']]' - - + bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = wikipedia.getSite('commons', 'commons')) bot.run() @@ -293,7 +293,7 @@ #Remove the move to commons templates if self.imagePage.site().language() in moveToCommonsTemplate: for moveTemplate in moveToCommonsTemplate[self.imagePage.site().language()]: - imtxt = re.sub(u'(?i)\{\{' + moveTemplate + u'\}\}', u'', imtxt) + imtxt = re.sub(u'(?i)\{\{' + moveTemplate + u'[^\}]*\}\}', u'', imtxt) #add {{NowCommons}} if self.imagePage.site().language() in nowCommonsTemplate:

14 years, 2 months

SVN: [7942] trunk/pywikipedia/wikipedia.py

by xqt＠svn.wikimedia.org

Revision: 7942 Author: xqt Date: 2010-02-17 05:53:13 +0000 (Wed, 17 Feb 2010) Log Message: ----------- Bugfix for #2952927 Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2010-02-16 17:07:50 UTC (rev 7941) +++ trunk/pywikipedia/wikipedia.py 2010-02-17 05:53:13 UTC (rev 7942) @@ -4395,9 +4395,6 @@ if nothing is changed, it is added at the end """ - # Hyperlink regex is defined in weblinkchecker.py - import weblinkchecker - if site is None: site = getSite() @@ -4431,7 +4428,7 @@ # depth, we'd need recursion which can't be done in Python's re. # After all, the language of correct parenthesis words is not regular. 'template': re.compile(r'(?s){{(({{(({{.*?}})|.)*}})|.)*}}'), - 'hyperlink': weblinkchecker.compileLinkR(), + 'hyperlink': compileLinkR(), 'gallery': re.compile(r'(?is)<gallery.*?>.*?</gallery>'), # this matches internal wikilinks, but also interwiki, categories, and # images.

14 years, 2 months

SVN: [7941] trunk/pywikipedia/wikipedia.py

by xqt＠svn.wikimedia.org

Revision: 7941 Author: xqt Date: 2010-02-16 17:07:50 +0000 (Tue, 16 Feb 2010) Log Message: ----------- Bugfix for #2946258: Test status code. I would prefer going into loop introduced in r5014 but this is old stuff. Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2010-02-16 17:01:39 UTC (rev 7940) +++ trunk/pywikipedia/wikipedia.py 2010-02-16 17:07:50 UTC (rev 7941) @@ -2151,7 +2151,7 @@ return self._putPageOld(text, comment, watchArticle, minorEdit, newPage, token, newToken, sysop, captcha=solve) # We are expecting a 302 to the action=view page. I'm not sure why this was removed in r5019 - if data.strip() != u"": + if response.status != 302 and data.strip() != u"": # Something went wrong, and we don't know what. Show the # HTML code that hopefully includes some error message. output(u"ERROR: Unexpected response from wiki server.")

14 years, 2 months

SVN: [7940] trunk/pywikipedia

by xqt＠svn.wikimedia.org

Revision: 7940 Author: xqt Date: 2010-02-16 17:01:39 +0000 (Tue, 16 Feb 2010) Log Message: ----------- put weblinkchecker.compileLinkR() into wikipedia library (textlib update from rewrite) Modified Paths: -------------- trunk/pywikipedia/weblinkchecker.py trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/weblinkchecker.py =================================================================== --- trunk/pywikipedia/weblinkchecker.py 2010-02-16 16:02:34 UTC (rev 7939) +++ trunk/pywikipedia/weblinkchecker.py 2010-02-16 17:01:39 UTC (rev 7940) @@ -202,32 +202,6 @@ re.compile('.*[\./(a)]bodo\.kommune\.no(/.*)?'), # bot can't handle their redirects ] -def compileLinkR(withoutBracketed = False, onlyBracketed = False): - # RFC 2396 says that URLs may only contain certain characters. - # For this regex we also accept non-allowed characters, so that the bot - # will later show these links as broken ('Non-ASCII Characters in URL'). - # Note: While allowing parenthesis inside URLs, MediaWiki will regard - # right parenthesis at the end of the URL as not part of that URL. - # The same applies to dot, comma, colon and some other characters. - notAtEnd = '\]\s\)\.:;,<>"' - # So characters inside the URL can be anything except whitespace, - # closing squared brackets, quotation marks, greater than and less - # than, and the last character also can't be parenthesis or another - # character disallowed by MediaWiki. - notInside = '\]\s<>"' - # The first half of this regular expression is required because '' is - # not allowed inside links. For example, in this wiki text: - # ''Please see http://www.example.org.'' - # .'' shouldn't be considered as part of the link. - regex = r'(?P<url>http[s]?://[^' + notInside + ']*?[^' + notAtEnd + '](?=[' + notAtEnd+ ']*\'\')|http[s]?://[^' + notInside + ']*[^' + notAtEnd + '])' - - if withoutBracketed: - regex = r'(?<!\[)' + regex - elif onlyBracketed: - regex = r'\[' + regex - linkR = re.compile(regex) - return linkR - def weblinksIn(text, withoutBracketed = False, onlyBracketed = False): text = wikipedia.removeDisabledParts(text) @@ -245,7 +219,7 @@ while templateWithParamsR.search(text): text = templateWithParamsR.sub(r'{{ \1 | \2 }}', text) - linkR = compileLinkR(withoutBracketed, onlyBracketed) + linkR = wikipedia.compileLinkR(withoutBracketed, onlyBracketed) # Remove HTML comments in URLs as well as URLs in HTML comments. # Also remove text inside nowiki links etc. Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2010-02-16 16:02:34 UTC (rev 7939) +++ trunk/pywikipedia/wikipedia.py 2010-02-16 17:01:39 UTC (rev 7940) @@ -4963,6 +4963,35 @@ #catLinks.sort() return sep.join(catLinks) + '\r\n' +def compileLinkR(withoutBracketed=False, onlyBracketed=False): + """Return a regex that matches external links.""" + # RFC 2396 says that URLs may only contain certain characters. + # For this regex we also accept non-allowed characters, so that the bot + # will later show these links as broken ('Non-ASCII Characters in URL'). + # Note: While allowing parenthesis inside URLs, MediaWiki will regard + # right parenthesis at the end of the URL as not part of that URL. + # The same applies to dot, comma, colon and some other characters. + notAtEnd = '\]\s\)\.:;,<>"' + # So characters inside the URL can be anything except whitespace, + # closing squared brackets, quotation marks, greater than and less + # than, and the last character also can't be parenthesis or another + # character disallowed by MediaWiki. + notInside = '\]\s<>"' + # The first half of this regular expression is required because '' is + # not allowed inside links. For example, in this wiki text: + # ''Please see http://www.example.org.'' + # .'' shouldn't be considered as part of the link. + regex = r'(?P<url>http[s]?://[^' + notInside + ']*?[^' + notAtEnd \ + + '](?=[' + notAtEnd+ ']*\'\')|http[s]?://[^' + notInside \ + + ']*[^' + notAtEnd + '])' + + if withoutBracketed: + regex = r'(?<!\[)' + regex + elif onlyBracketed: + regex = r'\[' + regex + linkR = re.compile(regex) + return linkR + # end of category specific code def url2link(percentname, insite, site): """Convert urlname of a wiki page into interwiki link format.

14 years, 2 months

SVN: [7939] branches/rewrite/pywikibot

by xqt＠svn.wikimedia.org

Revision: 7939 Author: xqt Date: 2010-02-16 16:02:34 +0000 (Tue, 16 Feb 2010) Log Message: ----------- proxy support for rewrite (patch 2789651) Modified Paths: -------------- branches/rewrite/pywikibot/comms/threadedhttp.py branches/rewrite/pywikibot/config2.py Modified: branches/rewrite/pywikibot/comms/threadedhttp.py =================================================================== --- branches/rewrite/pywikibot/comms/threadedhttp.py 2010-02-16 09:54:23 UTC (rev 7938) +++ branches/rewrite/pywikibot/comms/threadedhttp.py 2010-02-16 16:02:34 UTC (rev 7939) @@ -31,6 +31,7 @@ import sys import pywikibot +from pywikibot import config logger = logging.getLogger("pywiki.comms.threadedhttp") @@ -153,6 +154,8 @@ except KeyError: self.connection_pool = ConnectionPool() self.max_redirects = kwargs.pop('max_redirects', 5) + if len(args) < 3: + kwargs.setdefault('proxy_info', config.proxy) httplib2.Http.__init__(self, *args, **kwargs) def request(self, uri, method="GET", body=None, headers=None, Modified: branches/rewrite/pywikibot/config2.py =================================================================== --- branches/rewrite/pywikibot/config2.py 2010-02-16 09:54:23 UTC (rev 7938) +++ branches/rewrite/pywikibot/config2.py 2010-02-16 16:02:34 UTC (rev 7939) @@ -478,6 +478,18 @@ # foreign wiki, set cosmetic_changes_mylang_only to False, but be careful! cosmetic_changes_mylang_only = True +### Proxy configuration ### +# assign prox = None to connect directly +# For proxy support first run: apt-get install python-socks.py +# then change your user-config.py like: +# import httplib2 +# import socks +# proxy = httplib2.ProxyInfo(socks.PROXY_TYPE_HTTP, 'localhost', 8000) +# The following lines will be printed, but it works: +# Configuration variable 'httplib2' is defined but unknown. Misspelled? +# Configuration variable 'socks' is defined but unknown. Misspelled?proxy = None +proxy = None + # End of configuration section # ============================ # System-level and User-level changes.

14 years, 2 months

SVN: [7938] trunk/pywikipedia/pagegenerators.py

by xqt＠svn.wikimedia.org

Revision: 7938 Author: xqt Date: 2010-02-16 09:54:23 +0000 (Tue, 16 Feb 2010) Log Message: ----------- Increase the number of pages returned by UserContributionsGenerator() as given by the argument (fix for #2930108) Modified Paths: -------------- trunk/pywikipedia/pagegenerators.py Modified: trunk/pywikipedia/pagegenerators.py =================================================================== --- trunk/pywikipedia/pagegenerators.py 2010-02-16 07:20:42 UTC (rev 7937) +++ trunk/pywikipedia/pagegenerators.py 2010-02-16 09:54:23 UTC (rev 7938) @@ -108,8 +108,13 @@ Argument can be given as "-unwatched:n" where n is the maximum number of articles to work on. --usercontribs Work on all articles that were edited by a certain user : - Example : -usercontribs:DumZiBoT +-usercontribs Work on articles that were edited by a certain user. + Example: -usercontribs:DumZiBoT + Normally up to 250 distinct pages are given. To get an other + number of pages, add the number behind the username + delimited with ";" + Example: -usercontribs:DumZiBoT;500 + returns 500 distinct pages to work on. -weblink Work on all articles that contain an external link to a given URL; may be given as "-weblink:url" @@ -148,6 +153,9 @@ -yahoo Work on all pages that are found in a Yahoo search. Depends on python module pYsearch. See yahoo_appid in config.py for instructions. + +-page Work on a single page. Argument can also be given as + "-page:pagetitle". """ docuReplacements = {'&params;': parameterHelp} @@ -485,12 +493,8 @@ Yields number unique pages edited by user:username namespaces : list of namespace numbers to fetch contribs from """ - if site is None: site = pywikibot.getSite() - if number > 500: - # the api does not allow more than 500 results for anonymous users - number = 500 user = userlib.User(site, username) for page in user.contributions(number, namespaces): yield page[0] @@ -994,7 +998,13 @@ else: gen = UnwatchedPagesPageGenerator(number = int(arg[11:])) elif arg.startswith('-usercontribs'): - gen = UserContributionsGenerator(arg[14:]) + args = arg[14:].split(';') + number = None + try: + number = int(args[1]) + except: + number = 250 + gen = UserContributionsGenerator(args[0], number) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator()

14 years, 2 months

SVN: [7937] trunk/pywikipedia/interwiki.py

by xqt＠svn.wikimedia.org

Revision: 7937 Author: xqt Date: 2010-02-16 07:20:42 +0000 (Tue, 16 Feb 2010) Log Message: ----------- doc for new behavior Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2010-02-15 15:07:12 UTC (rev 7936) +++ trunk/pywikipedia/interwiki.py 2010-02-16 07:20:42 UTC (rev 7937) @@ -186,10 +186,12 @@ These arguments specify in which way the bot should follow interwiki links: - -noredirect do not follow redirects. (note: without ending colon) + -noredirect do not follow redirects nor category redirects. + (note: without ending colon) - -initialredirect work on its target if a redirect is entered on the - command line. (note: without ending colon) + -initialredirect work on its target if a redirect or category redirect is + entered on the command line or by a generator. + (note: without ending colon) -neverlink: used as -neverlink:xx where xx is a language code: Disregard any links found to language xx. You can also

14 years, 3 months

SVN: [7936] trunk/pywikipedia/interwiki.py

by xqt＠svn.wikimedia.org

Revision: 7936 Author: xqt Date: 2010-02-15 15:07:12 +0000 (Mon, 15 Feb 2010) Log Message: ----------- iw: recognize categoryRedirect templates (bugfix for 2949822) Modified Paths: -------------- trunk/pywikipedia/interwiki.py Modified: trunk/pywikipedia/interwiki.py =================================================================== --- trunk/pywikipedia/interwiki.py 2010-02-15 14:49:55 UTC (rev 7935) +++ trunk/pywikipedia/interwiki.py 2010-02-15 15:07:12 UTC (rev 7936) @@ -287,7 +287,7 @@ # (C) Rob W.W. Hooft, 2003 # (C) Daniel Herding, 2004 # (C) Yuri Astrakhan, 2005-2006 -# (C) Pywikipedia bot team, 2007-2009 +# (C) Pywikipedia bot team, 2007-2010 # # Distributed under the terms of the MIT license. # @@ -937,7 +937,8 @@ """ for tree in [self.done, self.pending]: for page in tree.filter(site): - if page.exists() and not page.isDisambig() and not page.isRedirectPage(): + if page.exists() and not page.isDisambig() \ + and not page.isRedirectPage() and not page.isCategoryRedirect(): return page return None @@ -951,7 +952,7 @@ for tree in [self.done, self.pending, self.todo]: for page in tree.filter(site): if page.namespace() == self.originPage.namespace(): - if page.exists() and not page.isRedirectPage(): + if page.exists() and not page.isRedirectPage() and not page.isCategoryRedirect(): return page return None @@ -1182,7 +1183,8 @@ if not self.workonme: # Do not ask hints for pages that we don't work on anyway return - if (self.untranslated or globalvar.askhints) and not self.hintsAsked and not self.originPage.isRedirectPage(): + if (self.untranslated or globalvar.askhints) and not self.hintsAsked \ + and not self.originPage.isRedirectPage() and not self.originPage.isCategoryRedirect(): # Only once! self.hintsAsked = True if globalvar.untranslated: @@ -1253,23 +1255,33 @@ self.done = PageTree() continue - elif page.isRedirectPage(): + elif page.isRedirectPage() or page.isCategoryRedirect(): + if page.isRedirectPage(): + redir = u'' + else: + redir = u'category ' try: - redirectTargetPage = page.getRedirectTarget() + if page.isRedirectPage(): + redirectTargetPage = page.getRedirectTarget() + else: + redirectTargetPage = page.getCategoryRedirectTarget() except pywikibot.InvalidTitle: # MW considers #redirect [[en:#foo]] as a redirect page, # but we can't do anything useful with such pages if not globalvar.quiet or pywikibot.verbose: - pywikibot.output(u"NOTE: %s redirects to an invalid title" % page.aslink(True)) + pywikibot.output(u"NOTE: %s redirects to an invalid title" + % page.aslink(True)) continue if not globalvar.quiet or pywikibot.verbose: - pywikibot.output(u"NOTE: %s is redirect to %s" % (page.aslink(True), redirectTargetPage.aslink(True))) + pywikibot.output(u"NOTE: %s is %sredirect to %s" + % (page.aslink(True), redir, redirectTargetPage.aslink(True))) if page == self.originPage: if globalvar.initialredirect: if globalvar.contentsondisk: redirectTargetPage = StoredPage(redirectTargetPage) #don't follow double redirects; it might be a self loop - if not redirectTargetPage.isRedirectPage(): + if not redirectTargetPage.isRedirectPage() \ + and not redirectTargetPage.isCategoryRedirect(): self.originPage = redirectTargetPage self.todo.add(redirectTargetPage) counter.plus(redirectTargetPage.site()) @@ -1282,12 +1294,13 @@ self.todo = PageTree() elif not globalvar.followredirect: if not globalvar.quiet or pywikibot.verbose: - pywikibot.output(u"NOTE: not following redirects.") + pywikibot.output(u"NOTE: not following %sredirects." % redir) elif page.site().family == redirectTargetPage.site().family \ and not self.skipPage(page, redirectTargetPage, counter): if self.addIfNew(redirectTargetPage, counter, page): if config.interwiki_shownew or pywikibot.verbose: - pywikibot.output(u"%s: %s gives new redirect %s" % (self.originPage.aslink(), page.aslink(True), redirectTargetPage.aslink(True))) + pywikibot.output(u"%s: %s gives new %sredirect %s" + % (self.originPage.aslink(), page.aslink(True), redir, redirectTargetPage.aslink(True))) continue # must be behind the page.isRedirectPage() part @@ -1326,7 +1339,7 @@ duplicate = None for p in self.done.filter(page.site()): - if p != page and p.exists() and not p.isRedirectPage(): + if p != page and p.exists() and not p.isRedirectPage() and not p.isCategoryRedirect(): duplicate = p break @@ -1428,7 +1441,7 @@ # Each value will be a list of pages. new = {} for page in self.done: - if page.exists() and not page.isRedirectPage(): + if page.exists() and not page.isRedirectPage() and not page.isCategoryRedirect(): site = page.site() if site == self.originPage.site(): if page != self.originPage: @@ -1535,6 +1548,8 @@ return if self.originPage.isRedirectPage(): return + if self.originPage.isCategoryRedirect(): + return if not self.untranslated and globalvar.untranslatedonly: return # The following check is not always correct and thus disabled.

14 years, 3 months

SVN: [7935] trunk/pywikipedia/wikipedia.py

by xqt＠svn.wikimedia.org

Revision: 7935 Author: xqt Date: 2010-02-15 14:49:55 +0000 (Mon, 15 Feb 2010) Log Message: ----------- isCategoryRedirect() returns False if page does not exists (instead beaking the job) Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2010-02-15 13:49:12 UTC (rev 7934) +++ trunk/pywikipedia/wikipedia.py 2010-02-15 14:49:55 UTC (rev 7935) @@ -1205,7 +1205,10 @@ return False if not hasattr(self, "_catredirect"): if not text: - text = self.get(get_redirect=True) + try: + text = self.get(get_redirect=True) + except NoPage: + return False catredirs = self.site().category_redirects() for (t, args) in self.templatesWithParams(thistxt=text): template = Page(self.site(), t, defaultNamespace=10

14 years, 3 months

SVN: [7934] trunk/pywikipedia/cosmetic_changes.py

by xqt＠svn.wikimedia.org

Revision: 7934 Author: xqt Date: 2010-02-15 13:49:12 +0000 (Mon, 15 Feb 2010) Log Message: ----------- cc: do not any changes inside file links Modified Paths: -------------- trunk/pywikipedia/cosmetic_changes.py Modified: trunk/pywikipedia/cosmetic_changes.py =================================================================== --- trunk/pywikipedia/cosmetic_changes.py 2010-02-15 13:46:26 UTC (rev 7933) +++ trunk/pywikipedia/cosmetic_changes.py 2010-02-15 13:49:12 UTC (rev 7934) @@ -700,6 +700,10 @@ 'source', 'startspace', ] + # do not change inside file links + namespaces = list(self.site.namespace(6, all = True)) + pattern = re.compile(u'\[\[(' + '|'.join(namespaces) + '):.+?\..+?\]\]', re.UNICODE) + exceptions.append(pattern) text = pywikibot.replaceExcept(text, u',', u'،', exceptions) text = pywikibot.replaceExcept(text, ur'ه([.،_<\]\s])', ur'ە\1', exceptions) text = pywikibot.replaceExcept(text, u'ه‌', u'ە', exceptions) @@ -715,9 +719,6 @@ # do not change digits inside html-tags pattern = re.compile(u'<[/]*?[^</]+?[/]*?>', re.UNICODE) exceptions.append(pattern) - # do not change digits inside file links - pattern = re.compile(u'\[\[.+?:.+?\..+?\|.+?\]\]', re.UNICODE) - exceptions.append(pattern) for i in range(0,10): text = pywikibot.replaceExcept(text, str(i), u'٠١٢٣٤٥٦٧٨٩'[i], exceptions) return text

14 years, 3 months

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn February 2010