http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11021
Revision: 11021
Author: drtrigon
Date: 2013-01-31 18:21:29 +0000 (Thu, 31 Jan 2013)
Log Message:
-----------
docu and svn:keywords added
Modified Paths:
--------------
trunk/pywikipedia/overcat_simple_filter.py
Property Changed:
----------------
trunk/pywikipedia/overcat_simple_filter.py
Modified: trunk/pywikipedia/overcat_simple_filter.py
===================================================================
--- trunk/pywikipedia/overcat_simple_filter.py 2013-01-31 17:36:03 UTC (rev 11020)
+++ trunk/pywikipedia/overcat_simple_filter.py 2013-01-31 18:21:29 UTC (rev 11021)
@@ -6,6 +6,14 @@
That might be a very good strategy when the parent category is very full, but later on it will become very inefficient.
'''
+#
+# (C) Pywikipedia bot team, 2013
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+#
+
import sys, pywikibot, catlib, pagegenerators
def filterCategory(page):
Property changes on: trunk/pywikipedia/overcat_simple_filter.py
___________________________________________________________________
Added: svn:keywords
+ Id
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11020
Revision: 11020
Author: drtrigon
Date: 2013-01-31 17:36:03 +0000 (Thu, 31 Jan 2013)
Log Message:
-----------
catch AssertionError (if charset missmatches) but print warning
suppress output (if back_response=True) but still print error in verbose mode
warning if/before attempt to download big content (>10MB)
PEP 8 and docu
Modified Paths:
--------------
trunk/pywikipedia/pywikibot/comms/http.py
Modified: trunk/pywikipedia/pywikibot/comms/http.py
===================================================================
--- trunk/pywikipedia/pywikibot/comms/http.py 2013-01-30 23:56:18 UTC (rev 11019)
+++ trunk/pywikipedia/pywikibot/comms/http.py 2013-01-31 17:36:03 UTC (rev 11020)
@@ -58,20 +58,25 @@
return self._buffer[name]
-def request(site, uri, retry = None, sysop = False, data = None, compress = True,
- no_hostname = False, cookie_only=False, refer=None, back_response=False):
+def request(site, uri, retry=None, sysop=False, data=None, compress=True,
+ no_hostname=False, cookie_only=False, refer=None,
+ back_response=False):
"""
Low-level routine to get a URL from any source (may be the wiki).
Parameters:
- @param site - The Site to connect to.
- @param uri - The absolute uri, without the hostname.
- @param retry - If True, retries loading the page when a network error
- occurs.
- @param sysop - If True, the sysop account's cookie will be used.
- @param data - An optional dict providing extra post request
- parameters.
- @param cookie_only - Only return the cookie the server sent us back
+ @param site - The Site to connect to.
+ @param uri - The absolute uri, without the hostname.
+ @param retry - If True, retries loading the page when a network
+ error occurs.
+ @param sysop - If True, the sysop account's cookie will be used.
+ @param data - An optional dict providing extra post request
+ parameters.
+ @param compress - Accept compressed page content transfer also.
+ @param no_hostname - Do query to foreign host (any kind of web-server).
+ @param cookie_only - Only return the cookie the server sent us back
+ @param refer - ...
+ @param back_response - Return the addinfourl object from request too.
@return: Returns the HTML text of the page converted to unicode.
"""
@@ -114,20 +119,24 @@
f = buffered_addinfourl(MyURLopener.open(req))
# read & info can raise socket.error
+ headers = f.info()
+ if (int(headers.get('content-length', '-1')) > 1E7):
+ pywikibot.output(u'WARNING: Target is of huge size (>10MB) is '
+ u'that correct? Downloading will take some '
+ u'time, please be patient.')
text = f.read()
- headers = f.info()
break
except KeyboardInterrupt:
raise
except urllib2.HTTPError, e:
if e.code in [401, 404]:
raise PageNotFound(
-u'Page %s could not be retrieved. Check your family file.'
- % url)
+ u'Page %s could not be retrieved. Check your family file.'
+ % url)
elif e.code in [403]:
raise PageNotFound(
-u'Page %s could not be retrieved. Check your virus wall.'
- % url)
+ u'Page %s could not be retrieved. Check your virus wall.'
+ % url)
elif e.code == 504:
pywikibot.output(u'HTTPError: %s %s' % (e.code, e.msg))
if retry:
@@ -135,8 +144,9 @@
if retry_attempt > config.maxretries:
raise MaxTriesExceededError()
pywikibot.output(
-u"WARNING: Could not open '%s'.Maybe the server or\n your connection is down. Retrying in %i minutes..."
- % (url, retry_idle_time))
+ u"WARNING: Could not open '%s'.Maybe the server or\n "
+ u"your connection is down. Retrying in %i minutes..."
+ % (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
# Next time wait longer,
# but not longer than half an hour
@@ -155,8 +165,9 @@
if retry_attempt > config.maxretries:
raise MaxTriesExceededError()
pywikibot.output(
-u"WARNING: Could not open '%s'. Maybe the server or\n your connection is down. Retrying in %i minutes..."
- % (url, retry_idle_time))
+ u"WARNING: Could not open '%s'. Maybe the server or\n your "
+ u"connection is down. Retrying in %i minutes..."
+ % (url, retry_idle_time))
time.sleep(retry_idle_time * 60)
retry_idle_time *= 2
if retry_idle_time > 30:
@@ -206,17 +217,29 @@
# UTF-8 as default
charset = 'utf-8'
# Check if this is the charset we expected
- site.checkCharset(charset)
+ try:
+ site.checkCharset(charset)
+ except AssertionError, e:
+ if (not back_response) or verbose:
+ pywikibot.output(u'%s' %e)
+ if no_hostname:
+ pywikibot.output(u'ERROR: Invalid charset found on %s.' % uri)
+ else:
+ pywikibot.output(u'ERROR: Invalid charset found on %s://%s%s.'
+ % (site.protocol(), site.hostname(), uri))
# Convert HTML to Unicode
try:
text = unicode(text, charset, errors = 'strict')
except UnicodeDecodeError, e:
- if verbose:
+ if (not back_response) or verbose:
pywikibot.output(u'%s' %e)
- if no_hostname:
- pywikibot.output(u'ERROR: Invalid characters found on %s, replaced by \\ufffd.' % uri)
- else:
- pywikibot.output(u'ERROR: Invalid characters found on %s://%s%s, replaced by \\ufffd.' % (site.protocol(), site.hostname(), uri))
+ if no_hostname:
+ pywikibot.output(u'ERROR: Invalid characters found on %s, '
+ u'replaced by \\ufffd.' % uri)
+ else:
+ pywikibot.output(u'ERROR: Invalid characters found on %s://%s%s, '
+ u'replaced by \\ufffd.'
+ % (site.protocol(), site.hostname(), uri))
# We use error='replace' in case of bad encoding.
text = unicode(text, charset, errors = 'replace')
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11019
Revision: 11019
Author: xqt
Date: 2013-01-30 23:56:18 +0000 (Wed, 30 Jan 2013)
Log Message:
-----------
cross update trunk/rewrite
Modified Paths:
--------------
branches/rewrite/scripts/interwiki.py
trunk/pywikipedia/families/wikipedia_family.py
trunk/pywikipedia/interwiki.py
Modified: branches/rewrite/scripts/interwiki.py
===================================================================
--- branches/rewrite/scripts/interwiki.py 2013-01-30 23:43:37 UTC (rev 11018)
+++ branches/rewrite/scripts/interwiki.py 2013-01-30 23:56:18 UTC (rev 11019)
@@ -1651,14 +1651,21 @@
break
else:
for (site, page) in new.iteritems():
+ # edit restriction for some templates on zh-wiki where interlanguage keys are included
+ # by /doc subpage
+ smallWikiAllowed = not (page.site.sitename() == 'wikipedia:zh' and
+ page.namespace() == 10 and
+ u'Country data' in page.title(withNamespace=False))
# edit restriction on is-wiki
# http://is.wikipedia.org/wiki/Wikipediaspjall:V%C3%A9lmenni
+ # and zh-wiki for template namespace which prevents increasing the queue
# allow edits for the same conditions as -whenneeded
# or the last edit wasn't a bot
# or the last edit was 1 month ago
- smallWikiAllowed = True
- if globalvar.autonomous and (page.site.sitename() == 'wikipedia:is' or
- page.site.sitename() == 'wikipedia:zh'):
+ if smallWikiAllowed and globalvar.autonomous and \
+ (page.site.sitename() == 'wikipedia:is' or
+ page.site.sitename() == 'wikipedia:zh' and
+ page.namespace() == 10):
old={}
try:
for mypage in new[page.site].interwiki():
@@ -1693,9 +1700,10 @@
% page.site.sitename())
# if we have an account for this site
- if site.family.name in config.usernames \
- and site.lang in config.usernames[site.family.name] \
- and smallWikiAllowed:
+ if site.family.name in config.usernames and \
+ site.lang in config.usernames[site.family.name] and \
+ smallWikiAllowed and \
+ not site.has_transcluded_data:
# Try to do the changes
try:
if self.replaceLinks(page, new):
Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py 2013-01-30 23:43:37 UTC (rev 11018)
+++ trunk/pywikipedia/families/wikipedia_family.py 2013-01-30 23:56:18 UTC (rev 11019)
@@ -1533,7 +1533,7 @@
return self.code2encoding(code),
def shared_data_repository(self, code, transcluded=False):
- if not transcluded or code in ['he','hu','it']:
+ if not transcluded or code in ['he', 'hu', 'it']:
return ('wikidata', 'wikidata')
else:
return (None, None)
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2013-01-30 23:43:37 UTC (rev 11018)
+++ trunk/pywikipedia/interwiki.py 2013-01-30 23:56:18 UTC (rev 11019)
@@ -1322,7 +1322,7 @@
iw = page.interwiki()
except pywikibot.NoSuchSite:
if not globalvar.quiet or pywikibot.verbose:
- pywikibot.output(u"NOTE: site %s does not exist" % page.site())
+ pywikibot.output(u"NOTE: site %s does not exist" % page.site)
continue
(skip, alternativePage) = self.disambigMismatch(page, counter)
@@ -1542,7 +1542,7 @@
break
return result
- def finish(self, bot = None):
+ def finish(self, bot=None):
"""Round up the subject, making any necessary changes. This method
should be called exactly once after the todo list has gone empty.
@@ -2181,9 +2181,9 @@
return None
oc = dict(self.firstSubject().openSites())
if not oc:
- # The first subject is done. This might be a recursive call made because we
- # have to wait before submitting another modification to go live. Select
- # any language from counts.
+ # The first subject is done. This might be a recursive call made
+ # because we have to wait before submitting another modification to
+ # go live. Select any language from counts.
oc = self.counts
if pywikibot.getSite() in oc:
return pywikibot.getSite()
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11016
Revision: 11016
Author: amir
Date: 2013-01-30 19:12:35 +0000 (Wed, 30 Jan 2013)
Log Message:
-----------
he and it become clients of wikidata
Modified Paths:
--------------
trunk/pywikipedia/families/wikipedia_family.py
Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py 2013-01-30 18:45:16 UTC (rev 11015)
+++ trunk/pywikipedia/families/wikipedia_family.py 2013-01-30 19:12:35 UTC (rev 11016)
@@ -1533,7 +1533,7 @@
return self.code2encoding(code),
def shared_data_repository(self, code, transcluded=False):
- if not transcluded or code in ['hu']:
+ if not transcluded or code in ['he','hu','it']:
return ('wikidata', 'wikidata')
else:
return (None, None)
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11014
Revision: 11014
Author: xqt
Date: 2013-01-29 08:38:24 +0000 (Tue, 29 Jan 2013)
Log Message:
-----------
initialize 'namespacesWithSubpage' for the generic family file,
bugfix for bug #3602373
Modified Paths:
--------------
branches/rewrite/pywikibot/family.py
Modified: branches/rewrite/pywikibot/family.py
===================================================================
--- branches/rewrite/pywikibot/family.py 2013-01-28 20:55:48 UTC (rev 11013)
+++ branches/rewrite/pywikibot/family.py 2013-01-29 08:38:24 UTC (rev 11014)
@@ -106,6 +106,8 @@
self.langs = {}
+ self.namespacesWithSubpage = [2] + range(1, 16, 2)
+
# letters that can follow a wikilink and are regarded as part of
# this link
# This depends on the linktrail setting in LanguageXx.php and on