http://www.mediawiki.org/wiki/Special:Code/pywikipedia/10457
Revision: 10457
Author: xqt
Date: 2012-07-15 08:27:45 +0000 (Sun, 15 Jul 2012)
Log Message:
-----------
exclusion of pages rejecting bots,
patch submitted by masti, bug #3544207. thanks
Modified Paths:
--------------
trunk/pywikipedia/weblinkchecker.py
Modified: trunk/pywikipedia/weblinkchecker.py
===================================================================
--- trunk/pywikipedia/weblinkchecker.py 2012-07-14 12:20:03 UTC (rev 10456)
+++ trunk/pywikipedia/weblinkchecker.py 2012-07-15 08:27:45 UTC (rev 10457)
@@ -215,6 +215,8 @@
re.compile('.*[\./(a)]itis\.gov(/.*)?'))?'), # bot rejected on the site
re.compile('.*[\./(a)]cev\.lu(/.*)?'))?'), # bot rejected on the site
re.compile('.*[\./(a)]science\.ksc\.nasa\.gov(/.*)?'))?'), # very slow response
resulting in bot error
+ re.compile('.*[\./(a)]britannica\.com(/.*)?'))?'), #HTTP redirect loop
+ re.compile('.*[\./(a)]quickfacts\.census\.gov(/.*)?'))?'), # bot rejected on the
site
]
def weblinksIn(text, withoutBracketed = False, onlyBracketed = False):