http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9561
Revision: 9561 Author: valhallasw Date: 2011-09-27 20:19:40 +0000 (Tue, 27 Sep 2011) Log Message: ----------- Updated generate_family_file.py to allow mw1.18+ protocol-relative urls Created simple test system to check if creating family files for a few wikis does not throw exceptions. Doesn't test if the files actually work...
Modified Paths: -------------- trunk/pywikipedia/generate_family_file.py
Added Paths: ----------- trunk/pywikipedia/tests/test_generate_family_file.py
Modified: trunk/pywikipedia/generate_family_file.py =================================================================== --- trunk/pywikipedia/generate_family_file.py 2011-09-27 19:34:33 UTC (rev 9560) +++ trunk/pywikipedia/generate_family_file.py 2011-09-27 20:19:40 UTC (rev 9561) @@ -13,20 +13,24 @@ # Distributed under the terms of the MIT license #
-from urllib2 import HTTPError -import urllib2 - -from BeautifulSoup import BeautifulSoup +# system imports +import sys, re +import codecs from distutils.version import LooseVersion as V
+# creating urls +from urlparse import urlparse, urljoin, ParseResult + +# retrieving urls +import urllib2 +from urllib2 import HTTPError def urlopen(url): req = urllib2.Request(url, headers = {'User-agent': 'Pywikipedia family generator 0.1 - pywikipediabot.sf.net'}) return urllib2.urlopen(req)
-from urlparse import urlparse, ParseResult -import codecs -import sys -import re +# parsing response data +from BeautifulSoup import BeautifulSoup + try: import json except ImportError: @@ -253,7 +257,7 @@ if V(self.version) < V("1.17.0"): self._parse_pre_117(data) else: - self._parse_post_117(bs) + self._parse_post_117(bs, fromurl)
def _parse_pre_117(self, data): if not self.REwgEnableApi.search(data): @@ -276,11 +280,13 @@ except Exception: pass
- def _parse_post_117(self, bs): + def _parse_post_117(self, bs, fromurl): apipath = bs.find("link", rel='EditURI')['href'].split("?")[0] - info = json.load(urlopen(apipath + "?action=query&meta=siteinfo&format=json"))['query']['general'] - - for item in ['server', 'scriptpath', 'articlepath', 'lang']: + fullurl = urljoin(fromurl, apipath) + info = json.load(urlopen(fullurl + "?action=query&meta=siteinfo&format=json"))['query']['general'] + + self.server = urljoin(fromurl, info['server']) + for item in ['scriptpath', 'articlepath', 'lang']: setattr(self, item, info[item])
def __cmp__(self, other):
Added: trunk/pywikipedia/tests/test_generate_family_file.py =================================================================== --- trunk/pywikipedia/tests/test_generate_family_file.py (rev 0) +++ trunk/pywikipedia/tests/test_generate_family_file.py 2011-09-27 20:19:40 UTC (rev 9561) @@ -0,0 +1,17 @@ +import sys, os +sys.path.append(os.getcwd()) + +from generate_family_file import FamilyFileGenerator +try: + os.remove('families/test_family.py') +except Exception: + pass +FamilyFileGenerator('http://nl.wikipedia.org/wiki/Hoofdpagina', 'test').run() +os.remove('families/test_family.py') +#FamilyFileGenerator('https://nl.wikipedia.org/wiki/Hoofdpagina', 'test').run() +#os.remove('families/test_family.py') +FamilyFileGenerator('http://techessentials.org/wiki/Main_Page', 'test').run() +os.remove('families/test_family.py') +FamilyFileGenerator('http://botwiki.sno.cc/wiki/Main_Page', 'test').run() +os.remove('families/test_family.py') +
pywikipedia-svn@lists.wikimedia.org