http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9561
Revision: 9561
Author: valhallasw
Date: 2011-09-27 20:19:40 +0000 (Tue, 27 Sep 2011)
Log Message:
-----------
Updated generate_family_file.py to allow mw1.18+ protocol-relative urls
Created simple test system to check if creating family files for a few wikis does not
throw exceptions. Doesn't test if the files actually work...
Modified Paths:
--------------
trunk/pywikipedia/generate_family_file.py
Added Paths:
-----------
trunk/pywikipedia/tests/test_generate_family_file.py
Modified: trunk/pywikipedia/generate_family_file.py
===================================================================
--- trunk/pywikipedia/generate_family_file.py 2011-09-27 19:34:33 UTC (rev 9560)
+++ trunk/pywikipedia/generate_family_file.py 2011-09-27 20:19:40 UTC (rev 9561)
@@ -13,20 +13,24 @@
# Distributed under the terms of the MIT license
#
-from urllib2 import HTTPError
-import urllib2
-
-from BeautifulSoup import BeautifulSoup
+# system imports
+import sys, re
+import codecs
from distutils.version import LooseVersion as V
+# creating urls
+from urlparse import urlparse, urljoin, ParseResult
+
+# retrieving urls
+import urllib2
+from urllib2 import HTTPError
def urlopen(url):
req = urllib2.Request(url, headers = {'User-agent': 'Pywikipedia family
generator 0.1 - pywikipediabot.sf.net'})
return urllib2.urlopen(req)
-from urlparse import urlparse, ParseResult
-import codecs
-import sys
-import re
+# parsing response data
+from BeautifulSoup import BeautifulSoup
+
try:
import json
except ImportError:
@@ -253,7 +257,7 @@
if V(self.version) < V("1.17.0"):
self._parse_pre_117(data)
else:
- self._parse_post_117(bs)
+ self._parse_post_117(bs, fromurl)
def _parse_pre_117(self, data):
if not self.REwgEnableApi.search(data):
@@ -276,11 +280,13 @@
except Exception:
pass
- def _parse_post_117(self, bs):
+ def _parse_post_117(self, bs, fromurl):
apipath = bs.find("link",
rel='EditURI')['href'].split("?")[0]
- info = json.load(urlopen(apipath +
"?action=query&meta=siteinfo&format=json"))['query']['general']
-
- for item in ['server', 'scriptpath', 'articlepath',
'lang']:
+ fullurl = urljoin(fromurl, apipath)
+ info = json.load(urlopen(fullurl +
"?action=query&meta=siteinfo&format=json"))['query']['general']
+
+ self.server = urljoin(fromurl, info['server'])
+ for item in ['scriptpath', 'articlepath', 'lang']:
setattr(self, item, info[item])
def __cmp__(self, other):
Added: trunk/pywikipedia/tests/test_generate_family_file.py
===================================================================
--- trunk/pywikipedia/tests/test_generate_family_file.py (rev 0)
+++ trunk/pywikipedia/tests/test_generate_family_file.py 2011-09-27 20:19:40 UTC (rev
9561)
@@ -0,0 +1,17 @@
+import sys, os
+sys.path.append(os.getcwd())
+
+from generate_family_file import FamilyFileGenerator
+try:
+ os.remove('families/test_family.py')
+except Exception:
+ pass
+FamilyFileGenerator('http://nl.wikipedia.org/wiki/Hoofdpagina',
'test').run()
+os.remove('families/test_family.py')
+#FamilyFileGenerator('https://nl.wikipedia.org/wiki/Hoofdpagina',
'test').run()
+#os.remove('families/test_family.py')
+FamilyFileGenerator('http://techessentials.org/wiki/Main_Page',
'test').run()
+os.remove('families/test_family.py')
+FamilyFileGenerator('http://botwiki.sno.cc/wiki/Main_Page',
'test').run()
+os.remove('families/test_family.py')
+