Pywikipedia-svn September 2011

pywikipedia-svn@lists.wikimedia.org

6 participants
79 discussions

SVN: [9532] trunk/pywikipedia/redirect.py
by xqt＠svn.wikimedia.org 18 Sep '11

18 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9532 Revision: 9532 Author: xqt Date: 2011-09-18 17:10:08 +0000 (Sun, 18 Sep 2011) Log Message: ----------- do not localize #REDIRECT tag if the target page is the same Modified Paths: -------------- trunk/pywikipedia/redirect.py Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2011-09-18 16:53:51 UTC (rev 9531) +++ trunk/pywikipedia/redirect.py 2011-09-18 17:10:08 UTC (rev 9532) @@ -659,7 +659,7 @@ '#%s %s' % (self.site.redirect(), targetPage.title(asLink=True, textlink=True)), oldText) - if text == oldText: + if redir.title() == targetPage.title() or text == oldText: pywikibot.output(u"Note: Nothing left to do on %s" % redir.title(asLink=True)) break

1 0

SVN: [9531] trunk/pywikipedia/redirect.py
by xqt＠svn.wikimedia.org 18 Sep '11

18 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9531 Revision: 9531 Author: xqt Date: 2011-09-18 16:53:51 +0000 (Sun, 18 Sep 2011) Log Message: ----------- do not use default redirect tag, fix for bug #3404473 Modified Paths: -------------- trunk/pywikipedia/redirect.py Modified: trunk/pywikipedia/redirect.py =================================================================== --- trunk/pywikipedia/redirect.py 2011-09-18 16:32:57 UTC (rev 9530) +++ trunk/pywikipedia/redirect.py 2011-09-18 16:53:51 UTC (rev 9531) @@ -656,7 +656,7 @@ pywikibot.output(u"Bad Title Error") break text = self.site.redirectRegex().sub( - '#%s %s' % (self.site.redirect(True), + '#%s %s' % (self.site.redirect(), targetPage.title(asLink=True, textlink=True)), oldText) if text == oldText:

1 0

SVN: [9530] trunk/pywikipedia/generate_family_file.py
by valhallasw＠svn.wikimedia.org 18 Sep '11

18 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9530 Revision: 9530 Author: valhallasw Date: 2011-09-18 16:32:57 +0000 (Sun, 18 Sep 2011) Log Message: ----------- Added support for older (tested with 1.11) MW wikis. Follow-up for r9528 Further fix for https://sourceforge.net/tracker/?func=detail&atid=603138&aid=3400045&group_… Modified Paths: -------------- trunk/pywikipedia/generate_family_file.py Modified: trunk/pywikipedia/generate_family_file.py =================================================================== --- trunk/pywikipedia/generate_family_file.py 2011-09-18 15:26:53 UTC (rev 9529) +++ trunk/pywikipedia/generate_family_file.py 2011-09-18 16:32:57 UTC (rev 9530) @@ -176,12 +176,26 @@ self.f = family.Family() def addfromwiki(self, w): - data = json.load(urlopen(w.api + "?action=query&format=json&meta=siteinfo&siprop=namespaces|namespacealiases"))['query'] - for ns in data['namespaces'].itervalues(): - self.add(ns['id'], w.lang, ns['*']) - for ns in data['namespacealiases']: - self.add(ns['id'], w.lang, ns['*']) + jdata = json.load(urlopen(w.api + "?action=query&format=json&meta=siteinfo&siprop=namespaces|namespacealiases")) + if 'query' not in jdata: + jdata = json.load(urlopen(w.api + "?action=query&format=json&meta=siteinfo&siprop=namespaces")) + if 'query' not in jdata: + jdata = {'query': {}} # empty + data = jdata['query'] + + if 'namespaces' in data: + for ns in data['namespaces'].itervalues(): + self.add(ns['id'], w.lang, ns['*']) + else: + print "\n*** Notice: cannot retrieve namespaces for %s" % w.lang + + if 'namespacealiases' in data: + for ns in data['namespacealiases']: + self.add(ns['id'], w.lang, ns['*']) + else: + print "\n*** Notice: cannot retrieve namespace aliases for %s" % w.lang + def add(self, ns, lang, translation): """ Contains logic for determining whether to define a namespace or not """ ns = int(ns) @@ -248,11 +262,20 @@ self.version = self.REwgVersion.search(data).groups()[0] except AttributeError: self.version = None + self.server = self.REwgServer.search(data).groups()[0] self.scriptpath = self.REwgScriptPath.search(data).groups()[0] self.articlepath = self.REwgArticlePath.search(data).groups()[0] self.lang = self.REwgContentLanguage.search(data).groups()[0] + if self.version == None: + # try to get version using api + try: + d = json.load(urlopen(self.api + "?version&format=json")) + self.version = filter(lambda x: x.startswith("MediaWiki"), [l.strip() for l in d['error']['*'].split("\n")])[0].split()[1] + except Exception: + pass + def _parse_post_117(self, bs): apipath = bs.find("link", rel='EditURI')['href'].split("?")[0] info = json.load(urlopen(apipath + "?action=query&meta=siteinfo&format=json"))['query']['general']

1 0

SVN: [9529] trunk/pywikipedia/generate_family_file.py
by valhallasw＠svn.wikimedia.org 18 Sep '11

18 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9529 Revision: 9529 Author: valhallasw Date: 2011-09-18 15:26:53 +0000 (Sun, 18 Sep 2011) Log Message: ----------- Two bugfixes for r9528: - added support for version numbers differing from 'x.y.z', eg '1.17wmf1' - fixed error message when there is no sign of the API Modified Paths: -------------- trunk/pywikipedia/generate_family_file.py Modified: trunk/pywikipedia/generate_family_file.py =================================================================== --- trunk/pywikipedia/generate_family_file.py 2011-09-18 15:19:02 UTC (rev 9528) +++ trunk/pywikipedia/generate_family_file.py 2011-09-18 15:26:53 UTC (rev 9529) @@ -17,7 +17,7 @@ import urllib2 from BeautifulSoup import BeautifulSoup -from distutils.version import StrictVersion as V +from distutils.version import LooseVersion as V def urlopen(url): req = urllib2.Request(url, headers = {'User-agent': 'Pywikipedia family generator 0.1 - pywikipediabot.sf.net'}) @@ -219,6 +219,7 @@ REwgVersion = re.compile(ur'wgVersion ?= ?"([^"]*)"') def __init__(self, fromurl): + self.fromurl = fromurl if fromurl.endswith("$1"): fromurl = fromurl[:-2] try: @@ -242,7 +243,7 @@ def _parse_pre_117(self, data): if not self.REwgEnableApi.search(data): - print "*** WARNING: Api does not seem to be enabled on %s" % fromurl + print "*** WARNING: Api does not seem to be enabled on %s" % self.fromurl try: self.version = self.REwgVersion.search(data).groups()[0] except AttributeError:

1 0

SVN: [9528] trunk/pywikipedia/generate_family_file.py
by valhallasw＠svn.wikimedia.org 18 Sep '11

18 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9528 Revision: 9528 Author: valhallasw Date: 2011-09-18 15:19:02 +0000 (Sun, 18 Sep 2011) Log Message: ----------- Added support for MW >= 1.17.0 wiki's Fixes https://sourceforge.net/tracker/?func=detail&atid=603138&aid=3400045&group_… Modified Paths: -------------- trunk/pywikipedia/generate_family_file.py Modified: trunk/pywikipedia/generate_family_file.py =================================================================== --- trunk/pywikipedia/generate_family_file.py 2011-09-18 12:45:29 UTC (rev 9527) +++ trunk/pywikipedia/generate_family_file.py 2011-09-18 15:19:02 UTC (rev 9528) @@ -16,6 +16,9 @@ from urllib2 import HTTPError import urllib2 +from BeautifulSoup import BeautifulSoup +from distutils.version import StrictVersion as V + def urlopen(url): req = urllib2.Request(url, headers = {'User-agent': 'Pywikipedia family generator 0.1 - pywikipediabot.sf.net'}) return urllib2.urlopen(req) @@ -225,7 +228,19 @@ raise data = e.read() pass + + bs = BeautifulSoup(data) + try: + self.version = bs.find("meta", attrs={'name': "generator"})['content'].replace("MediaWiki ", "") + except Exception: + self.version = "0.0" + if V(self.version) < V("1.17.0"): + self._parse_pre_117(data) + else: + self._parse_post_117(bs) + + def _parse_pre_117(self, data): if not self.REwgEnableApi.search(data): print "*** WARNING: Api does not seem to be enabled on %s" % fromurl try: @@ -237,6 +252,13 @@ self.articlepath = self.REwgArticlePath.search(data).groups()[0] self.lang = self.REwgContentLanguage.search(data).groups()[0] + def _parse_post_117(self, bs): + apipath = bs.find("link", rel='EditURI')['href'].split("?")[0] + info = json.load(urlopen(apipath + "?action=query&meta=siteinfo&format=json"))['query']['general'] + + for item in ['server', 'scriptpath', 'articlepath', 'lang']: + setattr(self, item, info[item]) + def __cmp__(self, other): return (self.server + self.scriptpath == other.server + other.scriptpath)

1 0

SVN: [9527] trunk/pywikipedia/imageharvest.py
by xqt＠svn.wikimedia.org 18 Sep '11

18 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9527 Revision: 9527 Author: xqt Date: 2011-09-18 12:45:29 +0000 (Sun, 18 Sep 2011) Log Message: ----------- removed tabs Modified Paths: -------------- trunk/pywikipedia/imageharvest.py Modified: trunk/pywikipedia/imageharvest.py =================================================================== --- trunk/pywikipedia/imageharvest.py 2011-09-18 12:37:35 UTC (rev 9526) +++ trunk/pywikipedia/imageharvest.py 2011-09-18 12:45:29 UTC (rev 9527) @@ -33,15 +33,15 @@ soup = BeautifulSoup.BeautifulSoup(file.read()) file.close() if not shown: - tagname = "a" + tagname = "a" elif shown == "just": - tagname = "img" + tagname = "img" else: tagname = ["a", "img"] for tag in soup.findAll(tagname): - link = tag.get("src", tag.get("href", None)) - if link: + link = tag.get("src", tag.get("href", None)) + if link: ext = os.path.splitext(link)[1].lower().strip('.') if ext in fileformats: links.append(urllib.basejoin(url, link))

1 0

SVN: [9526] trunk/pywikipedia
by xqt＠svn.wikimedia.org 18 Sep '11

18 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9526 Revision: 9526 Author: xqt Date: 2011-09-18 12:37:35 +0000 (Sun, 18 Sep 2011) Log Message: ----------- update namespace aliases Modified Paths: -------------- trunk/pywikipedia/families/wikipedia_family.py trunk/pywikipedia/family.py Modified: trunk/pywikipedia/families/wikipedia_family.py =================================================================== --- trunk/pywikipedia/families/wikipedia_family.py 2011-09-14 22:54:59 UTC (rev 9525) +++ trunk/pywikipedia/families/wikipedia_family.py 2011-09-18 12:37:35 UTC (rev 9526) @@ -78,6 +78,7 @@ 'arc': u'ܘܝܩܝܦܕܝܐ', 'arz': u'ويكيبيديا', 'ast': u'Uiquipedia', + 'ay': u'Wikipidiya', 'az': u'Vikipediya', 'bat-smg': u'Vikipedėjė', 'be': u'Вікіпедыя', @@ -198,7 +199,7 @@ 'as': [u'Wikipedia বাৰ্তা', u'Wikipedia वार्ता', u'Wikipedia বার্তা'], 'ast': u'Uiquipedia alderique', 'av': u'Обсуждение Wikipedia', - 'ay': u'Wikipedia discusión', + 'ay': u'Wikipidiyan Aruskipäwi', 'az': u'Vikipediya müzakirəsi', 'ba': u'Wikipedia б-са фекер алышыу', 'bar': u'Wikipedia Diskussion', @@ -447,6 +448,7 @@ 'hi': u'प्रवेशद्वार', 'hr': u'Portal', 'hu': u'Portál', + 'hy': u'Պորտալ', 'ia': u'Portal', 'id': u'Portal', 'is': u'Gátt', @@ -479,6 +481,7 @@ 'roa-tara': u'Portale', 'ru': u'Портал', 'scn': u'Purtali', + 'sh': u'Portal', 'si': u'ද්වාරය', 'sk': u'Portál', 'sl': u'Portal', @@ -540,6 +543,7 @@ 'hi': u'प्रवेशद्वार वार्ता', 'hr': u'Razgovor o portalu', 'hu': u'Portálvita', + 'hy': u'Պորտալի քննարկում', 'ia': u'Discussion Portal', 'id': u'Pembicaraan Portal', 'is': u'Gáttaspjall', @@ -572,6 +576,7 @@ 'roa-tara': u'\'Ngazzaminde d\'u Portale', 'ru': u'Обсуждение портала', 'scn': u'Discussioni purtali', + 'sh': u'Razgovor o portalu', 'si': u'ද්වාරය සාකච්ඡාව', 'sk': u'Diskusia k portálu', 'sl': u'Pogovor o portalu', Modified: trunk/pywikipedia/family.py =================================================================== --- trunk/pywikipedia/family.py 2011-09-14 22:54:59 UTC (rev 9525) +++ trunk/pywikipedia/family.py 2011-09-18 12:37:35 UTC (rev 9526) @@ -1084,6 +1084,7 @@ 'ab': u'Авикипедиа', 'am': u'ውክፔዲያ', 'arc': u'ܘܝܩܝܦܕܝܐ', + 'ay': u'Wikipidiya', 'bh': u'विकिपीडिया', 'bjn': u'Wikipidia', 'bn': u'উইকিপিডিয়া', @@ -1110,7 +1111,7 @@ 'am': u'ውክፔዲያ ውይይት', 'arc': [u'ܡܡܠܠܐ ܕ ܘܝܩܝܦܕܝܐ', u'ܘܝܩܝܦܕܝܐ talk'], 'as': [u'Wikipedia বাৰ্তা', u'Wikipedia वार्ता', u'Wikipedia বার্তা'], - 'ay': u'Wikipedia discusión', + 'ay': u'Wikipidiyan Aruskipäwi', 'bh': u'विकिपीडिया वार्ता', 'bjn': u'Wikipidia pamandiran', 'bn': u'উইকিপিডিয়া আলোচনা',

1 0

SVN: [9525] trunk/pywikipedia/imageharvest.py
by saper＠svn.wikimedia.org 14 Sep '11

14 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9525 Revision: 9525 Author: saper Date: 2011-09-14 22:54:59 +0000 (Wed, 14 Sep 2011) Log Message: ----------- Use BeautifulSoup for getting HTML links and images. Removed simplistic regular expression based guessing of contents of src="" and href="" attributes. Still, treating all URLs ending with '.jpeg' or similar is unsuitable for fetching images from MediaWiki installations, since /wiki/File:Picture.jpg links are pointing to the description pages, not the pictures themselves. Modified Paths: -------------- trunk/pywikipedia/imageharvest.py Modified: trunk/pywikipedia/imageharvest.py =================================================================== --- trunk/pywikipedia/imageharvest.py 2011-09-13 15:58:36 UTC (rev 9524) +++ trunk/pywikipedia/imageharvest.py 2011-09-14 22:54:59 UTC (rev 9525) @@ -20,36 +20,31 @@ import re, sys, os import wikipedia as pywikibot +import urllib +import BeautifulSoup import upload def get_imagelinks(url): - # Given a URL, get all images linked to by the page at that URL. - # First, we get the location for relative links from the URL. - relativepath = url.split("/") - if len(relativepath) == 1: - relativepath=relativepath[0] - else: - relativepath=relativepath[:len(relativepath)-1] - relativepath="/".join(relativepath) + """Given a URL, get all images linked to by the page at that URL.""" + links = [] uo = pywikibot.MyURLopener file = uo.open(url) - text = file.read() + soup = BeautifulSoup.BeautifulSoup(file.read()) file.close() - text = text.lower() if not shown: - R=re.compile("href\s*=\s*[\"'](.*?)[\"']") + tagname = "a" elif shown == "just": - R=re.compile("src\s*=s*[\"'](.*?)[\"']") + tagname = "img" else: - R=re.compile("[\"'](.*?)[\"']") - for link in R.findall(text): - ext = os.path.splitext(link)[1].lower().strip('.') - if ext in fileformats: - if re.compile("://").match(text): - links += [link] - else: - links += [relativepath+"/"+link] + tagname = ["a", "img"] + + for tag in soup.findAll(tagname): + link = tag.get("src", tag.get("href", None)) + if link: + ext = os.path.splitext(link)[1].lower().strip('.') + if ext in fileformats: + links.append(urllib.basejoin(url, link)) return links def main(give_url, image_url, desc):

1 0

SVN: [9524] trunk/pywikipedia/djvutext.py
by xqt＠svn.wikimedia.org 13 Sep '11

13 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9524 Revision: 9524 Author: xqt Date: 2011-09-13 15:58:36 +0000 (Tue, 13 Sep 2011) Log Message: ----------- i18n support for djvutext.py Modified Paths: -------------- trunk/pywikipedia/djvutext.py Modified: trunk/pywikipedia/djvutext.py =================================================================== --- trunk/pywikipedia/djvutext.py 2011-09-13 15:55:26 UTC (rev 9523) +++ trunk/pywikipedia/djvutext.py 2011-09-13 15:58:36 UTC (rev 9524) @@ -25,6 +25,7 @@ # __version__ = '$Id$' import wikipedia as pywikibot +from pywikibot import i18n import os, sys import config, codecs @@ -35,16 +36,6 @@ class DjVuTextBot: - # Edit summary message that should be used. - # NOTE: Put a good description here, and add translations, if possible! - msg = { - 'ar': u'روبوت: إنشاء صفحة بنص مستخرج من DjVu', - 'en': u'Robot: Creating page with text extracted from DjVu', - 'fa': u'ربات: ایجاد صفحه با متنی که از دژاوو استخراج شد', - 'fr': u'Bot: Creating page with texte extracted from DjVu', - 'nl': u'Bot: pagina aangemaakt met tekst geëxtraheerd uit DjVu-bestand', - 'pt': u'Bot: criando página com texto extraído do DjVu', - } def __init__(self, djvu, index, pages, ask=False, debug=False): """ @@ -84,7 +75,8 @@ def run(self): # Set the edit summary message - pywikibot.setAction(pywikibot.translate(pywikibot.getSite(), self.msg)) + pywikibot.setAction(i18n.twtranslate(pywikibot.getSite(), + 'djvutext-creating')) linkingPage = pywikibot.Page(pywikibot.getSite(), self.index) self.prefix = linkingPage.titleWithoutNamespace()

1 0

SVN: [9523] branches/rewrite/scripts/i18n/djvutext.py
by xqt＠svn.wikimedia.org 13 Sep '11

13 Sep '11

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9523 Revision: 9523 Author: xqt Date: 2011-09-13 15:55:26 +0000 (Tue, 13 Sep 2011) Log Message: ----------- i18n translations for djvutext.py Added Paths: ----------- branches/rewrite/scripts/i18n/djvutext.py Added: branches/rewrite/scripts/i18n/djvutext.py =================================================================== --- branches/rewrite/scripts/i18n/djvutext.py (rev 0) +++ branches/rewrite/scripts/i18n/djvutext.py 2011-09-13 15:55:26 UTC (rev 9523) @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +msg = { + 'ar': { + 'creating': u'روبوت: إنشاء صفحة بنص مستخرج من DjVu', + }, + 'en': { + 'creating': u'Robot: Creating page with text extracted from DjVu', + }, + 'fa': { + 'creating': u'ربات: ایجاد صفحه با متنی که از دژاوو استخراج شد', + }, + 'fr': { + 'creating': u'Bot: Creating page with texte extracted from DjVu', + }, + 'nl': { + 'creating': u'Bot: pagina aangemaakt met tekst geëxtraheerd uit DjVu-bestand', + }, + 'pt': { + 'creating': u'Bot: criando página com texto extraído do DjVu', + }, +} + Property changes on: branches/rewrite/scripts/i18n/djvutext.py ___________________________________________________________________ Added: svn:eol-style + native

1 0

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn September 2011