http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9532
Revision: 9532
Author: xqt
Date: 2011-09-18 17:10:08 +0000 (Sun, 18 Sep 2011)
Log Message:
-----------
do not localize #REDIRECT tag if the target page is the same
Modified Paths:
--------------
trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py
===================================================================
--- trunk/pywikipedia/redirect.py 2011-09-18 16:53:51 UTC (rev 9531)
+++ trunk/pywikipedia/redirect.py 2011-09-18 17:10:08 UTC (rev 9532)
@@ -659,7 +659,7 @@
'#%s %s' % (self.site.redirect(),
targetPage.title(asLink=True, textlink=True)),
oldText)
- if text == oldText:
+ if redir.title() == targetPage.title() or text == oldText:
pywikibot.output(u"Note: Nothing left to do on %s"
% redir.title(asLink=True))
break
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9529
Revision: 9529
Author: valhallasw
Date: 2011-09-18 15:26:53 +0000 (Sun, 18 Sep 2011)
Log Message:
-----------
Two bugfixes for r9528:
- added support for version numbers differing from 'x.y.z', eg '1.17wmf1'
- fixed error message when there is no sign of the API
Modified Paths:
--------------
trunk/pywikipedia/generate_family_file.py
Modified: trunk/pywikipedia/generate_family_file.py
===================================================================
--- trunk/pywikipedia/generate_family_file.py 2011-09-18 15:19:02 UTC (rev 9528)
+++ trunk/pywikipedia/generate_family_file.py 2011-09-18 15:26:53 UTC (rev 9529)
@@ -17,7 +17,7 @@
import urllib2
from BeautifulSoup import BeautifulSoup
-from distutils.version import StrictVersion as V
+from distutils.version import LooseVersion as V
def urlopen(url):
req = urllib2.Request(url, headers = {'User-agent': 'Pywikipedia family generator 0.1 - pywikipediabot.sf.net'})
@@ -219,6 +219,7 @@
REwgVersion = re.compile(ur'wgVersion ?= ?"([^"]*)"')
def __init__(self, fromurl):
+ self.fromurl = fromurl
if fromurl.endswith("$1"):
fromurl = fromurl[:-2]
try:
@@ -242,7 +243,7 @@
def _parse_pre_117(self, data):
if not self.REwgEnableApi.search(data):
- print "*** WARNING: Api does not seem to be enabled on %s" % fromurl
+ print "*** WARNING: Api does not seem to be enabled on %s" % self.fromurl
try:
self.version = self.REwgVersion.search(data).groups()[0]
except AttributeError:
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9525
Revision: 9525
Author: saper
Date: 2011-09-14 22:54:59 +0000 (Wed, 14 Sep 2011)
Log Message:
-----------
Use BeautifulSoup for getting HTML links and images.
Removed simplistic regular expression based guessing
of contents of src="" and href="" attributes.
Still, treating all URLs ending with '.jpeg' or similar
is unsuitable for fetching images from MediaWiki
installations, since /wiki/File:Picture.jpg links are
pointing to the description pages, not the pictures
themselves.
Modified Paths:
--------------
trunk/pywikipedia/imageharvest.py
Modified: trunk/pywikipedia/imageharvest.py
===================================================================
--- trunk/pywikipedia/imageharvest.py 2011-09-13 15:58:36 UTC (rev 9524)
+++ trunk/pywikipedia/imageharvest.py 2011-09-14 22:54:59 UTC (rev 9525)
@@ -20,36 +20,31 @@
import re, sys, os
import wikipedia as pywikibot
+import urllib
+import BeautifulSoup
import upload
def get_imagelinks(url):
- # Given a URL, get all images linked to by the page at that URL.
- # First, we get the location for relative links from the URL.
- relativepath = url.split("/")
- if len(relativepath) == 1:
- relativepath=relativepath[0]
- else:
- relativepath=relativepath[:len(relativepath)-1]
- relativepath="/".join(relativepath)
+ """Given a URL, get all images linked to by the page at that URL."""
+
links = []
uo = pywikibot.MyURLopener
file = uo.open(url)
- text = file.read()
+ soup = BeautifulSoup.BeautifulSoup(file.read())
file.close()
- text = text.lower()
if not shown:
- R=re.compile("href\s*=\s*[\"'](.*?)[\"']")
+ tagname = "a"
elif shown == "just":
- R=re.compile("src\s*=s*[\"'](.*?)[\"']")
+ tagname = "img"
else:
- R=re.compile("[\"'](.*?)[\"']")
- for link in R.findall(text):
- ext = os.path.splitext(link)[1].lower().strip('.')
- if ext in fileformats:
- if re.compile("://").match(text):
- links += [link]
- else:
- links += [relativepath+"/"+link]
+ tagname = ["a", "img"]
+
+ for tag in soup.findAll(tagname):
+ link = tag.get("src", tag.get("href", None))
+ if link:
+ ext = os.path.splitext(link)[1].lower().strip('.')
+ if ext in fileformats:
+ links.append(urllib.basejoin(url, link))
return links
def main(give_url, image_url, desc):
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9518
Revision: 9518
Author: xqt
Date: 2011-09-08 20:51:52 +0000 (Thu, 08 Sep 2011)
Log Message:
-----------
copied the wrong error class in r9514
Modified Paths:
--------------
trunk/pywikipedia/tag_nowcommons.py
Modified: trunk/pywikipedia/tag_nowcommons.py
===================================================================
--- trunk/pywikipedia/tag_nowcommons.py 2011-09-08 20:48:36 UTC (rev 9517)
+++ trunk/pywikipedia/tag_nowcommons.py 2011-09-08 20:51:52 UTC (rev 9518)
@@ -21,8 +21,8 @@
from pywikibot import i18n
-class NothingFound(pywikibot.Error):
- """ An exception indicating that a regex has return [] instead of results."""
+class NoEnoughData(pywikibot.Error):
+ """ Error class for when the user doesn't specified all the data needed """
def main(args):
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9516
Revision: 9516
Author: xqt
Date: 2011-09-08 20:43:48 +0000 (Thu, 08 Sep 2011)
Log Message:
-----------
patch for clean_sandbox.py, patch (bug #3406436) submitted by meno25.
Modified Paths:
--------------
trunk/pywikipedia/clean_sandbox.py
Modified: trunk/pywikipedia/clean_sandbox.py
===================================================================
--- trunk/pywikipedia/clean_sandbox.py 2011-09-08 20:41:51 UTC (rev 9515)
+++ trunk/pywikipedia/clean_sandbox.py 2011-09-08 20:43:48 UTC (rev 9516)
@@ -50,12 +50,12 @@
content = {
'commons': u'{{Sandbox}}\n<!-- Please edit only below this line. -->',
'als':u'{{subst:/Vorlage}}',
- 'ar': u'{{من فضلك اترك هذا السطر ولا تعدله (عنوان ساحة التجربة)}}\n<!-- مرحبا! خذ راحتك في تجربة مهارتك في التنسيق والتحرير أسفل هذا السطر. هذه الصفحة لتجارب التعديل ، سيتم تفريغ هذه الصفحة كل 6 ساعات. -->',
+ 'ar': u'{{عنوان الملعب}}\n<!-- مرحبا! خذ راحتك في تجربة مهارتك في التنسيق والتحرير أسفل هذا السطر. هذه الصفحة لتجارب التعديل ، سيتم تفريغ هذه الصفحة كل 12 ساعة. -->',
'bar':u'{{Bitte erst NACH dieser Zeile schreiben! (Begrüßungskasten)}}\r\n',
'cs': u'{{subst:/uhrabat}}',
'da': u'{{subst:Sandkasse tekst}}',
'de': u'{{Bitte erst NACH dieser Zeile schreiben! (Begrüßungskasten)}}\r\n',
- 'en': u'{{Please leave this line alone (sandbox heading)}}\n<!-- Hello! Feel free to try your formatting and editing skills below this line. As this page is for editing experiments, this page will automatically be cleaned every 12 hours. -->',
+ 'en': u'{{Sandbox heading}}\n<!-- Hello! Feel free to try your formatting and editing skills below this line. As this page is for editing experiments, this page will automatically be cleaned every 12 hours. -->',
'fa': u'{{subst:User:Amirobot/sandbox}}',
'fi': u'{{subst:Hiekka}}',
'he': u'{{ארגז חול}}\n<!-- נא לערוך מתחת לשורה זו בלבד, תודה. -->',
@@ -82,7 +82,7 @@
sandboxTitle = {
'commons': u'Project:Sandbox',
'als':u'Project:Sandchaschte',
- 'ar': u'Project:ساحة التجربة',
+ 'ar': u'Project:ملعب',
'bar':u'Project:Spielwiese',
'cs': u'Project:Pískoviště',
'da': u'Project:Sandkassen',
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9514
Revision: 9514
Author: xqt
Date: 2011-09-08 20:05:00 +0000 (Thu, 08 Sep 2011)
Log Message:
-----------
missing error class added
Modified Paths:
--------------
trunk/pywikipedia/tag_nowcommons.py
Modified: trunk/pywikipedia/tag_nowcommons.py
===================================================================
--- trunk/pywikipedia/tag_nowcommons.py 2011-09-06 22:51:33 UTC (rev 9513)
+++ trunk/pywikipedia/tag_nowcommons.py 2011-09-08 20:05:00 UTC (rev 9514)
@@ -20,6 +20,11 @@
from nowcommons import nowCommons
from pywikibot import i18n
+
+class NothingFound(pywikibot.Error):
+ """ An exception indicating that a regex has return [] instead of results."""
+
+
def main(args):
generator = None;
always = False
@@ -33,7 +38,7 @@
generator = genFactory.getCombinedGenerator()
if not generator:
- raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')
+ raise NoEnoughData('You have to specify the generator you want to use for the script!')
pregenerator = pagegenerators.PreloadingGenerator(generator)