Revision: 5165
Author: russblau
Date: 2008-03-24 22:03:25 +0000 (Mon, 24 Mar 2008)
Log Message:
-----------
It appears that many authors use empty brackets [[]] as a placeholder; giving a warning for this is more distracting than helpful.
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-03-24 22:01:45 UTC (rev 5164)
+++ trunk/pywikipedia/wikipedia.py 2008-03-24 22:03:25 UTC (rev 5165)
@@ -1514,7 +1514,8 @@
try:
page = Page(self.site(), title)
except Error:
- output(u"Page %s contains invalid link to [[%s]]."
+ if title.strip(" "):
+ output(u"Page %s contains invalid link to [[%s]]."
% (self.title(), title))
continue
if not withImageLinks and page.isImage():
Revision: 5163
Author: filnik
Date: 2008-03-24 18:32:29 +0000 (Mon, 24 Mar 2008)
Log Message:
-----------
Little fix of NicDumZ
Modified Paths:
--------------
trunk/pywikipedia/redirect.py
Modified: trunk/pywikipedia/redirect.py
===================================================================
--- trunk/pywikipedia/redirect.py 2008-03-24 12:48:33 UTC (rev 5162)
+++ trunk/pywikipedia/redirect.py 2008-03-24 18:32:29 UTC (rev 5163)
@@ -143,7 +143,7 @@
for code in site.family.langs.keys():
if target.startswith('%s:' % code) \
or target.startswith(':%s:' % code):
- if code == site.language:
+ if code == site.language():
# link to our wiki, but with the lang prefix
target = target[(len(code)+1):]
if target.startswith(':'):
@@ -348,7 +348,7 @@
% targetPage.aslink())
content=targetPage.get(get_redirect=True)
- if wikipedia.Page(wikipedia.getSite(), u"Template:Db-r1").exists():
+ if wikipedia.Page(wikipedia.getSite(), u"Template:Db-r1").exists():
wikipedia.output(u"Tagging redirect for deletion")
# Delete the two redirects
targetPage.put("{{db-r1}}\n"+content, "Tagging for speedy deletion")
Bugs item #1924322, was opened at 2008-03-24 06:19
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1924322&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Nobody/Anonymous (nobody)
Assigned to: Nobody/Anonymous (nobody)
Summary: interwiki links on subpages in templates
Initial Comment:
In English and some other major wikipedias interwiki links are placed on /doc subpage (or whatever it's called) in templates. Interwiki bot should check if such a page exists and not place interwiki links on main template page but place/update links on that subpage. Otherwise, everytime a bot places interwiki on a template with this structure, the main template page needs to be cleaned and interwiki links moved to a subpage manually
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1924322&group_…
Revision: 5160
Author: cosoleto
Date: 2008-03-24 10:49:15 +0000 (Mon, 24 Mar 2008)
Log Message:
-----------
code cleanup
Modified Paths:
--------------
trunk/pywikipedia/add_text.py
Modified: trunk/pywikipedia/add_text.py
===================================================================
--- trunk/pywikipedia/add_text.py 2008-03-24 10:46:09 UTC (rev 5159)
+++ trunk/pywikipedia/add_text.py 2008-03-24 10:49:15 UTC (rev 5160)
@@ -21,14 +21,14 @@
--- Example ---
-python add_text.py -start:! -summary:"Bot: Adding a template" -text:"{{Something}}" -except:"\{\{(?:[Tt]emplate:|)[Ss]omething" -up
+python add_text.py -cat:catname -summary:"Bot: Adding a template" -text:"{{Something}}" -except:"\{\{([Tt]emplate:|)[Ss]omething" -up
# Command used on it.wikipedia to put the template in the page without any category.
python add_text.py -excepturl:"<p class='catlinks'>" -uncat -text:"{{Categorizzare}}"
--except:"\{\{(?:[Tt]emplate:|)[Cc]ategorizzare" -summary:"Bot: Aggiungo template Categorizzare"
+-except:"\{\{([Tt]emplate:|)[Cc]ategorizzare" -summary:"Bot: Aggiungo template Categorizzare"
--- Credits and Help ---
-This script has been written by Botwiki's stuff, if you want to help us
+This script has been written by Botwiki's staff, if you want to help us
or you need some help regarding this script, you can find us here:
* http://botwiki.sno.cc
@@ -73,21 +73,16 @@
""" Function to load HTML text of a URL """
try:
request = urllib2.Request(url)
- user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7'
- request.add_header("User-Agent", user_agent)
+ request.add_header("User-Agent", wikipedia.useragent)
response = urllib2.urlopen(request)
text = response.read()
response.close()
# When you load to many users, urllib2 can give this error.
except urllib2.HTTPError:
wikipedia.output(u"Server error. Pausing for 10 seconds... " + time.strftime("%d %b %Y %H:%M:%S (UTC)", time.gmtime()) )
- time.sleep(10)
- request = urllib2.Request(url)
- user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7'
- request.add_header("User-Agent", user_agent)
- response = urllib2.urlopen(request)
- text = response.read()
response.close()
+ time.sleep(10)
+ return pageText(url)
return text
def untaggedGenerator(untaggedProject, limit = 500):
Revision: 5157
Author: cosoleto
Date: 2008-03-24 10:38:55 +0000 (Mon, 24 Mar 2008)
Log Message:
-----------
* Fix in XML parser for sites that use authentication, 'data' isn't a Unicode string
* Undo r4965 and others related revisions about an ar.wiki fix, because the new code generate a not useful regex (http://lists.wikimedia.org/pipermail/pywikipedia-l/2008-February/002141.html) or a RE compilation error ('sre_constants.error: multiple repeat', lastest revision)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-03-23 21:35:08 UTC (rev 5156)
+++ trunk/pywikipedia/wikipedia.py 2008-03-24 10:38:55 UTC (rev 5157)
@@ -2674,9 +2674,9 @@
data = response.read()
else:
response, data = self.site.postForm(address, predata)
- # The XML parser doesn't expect a Unicode string, but an encoded one,
- # so we'll encode it back.
- data = data.encode(self.site.encoding())
+ # The XML parser doesn't expect a Unicode string, but an encoded one,
+ # so we'll encode it back.
+ data = data.encode(self.site.encoding())
get_throttle.setDelay(time.time() - now)
return data
@@ -4941,26 +4941,18 @@
Group 1 in the regex match object will be the target title.
"""
- redDefault = 'redirect'
- red = 'redirect'
- if self.lang == 'ar':
- red = u"تحويل"
+
try:
- if redDefault == red:
- redirKeywords = [red] + self.family.redirect[self.lang]
- redirKeywordsR = r'(?:redirect|' + '|'.join(redirKeywords) + ')' # always redirect as default
- else:
- redirKeywords = [red] + self.family.redirect[self.lang]
- redirKeywordsR = r'(?:redirect|' + redDefault + '|'.join(redirKeywords) + ')'
+ redirKeywords = [u'redirect'] + self.family.redirect[self.lang]
+ redirKeywordsR = r'(?:' + '|'.join(redirKeywords) + ')'
except KeyError:
# no localized keyword for redirects
- if redDefault == red:
- redirKeywordsR = r'%s' % red
- else:
- redirKeywordsR = r'(?:%s|%s)' % (red, redDefault)
+ redirKeywordsR = r'redirect'
+
# A redirect starts with hash (#), followed by a keyword, then
# arbitrary stuff, then a wikilink. The wikilink may contain
# a label, although this is not useful.
+
return re.compile(r'#' + redirKeywordsR +
'.*?\[\[(.*?)(?:\|.*?)?\]\]',
re.IGNORECASE | re.UNICODE | re.DOTALL)