Revision: 5814
Author: nicdumz
Date: 2008-08-19 11:46:52 +0000 (Tue, 19 Aug 2008)
Log Message:
-----------
Fixing Filnik's commit: r5813 had reverted all changes from r5803 to r5813 ... !
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-08-19 11:16:52 UTC (rev 5813)
+++ trunk/pywikipedia/wikipedia.py 2008-08-19 11:46:52 UTC (rev 5814)
@@ -505,6 +505,19 @@
else:
return self.sectionFreeTitle(underscore=underscore).split(':', 1)[1]
+ def titleForFilename(self):
+ """
+ Return the title of the page in a form suitable for a filename on
+ the user's file system.
+ """
+ result = self.title()
+ # Replace characters that are not possible in file names on some
+ # systems.
+ # Spaces are possible on most systems, but are bad for URLs.
+ for forbiddenChar in ':*?/\\ ':
+ result = result.replace(forbiddenChar, '_')
+ return result
+
def section(self, underscore = False, decode=False):
"""Return the name of the section this Page refers to.
@@ -1283,6 +1296,18 @@
return self._putPage(newtext, comment, watchArticle, minorEdit,
newPage, self.site().getToken(sysop = sysop), sysop = sysop)
+ def _encodeArg(self, arg, msgForError):
+ """Encode an ascii string/Unicode string to the site's encoding"""
+ try:
+ return arg.encode(self.site().encoding())
+ except UnicodeDecodeError:
+ # happens when arg is a non-ascii bytestring :
+ # when reencoding bytestrings, python decodes first to ascii
+ raise PageNotSaved("An ascii string or unicode %s is expected" % msgForError)
+ except UnicodeEncodeError:
+ # happens when arg is unicode
+ raise PageNotSaved("The %s could not be converted to the site's encoding (%s)" % (msgForError, self.site().encoding()))
+
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
newPage=False, token=None, newToken=False, sysop=False,
captchaId=None, captchaAnswer=None ):
@@ -1294,14 +1319,10 @@
host = self.site().hostname()
# Get the address of the page on that host.
address = self.site().put_address(self.urlname())
- # Use the proper encoding for the comment
- encodedComment = comment.encode(self.site().encoding())
- # Encode the text into the right encoding for the wiki
- encodedText = text.encode(self.site().encoding())
predata = {
'wpSave': '1',
- 'wpSummary': encodedComment,
- 'wpTextbox1': encodedText,
+ 'wpSummary': self._encodeArg(comment, 'edit summary'),
+ 'wpTextbox1': self._encodeArg(text, 'wikitext'),
}
if captchaId:
predata["wpCaptchaId"] = captchaId
@@ -1392,6 +1413,9 @@
# A second text area means that an edit conflict has occured.
if 'id=\'wpTextbox2\' name="wpTextbox2"' in data:
raise EditConflict(u'An edit conflict has occured.')
+
+ # remove the wpAntispam keyword before checking for Spamfilter
+ data = re.sub(u'(?s)<label for="wpAntispam">.*?</label>', '', data)
if self.site().has_mediawiki_message("spamprotectiontitle")\
and self.site().mediawiki_message('spamprotectiontitle') in data:
try:
Revision: 5813
Author: filnik
Date: 2008-08-19 11:16:52 +0000 (Tue, 19 Aug 2008)
Log Message:
-----------
wikipedia.output -> output, here is wikipedia.py :)
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-08-18 14:37:03 UTC (rev 5812)
+++ trunk/pywikipedia/wikipedia.py 2008-08-19 11:16:52 UTC (rev 5813)
@@ -505,19 +505,6 @@
else:
return self.sectionFreeTitle(underscore=underscore).split(':', 1)[1]
- def titleForFilename(self):
- """
- Return the title of the page in a form suitable for a filename on
- the user's file system.
- """
- result = self.title()
- # Replace characters that are not possible in file names on some
- # systems.
- # Spaces are possible on most systems, but are bad for URLs.
- for forbiddenChar in ':*?/\\ ':
- result = result.replace(forbiddenChar, '_')
- return result
-
def section(self, underscore = False, decode=False):
"""Return the name of the section this Page refers to.
@@ -1296,18 +1283,6 @@
return self._putPage(newtext, comment, watchArticle, minorEdit,
newPage, self.site().getToken(sysop = sysop), sysop = sysop)
- def _encodeArg(self, arg, msgForError):
- """Encode an ascii string/Unicode string to the site's encoding"""
- try:
- return arg.encode(self.site().encoding())
- except UnicodeDecodeError:
- # happens when arg is a non-ascii bytestring :
- # when reencoding bytestrings, python decodes first to ascii
- raise PageNotSaved("An ascii string or unicode %s is expected" % msgForError)
- except UnicodeEncodeError:
- # happens when arg is unicode
- raise PageNotSaved("The %s could not be converted to the site's encoding (%s)" % (msgForError, self.site().encoding()))
-
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
newPage=False, token=None, newToken=False, sysop=False,
captchaId=None, captchaAnswer=None ):
@@ -1319,10 +1294,14 @@
host = self.site().hostname()
# Get the address of the page on that host.
address = self.site().put_address(self.urlname())
+ # Use the proper encoding for the comment
+ encodedComment = comment.encode(self.site().encoding())
+ # Encode the text into the right encoding for the wiki
+ encodedText = text.encode(self.site().encoding())
predata = {
'wpSave': '1',
- 'wpSummary': self._encodeArg(comment, 'edit summary'),
- 'wpTextbox1': self._encodeArg(text, 'wikitext'),
+ 'wpSummary': encodedComment,
+ 'wpTextbox1': encodedText,
}
if captchaId:
predata["wpCaptchaId"] = captchaId
@@ -1413,9 +1392,6 @@
# A second text area means that an edit conflict has occured.
if 'id=\'wpTextbox2\' name="wpTextbox2"' in data:
raise EditConflict(u'An edit conflict has occured.')
-
- # remove the wpAntispam keyword before checking for Spamfilter
- data = re.sub(u'(?s)<label for="wpAntispam">.*?</label>', '', data)
if self.site().has_mediawiki_message("spamprotectiontitle")\
and self.site().mediawiki_message('spamprotectiontitle') in data:
try:
@@ -2738,12 +2714,12 @@
if self.exists():
raise NoHash('No Hash found in the APIs! Maybe the regex to catch it is wrong or someone has changed the APIs structure.')
else:
- wikipedia.output(u'Image deleted before getting the Hash. Skipping...')
+ output(u'Image deleted before getting the Hash. Skipping...')
return None
else:
return hash_found
else:
- wikipedia.output(u'Image deleted before getting the Hash. Skipping...')
+ output(u'Image deleted before getting the Hash. Skipping...')
return None
def getFileVersionHistoryTable(self):
Hi, I want to request some feature of pywikipediabot. The purpose of
this feature is to add or remove category on pages of first wikis based
on pages on second wikis.
Sorry for bad english and bad explanation.
Input:
* Target wikipedia code: ex: "id" (for id.wikipedia)
* Target category name (targetcat): ex: "Kategori:Ilmuwan Amerika
Serikat" (equivalent for en:Category:American scientists)
* Source wikipedia code: ex: "en" (for en.wikipedia)
=Algorithm=
==Add==
* Search category name (sourcecat) of target category on source
wikipedia based on interwiki of "targetcat"
* For all pages (P1) on source wikipedia having category "sourcecat" do:
if (P1) has interwiki to target wikipedia (P2) and (P2) not
categorized to "targetcat" then add category "targetcat" to P2.
==Remove==
* Search category name (sourcecat) of target category on source
wikipedia based on interwiki of "targetcat"
* For all pages (P1) on source wikipedia having category "sourcecat" do:
if (P1) has interwiki to target wikipedia (P2) and (P2) categorized to
"targetcat" then remove category "targetcat" from P2.
This feature will make great help.
Regards
Stanley
Bugs item #2058951, was opened at 2008-08-19 11:55
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2058951&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Alex S.H. Lin (lin4h)
Assigned to: Nobody/Anonymous (nobody)
Summary: incorrect section process in double redirect (enwiki)
Initial Comment:
the problem is this link.
http://en.wikipedia.org/w/index.php?title=\langle&diff=prev&oldid=231637228
The section cannot make correct to jump.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2058951&group_…
Bugs item #2011362, was opened at 2008-07-05 20:07
Message generated for change (Comment added) made by melancholie
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2011362&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Melancholie (melancholie)
Assigned to: Nobody/Anonymous (nobody)
>Summary: Update featured.py
Initial Comment:
Both, hiwiki and yiwiki do use Template:Link_FA, the pages hi:Template:Lien AdQ and yi:Template:רא are only redirects.
By re-adding 'Link FA' to the langs arrays, Template:Link_FA will be used by the bot (otherewise those redirects).
----------------------------------------------------------------------
>Comment By: Melancholie (melancholie)
Date: 2008-08-18 23:45
Message:
Logged In: YES
user_id=2089773
Originator: YES
File Added: featured.diff
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-08-18 23:44
Message:
Logged In: YES
user_id=2089773
Originator: YES
Added szl category
File Added: featured.diff
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-08-05 13:50
Message:
Logged In: YES
user_id=2089773
Originator: YES
added patch (working current)
File Added: featured.diff
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-11 07:51
Message:
Logged In: YES
user_id=2089773
Originator: YES
Maybe I was a little bit too imprecise ;-)
Here is what I mean:
templatelist = template['_default']
try:
templatelist += template[tosite.lang]
+ (u" {{%s|%s}}" % (templatelist[0],
fromsite.lang))
templatelist[0] is 'Link FA', but the localized templates used are
templatelist[1]. If there is a localized template (with Link_FA being only
a redirect) it should be used for edit. So it might be good to change the
order (make 'try:' first, add _default with +=)
For hi, yi the easiest way might be to just comment out the localized
template names, as they are only redirects to Link_FA.
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-10 11:59
Message:
Logged In: YES
user_id=2089773
Originator: YES
So {{Link FA}} is always the first choice?
Or does _default actually follow redirects?
----------------------------------------------------------------------
Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2008-07-10 10:41
Message:
Logged In: YES
user_id=1963242
Originator: NO
I'm not sure melancholie :)
I added {{Leam VdC}} in r5707, however, since r5669 (
https://fisheye.toolserver.org/browse/pywikipedia/trunk/pywikipedia/feature…
) it uses the '_default' entry (Link FA) AND the locale entry. The list for
hi: is for example ['Link Fa', 'Lien AdQ']
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-10 10:28
Message:
Logged In: YES
user_id=2089773
Originator: YES
Furthermore, add http://fur.wikipedia.org/wiki/Model:Leam_VdC
[I wish I were able to do this things myself, but *still* no account yet]
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2011362&group_…
Bugs item #2011362, was opened at 2008-07-05 20:07
Message generated for change (Comment added) made by melancholie
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2011362&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Melancholie (melancholie)
Assigned to: Nobody/Anonymous (nobody)
>Summary: update featured.py
Initial Comment:
Both, hiwiki and yiwiki do use Template:Link_FA, the pages hi:Template:Lien AdQ and yi:Template:רא are only redirects.
By re-adding 'Link FA' to the langs arrays, Template:Link_FA will be used by the bot (otherewise those redirects).
----------------------------------------------------------------------
>Comment By: Melancholie (melancholie)
Date: 2008-08-18 23:44
Message:
Logged In: YES
user_id=2089773
Originator: YES
Added szl category
File Added: featured.diff
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-08-05 13:50
Message:
Logged In: YES
user_id=2089773
Originator: YES
added patch (working current)
File Added: featured.diff
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-11 07:51
Message:
Logged In: YES
user_id=2089773
Originator: YES
Maybe I was a little bit too imprecise ;-)
Here is what I mean:
templatelist = template['_default']
try:
templatelist += template[tosite.lang]
+ (u" {{%s|%s}}" % (templatelist[0],
fromsite.lang))
templatelist[0] is 'Link FA', but the localized templates used are
templatelist[1]. If there is a localized template (with Link_FA being only
a redirect) it should be used for edit. So it might be good to change the
order (make 'try:' first, add _default with +=)
For hi, yi the easiest way might be to just comment out the localized
template names, as they are only redirects to Link_FA.
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-10 11:59
Message:
Logged In: YES
user_id=2089773
Originator: YES
So {{Link FA}} is always the first choice?
Or does _default actually follow redirects?
----------------------------------------------------------------------
Comment By: NicDumZ — Nicolas Dumazet (nicdumz)
Date: 2008-07-10 10:41
Message:
Logged In: YES
user_id=1963242
Originator: NO
I'm not sure melancholie :)
I added {{Leam VdC}} in r5707, however, since r5669 (
https://fisheye.toolserver.org/browse/pywikipedia/trunk/pywikipedia/feature…
) it uses the '_default' entry (Link FA) AND the locale entry. The list for
hi: is for example ['Link Fa', 'Lien AdQ']
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-10 10:28
Message:
Logged In: YES
user_id=2089773
Originator: YES
Furthermore, add http://fur.wikipedia.org/wiki/Model:Leam_VdC
[I wish I were able to do this things myself, but *still* no account yet]
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2011362&group_…
Revision: 5802
Author: nicdumz
Date: 2008-08-17 07:40:05 +0000 (Sun, 17 Aug 2008)
Log Message:
-----------
_putPage was crashing when either the text or the comment argument was a non-ascii string :
>>> p.put('?\195?\182?\195?\164?\195?\182$?\195?\188?\195?\182$?\195?\164?\195?\182?\195?\164?\195?\167%2234qwdadasd', 'Test')
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/nico/projets/devpywiki/wikipedia.py", line 1284, in put
newPage, self.site().getToken(sysop = sysop), sysop = sysop)
File "/home/nico/projets/devpywiki/wikipedia.py", line 1300, in _putPage
encodedText = text.encode(self.site().encoding())
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128)
Now raising ValueError when the text or comment is not Unicode
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-08-16 11:18:09 UTC (rev 5801)
+++ trunk/pywikipedia/wikipedia.py 2008-08-17 07:40:05 UTC (rev 5802)
@@ -1294,8 +1294,12 @@
host = self.site().hostname()
# Get the address of the page on that host.
address = self.site().put_address(self.urlname())
+ if not isinstance(comment, unicode):
+ raise ValueError("An unicode edit comment is expected as an argument")
# Use the proper encoding for the comment
encodedComment = comment.encode(self.site().encoding())
+ if not isinstance(text, unicode):
+ raise ValueError("An unicode wikitext is expected as an argument")
# Encode the text into the right encoding for the wiki
encodedText = text.encode(self.site().encoding())
predata = {
Revision: 5811
Author: wikipedian
Date: 2008-08-18 12:50:37 +0000 (Mon, 18 Aug 2008)
Log Message:
-----------
improved behaviour on pages like http://de.wikipedia.org/wiki/Buthan or
http://de.wikipedia.org/wiki/Equador
Modified Paths:
--------------
trunk/pywikipedia/misspelling.py
Modified: trunk/pywikipedia/misspelling.py
===================================================================
--- trunk/pywikipedia/misspelling.py 2008-08-18 12:41:56 UTC (rev 5810)
+++ trunk/pywikipedia/misspelling.py 2008-08-18 12:50:37 UTC (rev 5811)
@@ -88,8 +88,17 @@
for templateName, params in disambPage.templatesWithParams():
if templateName in self.misspellingTemplate[wikipedia.getSite().lang]:
# The correct spelling is in the last paramter.
- # This works for de:, not tested with others.
- self.alternatives.append(params[-1])
+ correctSpelling = params[-1]
+ # On de.wikipedia, there are some cases where the
+ # misspelling is ambigous, see for example:
+ # http://de.wikipedia.org/wiki/Buthan
+ for match in self.linkR.finditer(correctSpelling):
+ self.alternatives.append(match.group('title'))
+
+ if not self.alternatives:
+ # There were no links in the parameter, so there is
+ # only one correct spelling.
+ self.alternatives.append(correctSpelling)
return True
# Overrides the DisambiguationRobot method.