Revision: 5809
Author: wikipedian
Date: 2008-08-18 12:31:26 +0000 (Mon, 18 Aug 2008)
Log Message:
-----------
misspelling.py now works on en.wikipedia
Modified Paths:
--------------
trunk/pywikipedia/misspelling.py
Modified: trunk/pywikipedia/misspelling.py
===================================================================
--- trunk/pywikipedia/misspelling.py 2008-08-18 12:24:49 UTC (rev 5808)
+++ trunk/pywikipedia/misspelling.py 2008-08-18 12:31:26 UTC (rev 5809)
@@ -33,14 +33,16 @@
misspellingTemplate = {
'de': u'Falschschreibung',
#'en': u'Template:Misspelling', # rarely used on en:
- 'pt': u'Pseudo-redirect',
+ 'en': None, # en: uses simple redirects
+ #'pt': u'Pseudo-redirect', # replaced by another system on pt:
}
# Optional: if there is a category, one can use the -start
# parameter.
misspellingCategory = {
'de': u'Kategorie:Wikipedia:Falschschreibung',
- 'pt': u'Categoria:!Pseudo-redirects',
+ 'en': u'Redirects from misspellings',
+ #'pt': u'Categoria:!Pseudo-redirects',
}
msg = {
@@ -66,18 +68,22 @@
misspellingTemplate = wikipedia.Page(wikipedia.getSite(), misspellingTemplateName)
generator = pagegenerators.ReferringPageGenerator(misspellingTemplate, onlyTemplateInclusion = True)
if firstPageTitle:
- wikipedia.output('-start parameter unsupported on this wiki because there is no category for misspellings.')
+ wikipedia.output(u'-start parameter unsupported on this wiki because there is no category for misspellings.')
preloadingGen = pagegenerators.PreloadingGenerator(generator)
return preloadingGen
# Overrides the DisambiguationRobot method.
def findAlternatives(self, disambPage):
- for templateName, params in disambPage.templatesWithParams():
- if templateName in self.misspellingTemplate[wikipedia.getSite().lang]:
- # The correct spelling is in the last paramter.
- # This works for de:, not tested with others.
- self.alternatives.append(params[-1])
- return True
+ if disambPage.isRedirectPage():
+ self.alternatives.append(disambPage.getRedirectTarget().title())
+ return True
+ elif self.misspellingTemplate[disambPage.site().lang] is not None:
+ for templateName, params in disambPage.templatesWithParams():
+ if templateName in self.misspellingTemplate[wikipedia.getSite().lang]:
+ # The correct spelling is in the last paramter.
+ # This works for de:, not tested with others.
+ self.alternatives.append(params[-1])
+ return True
# Overrides the DisambiguationRobot method.
def setSummaryMessage(self, disambPage, new_targets, unlink):
Revision: 5808
Author: wikipedian
Date: 2008-08-18 12:24:49 +0000 (Mon, 18 Aug 2008)
Log Message:
-----------
Moved filename generation code from interwiki_graph to new method
wikipedia.Page.titleForFilename().
Used that method for solve_disambiguation.py -primary. This improves the
behaviour when running the script on a page that contains slashes etc.
Modified Paths:
--------------
trunk/pywikipedia/interwiki_graph.py
trunk/pywikipedia/solve_disambiguation.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/interwiki_graph.py
===================================================================
--- trunk/pywikipedia/interwiki_graph.py 2008-08-18 11:53:24 UTC (rev 5807)
+++ trunk/pywikipedia/interwiki_graph.py 2008-08-18 12:24:49 UTC (rev 5808)
@@ -1,4 +1,4 @@
-""" Module with the graphviz drawing calls """
+""" Module with the graphviz drawing calls """
__version__ = '$Id$'
import threading
pydotfound = True
@@ -208,13 +208,11 @@
def getFilename(page, extension = None):
- filename = '%s-%s-%s' % (page.site().family.name, page.site().language(), page.title())
+ filename = '%s-%s-%s' % (page.site().family.name,
+ page.site().language(),
+ page.titleForFilename())
if extension:
filename += '.%s' % extension
- # Replace characters that are not possible in file names on some systems.
- # Spaces are possible on most systems, but are bad for URLs.
- for forbiddenChar in ':*?/\\ ':
- filename = filename.replace(forbiddenChar, '_')
return filename
if __name__ == "__main__":
Modified: trunk/pywikipedia/solve_disambiguation.py
===================================================================
--- trunk/pywikipedia/solve_disambiguation.py 2008-08-18 11:53:24 UTC (rev 5807)
+++ trunk/pywikipedia/solve_disambiguation.py 2008-08-18 12:24:49 UTC (rev 5808)
@@ -474,7 +474,7 @@
self.ignorelist = []
filename = wikipedia.config.datafilepath('disambiguations',
- self.disambPage.urlname() + '.txt')
+ self.disambPage.titleForFilename() + '.txt')
try:
# The file is stored in the disambiguation/ subdir. Create if necessary.
f = codecs.open(filename, 'r', 'utf-8')
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-08-18 11:53:24 UTC (rev 5807)
+++ trunk/pywikipedia/wikipedia.py 2008-08-18 12:24:49 UTC (rev 5808)
@@ -505,6 +505,19 @@
else:
return self.sectionFreeTitle(underscore=underscore).split(':', 1)[1]
+ def titleForFilename(self):
+ """
+ Return the title of the page in a form suitable for a filename on
+ the user's file system.
+ """
+ result = self.title()
+ # Replace characters that are not possible in file names on some
+ # systems.
+ # Spaces are possible on most systems, but are bad for URLs.
+ for forbiddenChar in ':*?/\\ ':
+ result = result.replace(forbiddenChar, '_')
+ return result
+
def section(self, underscore = False, decode=False):
"""Return the name of the section this Page refers to.
Bugs item #1985308, was opened at 2008-06-05 10:39
Message generated for change (Comment added) made by nobody
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1985308&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: General
Group: None
Status: Open
Resolution: None
Priority: 6
Private: No
Submitted By: Melancholie (melancholie)
Assigned to: Nobody/Anonymous (nobody)
Summary: Bot does not properly stay logged in (cookies)
Initial Comment:
My bot (r5514) does not stay properly logged in!
My first thought was this is because of the API_login, but it happens also for the normal login method. Is this maybe caused by SUL?
Or maybe by the following change?
http://svn.wikimedia.org/viewvc/pywikipedia?view=rev&revision=5514
See fa.wikipedia.org/wiki/_:MelancholieBot#IP_contribution
----------------------------------------------------------------------
Comment By: Nobody/Anonymous (nobody)
Date: 2008-08-18 08:11
Message:
Logged In: NO
It's odd, but the bug is intermittent and not all the wikis are affected.
Most affected wikis are: ast, da, de, ja, he, ka, ko, mk, rmy, ro, sr, tr.
Is there any way to definitely fix this bug?
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-17 12:58
Message:
Logged In: YES
user_id=2089773
Originator: YES
For this, please see the comment of sswierkosz (at the very bottom)!
It's best to delete all login.data first, then run your bot without
logging out from your bot account by using a web browser in parallel.
----------------------------------------------------------------------
Comment By: Nobody/Anonymous (nobody)
Date: 2008-07-17 12:05
Message:
Logged In: NO
Thanks for the hint. The bot doesn't asks for password now but it seems
like it still accidentally makes some edits under IP address. And it
overwrites login.data files with every login/edit, so the bug is still
there and needs to be fixed.
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-07-16 00:24
Message:
Logged In: YES
user_id=2089773
Originator: YES
The easiest and best workaround is to use a password file!
Add the line
password_file = "yourFileName"
to your user-config.py
Provide yourFileName with the following line(s):
("yourBot", "yourPassword")
Login and re-login is done automatically then.
----------------------------------------------------------------------
Comment By: Nobody/Anonymous (nobody)
Date: 2008-07-15 20:21
Message:
Logged In: NO
the bot asks for a password every time it tries to save a page.
"centralauth_Token" and "centralauth_Session" values in xxx-login.data
files change every time, and the bot doesn't stay logged in. any
workaround?
----------------------------------------------------------------------
Comment By: Carsrac (carsrac)
Date: 2008-06-06 08:38
Message:
Logged In: YES
user_id=2066902
Originator: NO
I have also problems. I need constantly enter in my password for several
wiki's and if I do it the bot makes tha edit under my ip adres instead of
username of the bot. The wiki that are effected are fr, ro, be, nds-nl, af,
hif, sah, srn. My ip adress is 213.132.164.186 and my bot user name is
CarsracBot. As you all know may IW bots do not any edits under its ip
adress. So I think this bug will effect very likely a lot of bots running
with pywikipedia script.
----------------------------------------------------------------------
Comment By: Szymon wierkosz (sswierkosz)
Date: 2008-06-05 11:25
Message:
Logged In: YES
user_id=2022153
Originator: NO
I don't know if this is correct behavior, but it is certainly annoying.
----------------------------------------------------------------------
Comment By: Melancholie (melancholie)
Date: 2008-06-05 11:20
Message:
Logged In: YES
user_id=2089773
Originator: YES
@sswierkosz: Yes, maybe that could be the reason. MediaWiki bug?
----------------------------------------------------------------------
Comment By: Szymon wierkosz (sswierkosz)
Date: 2008-06-05 11:07
Message:
Logged In: YES
user_id=2022153
Originator: NO
I noticed that if you log out all sessions are destroyed, not only
current. Maybe you logged in and then logged out using your bot account, so
its cookies are not valid anymore.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1985308&group_…
Revision: 5805
Author: nicdumz
Date: 2008-08-18 01:48:25 +0000 (Mon, 18 Aug 2008)
Log Message:
-----------
Even better _putPage encoding handling for edit summaries/wikitext:
* if the parameter is an ascii string, let it be
* if the parameter is unicode :
** encode it to the site' encoding
** if it fails, raise an user-friendly error
* if the parameter is a non-ascii string, we don't want it, raise an user-friendly error
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-08-17 15:14:02 UTC (rev 5804)
+++ trunk/pywikipedia/wikipedia.py 2008-08-18 01:48:25 UTC (rev 5805)
@@ -1283,6 +1283,18 @@
return self._putPage(newtext, comment, watchArticle, minorEdit,
newPage, self.site().getToken(sysop = sysop), sysop = sysop)
+ def _encodeArg(self, arg, msgForError):
+ """Encode an ascii string/Unicode string to the site's encoding"""
+ try:
+ if isinstance(arg, str):
+ arg.decode() # fails with UnicodeDecodeError if non-ascii
+ except UnicodeDecodeError:
+ raise PageNotSaved("An ascii string or unicode %s is expected" % msgForError)
+ try:
+ return arg.encode(self.site().encoding())
+ except UnicodeDecodeError:
+ raise PageNotSaved("The %s could not be converted to the site's encoding (%s)" % (msgForError, self.site().encoding()))
+
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
newPage=False, token=None, newToken=False, sysop=False,
captchaId=None, captchaAnswer=None ):
@@ -1294,20 +1306,10 @@
host = self.site().hostname()
# Get the address of the page on that host.
address = self.site().put_address(self.urlname())
- # Use the proper encoding for the comment
- try:
- encodedComment = comment.encode(self.site().encoding())
- except UnicodeDecodeError:
- raise ValueError("An ascii string or unicode edit comment is expected as an argument")
- # Encode the text into the right encoding for the wiki
- try:
- encodedText = text.encode(self.site().encoding())
- except UnicodeDecodeError:
- raise ValueError("An ascii string or unicode wikitext is expected as an argument")
predata = {
'wpSave': '1',
- 'wpSummary': encodedComment,
- 'wpTextbox1': encodedText,
+ 'wpSummary': _encodeArg(comment, 'edit summary'),
+ 'wpTextbox1': _encodeArg(text, 'wikitext'),
}
if captchaId:
predata["wpCaptchaId"] = captchaId
Bugs item #2026525, was opened at 2008-07-24 10:26
Message generated for change (Settings changed) made by nicdumz
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2026525&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Closed
>Resolution: Fixed
Priority: 5
Private: No
Submitted By: NicDumZ Nicolas Dumazet (nicdumz)
Assigned to: Nobody/Anonymous (nobody)
Summary: diskcache fails with an IOError[22] in welcome.py
Initial Comment:
Traceback (most recent call last):
File "pywikipedia/welcome.py", line 967, in <module>
for number_user in main(settingsBot):
File "pywikipedia/welcome.py", line 704, in main
contrib = string.capitalize(wsite.mediawiki_message('contribslink'))
File "/home/filnik/pywikipedia/wikipedia.py", line 4678, in mediawiki_message
value = self._mediawiki_messages[key]
File "/home/filnik/pywikipedia/diskcache.py", line 94, in __getitem__
self.cache_file.seek(self.lookup[i])
IOError: [Errno 22] Invalid argument
Does that mean that some lookup values are still -1 even after the initialization ?
----------------------------------------------------------------------
Comment By: NicDumZ Nicolas Dumazet (nicdumz)
Date: 2008-07-24 19:40
Message:
Logged In: YES
user_id=1963242
Originator: YES
mmm... Running a custom script in another system, I got a much more
user-friendly message than "Invalid argument" : it complained about the
cache file not being found. I believe that this is caused by our deferred
put thread, which might still be running when stopme() is called.
I attempted, in r5753, to address this issue. Waiting a bit more before
closing this issue.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2026525&group_…
Bugs item #2030278, was opened at 2008-07-28 14:48
Message generated for change (Comment added) made by nicdumz
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2030278&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Closed
>Resolution: Invalid
Priority: 5
Private: No
Submitted By: Purodha B Blissenbach (purodha)
Assigned to: Nobody/Anonymous (nobody)
Summary: "pre_put_convert" function missing from most families.
Initial Comment:
R5764 introduced a bug, for which I submitted a quick fix with R5766. I sincerely believe that, this was only a partial fix. Although it works for me using interwiki.py -familiy:wikipedia , I expect it to not cure the error for other wiki families, and possibly, this was not the best way to do it anyways.
Here is an output *before* my quick-and-dirty fix:
python /home/purodha/pywikipedia/interwiki.py -v -initialredirect Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi
Checked for running processes. 1 processes currently running, including the current process.
Pywikipediabot (r5764 (wikipedia.py), Jul 27 2008, 11:04:03)
Python 2.5.2 (r252:60911, May 28 2008, 19:19:25)
[GCC 4.2.4 (Debian 4.2.4-1)]
Retrieving mediawiki messages from Special:Allmessages
WARNING: No character set found.
Getting 1 pages from wikipedia:ksh...
[[Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]]: [[ksh:Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]] gives new interwiki [[lt:Kategorija:User hi-1]]
[[Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]]: [[ksh:Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]] gives new interwiki [[zh:Category:Hi-1 ???]]
[[Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]]: [[ksh:Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]] gives new interwiki [[th:?????????:User hi-1]]
--- note, some 500 lines skipped ---
======Post-processing [[ksh:Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]]======
Updating links on page [[lt:Kategorija:User hi-1]].
Changes to be made: Pridedama: it, ksh, nl, no, pl, zh
- [[as:Category:User hi-1]]
+ [[as:??????:User hi-1]]
+ [[it:Categoria:Utenti hi-1]]
+ [[ksh:Saachjrupp:Wikipedia:Medmaacher kann winnnish Hinndi]]
- [[lad:Category:User hi-1]]
+ [[lad:Categora:User hi-1]]
+ [[nl:Categorie:Gebruiker hi-1]]
+ [[no:Kategori:Bruker hi-1]]
+ [[pl:Kategoria:User hi-1]]
+ [[zh:Category:Hi-1 ???]]
NOTE: Updating live wiki...
Getting information for site wikipedia:lt
Copy of watchlist is one month old, reloading
Retrieving watchlist for wikipedia:lt
Parsing watchlist
Dump ksh (wikipedia) saved
Traceback (most recent call last):
File "/home/purodha/pywikipedia/interwiki.py", line 1755, in <module>
bot.run()
File "/home/purodha/pywikipedia/interwiki.py", line 1492, in run
self.queryStep()
File "/home/purodha/pywikipedia/interwiki.py", line 1471, in queryStep
subj.finish(self)
File "/home/purodha/pywikipedia/interwiki.py", line 1052, in finish
if self.replaceLinks(page, new, bot):
File "/home/purodha/pywikipedia/interwiki.py", line 1210, in replaceLinks
status, reason, data = page.put(newtext, comment = wikipedia.translate(page.site().lang, msg)[0] + mods)
File "/home/purodha/pywikipedia/wikipedia.py", line 1281, in put
newtext = self.site().pre_put_convert(newtext)
File "/home/purodha/pywikipedia/wikipedia.py", line 5754, in pre_put_convert
return self.family.pre_put_convert(self, putText)
AttributeError: Family instance has no attribute 'pre_put_convert'
#
----------------------------------------------------------------------
>Comment By: NicDumZ Nicolas Dumazet (nicdumz)
Date: 2008-08-17 17:25
Message:
Logged In: YES
user_id=1963242
Originator: NO
pre_put_convert is in the family superclass. I bet you had not updated the
whole directory ;)
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2030278&group_…
Bugs item #2034826, was opened at 2008-08-01 12:42
Message generated for change (Comment added) made by nicdumz
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2034826&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
>Status: Closed
>Resolution: Invalid
Priority: 5
Private: No
Submitted By: NicDumZ Nicolas Dumazet (nicdumz)
Assigned to: Nobody/Anonymous (nobody)
Summary: interwiki.py emptying pages ?
Initial Comment:
See http://es.wikipedia.org/w/index.php?title=Condado_de_R%C3%ADo_Arriba&diff=1… or http://es.wikipedia.org/w/index.php?title=Byt%C3%B3w&diff=19033997&oldid=18… ...
This is of course the last revision, interwiki.py in autonomous mode, operating for this time from it:
I really don't have any time to investigate this now. Could someone look into it, please ? :)
----------------------------------------------------------------------
>Comment By: NicDumZ Nicolas Dumazet (nicdumz)
Date: 2008-08-17 17:23
Message:
Logged In: YES
user_id=1963242
Originator: YES
okay, thanks for the explanation :)
----------------------------------------------------------------------
Comment By: tuvic (tuvic)
Date: 2008-08-01 13:26
Message:
Logged In: YES
user_id=1557188
Originator: NO
The bots didn't empty pages, the diff is just wrong due to a server error.
This is already fixed, I think, or atleast being worked on.
For more info: see
http://en.wikipedia.org/wiki/Wikipedia:Village_pump_(technical)#Bug:_revisi…
and
https://bugzilla.wikimedia.org/show_bug.cgi?id=14933
Greetings, Tuvic
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2034826&group_…
Bugs item #2042864, was opened at 2008-08-08 12:52
Message generated for change (Comment added) made by nicdumz
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2042864&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: General
Group: None
>Status: Closed
>Resolution: Fixed
Priority: 5
Private: No
Submitted By: Multichill (multichill)
Assigned to: Nobody/Anonymous (nobody)
Summary: Cannot edit at pl wikipedia because of spamfilter
Initial Comment:
replace.py -lang:pl -regex "\{\{[Cc]ommons\|[Cc]ategory:" "{{Commonscat|" -excepttext:"ommonscat" -summary:"Commons -> commonscat" -transcludes:Commons -namespace:14
>>> Kategoria:Biologia <<<
- {{commons|Category:Biology}}
+ {{Commonscat|Biology}}
Sleeping for 3.5 seconds, 2008-08-08 12:51:02
Changing page [[pl:Kategoria:Biologia]]
Cannot change Kategoria:Biologia because of blacklist entry http://www.example.com
>>> Kategoria:Pierwiastki chemiczne <<<
- {{commons|Category:Elements}}
+ {{Commonscat|Elements}}
Changing page [[pl:Kategoria:Pierwiastki chemiczne]]
Cannot change Kategoria:Pierwiastki chemiczne because of blacklist entry http:/www.example.com
version.py :
Pywikipedia [svn+ssh] multichill@trunk/pywikipedia (r5790, Aug 07 2008, 10:10:46
)
Python 2.5.1 (r251:54863, Apr 18 2007, 08:51:08) [MSC v.1310 32 bit (Intel)]
----------------------------------------------------------------------
>Comment By: NicDumZ Nicolas Dumazet (nicdumz)
Date: 2008-08-17 17:21
Message:
Logged In: YES
user_id=1963242
Originator: NO
fixed in r5803, see
https://sourceforge.net/tracker/index.php?func=detail&aid=2055939&group_id=…
for the technical causes of that bug
----------------------------------------------------------------------
Comment By: Nobody/Anonymous (nobody)
Date: 2008-08-10 11:50
Message:
Logged In: NO
The same behavior when trying edit any page in pl.wikinews
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2042864&group_…