Revision: 5914
Author: wikipedian
Date: 2008-09-22 16:15:37 +0000 (Mon, 22 Sep 2008)
Log Message:
-----------
Fixed the Esperanto X-convention bug [ 2006208 ] by rolling back many changes that
concerned Esperanto X-conv.
I fixed this on 2008-08-21 already, but somehow my commit seems to have failed
(sorry), so now I retry to commit it.
Modified Paths:
--------------
trunk/pywikipedia/families/wikipedia_family.py
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/families/wikipedia_family.py
===================================================================
--- trunk/pywikipedia/families/wikipedia_family.py 2008-09-22 09:58:41 UTC (rev 5913)
+++ trunk/pywikipedia/families/wikipedia_family.py 2008-09-22 16:15:37 UTC (rev 5914)
@@ -966,16 +966,4 @@
return self.code2encoding(code),
def shared_image_repository(self, code):
- return ('commons', 'commons')
-
- def post_get_convert(self, site, getText):
- if site.lang == 'eo':
- return wikipedia.decodeEsperantoX(getText)
- else:
- return getText
-
- def pre_put_convert(self, site, getText):
- if site.lang == 'eo':
- return wikipedia.encodeEsperantoX(getText)
- else:
- return getText
+ return ('commons', 'commons')
\ No newline at end of file
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-09-22 09:58:41 UTC (rev 5913)
+++ trunk/pywikipedia/wikipedia.py 2008-09-22 16:15:37 UTC (rev 5914)
@@ -799,12 +799,12 @@
else:
self._isWatched = False
# Now process the contents of the textarea
- # Unescape HTML characters, strip whitespace and postconvert
- pagetext = text[i1:i2]
- pagetext = unescape(pagetext)
- pagetext = pagetext.rstrip()
- pagetext = self.site().post_get_convert(pagetext)
-
+ # Unescape HTML characters, strip whitespace
+ pagetext = text[i1:i2]
+ pagetext = unescape(pagetext)
+ pagetext = pagetext.rstrip()
+ if self.site().lang == 'eo':
+ pagetext = decodeEsperantoX(pagetext)
m = self.site().redirectRegex().match(pagetext)
if m:
# page text matches the redirect pattern
@@ -1295,7 +1295,12 @@
import watchlist
watchArticle = watchlist.isWatched(self.title(), site = self.site())
newPage = not self.exists()
- newtext = self.site().pre_put_convert(newtext)
+ # if posting to an Esperanto wiki, we must e.g. write Bordeauxx instead
+ # of Bordeaux
+ if self.site().lang == 'eo':
+ newtext = encodeEsperantoX(newtext)
+ comment = encodeEsperantoX(comment)
+
return self._putPage(newtext, comment, watchArticle, minorEdit,
newPage, self.site().getToken(sysop = sysop), sysop = sysop)
@@ -2237,7 +2242,7 @@
reason = input(u'Please enter a reason for the deletion:')
answer = 'y'
if prompt and not hasattr(self.site(), '_noDeletePrompt'):
- answer = inputChoice(u'Do you want to delete %s?' % self.aslink(forceInterwiki = True), ['Yes', 'No', 'All'], ['Y', 'N', 'A'], 'N')
+ answer = inputChoice(u'Do you want to delete %s?' % self.aslink(forceInterwiki = True), ['yes', 'no', 'all'], ['y', 'N', 'a'], 'N')
if answer == 'a':
answer = 'y'
self.site()._noDeletePrompt = True
@@ -2939,6 +2944,9 @@
def getData(self):
address = self.site.export_address()
pagenames = [page.sectionFreeTitle() for page in self.pages]
+ # We need to use X convention for requested page titles.
+ if self.site.lang == 'eo':
+ pagenames = [encodeEsperantoX(pagetitle) for pagetitle in pagenames]
pagenames = u'\r\n'.join(pagenames)
if type(pagenames) is not unicode:
output(u'Warning: xmlreader.WikipediaXMLHandler.getData() got non-unicode page names. Please report this.')
@@ -3995,11 +4003,6 @@
linktrail: Return regex for trailing chars displayed as part of a link.
disambcategory: Category in which disambiguation pages are listed.
- post_get_convert: Converts text data from the site immediatly after get
- i.e. EsperantoX -> unicode
- pre_put_convert: Converts text data from the site immediatly before put
- i.e. unicode -> EsperantoX
-
Methods that yield Page objects derived from a wiki's Special: pages
(note, some methods yield other information in a tuple along with the
Pages; see method docs for details) --
@@ -5840,12 +5843,6 @@
"""Return regex for trailing chars displayed as part of a link."""
return self.family.linktrail(self.lang)
- def post_get_convert(self, getText):
- return self.family.post_get_convert(self, getText)
-
- def pre_put_convert(self, putText):
- return self.family.pre_put_convert(self, putText)
-
def language(self):
"""Return Site's language code."""
return self.lang
Hi,
I work on two Wikis, and on one of them, I'd like to use PyWikipedia to
automate some tasks (I may end up using it on the other, too, but that's not
my question).
So, during configuration, I ran generate_user_files.py, but since I had not
yet created a Family file for either, they weren't options. Thus, I simply
created a default one using my Wikipedia account. Tested it, worked fine
(connected, asked for my password, at which point I used Ctrl+C to kill the
script since I don't have authorization to run a bot on Wikipedia).
At this point I created a Family file for the Wiki I do want to use the bot
on. I manually edited the user-config.py file to point to the new family,
and run login.py, and I get some error messages - some mis-aligned tabs in
my family file. I fix these, and then I get a very odd message:
"Please create a file user-config.py, and put in there:
One line saying "mylang='language'"
One line saying "usernames['wikipedia']['language']='yy'"
...filling in your username and the language code of the wiki you want to
work
on.
For other possible configuration variables check config.py."
I tried deleting user-config.py, and using generate_user_files.py again,
this time selecting my Wiki from the list. generate_user_files.py worked
fine, but login.py had the same error again.
Then I deleted user-config.py and created it manually with the lines stated
on http://meta.wikimedia.org/wiki/Pywikipedia_bot_on_non-wikimedia_projects:
family = 'sitename' # matches the name of the "sitename_family.py" file
mylang = 'en' # the only language on the Wiki
usernames['sitename']['en'] = DragoonWraith # my own account, at least for
testing
Still got the same error.
At this point, I created a family file for the *other *Wiki, and that worked
fine. Deleted user-config.py, re-created it with generate_user_files.py, and
successfully logged in with login.py. Didn't do anything with it because I
haven't really gotten that far; still need to learn how to use it - but
that's besides the point.
OK, so I tried re-creating my family file for the first Wiki, since that was
the only thing that was different. Still no go.
So, does anyone have any suggestions? Is there any relevant information I
should be sending about the Wiki? Should I include the family file or the
user-config file? Or anything else? Unfortunately, I'm such a newbie that I
don't even know what questions to ask, or how to ask them properly. Sorry
about that.
- DragoonWraith
Bugs item #2209355, was opened at 2008-10-30 10:15
Message generated for change (Comment added) made by a_engels
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2209355&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
>Status: Closed
>Resolution: Invalid
Priority: 5
Private: No
Submitted By: Maxim Semenik (maxsem)
Assigned to: Nobody/Anonymous (nobody)
Summary: interwiki.py shouldn't mix links to different namespaces
Initial Comment:
This was completely overkill:
http://en.wikipedia.org/w/index.php?title=Russian_Wikipedia&curid=2036528&d…
It violates [[WP:SELF]] by needlessly directing simple readers to places intended for editors, and propagates one mistake to every project.
----------------------------------------------------------------------
>Comment By: Andre Engels (a_engels)
Date: 2008-10-30 10:41
Message:
The bot already includes such links only if the operator says yes to them.
That some operators prefer to include the best links rather than adhere to
your dogmatism, is our (because I'm one of them) fault, not that of the
bot.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2209355&group_…
Bugs item #2209355, was opened at 2008-10-30 12:15
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2209355&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: interwiki
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Maxim Semenik (maxsem)
Assigned to: Nobody/Anonymous (nobody)
Summary: interwiki.py shouldn't mix links to different namespaces
Initial Comment:
This was completely overkill:
http://en.wikipedia.org/w/index.php?title=Russian_Wikipedia&curid=2036528&d…
It violates [[WP:SELF]] by needlessly directing simple readers to places intended for editors, and propagates one mistake to every project.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2209355&group_…
Bugs item #2208999, was opened at 2008-10-30 05:00
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2208999&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: General
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Nobody/Anonymous (nobody)
Assigned to: Nobody/Anonymous (nobody)
Summary: yo.wiki incorrectly throws NoPage errors
Initial Comment:
version.py results:
Pywikipedia [http] trunk/pywikipedia (r6044, Oct 29 2008, 20:12:04)
Python 2.5.1 (r251:54863, Apr 18 2007, 08:51:08) [MSC v.1310 32 bit (Intel)]
Trying read a Yoruba wikipedia article always fails with NoPage error. It appear that yo.wiki is setup with a linefeed in the "History" tab text. The fix is to use a re.DOTALL flag when searching for the history page.
My patch:
def _getEditPage()
...
# Find out if page actually exists. Only existing pages have a
# version history tab.
if self.site().family.RversionTab(self.site().language()):
# In case a family does not have version history tabs, or in
# another form
RversionTab = re.compile(self.site().family.RversionTab(self.site().language()), re.DOTALL)
else:
RversionTab = re.compile(r'<li id="ca-history"><a href=".*?title=.*?&action=history".*?>.*?</a></li>', re.DOTALL)
matchVersionTab = RversionTab.search(text)
if not matchVersionTab:
raise NoPage(self.site(), self.aslink(forceInterwiki = True))
...
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=2208999&group_…
Revision: 6043
Author: filnik
Date: 2008-10-29 19:21:05 +0000 (Wed, 29 Oct 2008)
Log Message:
-----------
Spare some time in checking phase, better now
Modified Paths:
--------------
trunk/pywikipedia/checkimages.py
Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py 2008-10-29 19:06:07 UTC (rev 6042)
+++ trunk/pywikipedia/checkimages.py 2008-10-29 19:21:05 UTC (rev 6043)
@@ -1100,19 +1100,62 @@
list_licenses.append(pageLicense) # the list has wiki-pages
return list_licenses
+ def miniTemplateCheck(self, template):
+ """
+ Is the template given in the licenses allowed or in the licenses to skip?
+ This function check this.
+ """
+ if template in self.list_licenses: # the list_licenses are loaded in the __init__ (not to load them multimple times)
+ self.seems_ok = True
+ self.license_found = self.license_selected # let the last "fake" license normally detected
+ return True
+ if template in self.hiddentemplates:
+ # if the whitetemplate is not in the images description, we don't care
+ try:
+ self.allLicenses.remove(template)
+ except ValueError:
+ return False
+ else:
+ self.whiteTemplatesFound = True
+ return False
+
+ def templateInList(self):
+ """
+ The problem is the calls to the Mediawiki system because they can be pretty slow.
+ While searching in a list of objects is really fast, so first of all let's see if
+ we can find something in the info that we already have, then make a deeper check.
+ """
+ for template in self.licenses_found:
+ self.license_selected = template.title().replace('Template:', '')
+ result = self.miniTemplateCheck(template)
+ if result:
+ break
+ if self.license_found == None:
+ for template in self.licenses_found:
+ try:
+ template.pageAPInfo()
+ except wikipedia.IsRedirectPage:
+ template = template.getRedirectTarget()
+ except wikipedia.NoPage:
+ continue
+ self.license_selected = template.title().replace('Template:', '')
+ result = self.miniTemplateCheck(template)
+ if result:
+ break
+
def smartDetection(self):
""" The bot instead of checking if there's a simple template in the
image's description, checks also if that template is a license or
something else. In this sense this type of check is smart.
"""
- seems_ok = False
- license_found = None
+ self.seems_ok = False
+ self.license_found = None
self.hiddentemplates = self.loadHiddenTemplates()
self.licenses_found = self.image.getTemplates()
- whiteTemplatesFound = False
+ self.whiteTemplatesFound = False
regex_find_licenses = re.compile(r'(?<!\{)\{\{(?:[Tt]emplate:|)([^{]*?)[|\n<}]', re.DOTALL)
templatesInTheImageRaw = regex_find_licenses.findall(self.imageCheckText)
- allLicenses = list()
+ self.allLicenses = list()
if self.list_licenses == []:
raise wikipedia.Error(u'No licenses allowed provided, add that option to the code to make the script working correctly')
# Found the templates ONLY in the image's description
@@ -1120,41 +1163,23 @@
for templateReal in self.licenses_found:
if self.convert_to_url(template_selected).lower().replace('template:', '') == \
self.convert_to_url(templateReal.title().lower().replace('template:', '')):
- if templateReal not in allLicenses: # don't put the same template, twice.
- allLicenses.append(templateReal)
+ if templateReal not in self.allLicenses: # don't put the same template, twice.
+ self.allLicenses.append(templateReal)
if self.licenses_found != []:
- for template in self.licenses_found:
- try:
- template.pageAPInfo()
- except wikipedia.IsRedirectPage:
- template = template.getRedirectTarget()
- except wikipedia.NoPage:
- continue
- license_selected = template.title().replace('Template:', '')
- if template in self.list_licenses: # the list_licenses are loaded in the __init__ (not to load them multimple times)
- seems_ok = True
- license_found = license_selected # let the last "fake" license normally detected
- break
- if template in self.hiddentemplates:
- # if the whitetemplate is not in the images description, we don't care
- try:
- allLicenses.remove(template)
- except ValueError:
- continue
- else:
- whiteTemplatesFound = True
- continue
- if license_found == None and allLicenses != list():
- license_found = license_selected
- if not seems_ok and license_found != None:
- rep_text_license_fake = u"\n*[[:Image:%s]] seems to have a ''fake license'', license detected: <nowiki>%s</nowiki>" % (self.imageName, license_found)
- regexFakeLicense = r"\* ?\[\[:Image:%s\]\] seems to have a ''fake license'', license detected: <nowiki>%s</nowiki>$" % (re.escape(self.imageName), license_found)
- printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, license_found))
+ self.templateInList()
+ if self.license_found == None and self.allLicenses != list():
+ self.license_found = self.license_selected
+ if not self.seems_ok and self.license_found != None:
+ rep_text_license_fake = u"\n*[[:Image:%s]] seems to have " + \
+ "a ''fake license'', license detected: <nowiki>%s</nowiki>" % (self.imageName, self.license_found)
+ regexFakeLicense = r"\* ?\[\[:Image:%s\]\] seems to have " + \
+ "a ''fake license'', license detected: <nowiki>%s</nowiki>$" % (re.escape(self.imageName), self.license_found)
+ printWithTimeZone(u"%s seems to have a fake license: %s, reporting..." % (self.imageName, self.license_found))
self.report_image(self.imageName, rep_text = rep_text_license_fake,
addings = False, regex = regexFakeLicense)
- elif license_found != None:
- printWithTimeZone(u"%s seems ok, license found: %s..." % (self.imageName, license_found))
- return (license_found, whiteTemplatesFound)
+ elif self.license_found != None:
+ printWithTimeZone(u"%s seems ok, license found: %s..." % (self.imageName, self.license_found))
+ return (self.license_found, self.whiteTemplatesFound)
def load(self, raw):
""" Load a list of object from a string using regex. """
@@ -1405,7 +1430,6 @@
self.some_problem = False
return True
elif brackets == True and license_found != None:
- seems_ok = False
# It works also without this... but i want only to be sure ^^
brackets = False
return True