Revision: 5344
Author: siebrand
Date: 2008-05-09 17:08:13 +0000 (Fri, 09 May 2008)
Log Message:
-----------
Add switch -replaceonly when working on a wiki without sysop rights
Modified Paths:
--------------
trunk/pywikipedia/nowcommons.py
Modified: trunk/pywikipedia/nowcommons.py
===================================================================
--- trunk/pywikipedia/nowcommons.py 2008-05-09 16:32:01 UTC (rev 5343)
+++ trunk/pywikipedia/nowcommons.py 2008-05-09 17:08:13 UTC (rev 5344)
@@ -9,8 +9,8 @@
the source wiki. If multiple versions of the file exist, the script will not
delete. If the MD5 comparison is not equal, the script will not delete.
-A sysop account on the local wiki is required if you want this script to work
-properly.
+A sysop account on the local wiki is required if you want all features of
+this script to work properly.
This script understands various command-line arguments:
-autonomous: run automatically, do not ask any questions. All files
@@ -30,6 +30,9 @@
or in galleries. However, it can also make more
mistakes.
+ -replaceonly: Use this if you do not have a local sysop account, but do
+ wish to replace links from the NowCommons template.
+
Known issues. Please fix these if you are capable and motivated:
- if a file marked nowcommons is not present on Wikimedia Commons, the bot
will exit.
@@ -53,6 +56,7 @@
replace = False
replacealways = False
replaceloose = False
+replaceonly = False
for arg in wikipedia.handleArgs():
if arg == '-autonomous':
@@ -64,6 +68,8 @@
replacealways = True
if arg == '-replaceloose':
replaceloose = True
+ if arg == '-replaceonly':
+ replaceonly = True
nowCommons = {
'_default': [
@@ -193,20 +199,21 @@
else:
wikipedia.output(u'No page is using \"\03{lightgreen}%s\03{default}\" anymore.' % localImagePage.titleWithoutNamespace())
commonsText = commonsImagePage.get()
- if md5 == commonsImagePage.getFileMd5Sum():
- wikipedia.output(u'The image is identical to the one on Commons.')
- if autonomous == False:
- wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % page.title())
- wikipedia.output(localImagePage.get())
- wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % commonsImagePage.title())
- wikipedia.output(commonsText)
- choice = wikipedia.inputChoice(u'Does the description on Commons contain all required source and license information?', ['yes', 'no'], ['y', 'N'], 'N')
- if choice == 'y':
+ if replaceonly == False:
+ if md5 == commonsImagePage.getFileMd5Sum():
+ wikipedia.output(u'The image is identical to the one on Commons.')
+ if autonomous == False:
+ wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % page.title())
+ wikipedia.output(localImagePage.get())
+ wikipedia.output(u'\n\n>>>> Description on \03{lightpurple}%s\03{default} <<<<\n' % commonsImagePage.title())
+ wikipedia.output(commonsText)
+ choice = wikipedia.inputChoice(u'Does the description on Commons contain all required source and license information?', ['yes', 'no'], ['y', 'N'], 'N')
+ if choice == 'y':
+ localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False)
+ else:
localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False)
else:
- localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False)
- else:
- wikipedia.output(u'The image is not identical to the one on Commons.')
+ wikipedia.output(u'The image is not identical to the one on Commons.')
except (wikipedia.NoPage, wikipedia.IsRedirectPage), e:
wikipedia.output(u'%s' % e)
continue
Revision: 5343
Author: siebrand
Date: 2008-05-09 16:32:01 +0000 (Fri, 09 May 2008)
Log Message:
-----------
Add feature to replace local image with shared repo image. New switches:
-replace: replace with confirmation
-replacealways: replace without confirmation (switch from image.py)
-replaceloose: use loose pattern matching when replacing (switch from image.py)
Modified Paths:
--------------
trunk/pywikipedia/nowcommons.py
Modified: trunk/pywikipedia/nowcommons.py
===================================================================
--- trunk/pywikipedia/nowcommons.py 2008-05-09 11:35:15 UTC (rev 5342)
+++ trunk/pywikipedia/nowcommons.py 2008-05-09 16:32:01 UTC (rev 5343)
@@ -9,13 +9,27 @@
the source wiki. If multiple versions of the file exist, the script will not
delete. If the MD5 comparison is not equal, the script will not delete.
-A sysop account is required for this script to work.
+A sysop account on the local wiki is required if you want this script to work
+properly.
This script understands various command-line arguments:
- -autonomous: run automatically, do not ask any questions. All files
- that qualify for deletion are deleted. Reduced screen
- output.
+ -autonomous: run automatically, do not ask any questions. All files
+ that qualify for deletion are deleted. Reduced screen
+ output.
+ -replace: replace links if the files are equal and the file names
+ differ
+
+ -replacealways: replace links if the files are equal and the file names
+ differ without asking for confirmation
+
+ -replaceloose: Do loose replacements. This will replace all occurences
+ of the name of the image (and not just explicit image
+ syntax). This should work to catch all instances of the
+ file, including where it is used as a template parameter
+ or in galleries. However, it can also make more
+ mistakes.
+
Known issues. Please fix these if you are capable and motivated:
- if a file marked nowcommons is not present on Wikimedia Commons, the bot
will exit.
@@ -31,14 +45,25 @@
import sys, re
import wikipedia, pagegenerators
+import image
# only for nowCommonsMessage
from imagetransfer import nowCommonsMessage
autonomous = False
+replace = False
+replacealways = False
+replaceloose = False
for arg in wikipedia.handleArgs():
if arg == '-autonomous':
autonomous = True
+ if arg == '-replace':
+ replace = True
+ if arg == '-replacealways':
+ replace = True
+ replacealways = True
+ if arg == '-replaceloose':
+ replaceloose = True
nowCommons = {
'_default': [
@@ -65,7 +90,7 @@
u'גם בוויקישיתוף'
],
'ja':[
- u'NowCommons',
+ u'NowCommons',
],
'ia': [
u'OraInCommons'
@@ -82,7 +107,7 @@
u'NowCommons'
],
'zh':[
- u'NowCommons',
+ u'NowCommons',
u'Nowcommons',
u'NCT',
],
@@ -104,7 +129,7 @@
def __init__(self):
self.site = wikipedia.getSite()
if repr(self.site) == 'commons:commons':
- sys.exit('Don\'t run this bot on Commons!')
+ sys.exit('Do not run this bot on Commons!')
ncList = self.ncTemplates()
self.nowCommonsTemplate = wikipedia.Page(self.site, 'Template:' + ncList[0])
@@ -150,23 +175,23 @@
if not filenameOnCommons:
wikipedia.output(u'NowCommons template not found.')
continue
- commonsImagePage = wikipedia.ImagePage(commons,
- 'Image:%s' % filenameOnCommons)
+ commonsImagePage = wikipedia.ImagePage(commons, 'Image:%s' % filenameOnCommons)
if len(localImagePage.getFileVersionHistory()) > 1:
- wikipedia.output(u"""\
-This image has a version history. Please delete it manually after making sure
-that the old versions aren't worth keeping.""")
+ wikipedia.output(u"This image has a version history. Please delete it manually after making sure that the old versions are not worth keeping.""")
continue
if localImagePage.titleWithoutNamespace() != commonsImagePage.titleWithoutNamespace():
usingPages = list(localImagePage.usingPages())
if usingPages and usingPages != [localImagePage]:
- wikipedia.output(
- '%s is still used in %i pages. Please change them manually.'
- % (localImagePage.title(), len(usingPages)))
+ wikipedia.output(u'\"\03{lightred}%s\03{default}\" is still used in %i pages.' % (localImagePage.titleWithoutNamespace(), len(usingPages)))
+ if replace == True:
+ wikipedia.output(u'Replacing \"\03{lightred}%s\03{default}\" by \"\03{lightgreen}%s\03{default}\".' % (localImagePage.titleWithoutNamespace(), commonsImagePage.titleWithoutNamespace()))
+ oImageRobot = image.ImageRobot(pagegenerators.FileLinksGenerator(localImagePage), localImagePage.titleWithoutNamespace(), commonsImagePage.titleWithoutNamespace(), '', replacealways, replaceloose)
+ oImageRobot.run()
+ else:
+ wikipedia.output(u'Please change them manually.')
continue
else:
- wikipedia.output('No page is using %s anymore.'
- % localImagePage.title())
+ wikipedia.output(u'No page is using \"\03{lightgreen}%s\03{default}\" anymore.' % localImagePage.titleWithoutNamespace())
commonsText = commonsImagePage.get()
if md5 == commonsImagePage.getFileMd5Sum():
wikipedia.output(u'The image is identical to the one on Commons.')
@@ -180,6 +205,8 @@
localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False)
else:
localImagePage.delete(comment + ' [[:commons:Image:%s]]' % filenameOnCommons, prompt = False)
+ else:
+ wikipedia.output(u'The image is not identical to the one on Commons.')
except (wikipedia.NoPage, wikipedia.IsRedirectPage), e:
wikipedia.output(u'%s' % e)
continue
Hi all,
I'd like to suggest to grant developer rights to Purodha B Blissenbach
([[de:Benutzer:Purodha]]). He has contributed some useful patches, but
sometimes they just starve because nobody seems to have time to review them.
I have met Purodha in person, and I think he's very careful in testing his
changes before uploading.
One example is his extensive bug report [ 1903113 ] interwiki.py looses access
to Wikipedias, see:
https://sourceforge.net/tracker/index.php?func=detail&aid=1903113&group_id=…
He has submitted two patches, 1907586 and 1918278. I think the approach is
worth a try, but the patches seem to be already outdated.
So, Purodha, would you like to have developer access to the SVN repository?
And what do others think about my suggestion?
Cheers
Daniel
Hi,
I recently started using pywikipedia-scripts again after some time-out.
Now when i try to output page content to screen using
gen = pagegenerators.CategorizedPageGenerator(cat)
for page in gen:
text = page.get()
print page.title
The correct output is (for example for a page with the name 1095)
<bound method Page.site of Page{[[1095]]}>
Can anyone tell me how to eliminate the
<bound method Page.site of Page{[[]]}>
I only need the page name
thnks in advance
Ruud
--
Met vriendelijke groet,
Ruud Habets
----------------------------------
Ruud Habets
mail ruud(a)kgv.nl
www http://www.kgv.nl
tel. 045-5418899 (p)
tel. 0650-844386 (m)
tel. 045-4006037 (w)
----------------------------------
By request, here is an update for interested persons on the status of the
rewrite branch.
The major purpose of the rewrite branch is to implement a bot framework that
uses the new MediaWiki API (see http://www.mediawiki.org/wiki/API for
details) instead of the old approach of scraping HTML wiki pages. Note that
many other potential areas for a rewrite have been suggested on this list,
and at http://www.botwiki.sno.cc/wiki/Rewrite, but most of those are not
currently being pursued due to a lack of resources.
The software in the rewrite branch currently is runnable, but incomplete,
and with limited documentation. For the most part, the bot programming
interface is intended to be very similar to the interface used in the
current pywikipedia trunk, so that bot programs can be ported easily, but
there are significant changes that we have started to document in the file
README-conversion.txt. To date, most methods of the Page object that read
from the wiki have been implemented; you can, for example, instantiate a
Page object, get its text, and retrieve its templates, links, categories,
backlinks, and so forth (interwiki links other than language links are not
yet implemented). Methods that provide Site-wide lists of pages (allpages,
allcategories, randompages, etc.) have not yet been implemented, but this is
next on my to-do list. Most methods in the existing framework that
manipulate wiki text have not yet been ported (things like
replaceCategoryLinks), but these should require very few changes.
The ability to save changes to the wiki is *not* yet implemented. Note that
the MediaWiki API does now have the ability to edit pages, but this has not
yet been activated on any WMF wikis, so once editing is implemented in the
bot framework, it still will be of limited use.
At the moment, I am doing most of the development work on this branch.
Valhallasw contributed the http interface, nicdumz has contributed some
user-related methods, and although he has not contributed directly, I have
stolen^H^H^H^H^H^Hbeen inspired by some of Bryan Tong Minh's ideas from his
mwclient project. Let me make it clear that I do not consider this "my"
project by any means; anyone who is willing and able to contribute will be
most welcome. It may be helpful for any new contributors to announce which
aspects of the code they are planning to work on, to avoid duplication of
effort.
Russ
Please help, I can't do it. :-(
I had a working command:
replace.py -links:User:BinBot/try -fix:datumjav -recursive -allowoverlap
Now I don't wan't to edit the articles containing templates {{szinnyei}} or
{{pallas}} because they are not worth to deal with. I tried to insert
-excepttext:'szinnyei'
-excepttext:szinnyei
-excepttext:{{szinnyei}}
-excepttext:'\{\{szinnyei\}\}'
and so on, and none of them worked, it always edits those terrible articles.
I also tried except instead of excepttext, because the help inside the
replace.py states that. I am a bit confused because
http://meta.wikimedia.org/wiki/Replace.py states that only older versions
used except, and my replace.py contains the LockedPage exception, so it must
be the newest according to
http://pywikipediabot.cvs.sourceforge.net/pywikipediabot/pywikipedia/replac…,
but the inside help still says except.
Anyhow, I cannot avoid editing articles with these templates. What would be
the correct syntax?
Bináris
Revision: 5342
Author: nicdumz
Date: 2008-05-09 11:35:15 +0000 (Fri, 09 May 2008)
Log Message:
-----------
* Adding a batch parameter to categorymembers to set the numbers of pages retrieved at once
* Trying to address NoPage issues
Modified Paths:
--------------
branches/rewrite/pywikibot/site.py
Modified: branches/rewrite/pywikibot/site.py
===================================================================
--- branches/rewrite/pywikibot/site.py 2008-05-09 10:56:35 UTC (rev 5341)
+++ branches/rewrite/pywikibot/site.py 2008-05-09 11:35:15 UTC (rev 5342)
@@ -744,7 +744,7 @@
for ns in namespaces)
return tlgen
- def categorymembers(self, category, namespaces=None):
+ def categorymembers(self, category, namespaces=None, batch=None):
"""Iterate members of specified category.
@param category: The Category to iterate.
@@ -754,6 +754,8 @@
however, that the iterated values are always Page objects, even
if in the Category or Image namespace.
@type namespaces: list of ints
+ @param batch: the number of pages to fetch each time.
+ @type batch: int
"""
if category.namespace() != 14:
@@ -766,6 +768,11 @@
if namespaces is not None:
cmgen.request[u"gcmnamespace"] = u"|".join(unicode(ns)
for ns in namespaces)
+ if batch is not None:
+ if batch > 5000:
+ logging.debug("No more than 5000 rows can be fetched at once.")
+ batch=5000
+ cmgen.request[u'cmlimit'] = str(batch)
return cmgen
def loadrevisions(self, page=None, getText=False, revids=None,
@@ -881,6 +888,8 @@
raise Error(
u"loadrevisions: Query on %s returned data on '%s'"
% (page, pagedata['title']))
+ if pagedata.has_key('missing'):
+ raise NoPage(u'Page %s does not exist' % page.title(asLink=True))
else:
page = Page(self, pagedata['title'])
api.update_page(page, pagedata)
Revision: 5340
Author: nicdumz
Date: 2008-05-08 19:49:52 +0000 (Thu, 08 May 2008)
Log Message:
-----------
Follow up to Daniel's last commit. I myself had locally disabled the "stop on new messages" feature in reflinks.py . Committing it :)
Modified Paths:
--------------
trunk/pywikipedia/reflinks.py
Modified: trunk/pywikipedia/reflinks.py
===================================================================
--- trunk/pywikipedia/reflinks.py 2008-05-08 19:37:31 UTC (rev 5339)
+++ trunk/pywikipedia/reflinks.py 2008-05-08 19:49:52 UTC (rev 5340)
@@ -6,8 +6,8 @@
the link to use it as the title of the wiki link in the reference, i.e.
<ref>[http://www.google.fr/search?q=test test - Google Search]</ref>
-The bot checks every 20 edits its talk page and a special stop page : if
-one of these page has been edited, it stops.
+The bot checks every 20 edits a special stop page : if
+the page has been edited, it stops.
DumZiBoT is running that script on en: & fr: at every new dump, running it on de: is not allowed anymore.
@@ -493,9 +493,6 @@
if actualRev != self.stopPageRevId:
wikipedia.output(u'[[%s]] has been edited : Someone wants us to stop.' % self.stopPage)
return
- if self.site.messages():
- wikipedia.output(u'Bot has new messages. Better stop to check.')
- return
def main():
genFactory = pagegenerators.GeneratorFactory()
Revision: 5339
Author: wikipedian
Date: 2008-05-08 19:37:31 +0000 (Thu, 08 May 2008)
Log Message:
-----------
rolled back more of the changes from the update of Betacommand / siebrand, revision 4985:
* Putting pages asynchronously in always-yes mode is not useful. The asynchronous putting
feature is meant to reduce waiting times in interactive mode.
* Removed the stop-on-new-messages feature. This is really annoying, most bot operators don't
read their messages with their bot accounts. Also, it doesn't belong here. If you want such a
feature, add code to wikipedia.py which stops the bot on new messages. Use a config variable for
this, and set its default so that the bot continues when there are new messages.
Modified Paths:
--------------
trunk/pywikipedia/noreferences.py
Modified: trunk/pywikipedia/noreferences.py
===================================================================
--- trunk/pywikipedia/noreferences.py 2008-05-08 19:17:04 UTC (rev 5338)
+++ trunk/pywikipedia/noreferences.py 2008-05-08 19:37:31 UTC (rev 5339)
@@ -21,7 +21,6 @@
-start:Category:M.
-always Don't prompt you for each replacement.
- -ignoremsg Don't stop when the bot has new messages
All other parameters will be regarded as part of the title of a single page,
and the bot will only work on that single page.
@@ -211,10 +210,9 @@
class NoReferencesBot:
- def __init__(self, generator, always = False, ignoreMsg = False):
+ def __init__(self, generator, always = False):
self.generator = generator
self.always = always
- self.ignoreMsg = ignoreMsg
self.site = wikipedia.getSite()
self.refR = re.compile('</ref>', re.IGNORECASE)
self.referencesR = re.compile('<references */>', re.IGNORECASE)
@@ -335,10 +333,7 @@
if self.always:
try:
- page.put_async(newText)
- if self.site.messages() and not self.ignoreMsg:
- wikipedia.output(u'NOTE: You have unread messages, stopping...')
- wikipedia.stopme()
+ page.put(newText)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % (page.title(),))
except wikipedia.SpamfilterError, e:
@@ -348,9 +343,6 @@
else:
# Save the page in the background. No need to catch exceptions.
page.put_async(newText)
- if self.site.messages() and not self.ignoreMsg:
- wikipedia.output(u'NOTE: You have unread messages, stopping...')
- wikipedia.stopme()
return
def run(self):
@@ -375,9 +367,6 @@
if self.lacksReferences(text):
newText = self.addReferences(text)
self.save(page, newText)
- if self.site.messages() and not self.ignoreMsg:
- wikipedia.output(u'NOTE: You have unread messages, stopping...')
- wikipedia.stopme()
def main():
#page generator
@@ -390,8 +379,6 @@
namespaces = []
# Never ask before changing a page
always = False
- # Stop when the bot has new messages
- ignoreMsg = False
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
@@ -411,8 +398,6 @@
namespaces.append(arg[11:])
elif arg == '-always':
always = True
- elif arg == '-ignoremsg':
- ignoreMsg = True
else:
generator = genFactory.handleArg(arg)
if generator:
@@ -429,7 +414,7 @@
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
- bot = NoReferencesBot(preloadingGen, always, ignoreMsg)
+ bot = NoReferencesBot(preloadingGen, always)
bot.run()
if __name__ == "__main__":