Bugs item #1944723, was opened at 2008-04-17 16:24
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1944723&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: shizhao (wikishizhao)
Assigned to: Nobody/Anonymous (nobody)
Summary: fix imagelinks() in wikipedia.py
Initial Comment:
fix imagelinks() bug in wikipedia.py. Add more file types.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1944723&group_…
Revision: 5225
Author: multichill
Date: 2008-04-16 13:38:04 +0000 (Wed, 16 Apr 2008)
Log Message:
-----------
Rewrite of the program.
Modified Paths:
--------------
trunk/pywikipedia/imagecopy.py
Modified: trunk/pywikipedia/imagecopy.py
===================================================================
--- trunk/pywikipedia/imagecopy.py 2008-04-16 10:40:54 UTC (rev 5224)
+++ trunk/pywikipedia/imagecopy.py 2008-04-16 13:38:04 UTC (rev 5225)
@@ -16,25 +16,39 @@
so he can test at: [[de:Benutzer Diskussion:Magnus Manske]]. You can
write him in German and English.
-Arguments:
+Examples
- -project Project to copy from (default: wikipedia)
- -lang Language to copy from (default: nl)
- -cat Category to copy to Wikimedia Commons (required)
- -start Start at index within category (optional)
+Work on a single image
+ python imagecopy.py -page:Image:<imagename>
+Work on the 100 newest images:
+ python imagecopy.py -newimages:100
+Work on all images in a category:<cat>
+ python imagecopy.py -cat:<cat>
+Work on all images which transclude a template
+ python imagecopy.py -transcludes:<template>
+See pagegenerators.py for more ways to get a list of images.
+By default the bot works on your home wiki (set in user-config)
+
Known issues/FIXMEs (no critical issues known):
* make it use pagegenerators.py
+** Implemented in rewrite
* Some variable names are in Spanish, which makes the code harder to read.
+** Almost all variables are now in English
* Depending on sorting within a file category, the "next batch" is sometimes
not working, leading to an endless loop
+** Using pagegenerators now
* Different wikis can have different exclusion lists. A parameter for the
exclusion list Uploadbot.localskips.txt would probably be nice.
* Bot should probably use API instead of query.php
+** Api? Query? Wikipedia.py!
* Should request alternative name if file name already exists on Commons
+** Implemented in rewrite
* Exits after last file in category was processed, aborting all pending
threads.
+** Implemented proper threading in rewrite
* Should take user-config.py as input for project and lang variables
+** Implemented in rewrite
* Should require a Commons user to be present in user-config.py before
working
* Should probably have an input field for additional categories
@@ -58,6 +72,9 @@
# New bot by:
# (C) Kyle/Orgullomoore, Siebrand Mazeland 2007
#
+# Another rewrite by:
+# (C) Multichill 2008
+#
# Distributed under the terms of the MIT license.
#
__version__='$Id$'
@@ -67,282 +84,12 @@
import os, sys, re, codecs
import urllib, httplib, urllib2
import catlib, thread, webbrowser
+import time, threading
import wikipedia, config
+import pagegenerators, add_text
+from upload import *
NL=''
-
-def pageText(url):
- request=urllib2.Request(url)
- user_agent='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7'
- print url
- request.add_header("User-Agent", user_agent)
- response=urllib2.urlopen(request)
- text=response.read()
- response.close()
- return text
-
-def post_multipart(host, selector, fields, files, cookies):
- """
- Post fields and files to an http host as multipart/form-data.
- fields is a sequence of (name, value) elements for regular form fields.
- files is a sequence of (name, filename, value) elements for data to be uploaded as files
- Return the server's response page.
- """
- content_type, body = encode_multipart_formdata(fields, files)
- conn = httplib.HTTPConnection(host)
- conn.putrequest('POST', selector)
- conn.putheader('content-type', content_type)
- conn.putheader('content-length', str(len(body)))
- conn.putheader("User-agent", 'RobHooftWikiRobot/1.0')
- if cookies:
- conn.putheader('Cookie',cookies)
- conn.endheaders()
- conn.send(body)
- response = conn.getresponse()
- returned_html = response.read()
- conn.close()
- return response, returned_html
-
-def encode_multipart_formdata(fields, files):
- """
- fields is a sequence of (name, value) elements for regular form fields.
- files is a sequence of (name, filename, value) elements for data to be uploaded as files
- Return (content_type, body) ready for httplib.HTTP instance
- """
- BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
- CRLF = '\r\n'
- L = []
- for (key, value) in fields:
- L.append('--' + BOUNDARY)
- L.append('Content-Disposition: form-data; name="%s"' % key)
- L.append('')
- L.append(value)
- for (key, filename, value) in files:
- L.append('--' + BOUNDARY)
- L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
- L.append('Content-Type: %s' % get_content_type(filename))
- L.append('')
- L.append(value)
- L.append('--' + BOUNDARY + '--')
- L.append('')
- body = CRLF.join(L)
- content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
- return content_type, body
-
-def get_content_type(filename):
- import mimetypes
- return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
-
-
-class UploadRobot:
- def __init__(self, url, description = u'', keepFilename = False, verifyDescription = False, ignoreWarning = True, targetSite = None, urlEncoding = None, newname=None):
- """
- ignoreWarning - Set this to True if you want to upload even if another
- file would be overwritten or another mistake would be
- risked.
- Attention: This parameter doesn't work yet for unknown reason.
- """
- self.url = url
- self.urlEncoding = urlEncoding
- self.description = description
- self.keepFilename = keepFilename
- self.verifyDescription = verifyDescription
- self.ignoreWarning = ignoreWarning
- self.newname=newname
- if config.upload_to_commons:
- self.targetSite = targetSite or wikipedia.getSite('commons', 'commons')
- else:
- self.targetSite = targetSite or wikipedia.getSite()
- self.targetSite=wikipedia.Site('commons', 'commons')
- self.targetSite.forceLogin()
-
- def urlOK(self):
- '''
- Returns true iff the URL references an online site or an
- existing local file.
- '''
- return self.url != '' and ('://' in self.url or os.path.exists(self.url))
-
- def upload_image(self, debug=False):
- """Gets the image at URL self.url, and uploads it to the target wiki.
- Returns the filename which was used to upload the image.
- If the upload fails, the user is asked whether to try again or not.
- If the user chooses not to retry, returns null.
- """
- # Get file contents
- if '://' in self.url:
- uo = wikipedia.MyURLopener()
- file = uo.open(self.url,"rb")
- else:
- # Opening local files with MyURLopener would be possible, but we
- # don't do it because it only accepts ASCII characters in the
- # filename.
- file = open(self.url,"rb")
- wikipedia.output(u'Reading file %s' % self.url)
- contents = file.read()
- if contents.find("The requested URL was not found on this server.") != -1:
- print "Couldn't download the file."
- return
- file.close()
- # Isolate the pure name
- filename = self.newname
- if '/' in filename:
- filename = filename.split('/')[-1]
- if '\\' in filename:
- filename = filename.split('\\')[-1]
- if self.urlEncoding:
- filename = urllib.unquote(filename)
- filename = filename.decode(self.urlEncoding)
- if not self.keepFilename:
- wikipedia.output(u"The filename on the target wiki will default to: %s" % filename)
- # ask newfn until it's valid
- ok = False
- # FIXME: these 2 belong somewhere else, presumably in family
- forbidden = '/' # to be extended
- allowed_formats = (u'gif', u'jpg', u'jpeg', u'mid', u'midi', u'ogg', u'png', u'svg', u'xcf')
- while not ok:
- ok = True
- newfn = wikipedia.input(u'Enter a better name, or press enter to accept:')
- if newfn == "":
- newfn = filename
- ext = os.path.splitext(newfn)[1].lower().strip('.')
- for c in forbidden:
- if c in newfn:
- print "Invalid character: %s. Please try again" % c
- ok = False
- if ext not in allowed_formats and ok:
- choice = wikipedia.inputChoice(u"File format is not one of [%s], but %s. Continue?" % (u' '.join(allowed_formats), ext), ['yes', 'no'], ['y', 'N'], 'N')
- if choice == 'n':
- ok = False
- if newfn != '':
- filename = newfn
- # MediaWiki doesn't allow spaces in the file name.
- # Replace them here to avoid an extra confirmation form
- filename = filename.replace(' ', '_')
- # Convert the filename (currently Unicode) to the encoding used on the
- # target wiki
- encodedFilename = filename.encode(self.targetSite.encoding())
- # A proper description for the submission.
- wikipedia.output(u"The suggested description is:")
- wikipedia.output(self.description)
- if self.verifyDescription:
- newDescription = u''
- choice = wikipedia.inputChoice(u'Do you want to change this description?', ['Yes', 'No'], ['y', 'N'], 'n')
- if choice == 'y':
- import editarticle
- editor = editarticle.TextEditor()
- newDescription = editor.edit(self.description)
- # if user saved / didn't press Cancel
- if newDescription:
- self.description = newDescription
-
- formdata = {}
- formdata["wpUploadDescription"] = self.description
- # if self.targetSite.version() >= '1.5':
- # formdata["wpUploadCopyStatus"] = wikipedia.input(u"Copyright status: ")
- # formdata["wpUploadSource"] = wikipedia.input(u"Source of file: ")
- formdata["wpUploadAffirm"] = "1"
- formdata["wpUpload"] = "upload bestand"
- # This somehow doesn't work.
- if self.ignoreWarning:
- formdata["wpIgnoreWarning"] = "1"
- else:
- formdata["wpIgnoreWarning"] = "0"
-
- # try to encode the strings to the encoding used by the target site.
- # if that's not possible (e.g. because there are non-Latin-1 characters and
- # the home Wikipedia uses Latin-1), convert all non-ASCII characters to
- # HTML entities.
- for key in formdata:
- assert isinstance(key, basestring), "ERROR: %s is not a string but %s" % (key, type(key))
- try:
- formdata[key] = formdata[key].encode(self.targetSite.encoding())
- except (UnicodeEncodeError, UnicodeDecodeError):
- formdata[key] = wikipedia.UnicodeToAsciiHtml(formdata[key]).encode(self.targetSite.encoding())
-
- # don't upload if we're in debug mode
- if not debug:
- wikipedia.output(u'Uploading file to %s...' % self.targetSite)
- response, returned_html = post_multipart(self.targetSite.hostname(),
- self.targetSite.upload_address(),
- formdata.items(),
- (('wpUploadFile', encodedFilename, contents),),
- cookies = self.targetSite.cookies()
- )
- returned_html = returned_html.decode(self.targetSite.encoding())
- # There are 2 ways MediaWiki can react on success: either it gives
- # a 200 with a success message, or it gives a 302 (redirection).
- # Do we know how the "success!" HTML page should look like?
- # ATTENTION: if you changed your Wikimedia Commons account not to show
- # an English interface, this detection will fail!
- success_msg = self.targetSite.mediawiki_message('successfulupload')
- if success_msg in returned_html or response.status == 302:
- wikipedia.output(u"Upload successful.")
- # The following is not a good idea, because the server also gives a 200 when
- # something went wrong.
- #if response.status in [200, 302]:
- # wikipedia.output(u"Upload successful.")
-
- else:
- try:
- # Try to find the error message within the HTML page.
- # If we can't find it, we just dump the entire HTML page.
- returned_html = returned_html[returned_html.index('<!-- start content -->') + 22: returned_html.index('<!-- end content -->')]
- except:
- pass
- wikipedia.output(u'%s\n\n' % returned_html)
- wikipedia.output(u'%i %s' % (response.status, response.reason))
- answer = wikipedia.inputChoice(u'Upload of %s probably failed. Above you see the HTML page which was returned by MediaWiki. Try again?' % filename, ['Yes', 'No'], ['y', 'N'], 'N')
- if answer in ["y", "Y"]:
- return upload_image(debug)
- else:
- return
- return filename
-
- def run(self):
- while not self.urlOK():
- if not self.url:
- wikipedia.output(u'No input filename given')
- else:
- wikipedia.output(u'Invalid input filename given. Try again.')
- self.url = wikipedia.input(u'File or URL where file is now:')
- return self.upload_image()
-
-def getcatimgs(catP, cpfrom=''):
- toreturn=[]
- #http://commons.wikimedia.org/w/query.php?what=category&cptitle=GFDL&cplimit=500
- done=0
- while done==0:
- if catP !='':
- path='http://'+catP.site().hostname()+'/w/query.php?what=content|imageinfo|category&cptitle='+catP.urlname()+'&cpfrom='+six[0]+':'+cpfrom+'&cplimit=50&cpnamespace=6&iiurl&format=xml'
- else:
- path='http://'+six[1].hostname()+'/w/query.php?what=content|imageinfo|allpages&apfrom='+cpfrom+'&aplimit=50&apnamespace=6&iiurl&format=xml'
- crudo=pageText(path)
- print 'got'
- if '<category next="' in crudo:
- cpfrom=crudo.split('<category next="')[1].split('"')[0]
- elif '<allpages next="' in crudo:
- cpfrom=crudo.split('<allpages next="')[1].split('"')[0]
- else:
- done=1
- cpfrom=urllib.quote(cpfrom)
- paginas=crudo.split('<page>')
- for pagina in paginas[1:]:
- ns=pagina.split('<ns>')[1].split('</ns>')[0]
- if ns =='6':
- try:
- imageblock=pagina.split('<image ')[1].split('>')[0]
- url=imageblock.split('url="')[1].split('"')[0]
- uploader=imageblock.split('user="')[1].split('"')[0]
- imtit=pagina.split('<title>')[1].split('</title>')[0]
- contentblock=pagina.split('<content ')[1].split('>')[0]
- if contentblock[-1]=='/':
- content=''
- else:
- content=pagina.split('<content '+contentblock+'>')[1].split('</content>')[0]
- toappend=(url, imtit.decode('utf-8'), content, uploader)
- yield toappend
- except:
- continue
+
def pageTextPost(url,postinfo):
print url
m=re.search(ur'http://(.*?)(/.*)',url)
@@ -364,36 +111,51 @@
data = h.getfile().read() # Obtener el HTML en bruto/wiki?title=Special:Userlogin&action=submitlogin&type=signup HTTP/1.1
return data
-def getCH(url, imageP, nn, tenemosuncambio):
- tosend={'language':str(imageP.site()).split(':')[1],
- 'image':imageP.title(),
- 'newname':'',
- 'project':str(imageP.site()).split(':')[0],
- 'commonsense':'1',
- 'doit':'Get+text'}
- for k in tosend.keys():
- tosend[k]=tosend[k].encode('utf-8')
- tosend=urllib.urlencode(tosend)
- print tosend
- CH=pageTextPost('http://tools.wikimedia.de/~magnus/commonshelper.php', tosend)
- print 'Got CH desc.'
- tablock=CH.split('<textarea ')[1].split('>')[0]
- CH=CH.split('<textarea '+tablock+'>')[1].split('</textarea>')[0]
- CH=CH.replace('×', '×')
- CH=CH.decode('utf-8')
- if not '[[category:' in CH.lower():
- CH=u'\n\n{{BotMoveToCommons|'+six[1].hostname().split('.org')[0]+'}}'+CH
- ##add {{NowCommons}}
- bot = UploadRobot(url, CH, keepFilename=True, verifyDescription=False, newname=nn, urlEncoding='utf-8')
- bot.run()
- imtxt=imageP.get()
- if tenemosuncambio==1:
- imageP.put(imtxt+u'\n\n{{NowCommons|'+nn.decode('utf-8')+'}}', u'{{NowCommons}}')
- else:
- imageP.put(imtxt+u'\n\n{{NowCommons}}', u'{{NowCommons}}')
-#-etiqueta ok skip view
-#texto
+class imageTransfer (threading.Thread):
+
+ def __init__ ( self, imagePage, newname):
+ self.imagePage = imagePage
+ self.newname = newname
+ threading.Thread.__init__ ( self )
+
+ def run(self):
+ tosend={'language':str(self.imagePage.site().language()),
+ 'image':self.imagePage.titleWithoutNamespace().encode('utf-8'),
+ 'newname':urllib.quote(self.newname.encode('utf-8')),
+ 'project':str(self.imagePage.site().family.name),
+ 'commonsense':'1',
+ 'doit':'Get+text'}
+ #for k in tosend.keys():
+ # tosend[k]=tosend[k].encode('utf-8')
+ tosend=urllib.urlencode(tosend)
+ print tosend
+ CH=pageTextPost('http://tools.wikimedia.de/~magnus/commonshelper.php', tosend)
+ print 'Got CH desc.'
+ wikipedia.output(CH);
+ tablock=CH.split('<textarea ')[1].split('>')[0]
+ CH=CH.split('<textarea '+tablock+'>')[1].split('</textarea>')[0]
+ CH=CH.replace('×', '×')
+ CH=CH.decode('utf-8')
+ ## if not '[[category:' in CH.lower():
+ # I want every picture to be tagged with the bottemplate so i can check my contributions later.
+ CH=u'\n\n{{BotMoveToCommons|'+ self.imagePage.site().language() + '.' + self.imagePage.site().family.name +'}}'+CH
+ #urlEncoding='utf-8'
+ bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = wikipedia.getSite('commons', 'commons'))
+ bot.run()
+
+ #add {{NowCommons}}, first force to get the page so we dont run into edit conflicts
+ imtxt=self.imagePage.get(force=True)
+ if self.newname!=self.imagePage.titleWithoutNamespace():
+ self.imagePage.put(imtxt+u'\n\n{{NowCommons|'+self.newname.decode('utf-8')+'}}', u'{{NowCommons}}')
+ print 'Nowcommons with different name.\n'
+ else:
+ self.imagePage.put(imtxt+u'\n\n{{NowCommons}}', u'{{NowCommons}}')
+ print 'Nowcommons.\n'
+ return
+
+#-label ok skip view
+#textarea
archivo=wikipedia.config.datafilepath("Uploadbot.localskips.txt")
try:
open(archivo, 'r')
@@ -403,90 +165,103 @@
tocreate.close()
def getautoskip():
+ '''
+ Get a list of templates to skip.
+ '''
f=codecs.open(archivo, 'r', 'utf-8')
txt=f.read()
f.close()
toreturn=txt.split('{{')[1:]
return toreturn
-class Tkstuff:
- def __init__(self, nP, contenido, uploader, commonsconflict=0):
+class Tkdialog:
+ def __init__(self, image_title, content, uploader, url, templates, commonsconflict=0):
self.root=Tk()
#"%dx%d%+d%+d" % (width, height, xoffset, yoffset)
#Always appear the same size and in the bottom-left corner
self.root.geometry("600x200+100-100")
- self.nP=wikipedia.Page(six[1], 'Image:'+nP)
- self.root.title(self.nP.titleWithoutNamespace())
+ #self.nP=wikipediaPage
+ self.root.title(image_title)
self.changename=''
self.skip=0
- uploader=uploader.decode('utf-8')
+ self.url=url
+ self.uploader="Unkown"
+ #uploader.decode('utf-8')
scrollbar=Scrollbar(self.root, orient=VERTICAL)
- etiqueta=Label(self.root,text=u"Enter new name or leave blank.")
+ label=Label(self.root,text=u"Enter new name or leave blank.")
imageinfo=Label(self.root, text='Uploaded by '+uploader+'.')
- texto=Text(self.root)
- texto.insert(END, contenido.decode('utf-8'))
- texto.config(state=DISABLED, height=8, width=40, padx=0, pady=0, wrap=WORD, yscrollcommand=scrollbar.set)
- scrollbar.config(command=texto.yview)
- self.entrada=Entry(self.root)
+ textarea=Text(self.root)
+ textarea.insert(END, content.encode('utf-8'))
+ textarea.config(state=DISABLED, height=8, width=40, padx=0, pady=0, wrap=WORD, yscrollcommand=scrollbar.set)
+ scrollbar.config(command=textarea.yview)
+ self.entry=Entry(self.root)
- self.listado=Listbox(self.root, bg="white", height=5)
+ self.templatelist=Listbox(self.root, bg="white", height=5)
+
+ for template in templates:
+ self.templatelist.insert(END, template)
+ autoskipButton=Button(self.root, text="Add to AutoSkip", command=self.add2autoskip)
+ browserButton=Button(self.root, text='View in browser', command=self.openInBrowser)
+ skipButton=Button(self.root, text="Skip", command=self.skipFile)
+ okButton=Button(self.root, text="OK", command=self.okFile)
- self.plantillas=[]
+ ##Start grid
+ label.grid(row=0)
+ okButton.grid(row=0, column=1, rowspan=2)
+ skipButton.grid(row=0, column=2, rowspan=2)
+ browserButton.grid(row=0, column=3, rowspan=2)
- for chuleta in contenido.split('{{')[1:]:
- trytosplit=re.split(ur'(?:\}\}|\|)', chuleta)
- if trytosplit !=[]:
- plantilla=trytosplit[0]
- for char in ['}', ']', '{', '[']:
- if char in plantilla:
- plantilla=''
- if plantilla.lower()=='information':
- plantilla=''
- if plantilla !='':
- self.plantillas.append(plantilla)
- for plantilla in self.plantillas:
- self.listado.insert(END, plantilla)
- addB=Button(self.root, text="Add to AutoSkip", command=self.add2autoskip)
- browser=Button(self.root, text='View in browser', command=self.oib)
- saltar=Button(self.root, text="Skip", command=self.skipF)
- ok=Button(self.root, text="OK", command=self.okF)
-
-##Start grid
- etiqueta.grid(row=0)
- ok.grid(row=0, column=1, rowspan=2)
- saltar.grid(row=0, column=2, rowspan=2)
- browser.grid(row=0, column=3, rowspan=2)
-
- self.entrada.grid(row=1)
+ self.entry.grid(row=1)
- texto.grid(row=2, column=1, columnspan=3)
+ textarea.grid(row=2, column=1, columnspan=3)
scrollbar.grid(row=2, column=5)
- self.listado.grid(row=2, column=0)
+ self.templatelist.grid(row=2, column=0)
- addB.grid(row=3, column=0)
+ autoskipButton.grid(row=3, column=0)
imageinfo.grid(row=3, column=1, columnspan=4)
- def okF(self):
- self.changename=self.entrada.get()
+
+
+ def okFile(self):
+ '''
+ The user pressed the OK button.
+ '''
+ self.changename=self.entry.get()
self.root.destroy()
- def skipF(self):
+ def skipFile(self):
+ '''
+ The user pressed the Skip button.
+ '''
self.skip=1
self.root.destroy()
- def oib(self):
- webbrowser.open('http://'+six[1].hostname()+'/wiki/'+self.nP.urlname())
+ def openInBrowser(self):
+ '''
+ The user pressed the View in browser button.
+ '''
+ webbrowser.open(self.url)
def add2autoskip(self):
- identificador=int(self.listado.curselection()[0])
- template=self.plantillas[identificador]
+ '''
+ The user pressed the Add to AutoSkip button.
+ '''
+ templateid=int(self.templatelist.curselection()[0])
+ template=self.templatelist.get(templateid)
toadd=codecs.open(archivo, 'a', 'utf-8')
toadd.write('{{'+template)
toadd.close()
- self.skipF()
+ self.skipFile()
def getnewname(self):
+ '''
+ Activate the dialog and return the new name and if the image is skipped.
+ '''
self.root.mainloop()
return (self.changename, self.skip)
def doiskip(pagetext):
+ '''
+ Skip this image or not.
+ Returns True if the image is on the skip list, otherwise False
+ '''
saltos=getautoskip()
#print saltos
for salto in saltos:
@@ -496,79 +271,71 @@
return True
return False
-six=['These should', 'both be changed']
-
def main(args):
+ generator = None;
+ #newname = "";
+ imagepage = None;
+ # Load a lot of default generators
+ genFactory = pagegenerators.GeneratorFactory()
- lang=u''
- site=u''
- cat = u''
- startingpoint=u''
- verifyDescription=False
- keepFilename = False
-
- for arg in args:
- if arg.startswith('-start:'):
- startingpoint=arg.split('-start:')[1]
- elif arg.startswith('-cat:'):
- cat=arg.split('-cat:')[1]
- elif arg.startswith('-lang:'):
- lang=arg.split('-lang:')[1]
- elif arg.startswith('-site:'):
- site=arg.split('-site:')[1]
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-page'):
+ if len(arg) == 5:
+ generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
+ else:
+ generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
+ elif arg == '-always':
+ always = True
else:
- print 'Argument: '+str([arg])+' is not valid'
- print 'ourcat: '+cat
- if (len(site)>1, len(lang)>1)==(True, True):
- try:
- sitio=wikipedia.Site(lang, site)
- six[1]=sitio
- except:
- print str((site, lang))+' didnt work out. Defaulting to nl.wikipedia.'
- six[1]=wikipedia.Site('nl', 'wikipedia')
- else:
- six[1]=wikipedia.Site('nl', 'wikipedia')
- print "Working from "+str(six[1])
- seis=pageText('http://'+six[1].hostname()+'/w/query.php?what=namespaces&format=xml').split('<ns id="6">')[1].split('</ns>')[0]
- seis=urllib.quote(seis)
- six[0]=seis
- print six
- if cat != u'':
- categ=wikipedia.Page(six[1], 'Category:'+cat.decode('utf-8'))
- #Wikipedia:Verplaats naar Wikimedia Commons
- categorizadas=getcatimgs(categ, startingpoint)
+ generator = genFactory.handleArg(arg)
+ if not generator:
+ raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')
- elif startingpoint != u'':
- categorizadas=getcatimgs('', startingpoint)
+ pregenerator = pagegenerators.PreloadingGenerator(generator)
+ for page in pregenerator:
+ if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) :
+ imagepage = wikipedia.ImagePage(page.site(), page.title())
+
+ #First do autoskip.
+ if doiskip(imagepage.get()):
+ wikipedia.output("Skipping " + page.title())
+ skip = True
+ else:
+ # The first upload is last in the list.
+ (datetime, username, resolution, size, comment) = imagepage.getFileVersionHistory().pop()
+ while True:
- for categorizada in categorizadas:
- #print categorizada
- url=categorizada[0]
- tenemosuncambio=0
- nn=url.split('/')[-1]
- if doiskip(categorizada[2]):
- print "Autoskipping " + nn
- continue
- #changename=wikipedia.input(u'The name on Commons will be '+nn+', ok? Enter a better name or press ENTER to proceed: ')
- changename=Tkstuff(nn, categorizada[2], categorizada[3]).getnewname()
- print ('changename', changename)
- if len(changename[0])!=0:
- nn=changename[0].encode('utf-8')
- tenemosuncambio=1
- elif changename[1]==1:
- print 'skipping this file'
- continue
- imageP=wikipedia.Page(six[1], categorizada[1])
- CP=wikipedia.Page(wikipedia.Site('commons', 'commons'), 'Image:'+nn.decode('utf-8'))
- if CP.exists():
- nn=Tkstuff(nn[0], categorizada[2], categorizada[3], commonsconflict=1).getnewname()
- if nn[1]==1:
- print 'skipping this file'
- continue
-
- thread.start_new_thread(getCH, (url, imageP, nn, tenemosuncambio))
+ # Do the Tkdialog to accept/reject and change te name
+ (newname, skip)=Tkdialog(imagepage.titleWithoutNamespace(), imagepage.get(), username, imagepage.permalink(), imagepage.templates()).getnewname()
+
+ if skip:
+ wikipedia.output('Skipping this image')
+ break
+
+ # Did we enter a new name?
+ if len(newname)==0:
+ #Take the old name
+ newname=imagepage.titleWithoutNamespace()
+
+ # Check if the image already exists
+ CommonsPage=wikipedia.Page(wikipedia.Site('commons', 'commons'), 'Image:'+newname)
+
+ if not CommonsPage.exists():
+ break
+ else:
+ wikipedia.output('Image already exists, pick another name or skip this image')
+ # We dont overwrite images, pick another name, go to the start of the loop
+ if not skip:
+ imageTransfer(imagepage, newname).start()
+
+ wikipedia.output(u'Still ' + str(threading.activeCount()) + u' active threads, lets wait')
+ for openthread in threading.enumerate():
+ if openthread != threading.currentThread():
+ openthread.join()
+ wikipedia.output(u'All threads are done')
+
if __name__ == "__main__":
try:
main(sys.argv[1:])
Revision: 5224
Author: wikipedian
Date: 2008-04-16 10:40:54 +0000 (Wed, 16 Apr 2008)
Log Message:
-----------
Wikimedia server message seems to have been changed
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-04-16 10:36:16 UTC (rev 5223)
+++ trunk/pywikipedia/wikipedia.py 2008-04-16 10:40:54 UTC (rev 5224)
@@ -1350,7 +1350,10 @@
# We might have been using an outdated token
output(u"Changing page has failed. Retrying.")
return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken = True, sysop = sysop)
- if data.find("<title>Wikimedia Error</title>") > -1:
+ # I think the error message title was changed from "Wikimedia Error"
+ # to "Wikipedia has a problem", but I'm not sure. Maybe we could
+ # just check for HTTP Status 500 (Internal Server Error)?
+ if "<title>Wikimedia Error</title>" in data or "has a problem</title>" in data:
output(
u"Wikimedia has technical problems; will retry in %i minute%s."
% (retry_delay, retry_delay != 1 and "s" or ""))
@@ -1359,7 +1362,7 @@
if retry_delay > 30:
retry_delay = 30
continue
- if data.find(self.site().mediawiki_message('readonly')) or data.find(self.site().mediawiki_message('readonly_lag')):
+ if self.site().mediawiki_message('readonly') in data or self.site().mediawiki_message('readonly_lag') in data:
output(u"The database is currently locked for write access; will retry in %i minute%s."
% (retry_delay, retry_delay != 1 and "s" or ""))
time.sleep(60 * retry_delay)
Revision: 5222
Author: wikipedian
Date: 2008-04-16 10:23:09 +0000 (Wed, 16 Apr 2008)
Log Message:
-----------
slightly lifted the category restriction on de.wikipedia
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-04-15 19:20:01 UTC (rev 5221)
+++ trunk/pywikipedia/wikipedia.py 2008-04-16 10:23:09 UTC (rev 5222)
@@ -3342,8 +3342,8 @@
if site is None:
site = getSite()
- if site.sitename() == 'wikipedia:de':
- raise Error('The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…')
+ if site.sitename() == 'wikipedia:de' and "{{Personendaten" in oldtext:
+ raise Error('The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia on pages that contain the person data template because of the non-standard placement of that template. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…')
s = categoryFormat(new, insite = site)
if addOnly:
Revision: 5221
Author: multichill
Date: 2008-04-15 19:20:01 +0000 (Tue, 15 Apr 2008)
Log Message:
-----------
First version of commonscat.py, a tool to add the commonscat template to wikipedia categories.
Added Paths:
-----------
trunk/pywikipedia/commonscat.py
Added: trunk/pywikipedia/commonscat.py
===================================================================
--- trunk/pywikipedia/commonscat.py (rev 0)
+++ trunk/pywikipedia/commonscat.py 2008-04-15 19:20:01 UTC (rev 5221)
@@ -0,0 +1,228 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+"""
+With this tool you can add the template {{commonscat}} to categories.
+The tool works by following the interwiki links. If the template is present on
+another langauge page, the bot will use it.
+
+You could probably use it at articles as well, but this isnt tested.
+
+This bot uses pagegenerators to get a list of pages. For example to go through all categories:
+commonscat.py -start:Category:!
+
+Commonscat bot:
+
+Take a page. Follow the interwiki's and look for the commonscat template
+*Found zero templates. Done.
+*Found one template. Add this template
+*Found more templates. Ask the user <- still have to implement this
+
+TODO:
+*Update interwiki's at commons
+*Collect all possibilities also if local wiki already has link.
+*Better support for other templates (translations) / redundant templates.
+*Check mode, only check pages which already have the template
+*More efficient like interwiki.py
+*Possibility to update other languages in the same run
+
+"""
+
+#
+# (C) Multichill, 2008
+#
+# Distributed under the terms of the MIT license.
+#
+
+import wikipedia, config, pagegenerators, add_text
+
+commonscatTemplates = {
+ 'af' : u'CommonsKategorie',
+ 'als' : u'Commonscat',
+ 'az' : u'CommonsKat',
+ 'bg' : u'Commonscat',
+ 'ca' : u'Commonscat',
+ 'cs' : u'Commonscat',
+ 'da' : u'Commonscat',
+ 'de' : u'Commonscat',
+ 'en' : u'Commonscat',
+ 'eo' : u'Commonscat',
+ 'es' : u'Commonscat',
+ 'eu' : u'Commonskat',
+ 'fi' : u'Commonscat',
+ 'fr' : u'Commonscat',
+ 'hr' : u'Commonscat',
+ 'hu' : u'Közvagyonkat',
+ 'id' : u'Commonscat',
+ 'io' : u'Commonscat',
+ 'is' : u'CommonsCat',
+ 'it' : u'Commonscat',
+ 'ja' : u'Commonscat',
+ 'ko' : u'Commonscat',
+ 'lt' : u'Commonscat',
+ 'lv' : u'Commonscat',
+ 'mk' : u'Ризница-врска',
+ 'ms' : u'Commonscat',
+ 'nl' : u'Commonscat',
+ 'nn' : u'Commonscat',
+ 'no' : u'Commonscat',
+ 'oc' : u'Commonscat',
+ 'os' : u'Commonscat',
+ 'pl' : u'Commonscat',
+ 'pt' : u'Commonscat',
+ 'ro' : u'Commonscat',
+ 'ru' : u'Commonscat',
+ 'scn' : u'Commonscat',
+ 'sh' : u'Commonscat',
+ 'simple' : u'Commonscat',
+ 'sk' : u'Commonscat',
+ 'sl' : u'Kategorija v Zbirki',
+ 'sr' : u'Commonscat',
+ 'su' : u'Commonscat',
+ 'sv' : u'Commonscat',
+ 'th' : u'Commonscat',
+ 'tr' : u'CommonsKat',
+ 'uk' : u'Commonscat',
+ 'vi' : u'Commonscat',
+ 'zh' : u'Commonscat',
+ 'zh-yue' : u'同享類'
+}
+
+def getTemplate (lang = None):
+ '''
+ Get the template name in a language. Expects the language code, returns the translation.
+ '''
+ if commonscatTemplates.has_key(lang):
+ return commonscatTemplates[lang]
+ else:
+ return u'Commonscat'
+
+def updateInterwiki (wikipediaPage = None, commonsPage = None):
+ '''
+ Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page.
+ This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist.
+
+ This function is disabled for the moment untill i figure out what the best way is to update the interwiki's.
+ '''
+ interwikis = {}
+ comment= u''
+ interwikilist = wikipediaPage.interwiki()
+ interwikilist.append(wikipediaPage)
+
+ for interwikiPage in interwikilist:
+ interwikis[interwikiPage.site()]=interwikiPage
+ oldtext = commonsPage.get()
+ # The commonssite object doesnt work with interwiki's
+ newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl'))
+ comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]'
+
+ if newtext != oldtext:
+ #This doesnt seem to work. Newtext has some trailing whitespace
+ wikipedia.showDiff(oldtext, newtext)
+ commonsPage.put(newtext=newtext, comment=comment)
+
+
+def addCommonscat (page = None, summary = None, always = False):
+ '''
+ Take a page. Go to all the interwiki page looking for a commonscat template.
+ When all the interwiki's links are checked and a proper category is found add it to the page.
+ '''
+ commonscat = ""
+ commonscatpage = None
+ commonscats = []
+
+ wikipedia.output("Working on " + page.title());
+ if getTemplate(page.site().language()) in page.templates():
+ wikipedia.output("Commonscat template is already on " + page.title());
+ #for template in page.templatesWithParams():
+ # if ((template[0]==getTemplate(page.site().language())) and (len(template[1]) > 0)):
+ # commonscatpage = getCommonscat(template[1][0])
+ # if commonscatpage != None:
+ # updateInterwiki (page, commonscatpage)
+ # #Should remove the template if something is wrong
+
+ else:
+ #Follow the interwiki's
+ for ipage in page.interwiki():
+ #See if commonscat is present
+ if getTemplate(ipage.site().language()) in ipage.templates():
+ #Go through all the templates at the page
+ for template in ipage.templatesWithParams():
+ #We found the template and it has the parameter set.
+ if ((template[0]==getTemplate(ipage.site().language())) and (len(template[1]) > 0)):
+ commonscatpage = getCommonscat(template[1][0])
+ if commonscatpage != None:
+ commonscats.append(commonscatpage);
+ wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + commonscatpage.title() + ".");
+ commonscatpage = None
+ if len(commonscats) > 0:
+ commonscatpage = commonscats.pop();
+ commonscat = commonscatpage.titleWithoutNamespace()
+ #We found one or more commonscat links, build the template and add it to our page
+ #TODO: We should check if we found more than one different link.
+ commonscat = "{{" + getTemplate(page.site().language()) + "|" + commonscat + "}}";
+ add_text.add_text(page, commonscat, summary, None, None, always);
+ #updateInterwiki(page, commonscatpage)
+ return (True, always);
+
+def getCommonscat (name = ""):
+ '''
+ This function will retun a page object of the commons page
+ If the page is a redirect this function tries to follow it.
+ If the page doesnt exists the function will return None
+ '''
+ #wikipedia.output("getCommonscat: " + name );
+ result = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name);
+ if not result.exists():
+ #wikipedia.output("getCommonscat : The category doesnt exist.");
+ return None
+ elif result.isRedirectPage():
+ #wikipedia.output("getCommonscat : The category is a redirect");
+ return result.getRedirectTarget();
+ elif "Category redirect" in result.templates():
+ #wikipedia.output("getCommonscat : The category is a category redirect");
+ for template in result.templatesWithParams():
+ if ((template[0]=="Category redirect") and (len(template[1]) > 0)):
+ return getCommonscat(template[1][0])
+ elif result.isDisambig():
+ #wikipedia.output("getCommonscat : The category is disambigu");
+ return None
+ else:
+ return result
+
+def main():
+ '''
+ Parse the command line arguments and get a pagegenerator to work on.
+ Iterate through all the pages.
+ '''
+ summary = None; generator = None; always = False
+ # Load a lot of default generators
+ genFactory = pagegenerators.GeneratorFactory()
+
+ for arg in wikipedia.handleArgs():
+ if arg.startswith('-summary'):
+ if len(arg) == 8:
+ summary = wikipedia.input(u'What summary do you want to use?')
+ else:
+ summary = arg[9:]
+ elif arg.startswith('-page'):
+ if len(arg) == 5:
+ generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
+ else:
+ generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
+ elif arg == '-always':
+ always = True
+ else:
+ generator = genFactory.handleArg(arg)
+ if not generator:
+ raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')
+
+ pregenerator = pagegenerators.PreloadingGenerator(generator)
+
+ for page in pregenerator:
+ (status, always) = addCommonscat(page, summary, always)
+
+if __name__ == "__main__":
+ try:
+ main()
+ finally:
+ wikipedia.stopme()
Feature Requests item #1941798, was opened at 2008-04-14 00:32
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603141&aid=1941798&group_…
Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: None
Group: None
Status: Open
Priority: 5
Private: No
Submitted By: Nobody/Anonymous (nobody)
Assigned to: Nobody/Anonymous (nobody)
Summary: auto-save interwikidump
Initial Comment:
please add an option to automatically save interwikidump files at a specified period, not just when the script is user-interrupted or fails, to save a list of active articles. this will make it possible to resume work (rather than restart from beginning) in case something nasty like power failure or PC reboot happens.
----------------------------------------------------------------------
You can respond by visiting:
https://sourceforge.net/tracker/?func=detail&atid=603141&aid=1941798&group_…
Revision: 5218
Author: filnik
Date: 2008-04-13 14:08:44 +0000 (Sun, 13 Apr 2008)
Log Message:
-----------
Deleting a debug-print, deleting the deleted variable, not working correctly
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2008-04-12 19:09:12 UTC (rev 5217)
+++ trunk/pywikipedia/wikipedia.py 2008-04-13 14:08:44 UTC (rev 5218)
@@ -828,7 +828,6 @@
def previousRevision(self):
"""Return the revision id for the previous revision of this Page."""
vh = self.getVersionHistory(revCount=2)
- print vh
return vh[1][0]
def exists(self):
@@ -1155,7 +1154,7 @@
force, callback))
def put(self, newtext, comment=None, watchArticle=None, minorEdit=True,
- force=False, deleted = True):
+ force=False):
"""Save the page with the contents of the first argument as the text.
Optional parameters:
@@ -1164,8 +1163,7 @@
watchArticle: a bool, add or remove this Page to/from bot user's
watchlist (if None, leave watchlist status unchanged)
minorEdit: mark this edit as minor if True
- force: ignore botMayEdit() setting
-
+ force: ignore botMayEdit() setting.
"""
# Login
try:
@@ -1209,10 +1207,10 @@
if self.site().lang == 'eo':
newtext = encodeEsperantoX(newtext)
return self._putPage(newtext, comment, watchArticle, minorEdit,
- newPage, self.site().getToken(sysop = sysop), sysop = sysop, deleted = deleted)
+ newPage, self.site().getToken(sysop = sysop), sysop = sysop)
def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True,
- newPage=False, token=None, newToken=False, sysop=False, deleted=True):
+ newPage=False, token=None, newToken=False, sysop=False):
"""Upload 'text' as new content of Page by filling out the edit form.
Don't use this directly, use put() instead.
@@ -1299,7 +1297,7 @@
time.sleep(5)
continue
# A second text area means that an edit conflict has occured.
- if 'id=\'wpTextbox2\' name="wpTextbox2"' in data and deleted == True:
+ if 'id=\'wpTextbox2\' name="wpTextbox2"' in data:
raise EditConflict(u'An edit conflict has occured.')
if self.site().has_mediawiki_message("spamprotectiontitle")\
and self.site().mediawiki_message('spamprotectiontitle') in data: