pywikibot April 2008

pywikibot@lists.wikimedia.org

34 participants
213 discussions

[Pywikipedia-l] [ pywikipediabot-Bugs-1944723 ] fix imagelinks() in wikipedia.py
by SourceForge.net 17 Apr '08

17 Apr '08

Bugs item #1944723, was opened at 2008-04-17 16:24 Message generated for change (Tracker Item Submitted) made by Item Submitter You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1944723&group_… Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: shizhao (wikishizhao) Assigned to: Nobody/Anonymous (nobody) Summary: fix imagelinks() in wikipedia.py Initial Comment: fix imagelinks() bug in wikipedia.py. Add more file types. ---------------------------------------------------------------------- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603138&aid=1944723&group_…

1 0

[Pywikipedia-l] SVN: [5225] trunk/pywikipedia/imagecopy.py
by multichill＠svn.wikimedia.org 16 Apr '08

16 Apr '08

Revision: 5225 Author: multichill Date: 2008-04-16 13:38:04 +0000 (Wed, 16 Apr 2008) Log Message: ----------- Rewrite of the program. Modified Paths: -------------- trunk/pywikipedia/imagecopy.py Modified: trunk/pywikipedia/imagecopy.py =================================================================== --- trunk/pywikipedia/imagecopy.py 2008-04-16 10:40:54 UTC (rev 5224) +++ trunk/pywikipedia/imagecopy.py 2008-04-16 13:38:04 UTC (rev 5225) @@ -16,25 +16,39 @@ so he can test at: [[de:Benutzer Diskussion:Magnus Manske]]. You can write him in German and English. -Arguments: +Examples - -project Project to copy from (default: wikipedia) - -lang Language to copy from (default: nl) - -cat Category to copy to Wikimedia Commons (required) - -start Start at index within category (optional) +Work on a single image + python imagecopy.py -page:Image:<imagename> +Work on the 100 newest images: + python imagecopy.py -newimages:100 +Work on all images in a category:<cat> + python imagecopy.py -cat:<cat> +Work on all images which transclude a template + python imagecopy.py -transcludes:<template> +See pagegenerators.py for more ways to get a list of images. +By default the bot works on your home wiki (set in user-config) + Known issues/FIXMEs (no critical issues known): * make it use pagegenerators.py +** Implemented in rewrite * Some variable names are in Spanish, which makes the code harder to read. +** Almost all variables are now in English * Depending on sorting within a file category, the "next batch" is sometimes not working, leading to an endless loop +** Using pagegenerators now * Different wikis can have different exclusion lists. A parameter for the exclusion list Uploadbot.localskips.txt would probably be nice. * Bot should probably use API instead of query.php +** Api? Query? Wikipedia.py! * Should request alternative name if file name already exists on Commons +** Implemented in rewrite * Exits after last file in category was processed, aborting all pending threads. +** Implemented proper threading in rewrite * Should take user-config.py as input for project and lang variables +** Implemented in rewrite * Should require a Commons user to be present in user-config.py before working * Should probably have an input field for additional categories @@ -58,6 +72,9 @@ # New bot by: # (C) Kyle/Orgullomoore, Siebrand Mazeland 2007 # +# Another rewrite by: +# (C) Multichill 2008 +# # Distributed under the terms of the MIT license. # __version__='$Id$' @@ -67,282 +84,12 @@ import os, sys, re, codecs import urllib, httplib, urllib2 import catlib, thread, webbrowser +import time, threading import wikipedia, config +import pagegenerators, add_text +from upload import * NL='' - -def pageText(url): - request=urllib2.Request(url) - user_agent='Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7' - print url - request.add_header("User-Agent", user_agent) - response=urllib2.urlopen(request) - text=response.read() - response.close() - return text - -def post_multipart(host, selector, fields, files, cookies): - """ - Post fields and files to an http host as multipart/form-data. - fields is a sequence of (name, value) elements for regular form fields. - files is a sequence of (name, filename, value) elements for data to be uploaded as files - Return the server's response page. - """ - content_type, body = encode_multipart_formdata(fields, files) - conn = httplib.HTTPConnection(host) - conn.putrequest('POST', selector) - conn.putheader('content-type', content_type) - conn.putheader('content-length', str(len(body))) - conn.putheader("User-agent", 'RobHooftWikiRobot/1.0') - if cookies: - conn.putheader('Cookie',cookies) - conn.endheaders() - conn.send(body) - response = conn.getresponse() - returned_html = response.read() - conn.close() - return response, returned_html - -def encode_multipart_formdata(fields, files): - """ - fields is a sequence of (name, value) elements for regular form fields. - files is a sequence of (name, filename, value) elements for data to be uploaded as files - Return (content_type, body) ready for httplib.HTTP instance - """ - BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$' - CRLF = '\r\n' - L = [] - for (key, value) in fields: - L.append('--' + BOUNDARY) - L.append('Content-Disposition: form-data; name="%s"' % key) - L.append('') - L.append(value) - for (key, filename, value) in files: - L.append('--' + BOUNDARY) - L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) - L.append('Content-Type: %s' % get_content_type(filename)) - L.append('') - L.append(value) - L.append('--' + BOUNDARY + '--') - L.append('') - body = CRLF.join(L) - content_type = 'multipart/form-data; boundary=%s' % BOUNDARY - return content_type, body - -def get_content_type(filename): - import mimetypes - return mimetypes.guess_type(filename)[0] or 'application/octet-stream' - - -class UploadRobot: - def __init__(self, url, description = u'', keepFilename = False, verifyDescription = False, ignoreWarning = True, targetSite = None, urlEncoding = None, newname=None): - """ - ignoreWarning - Set this to True if you want to upload even if another - file would be overwritten or another mistake would be - risked. - Attention: This parameter doesn't work yet for unknown reason. - """ - self.url = url - self.urlEncoding = urlEncoding - self.description = description - self.keepFilename = keepFilename - self.verifyDescription = verifyDescription - self.ignoreWarning = ignoreWarning - self.newname=newname - if config.upload_to_commons: - self.targetSite = targetSite or wikipedia.getSite('commons', 'commons') - else: - self.targetSite = targetSite or wikipedia.getSite() - self.targetSite=wikipedia.Site('commons', 'commons') - self.targetSite.forceLogin() - - def urlOK(self): - ''' - Returns true iff the URL references an online site or an - existing local file. - ''' - return self.url != '' and ('://' in self.url or os.path.exists(self.url)) - - def upload_image(self, debug=False): - """Gets the image at URL self.url, and uploads it to the target wiki. - Returns the filename which was used to upload the image. - If the upload fails, the user is asked whether to try again or not. - If the user chooses not to retry, returns null. - """ - # Get file contents - if '://' in self.url: - uo = wikipedia.MyURLopener() - file = uo.open(self.url,"rb") - else: - # Opening local files with MyURLopener would be possible, but we - # don't do it because it only accepts ASCII characters in the - # filename. - file = open(self.url,"rb") - wikipedia.output(u'Reading file %s' % self.url) - contents = file.read() - if contents.find("The requested URL was not found on this server.") != -1: - print "Couldn't download the file." - return - file.close() - # Isolate the pure name - filename = self.newname - if '/' in filename: - filename = filename.split('/')[-1] - if '\\' in filename: - filename = filename.split('\\')[-1] - if self.urlEncoding: - filename = urllib.unquote(filename) - filename = filename.decode(self.urlEncoding) - if not self.keepFilename: - wikipedia.output(u"The filename on the target wiki will default to: %s" % filename) - # ask newfn until it's valid - ok = False - # FIXME: these 2 belong somewhere else, presumably in family - forbidden = '/' # to be extended - allowed_formats = (u'gif', u'jpg', u'jpeg', u'mid', u'midi', u'ogg', u'png', u'svg', u'xcf') - while not ok: - ok = True - newfn = wikipedia.input(u'Enter a better name, or press enter to accept:') - if newfn == "": - newfn = filename - ext = os.path.splitext(newfn)[1].lower().strip('.') - for c in forbidden: - if c in newfn: - print "Invalid character: %s. Please try again" % c - ok = False - if ext not in allowed_formats and ok: - choice = wikipedia.inputChoice(u"File format is not one of [%s], but %s. Continue?" % (u' '.join(allowed_formats), ext), ['yes', 'no'], ['y', 'N'], 'N') - if choice == 'n': - ok = False - if newfn != '': - filename = newfn - # MediaWiki doesn't allow spaces in the file name. - # Replace them here to avoid an extra confirmation form - filename = filename.replace(' ', '_') - # Convert the filename (currently Unicode) to the encoding used on the - # target wiki - encodedFilename = filename.encode(self.targetSite.encoding()) - # A proper description for the submission. - wikipedia.output(u"The suggested description is:") - wikipedia.output(self.description) - if self.verifyDescription: - newDescription = u'' - choice = wikipedia.inputChoice(u'Do you want to change this description?', ['Yes', 'No'], ['y', 'N'], 'n') - if choice == 'y': - import editarticle - editor = editarticle.TextEditor() - newDescription = editor.edit(self.description) - # if user saved / didn't press Cancel - if newDescription: - self.description = newDescription - - formdata = {} - formdata["wpUploadDescription"] = self.description - # if self.targetSite.version() >= '1.5': - # formdata["wpUploadCopyStatus"] = wikipedia.input(u"Copyright status: ") - # formdata["wpUploadSource"] = wikipedia.input(u"Source of file: ") - formdata["wpUploadAffirm"] = "1" - formdata["wpUpload"] = "upload bestand" - # This somehow doesn't work. - if self.ignoreWarning: - formdata["wpIgnoreWarning"] = "1" - else: - formdata["wpIgnoreWarning"] = "0" - - # try to encode the strings to the encoding used by the target site. - # if that's not possible (e.g. because there are non-Latin-1 characters and - # the home Wikipedia uses Latin-1), convert all non-ASCII characters to - # HTML entities. - for key in formdata: - assert isinstance(key, basestring), "ERROR: %s is not a string but %s" % (key, type(key)) - try: - formdata[key] = formdata[key].encode(self.targetSite.encoding()) - except (UnicodeEncodeError, UnicodeDecodeError): - formdata[key] = wikipedia.UnicodeToAsciiHtml(formdata[key]).encode(self.targetSite.encoding()) - - # don't upload if we're in debug mode - if not debug: - wikipedia.output(u'Uploading file to %s...' % self.targetSite) - response, returned_html = post_multipart(self.targetSite.hostname(), - self.targetSite.upload_address(), - formdata.items(), - (('wpUploadFile', encodedFilename, contents),), - cookies = self.targetSite.cookies() - ) - returned_html = returned_html.decode(self.targetSite.encoding()) - # There are 2 ways MediaWiki can react on success: either it gives - # a 200 with a success message, or it gives a 302 (redirection). - # Do we know how the "success!" HTML page should look like? - # ATTENTION: if you changed your Wikimedia Commons account not to show - # an English interface, this detection will fail! - success_msg = self.targetSite.mediawiki_message('successfulupload') - if success_msg in returned_html or response.status == 302: - wikipedia.output(u"Upload successful.") - # The following is not a good idea, because the server also gives a 200 when - # something went wrong. - #if response.status in [200, 302]: - # wikipedia.output(u"Upload successful.") - - else: - try: - # Try to find the error message within the HTML page. - # If we can't find it, we just dump the entire HTML page. - returned_html = returned_html[returned_html.index('') + 22: returned_html.index('')] - except: - pass - wikipedia.output(u'%s\n\n' % returned_html) - wikipedia.output(u'%i %s' % (response.status, response.reason)) - answer = wikipedia.inputChoice(u'Upload of %s probably failed. Above you see the HTML page which was returned by MediaWiki. Try again?' % filename, ['Yes', 'No'], ['y', 'N'], 'N') - if answer in ["y", "Y"]: - return upload_image(debug) - else: - return - return filename - - def run(self): - while not self.urlOK(): - if not self.url: - wikipedia.output(u'No input filename given') - else: - wikipedia.output(u'Invalid input filename given. Try again.') - self.url = wikipedia.input(u'File or URL where file is now:') - return self.upload_image() - -def getcatimgs(catP, cpfrom=''): - toreturn=[] - #http://commons.wikimedia.org/w/query.php?what=category&cptitle=GFDL&cplimit=500 - done=0 - while done==0: - if catP !='': - path='http://'+catP.site().hostname()+'/w/query.php?what=content|imageinfo|category&cptitle='+catP.urlname()+'&cpfrom='+six[0]+':'+cpfrom+'&cplimit=50&cpnamespace=6&iiurl&format=xml' - else: - path='http://'+six[1].hostname()+'/w/query.php?what=content|imageinfo|allpages&apfrom='+cpfrom+'&aplimit=50&apnamespace=6&iiurl&format=xml' - crudo=pageText(path) - print 'got' - if '<category next="' in crudo: - cpfrom=crudo.split('<category next="')[1].split('"')[0] - elif '<allpages next="' in crudo: - cpfrom=crudo.split('<allpages next="')[1].split('"')[0] - else: - done=1 - cpfrom=urllib.quote(cpfrom) - paginas=crudo.split('<page>') - for pagina in paginas[1:]: - ns=pagina.split('<ns>')[1].split('</ns>')[0] - if ns =='6': - try: - imageblock=pagina.split('<image ')[1].split('>')[0] - url=imageblock.split('url="')[1].split('"')[0] - uploader=imageblock.split('user="')[1].split('"')[0] - imtit=pagina.split('<title>')[1].split('</title>')[0] - contentblock=pagina.split('<content ')[1].split('>')[0] - if contentblock[-1]=='/': - content='' - else: - content=pagina.split('<content '+contentblock+'>')[1].split('</content>')[0] - toappend=(url, imtit.decode('utf-8'), content, uploader) - yield toappend - except: - continue + def pageTextPost(url,postinfo): print url m=re.search(ur'http://(.*?)(/.*)',url) @@ -364,36 +111,51 @@ data = h.getfile().read() # Obtener el HTML en bruto/wiki?title=Special:Userlogin&action=submitlogin&type=signup HTTP/1.1 return data -def getCH(url, imageP, nn, tenemosuncambio): - tosend={'language':str(imageP.site()).split(':')[1], - 'image':imageP.title(), - 'newname':'', - 'project':str(imageP.site()).split(':')[0], - 'commonsense':'1', - 'doit':'Get+text'} - for k in tosend.keys(): - tosend[k]=tosend[k].encode('utf-8') - tosend=urllib.urlencode(tosend) - print tosend - CH=pageTextPost('http://tools.wikimedia.de/~magnus/commonshelper.php', tosend) - print 'Got CH desc.' - tablock=CH.split('<textarea ')[1].split('>')[0] - CH=CH.split('<textarea '+tablock+'>')[1].split('</textarea>')[0] - CH=CH.replace('×', '×') - CH=CH.decode('utf-8') - if not '[[category:' in CH.lower(): - CH=u'\n\n{{BotMoveToCommons|'+six[1].hostname().split('.org')[0]+'}}'+CH - ##add {{NowCommons}} - bot = UploadRobot(url, CH, keepFilename=True, verifyDescription=False, newname=nn, urlEncoding='utf-8') - bot.run() - imtxt=imageP.get() - if tenemosuncambio==1: - imageP.put(imtxt+u'\n\n{{NowCommons|'+nn.decode('utf-8')+'}}', u'{{NowCommons}}') - else: - imageP.put(imtxt+u'\n\n{{NowCommons}}', u'{{NowCommons}}') -#-etiqueta ok skip view -#texto +class imageTransfer (threading.Thread): + + def __init__ ( self, imagePage, newname): + self.imagePage = imagePage + self.newname = newname + threading.Thread.__init__ ( self ) + + def run(self): + tosend={'language':str(self.imagePage.site().language()), + 'image':self.imagePage.titleWithoutNamespace().encode('utf-8'), + 'newname':urllib.quote(self.newname.encode('utf-8')), + 'project':str(self.imagePage.site().family.name), + 'commonsense':'1', + 'doit':'Get+text'} + #for k in tosend.keys(): + # tosend[k]=tosend[k].encode('utf-8') + tosend=urllib.urlencode(tosend) + print tosend + CH=pageTextPost('http://tools.wikimedia.de/~magnus/commonshelper.php', tosend) + print 'Got CH desc.' + wikipedia.output(CH); + tablock=CH.split('<textarea ')[1].split('>')[0] + CH=CH.split('<textarea '+tablock+'>')[1].split('</textarea>')[0] + CH=CH.replace('×', '×') + CH=CH.decode('utf-8') + ## if not '[[category:' in CH.lower(): + # I want every picture to be tagged with the bottemplate so i can check my contributions later. + CH=u'\n\n{{BotMoveToCommons|'+ self.imagePage.site().language() + '.' + self.imagePage.site().family.name +'}}'+CH + #urlEncoding='utf-8' + bot = UploadRobot(url=self.imagePage.fileUrl(), description=CH, useFilename=self.newname, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = wikipedia.getSite('commons', 'commons')) + bot.run() + + #add {{NowCommons}}, first force to get the page so we dont run into edit conflicts + imtxt=self.imagePage.get(force=True) + if self.newname!=self.imagePage.titleWithoutNamespace(): + self.imagePage.put(imtxt+u'\n\n{{NowCommons|'+self.newname.decode('utf-8')+'}}', u'{{NowCommons}}') + print 'Nowcommons with different name.\n' + else: + self.imagePage.put(imtxt+u'\n\n{{NowCommons}}', u'{{NowCommons}}') + print 'Nowcommons.\n' + return + +#-label ok skip view +#textarea archivo=wikipedia.config.datafilepath("Uploadbot.localskips.txt") try: open(archivo, 'r') @@ -403,90 +165,103 @@ tocreate.close() def getautoskip(): + ''' + Get a list of templates to skip. + ''' f=codecs.open(archivo, 'r', 'utf-8') txt=f.read() f.close() toreturn=txt.split('{{')[1:] return toreturn -class Tkstuff: - def __init__(self, nP, contenido, uploader, commonsconflict=0): +class Tkdialog: + def __init__(self, image_title, content, uploader, url, templates, commonsconflict=0): self.root=Tk() #"%dx%d%+d%+d" % (width, height, xoffset, yoffset) #Always appear the same size and in the bottom-left corner self.root.geometry("600x200+100-100") - self.nP=wikipedia.Page(six[1], 'Image:'+nP) - self.root.title(self.nP.titleWithoutNamespace()) + #self.nP=wikipediaPage + self.root.title(image_title) self.changename='' self.skip=0 - uploader=uploader.decode('utf-8') + self.url=url + self.uploader="Unkown" + #uploader.decode('utf-8') scrollbar=Scrollbar(self.root, orient=VERTICAL) - etiqueta=Label(self.root,text=u"Enter new name or leave blank.") + label=Label(self.root,text=u"Enter new name or leave blank.") imageinfo=Label(self.root, text='Uploaded by '+uploader+'.') - texto=Text(self.root) - texto.insert(END, contenido.decode('utf-8')) - texto.config(state=DISABLED, height=8, width=40, padx=0, pady=0, wrap=WORD, yscrollcommand=scrollbar.set) - scrollbar.config(command=texto.yview) - self.entrada=Entry(self.root) + textarea=Text(self.root) + textarea.insert(END, content.encode('utf-8')) + textarea.config(state=DISABLED, height=8, width=40, padx=0, pady=0, wrap=WORD, yscrollcommand=scrollbar.set) + scrollbar.config(command=textarea.yview) + self.entry=Entry(self.root) - self.listado=Listbox(self.root, bg="white", height=5) + self.templatelist=Listbox(self.root, bg="white", height=5) + + for template in templates: + self.templatelist.insert(END, template) + autoskipButton=Button(self.root, text="Add to AutoSkip", command=self.add2autoskip) + browserButton=Button(self.root, text='View in browser', command=self.openInBrowser) + skipButton=Button(self.root, text="Skip", command=self.skipFile) + okButton=Button(self.root, text="OK", command=self.okFile) - self.plantillas=[] + ##Start grid + label.grid(row=0) + okButton.grid(row=0, column=1, rowspan=2) + skipButton.grid(row=0, column=2, rowspan=2) + browserButton.grid(row=0, column=3, rowspan=2) - for chuleta in contenido.split('{{')[1:]: - trytosplit=re.split(ur'(?:\}\}|\|)', chuleta) - if trytosplit !=[]: - plantilla=trytosplit[0] - for char in ['}', ']', '{', '[']: - if char in plantilla: - plantilla='' - if plantilla.lower()=='information': - plantilla='' - if plantilla !='': - self.plantillas.append(plantilla) - for plantilla in self.plantillas: - self.listado.insert(END, plantilla) - addB=Button(self.root, text="Add to AutoSkip", command=self.add2autoskip) - browser=Button(self.root, text='View in browser', command=self.oib) - saltar=Button(self.root, text="Skip", command=self.skipF) - ok=Button(self.root, text="OK", command=self.okF) - -##Start grid - etiqueta.grid(row=0) - ok.grid(row=0, column=1, rowspan=2) - saltar.grid(row=0, column=2, rowspan=2) - browser.grid(row=0, column=3, rowspan=2) - - self.entrada.grid(row=1) + self.entry.grid(row=1) - texto.grid(row=2, column=1, columnspan=3) + textarea.grid(row=2, column=1, columnspan=3) scrollbar.grid(row=2, column=5) - self.listado.grid(row=2, column=0) + self.templatelist.grid(row=2, column=0) - addB.grid(row=3, column=0) + autoskipButton.grid(row=3, column=0) imageinfo.grid(row=3, column=1, columnspan=4) - def okF(self): - self.changename=self.entrada.get() + + + def okFile(self): + ''' + The user pressed the OK button. + ''' + self.changename=self.entry.get() self.root.destroy() - def skipF(self): + def skipFile(self): + ''' + The user pressed the Skip button. + ''' self.skip=1 self.root.destroy() - def oib(self): - webbrowser.open('http://'+six[1].hostname()+'/wiki/'+self.nP.urlname()) + def openInBrowser(self): + ''' + The user pressed the View in browser button. + ''' + webbrowser.open(self.url) def add2autoskip(self): - identificador=int(self.listado.curselection()[0]) - template=self.plantillas[identificador] + ''' + The user pressed the Add to AutoSkip button. + ''' + templateid=int(self.templatelist.curselection()[0]) + template=self.templatelist.get(templateid) toadd=codecs.open(archivo, 'a', 'utf-8') toadd.write('{{'+template) toadd.close() - self.skipF() + self.skipFile() def getnewname(self): + ''' + Activate the dialog and return the new name and if the image is skipped. + ''' self.root.mainloop() return (self.changename, self.skip) def doiskip(pagetext): + ''' + Skip this image or not. + Returns True if the image is on the skip list, otherwise False + ''' saltos=getautoskip() #print saltos for salto in saltos: @@ -496,79 +271,71 @@ return True return False -six=['These should', 'both be changed'] - def main(args): + generator = None; + #newname = ""; + imagepage = None; + # Load a lot of default generators + genFactory = pagegenerators.GeneratorFactory() - lang=u'' - site=u'' - cat = u'' - startingpoint=u'' - verifyDescription=False - keepFilename = False - - for arg in args: - if arg.startswith('-start:'): - startingpoint=arg.split('-start:')[1] - elif arg.startswith('-cat:'): - cat=arg.split('-cat:')[1] - elif arg.startswith('-lang:'): - lang=arg.split('-lang:')[1] - elif arg.startswith('-site:'): - site=arg.split('-site:')[1] + for arg in wikipedia.handleArgs(): + if arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + elif arg == '-always': + always = True else: - print 'Argument: '+str([arg])+' is not valid' - print 'ourcat: '+cat - if (len(site)>1, len(lang)>1)==(True, True): - try: - sitio=wikipedia.Site(lang, site) - six[1]=sitio - except: - print str((site, lang))+' didnt work out. Defaulting to nl.wikipedia.' - six[1]=wikipedia.Site('nl', 'wikipedia') - else: - six[1]=wikipedia.Site('nl', 'wikipedia') - print "Working from "+str(six[1]) - seis=pageText('http://'+six[1].hostname()+'/w/query.php?what=namespaces&format=xml').split('<ns id="6">')[1].split('</ns>')[0] - seis=urllib.quote(seis) - six[0]=seis - print six - if cat != u'': - categ=wikipedia.Page(six[1], 'Category:'+cat.decode('utf-8')) - #Wikipedia:Verplaats naar Wikimedia Commons - categorizadas=getcatimgs(categ, startingpoint) + generator = genFactory.handleArg(arg) + if not generator: + raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') - elif startingpoint != u'': - categorizadas=getcatimgs('', startingpoint) + pregenerator = pagegenerators.PreloadingGenerator(generator) + for page in pregenerator: + if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()) : + imagepage = wikipedia.ImagePage(page.site(), page.title()) + + #First do autoskip. + if doiskip(imagepage.get()): + wikipedia.output("Skipping " + page.title()) + skip = True + else: + # The first upload is last in the list. + (datetime, username, resolution, size, comment) = imagepage.getFileVersionHistory().pop() + while True: - for categorizada in categorizadas: - #print categorizada - url=categorizada[0] - tenemosuncambio=0 - nn=url.split('/')[-1] - if doiskip(categorizada[2]): - print "Autoskipping " + nn - continue - #changename=wikipedia.input(u'The name on Commons will be '+nn+', ok? Enter a better name or press ENTER to proceed: ') - changename=Tkstuff(nn, categorizada[2], categorizada[3]).getnewname() - print ('changename', changename) - if len(changename[0])!=0: - nn=changename[0].encode('utf-8') - tenemosuncambio=1 - elif changename[1]==1: - print 'skipping this file' - continue - imageP=wikipedia.Page(six[1], categorizada[1]) - CP=wikipedia.Page(wikipedia.Site('commons', 'commons'), 'Image:'+nn.decode('utf-8')) - if CP.exists(): - nn=Tkstuff(nn[0], categorizada[2], categorizada[3], commonsconflict=1).getnewname() - if nn[1]==1: - print 'skipping this file' - continue - - thread.start_new_thread(getCH, (url, imageP, nn, tenemosuncambio)) + # Do the Tkdialog to accept/reject and change te name + (newname, skip)=Tkdialog(imagepage.titleWithoutNamespace(), imagepage.get(), username, imagepage.permalink(), imagepage.templates()).getnewname() + + if skip: + wikipedia.output('Skipping this image') + break + + # Did we enter a new name? + if len(newname)==0: + #Take the old name + newname=imagepage.titleWithoutNamespace() + + # Check if the image already exists + CommonsPage=wikipedia.Page(wikipedia.Site('commons', 'commons'), 'Image:'+newname) + + if not CommonsPage.exists(): + break + else: + wikipedia.output('Image already exists, pick another name or skip this image') + # We dont overwrite images, pick another name, go to the start of the loop + if not skip: + imageTransfer(imagepage, newname).start() + + wikipedia.output(u'Still ' + str(threading.activeCount()) + u' active threads, lets wait') + for openthread in threading.enumerate(): + if openthread != threading.currentThread(): + openthread.join() + wikipedia.output(u'All threads are done') + if __name__ == "__main__": try: main(sys.argv[1:])

1 0

[Pywikipedia-l] SVN: [5224] trunk/pywikipedia/wikipedia.py
by wikipedian＠svn.wikimedia.org 16 Apr '08

16 Apr '08

Revision: 5224 Author: wikipedian Date: 2008-04-16 10:40:54 +0000 (Wed, 16 Apr 2008) Log Message: ----------- Wikimedia server message seems to have been changed Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-16 10:36:16 UTC (rev 5223) +++ trunk/pywikipedia/wikipedia.py 2008-04-16 10:40:54 UTC (rev 5224) @@ -1350,7 +1350,10 @@ # We might have been using an outdated token output(u"Changing page has failed. Retrying.") return self._putPage(text, comment, watchArticle, minorEdit, newPage, token=self.site().getToken(sysop = sysop, getagain = True), newToken = True, sysop = sysop) - if data.find("<title>Wikimedia Error</title>") > -1: + # I think the error message title was changed from "Wikimedia Error" + # to "Wikipedia has a problem", but I'm not sure. Maybe we could + # just check for HTTP Status 500 (Internal Server Error)? + if "<title>Wikimedia Error</title>" in data or "has a problem</title>" in data: output( u"Wikimedia has technical problems; will retry in %i minute%s." % (retry_delay, retry_delay != 1 and "s" or "")) @@ -1359,7 +1362,7 @@ if retry_delay > 30: retry_delay = 30 continue - if data.find(self.site().mediawiki_message('readonly')) or data.find(self.site().mediawiki_message('readonly_lag')): + if self.site().mediawiki_message('readonly') in data or self.site().mediawiki_message('readonly_lag') in data: output(u"The database is currently locked for write access; will retry in %i minute%s." % (retry_delay, retry_delay != 1 and "s" or "")) time.sleep(60 * retry_delay)

1 0

[Pywikipedia-l] SVN: [5223] trunk/pywikipedia/wikipedia.py
by wikipedian＠svn.wikimedia.org 16 Apr '08

16 Apr '08

Revision: 5223 Author: wikipedian Date: 2008-04-16 10:36:16 +0000 (Wed, 16 Apr 2008) Log Message: ----------- Retry putting the page after some time when the database is locked for write access. Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-16 10:23:09 UTC (rev 5222) +++ trunk/pywikipedia/wikipedia.py 2008-04-16 10:36:16 UTC (rev 5223) @@ -1359,6 +1359,14 @@ if retry_delay > 30: retry_delay = 30 continue + if data.find(self.site().mediawiki_message('readonly')) or data.find(self.site().mediawiki_message('readonly_lag')): + output(u"The database is currently locked for write access; will retry in %i minute%s." + % (retry_delay, retry_delay != 1 and "s" or "")) + time.sleep(60 * retry_delay) + retry_delay *= 2 + if retry_delay > 30: + retry_delay = 30 + continue if self.site().has_mediawiki_message('longpageerror'): # FIXME: Long page error detection isn't working in Vietnamese Wikipedia. long_page_errorR = re.compile(

1 0

[Pywikipedia-l] SVN: [5222] trunk/pywikipedia/wikipedia.py
by wikipedian＠svn.wikimedia.org 16 Apr '08

16 Apr '08

Revision: 5222 Author: wikipedian Date: 2008-04-16 10:23:09 +0000 (Wed, 16 Apr 2008) Log Message: ----------- slightly lifted the category restriction on de.wikipedia Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-15 19:20:01 UTC (rev 5221) +++ trunk/pywikipedia/wikipedia.py 2008-04-16 10:23:09 UTC (rev 5222) @@ -3342,8 +3342,8 @@ if site is None: site = getSite() - if site.sitename() == 'wikipedia:de': - raise Error('The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…') + if site.sitename() == 'wikipedia:de' and "{{Personendaten" in oldtext: + raise Error('The PyWikipediaBot is no longer allowed to touch categories on the German Wikipedia on pages that contain the person data template because of the non-standard placement of that template. See http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/bis_2006…') s = categoryFormat(new, insite = site) if addOnly:

1 0

[Pywikipedia-l] SVN: [5221] trunk/pywikipedia/commonscat.py
by multichill＠svn.wikimedia.org 15 Apr '08

15 Apr '08

Revision: 5221 Author: multichill Date: 2008-04-15 19:20:01 +0000 (Tue, 15 Apr 2008) Log Message: ----------- First version of commonscat.py, a tool to add the commonscat template to wikipedia categories. Added Paths: ----------- trunk/pywikipedia/commonscat.py Added: trunk/pywikipedia/commonscat.py =================================================================== --- trunk/pywikipedia/commonscat.py (rev 0) +++ trunk/pywikipedia/commonscat.py 2008-04-15 19:20:01 UTC (rev 5221) @@ -0,0 +1,228 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +With this tool you can add the template {{commonscat}} to categories. +The tool works by following the interwiki links. If the template is present on +another langauge page, the bot will use it. + +You could probably use it at articles as well, but this isnt tested. + +This bot uses pagegenerators to get a list of pages. For example to go through all categories: +commonscat.py -start:Category:! + +Commonscat bot: + +Take a page. Follow the interwiki's and look for the commonscat template +*Found zero templates. Done. +*Found one template. Add this template +*Found more templates. Ask the user <- still have to implement this + +TODO: +*Update interwiki's at commons +*Collect all possibilities also if local wiki already has link. +*Better support for other templates (translations) / redundant templates. +*Check mode, only check pages which already have the template +*More efficient like interwiki.py +*Possibility to update other languages in the same run + +""" + +# +# (C) Multichill, 2008 +# +# Distributed under the terms of the MIT license. +# + +import wikipedia, config, pagegenerators, add_text + +commonscatTemplates = { + 'af' : u'CommonsKategorie', + 'als' : u'Commonscat', + 'az' : u'CommonsKat', + 'bg' : u'Commonscat', + 'ca' : u'Commonscat', + 'cs' : u'Commonscat', + 'da' : u'Commonscat', + 'de' : u'Commonscat', + 'en' : u'Commonscat', + 'eo' : u'Commonscat', + 'es' : u'Commonscat', + 'eu' : u'Commonskat', + 'fi' : u'Commonscat', + 'fr' : u'Commonscat', + 'hr' : u'Commonscat', + 'hu' : u'Közvagyonkat', + 'id' : u'Commonscat', + 'io' : u'Commonscat', + 'is' : u'CommonsCat', + 'it' : u'Commonscat', + 'ja' : u'Commonscat', + 'ko' : u'Commonscat', + 'lt' : u'Commonscat', + 'lv' : u'Commonscat', + 'mk' : u'Ризница-врска', + 'ms' : u'Commonscat', + 'nl' : u'Commonscat', + 'nn' : u'Commonscat', + 'no' : u'Commonscat', + 'oc' : u'Commonscat', + 'os' : u'Commonscat', + 'pl' : u'Commonscat', + 'pt' : u'Commonscat', + 'ro' : u'Commonscat', + 'ru' : u'Commonscat', + 'scn' : u'Commonscat', + 'sh' : u'Commonscat', + 'simple' : u'Commonscat', + 'sk' : u'Commonscat', + 'sl' : u'Kategorija v Zbirki', + 'sr' : u'Commonscat', + 'su' : u'Commonscat', + 'sv' : u'Commonscat', + 'th' : u'Commonscat', + 'tr' : u'CommonsKat', + 'uk' : u'Commonscat', + 'vi' : u'Commonscat', + 'zh' : u'Commonscat', + 'zh-yue' : u'同享類' +} + +def getTemplate (lang = None): + ''' + Get the template name in a language. Expects the language code, returns the translation. + ''' + if commonscatTemplates.has_key(lang): + return commonscatTemplates[lang] + else: + return u'Commonscat' + +def updateInterwiki (wikipediaPage = None, commonsPage = None): + ''' + Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page. + This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist. + + This function is disabled for the moment untill i figure out what the best way is to update the interwiki's. + ''' + interwikis = {} + comment= u'' + interwikilist = wikipediaPage.interwiki() + interwikilist.append(wikipediaPage) + + for interwikiPage in interwikilist: + interwikis[interwikiPage.site()]=interwikiPage + oldtext = commonsPage.get() + # The commonssite object doesnt work with interwiki's + newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl')) + comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]' + + if newtext != oldtext: + #This doesnt seem to work. Newtext has some trailing whitespace + wikipedia.showDiff(oldtext, newtext) + commonsPage.put(newtext=newtext, comment=comment) + + +def addCommonscat (page = None, summary = None, always = False): + ''' + Take a page. Go to all the interwiki page looking for a commonscat template. + When all the interwiki's links are checked and a proper category is found add it to the page. + ''' + commonscat = "" + commonscatpage = None + commonscats = [] + + wikipedia.output("Working on " + page.title()); + if getTemplate(page.site().language()) in page.templates(): + wikipedia.output("Commonscat template is already on " + page.title()); + #for template in page.templatesWithParams(): + # if ((template[0]==getTemplate(page.site().language())) and (len(template[1]) > 0)): + # commonscatpage = getCommonscat(template[1][0]) + # if commonscatpage != None: + # updateInterwiki (page, commonscatpage) + # #Should remove the template if something is wrong + + else: + #Follow the interwiki's + for ipage in page.interwiki(): + #See if commonscat is present + if getTemplate(ipage.site().language()) in ipage.templates(): + #Go through all the templates at the page + for template in ipage.templatesWithParams(): + #We found the template and it has the parameter set. + if ((template[0]==getTemplate(ipage.site().language())) and (len(template[1]) > 0)): + commonscatpage = getCommonscat(template[1][0]) + if commonscatpage != None: + commonscats.append(commonscatpage); + wikipedia.output("Found link for " + page.title() + " at [[" + ipage.site().language() + ":" + ipage.title() + "]] to " + commonscatpage.title() + "."); + commonscatpage = None + if len(commonscats) > 0: + commonscatpage = commonscats.pop(); + commonscat = commonscatpage.titleWithoutNamespace() + #We found one or more commonscat links, build the template and add it to our page + #TODO: We should check if we found more than one different link. + commonscat = "{{" + getTemplate(page.site().language()) + "|" + commonscat + "}}"; + add_text.add_text(page, commonscat, summary, None, None, always); + #updateInterwiki(page, commonscatpage) + return (True, always); + +def getCommonscat (name = ""): + ''' + This function will retun a page object of the commons page + If the page is a redirect this function tries to follow it. + If the page doesnt exists the function will return None + ''' + #wikipedia.output("getCommonscat: " + name ); + result = wikipedia.Page(wikipedia.getSite("commons", "commons"), "Category:" + name); + if not result.exists(): + #wikipedia.output("getCommonscat : The category doesnt exist."); + return None + elif result.isRedirectPage(): + #wikipedia.output("getCommonscat : The category is a redirect"); + return result.getRedirectTarget(); + elif "Category redirect" in result.templates(): + #wikipedia.output("getCommonscat : The category is a category redirect"); + for template in result.templatesWithParams(): + if ((template[0]=="Category redirect") and (len(template[1]) > 0)): + return getCommonscat(template[1][0]) + elif result.isDisambig(): + #wikipedia.output("getCommonscat : The category is disambigu"); + return None + else: + return result + +def main(): + ''' + Parse the command line arguments and get a pagegenerator to work on. + Iterate through all the pages. + ''' + summary = None; generator = None; always = False + # Load a lot of default generators + genFactory = pagegenerators.GeneratorFactory() + + for arg in wikipedia.handleArgs(): + if arg.startswith('-summary'): + if len(arg) == 8: + summary = wikipedia.input(u'What summary do you want to use?') + else: + summary = arg[9:] + elif arg.startswith('-page'): + if len(arg) == 5: + generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] + else: + generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] + elif arg == '-always': + always = True + else: + generator = genFactory.handleArg(arg) + if not generator: + raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') + + pregenerator = pagegenerators.PreloadingGenerator(generator) + + for page in pregenerator: + (status, always) = addCommonscat(page, summary, always) + +if __name__ == "__main__": + try: + main() + finally: + wikipedia.stopme()

1 0

[Pywikipedia-l] SVN: [5220] trunk/pywikipedia/families
by multichill＠svn.wikimedia.org 15 Apr '08

15 Apr '08

Revision: 5220 Author: multichill Date: 2008-04-15 17:20:20 +0000 (Tue, 15 Apr 2008) Log Message: ----------- Removed tlh (Klingon) messages. Modified Paths: -------------- trunk/pywikipedia/families/wikipedia_family.py trunk/pywikipedia/families/wiktionary_family.py Modified: trunk/pywikipedia/families/wikipedia_family.py =================================================================== --- trunk/pywikipedia/families/wikipedia_family.py 2008-04-15 09:30:16 UTC (rev 5219) +++ trunk/pywikipedia/families/wikipedia_family.py 2008-04-15 17:20:20 UTC (rev 5220) @@ -116,7 +116,6 @@ 'te': u'వికీపీడియా', 'tg': u'Википедиа', 'th': u'วิกิพีเดีย', - 'tlh': u"wIqIpe'DIya", 'tr': u'Vikipedi', 'uk': u'Вікіпедія', 'ur': u'منصوبہ', @@ -260,7 +259,6 @@ 'tet': u'Diskusaun Wikipedia', 'tg': u'Баҳси Википедиа', 'th': u'คุยเรื่องวิกิพีเดีย', - 'tlh': u"wIqIpe'DIya ja'chuq", 'tr': u'Vikipedi tartışma', 'tt': u'Wikipedia bäxäse', 'ty': u'Discussion Wikipedia', Modified: trunk/pywikipedia/families/wiktionary_family.py =================================================================== --- trunk/pywikipedia/families/wiktionary_family.py 2008-04-15 09:30:16 UTC (rev 5219) +++ trunk/pywikipedia/families/wiktionary_family.py 2008-04-15 17:20:20 UTC (rev 5220) @@ -188,7 +188,6 @@ 'te': u'Wiktionary చర్చ', 'tg': u'Баҳси Wiktionary', 'th': u'คุยเรื่องWiktionary', - 'tlh': u"Wiktionary ja'chuq", 'tr': u'Vikisözlük tartışma', 'tt': u'Wiktionary bäxäse', 'uk': u'Обговорення Wiktionary',

1 0

[Pywikipedia-l] SVN: [5219] trunk/pywikipedia/commonsdelinker/delinker.py
by btongminh＠svn.wikimedia.org 15 Apr '08

15 Apr '08

Revision: 5219 Author: btongminh Date: 2008-04-15 09:30:16 +0000 (Tue, 15 Apr 2008) Log Message: ----------- Don't eat the preceding [|{=] Modified Paths: -------------- trunk/pywikipedia/commonsdelinker/delinker.py Modified: trunk/pywikipedia/commonsdelinker/delinker.py =================================================================== --- trunk/pywikipedia/commonsdelinker/delinker.py 2008-04-13 14:08:44 UTC (rev 5218) +++ trunk/pywikipedia/commonsdelinker/delinker.py 2008-04-15 09:30:16 UTC (rev 5219) @@ -267,7 +267,7 @@ # likely embedded in a complicated template. hook = 'complex' r_templates = ur'(?s)(\{\{.*?\}\})' - r_complicated = u'(?s)[|{=]\s*((?:%s)?)%s' % (r_namespace, r_image) + r_complicated = u'(?s)(?<=[|{=])\s*((?:%s)?)%s' % (r_namespace, r_image) def template_replacer(match): return re.sub(r_complicated, simple_replacer, match.group(1))

1 0

[Pywikipedia-l] [ pywikipediabot-Feature Requests-1941798 ] auto-save interwikidump
by SourceForge.net 14 Apr '08

14 Apr '08

Feature Requests item #1941798, was opened at 2008-04-14 00:32 Message generated for change (Tracker Item Submitted) made by Item Submitter You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603141&aid=1941798&group_… Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Priority: 5 Private: No Submitted By: Nobody/Anonymous (nobody) Assigned to: Nobody/Anonymous (nobody) Summary: auto-save interwikidump Initial Comment: please add an option to automatically save interwikidump files at a specified period, not just when the script is user-interrupted or fails, to save a list of active articles. this will make it possible to resume work (rather than restart from beginning) in case something nasty like power failure or PC reboot happens. ---------------------------------------------------------------------- You can respond by visiting: https://sourceforge.net/tracker/?func=detail&atid=603141&aid=1941798&group_…

1 0

[Pywikipedia-l] SVN: [5218] trunk/pywikipedia/wikipedia.py
by filnik＠svn.wikimedia.org 13 Apr '08

13 Apr '08

Revision: 5218 Author: filnik Date: 2008-04-13 14:08:44 +0000 (Sun, 13 Apr 2008) Log Message: ----------- Deleting a debug-print, deleting the deleted variable, not working correctly Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2008-04-12 19:09:12 UTC (rev 5217) +++ trunk/pywikipedia/wikipedia.py 2008-04-13 14:08:44 UTC (rev 5218) @@ -828,7 +828,6 @@ def previousRevision(self): """Return the revision id for the previous revision of this Page.""" vh = self.getVersionHistory(revCount=2) - print vh return vh[1][0] def exists(self): @@ -1155,7 +1154,7 @@ force, callback)) def put(self, newtext, comment=None, watchArticle=None, minorEdit=True, - force=False, deleted = True): + force=False): """Save the page with the contents of the first argument as the text. Optional parameters: @@ -1164,8 +1163,7 @@ watchArticle: a bool, add or remove this Page to/from bot user's watchlist (if None, leave watchlist status unchanged) minorEdit: mark this edit as minor if True - force: ignore botMayEdit() setting - + force: ignore botMayEdit() setting. """ # Login try: @@ -1209,10 +1207,10 @@ if self.site().lang == 'eo': newtext = encodeEsperantoX(newtext) return self._putPage(newtext, comment, watchArticle, minorEdit, - newPage, self.site().getToken(sysop = sysop), sysop = sysop, deleted = deleted) + newPage, self.site().getToken(sysop = sysop), sysop = sysop) def _putPage(self, text, comment=None, watchArticle=False, minorEdit=True, - newPage=False, token=None, newToken=False, sysop=False, deleted=True): + newPage=False, token=None, newToken=False, sysop=False): """Upload 'text' as new content of Page by filling out the edit form. Don't use this directly, use put() instead. @@ -1299,7 +1297,7 @@ time.sleep(5) continue # A second text area means that an edit conflict has occured. - if 'id=\'wpTextbox2\' name="wpTextbox2"' in data and deleted == True: + if 'id=\'wpTextbox2\' name="wpTextbox2"' in data: raise EditConflict(u'An edit conflict has occured.') if self.site().has_mediawiki_message("spamprotectiontitle")\ and self.site().mediawiki_message('spamprotectiontitle') in data:

1 0

← Newer
1
...
9
10
11
12
13
14
15
...
22
Older →

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

2008

2007

pywikibot April 2008