Revision: 8388 Author: multichill Date: 2010-08-07 16:08:55 +0000 (Sat, 07 Aug 2010)
Log Message: ----------- Implemented 3 different threads 1. Fetcher thread gets all information 2. User thread does the user interaction 3. Uploader thread does the actual uploading
This speeds up the bot a lot.
Modified Paths: -------------- trunk/pywikipedia/imagecopy_enwp.py
Modified: trunk/pywikipedia/imagecopy_enwp.py =================================================================== --- trunk/pywikipedia/imagecopy_enwp.py 2010-08-07 13:37:23 UTC (rev 8387) +++ trunk/pywikipedia/imagecopy_enwp.py 2010-08-07 16:08:55 UTC (rev 8388) @@ -51,6 +51,7 @@ import os, sys, re, codecs import urllib, httplib, urllib2 import webbrowser +from Queue import Queue import time, threading import wikipedia, config, socket import pagegenerators, add_text @@ -267,255 +268,252 @@ return (self.filename, self.description, self.date, self.source, self.author, self.licensetemplate, self.categories, self.skip)
-def doiskip(imagepage): +class imageFetcher(threading.Thread): ''' - Skip this image or not. - Returns True if the image is on the skip list, otherwise False + Tries to fetch information for all images in the generator ''' - for template in imagepage.templates(): - if template in skipTemplates: - wikipedia.output(u'Found ' + template + u' which is on the template skip list') - return True - return False + def __init__ ( self, pagegenerator, prefetchQueue): + self.pagegenerator = pagegenerator + self.prefetchQueue = prefetchQueue + imagerecat.initLists() + threading.Thread.__init__ ( self )
-def getNewFields(imagepage): - ''' - Build a new description based on the imagepage - ''' - if u'{{Information' in imagepage.get() or u'{{information' in imagepage.get(): - (description, date, source, author) = getNewFieldsFromInformation(imagepage) - else: - (description, date, source, author) = getNewFieldsFromFreetext(imagepage) + def run(self): + for page in self.pagegenerator: + self.processImage(page) + self.prefetchQueue.put(None) + wikipedia.output(u'Fetched all images.') + return True
- licensetemplate = getNewLicensetemplate(imagepage) - categories = getNewCategories(imagepage) - return (description, date, source, author, licensetemplate, categories) + def processImage(self, page): + ''' + Work on a single image + ''' + if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()): + imagepage = wikipedia.ImagePage(page.site(), page.title())
-def getNewFieldsFromInformation(imagepage): - ''' - ''' - description = u'' - date = u'' - source = u'' - author = u'' - permission = u'' - other_versions = u'' - text = imagepage.get() - # Need to add the permission field - # Need to use pywikipedia template parser code - regexes =[u'{{Information[\s\r\n]*|[\s\r\n]*description[\s\r\n]*=(?P<description>.*)|[\s\r\n]*source[\s\r\n]*=(?P<source>.*)|[\s\r\n]*date[\s\r\n]*=(?P<date>.*)|[\s\r\n]*author[\s\r\n]*=(?P<author>.*)|[\s\r\n]*permission.*=(?P<permission>[^}]*)|[\s\r\n]*other_versions.*=(?P<other_versions>[^}]*)}}', - u'{{Information[\s\r\n]*|[\s\r\n]*description[\s\r\n]*=(?P<description>.*)|[\s\r\n]*source[\s\r\n]*=(?P<source>.*)|[\s\r\n]*date[\s\r\n]*=(?P<date>.*)|[\s\r\n]*author[\s\r\n]*=(?P<author>.*)|[\s\r\n]*other_versions.*=(?P<other_versions>[^}]*)}}', - ] + #First do autoskip. + if self.doiskip(imagepage): + wikipedia.output(u'Skipping %s : Got a template on the skip list.' % page.title()) + return False
+ text = imagepage.get() + foundMatch = False + for (regex, replacement) in licenseTemplates: + match = re.search(regex, text, re.IGNORECASE) + if match: + foundMatch = True + if not foundMatch: + wikipedia.output(u'Skipping %s : No suitable license template was found.' % page.title()) + return False + self.prefetchQueue.put(self.getNewFields(imagepage))
- for regex in regexes: - match =re.search(regex, text, re.IGNORECASE|re.DOTALL) - if match: - description = convertLinks(match.group(u'description').strip(), imagepage.site()) - - date = match.group(u'date').strip() - if date == u'': - date = getUploadDate(imagepage) + def doiskip(self, imagepage): + ''' + Skip this image or not. + Returns True if the image is on the skip list, otherwise False + ''' + for template in imagepage.templates(): + if template in skipTemplates: + wikipedia.output(u'Found ' + template + u' which is on the template skip list') + return True + return False
- source = getSource(imagepage, source=convertLinks(match.group(u'source').strip(), imagepage.site())) + def getNewFields(self, imagepage): + ''' + Build a new description based on the imagepage + ''' + if u'{{Information' in imagepage.get() or u'{{information' in imagepage.get(): + (description, date, source, author) = self.getNewFieldsFromInformation(imagepage) + else: + (description, date, source, author) = self.getNewFieldsFromFreetext(imagepage)
- author = convertLinks(match.group(u'author').strip(), imagepage.site()) - if author == u'': - author = getAuthorText(imagepage) - - if u'permission' in match.groupdict(): - permission = convertLinks(match.group(u'permission').strip(), imagepage.site()) - if u'other_versions' in match.groupdict(): - other_versions = convertLinks(match.group(u'other_versions').strip(), imagepage.site()) - # Return the stuff we found - return (description, date, source, author) - - #We didn't find anything, return the empty strings - return (description, date, source, author) + licensetemplate = self.getNewLicensetemplate(imagepage) + categories = self.getNewCategories(imagepage) + return (imagepage, description, date, source, author, licensetemplate, categories)
-def getNewFieldsFromFreetext(imagepage): - ''' - ''' - text = imagepage.get() - #text = re.sub(u'== Summary ==', u'', text, re.IGNORECASE) - #text = re.sub(u'== Licensing ==', u'', text, re.IGNORECASE) - #text = re.sub(u'{{(self|self2)|[^}]+}}', u'', text, re.IGNORECASE) + def getNewFieldsFromInformation(self, imagepage): + ''' + Try to extract fields from the current information template for the new information template. + ''' + description = u'' + date = u'' + source = u'' + author = u'' + permission = u'' + other_versions = u'' + text = imagepage.get() + # Need to add the permission field + # Need to use pywikipedia template parser code + regexes =[u'{{Information[\s\r\n]*|[\s\r\n]*description[\s\r\n]*=(?P<description>.*)|[\s\r\n]*source[\s\r\n]*=(?P<source>.*)|[\s\r\n]*date[\s\r\n]*=(?P<date>.*)|[\s\r\n]*author[\s\r\n]*=(?P<author>.*)|[\s\r\n]*permission.*=(?P<permission>[^}]*)|[\s\r\n]*other_versions.*=(?P<other_versions>[^}]*)}}', + u'{{Information[\s\r\n]*|[\s\r\n]*description[\s\r\n]*=(?P<description>.*)|[\s\r\n]*source[\s\r\n]*=(?P<source>.*)|[\s\r\n]*date[\s\r\n]*=(?P<date>.*)|[\s\r\n]*author[\s\r\n]*=(?P<author>.*)|[\s\r\n]*other_versions.*=(?P<other_versions>[^}]*)}}', + ] + + for regex in regexes: + match =re.search(regex, text, re.IGNORECASE|re.DOTALL) + if match: + description = self.convertLinks(match.group(u'description').strip(), imagepage.site()) + + date = match.group(u'date').strip() + if date == u'': + date = self.getUploadDate(imagepage)
- for toRemove in sourceGarbage: - text = re.sub(toRemove, u'', text, re.IGNORECASE) - - for (regex, repl) in licenseTemplates: - text = re.sub(regex, u'', text, re.IGNORECASE) + source = self.getSource(imagepage, source=self.convertLinks(match.group(u'source').strip(), imagepage.site()))
- text = wikipedia.removeCategoryLinks(text, imagepage.site()).strip() + author = self.convertLinks(match.group(u'author').strip(), imagepage.site()) + if author == u'': + author = self.getAuthorText(imagepage) + + if u'permission' in match.groupdict(): + permission = self.convertLinks(match.group(u'permission').strip(), imagepage.site()) + if u'other_versions' in match.groupdict(): + other_versions = self.convertLinks(match.group(u'other_versions').strip(), imagepage.site()) + # Return the stuff we found + return (description, date, source, author)
- description = convertLinks(text.strip(), imagepage.site()) - date = getUploadDate(imagepage) - source = getSource(imagepage) - author = getAuthorText(imagepage) - return (description, date, source, author) + #We didn't find anything, return the empty strings + return (description, date, source, author)
-def getUploadDate(imagepage): - # Get the original upload date - uploadtime = imagepage.getFileVersionHistory()[-1][0] - uploadDatetime = datetime.strptime(uploadtime, u'%Y-%m-%dT%H:%M:%SZ') - return u'{{Date|' + str(uploadDatetime.year) + u'|' + str(uploadDatetime.month) + u'|' + str(uploadDatetime.day) + u'}} (original upload date)' + def getNewFieldsFromFreetext(self, imagepage): + ''' + Try to extract fields from free text for the new information template. + ''' + text = imagepage.get() + #text = re.sub(u'== Summary ==', u'', text, re.IGNORECASE) + #text = re.sub(u'== Licensing ==', u'', text, re.IGNORECASE) + #text = re.sub(u'{{(self|self2)|[^}]+}}', u'', text, re.IGNORECASE)
-def getSource(imagepage, source=u''): - site = imagepage.site() - lang = site.language() - family = site.family.name - if source==u'': - source=u'{{Own}}' + for toRemove in sourceGarbage: + text = re.sub(toRemove, u'', text, re.IGNORECASE)
- return source.strip() + u'<BR />Transferred from [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]' % {u'lang' : lang, u'family' : family} + for (regex, repl) in licenseTemplates: + text = re.sub(regex, u'', text, re.IGNORECASE)
-def getAuthorText(imagepage): - site = imagepage.site() - lang = site.language() - family = site.family.name - - firstuploader = getAuthor(imagepage) - #FIXME : Make other sites than Wikipedia work - return u'[[:%(lang)s:User:%(firstuploader)s|%(firstuploader)s]] at [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]' % {u'lang' : lang, u'family' : family , u'firstuploader' : firstuploader} + text = wikipedia.removeCategoryLinks(text, imagepage.site()).strip() + + description = self.convertLinks(text.strip(), imagepage.site()) + date = self.getUploadDate(imagepage) + source = self.getSource(imagepage) + author = self.getAuthorText(imagepage) + return (description, date, source, author)
-def getAuthor(imagepage): - return imagepage.getFileVersionHistory()[-1][1].strip() + def getUploadDate(self, imagepage): + ''' + Get the original upload date to put in the date field of the new information template. If we really have nothing better. + ''' + uploadtime = imagepage.getFileVersionHistory()[-1][0] + uploadDatetime = datetime.strptime(uploadtime, u'%Y-%m-%dT%H:%M:%SZ') + return u'{{Date|' + str(uploadDatetime.year) + u'|' + str(uploadDatetime.month) + u'|' + str(uploadDatetime.day) + u'}} (original upload date)'
-def convertLinks(text, sourceSite): - lang = sourceSite.language() - family = sourceSite.family.name - conversions =[(u'[[([^[]|]+)|([^[]|]+)]]', u'[[:%(lang)s:\1|\2]]'), - (u'[[([^[]|]+)]]', u'[[:%(lang)s:\1|\1]]'), - ] - - for (regex, replacement) in conversions: - text = re.sub(regex, replacement % {u'lang' : lang, u'family' : family}, text) + def getSource(self, imagepage, source=u''): + ''' + Get the text to put in the source field of the new information template. + ''' + site = imagepage.site() + lang = site.language() + family = site.family.name + if source==u'': + source=u'{{Own}}' + + return source.strip() + u'<BR />Transferred from [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]' % {u'lang' : lang, u'family' : family}
- return text + def getAuthorText(self, imagepage): + ''' + Get the original uploader to put in the author field of the new information template. + ''' + site = imagepage.site() + lang = site.language() + family = site.family.name + + firstuploader = self.getAuthor(imagepage) + return u'[[:%(lang)s:User:%(firstuploader)s|%(firstuploader)s]] at [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]' % {u'lang' : lang, u'family' : family , u'firstuploader' : firstuploader}
-def getNewLicensetemplate(imagepage): - ''' - ''' - text = imagepage.get() - - site = imagepage.site() - lang = site.language() - family = site.family.name + def getAuthor(self, imagepage): + ''' + Get the first uploader. + ''' + return imagepage.getFileVersionHistory()[-1][1].strip()
- result = u'' - - for (regex, replacement) in licenseTemplates: - match = re.search(regex, text, re.IGNORECASE) - if match: - result = re.sub(regex, replacement, match.group(0), re.IGNORECASE) - return result % {u'author' : getAuthor(imagepage), - u'lang' : lang, - u'family' : family} + def convertLinks(self, text, sourceSite): + ''' + Convert links from the current wiki to Commons. + ''' + lang = sourceSite.language() + family = sourceSite.family.name + conversions =[(u'[[([^[]|]+)|([^[]|]+)]]', u'[[:%(lang)s:\1|\2]]'), + (u'[[([^[]|]+)]]', u'[[:%(lang)s:\1|\1]]'), + ]
- return result - -def getNewCategories(imagepage): - ''' - Get a categories for the image - Dont forget to filter - ''' - result = u'' - (commonshelperCats, usage, galleries) = imagerecat.getCommonshelperCats(imagepage) - newcats = imagerecat.applyAllFilters(commonshelperCats) - for newcat in newcats: - result = result + u'[[Category:' + newcat + u']] ' - return result + for (regex, replacement) in conversions: + text = re.sub(regex, replacement % {u'lang' : lang, u'family' : family}, text)
-def getOriginalUploadLog(imagepage): - filehistory = imagepage.getFileVersionHistory() - filehistory.reverse() + return text
- site = imagepage.site() - lang = site.language() - family = site.family.name - - sourceimage = imagepage.site().get_address(imagepage.title()).replace(u'&redirect=no&useskin=monobook', u'') - - result = u'== {{Original upload log}} ==\n' - result = result + u'The original description page is/was [http://%(lang)s.%(family)s.org%(sourceimage)s here]. All following user names refer to %(lang)s.%(family)s.\n' % {u'lang' : lang, u'family' : family , u'sourceimage' : sourceimage} - for (timestamp, username, resolution, size, comment) in filehistory: - date = datetime.strptime(timestamp, u'%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d %H:%M') - result = result + u'* %(date)s [[:%(lang)s:user:%(username)s|%(username)s]] %(resolution)s (%(size)s bytes) ''<nowiki>%(comment)s</nowiki>''\n' % { - u'lang' : lang, - u'family' : family , - u'date' : date, - u'username' : username, - u'resolution': resolution, - u'size': size, - u'comment' : comment} + def getNewLicensetemplate(self, imagepage): + ''' + Get a license template to put on the image to be uploaded + ''' + text = imagepage.get()
- return result + site = imagepage.site() + lang = site.language() + family = site.family.name
- + result = u''
-def buildNewImageDescription(imagepage, description, date, source, author, licensetemplate, categories, checkTemplate): - ''' - Build a new information template - ''' - - site = imagepage.site() - lang = site.language() - family = site.family.name - - cid = u'' - if checkTemplate: - cid = cid + u'\n{{BotMoveToCommons|%(lang)s.%(family)s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % {u'lang' : lang, u'family' : family} - cid = cid + u'== {{int:filedesc}} ==\n' - cid = cid + u'{{Information\n' - cid = cid + u'|description={{%(lang)s|1=' % {u'lang' : lang, u'family' : family} - cid = cid + description + u'}}\n' - cid = cid + u'|date=' + date + u'\n' - cid = cid + u'|source=' + source + u'\n' - cid = cid + u'|author=' + author + u'\n' - cid = cid + u'|permission=\n' - cid = cid + u'|other_versions=\n' - cid = cid + u'}}\n' - cid = cid + u'== {{int:license}} ==\n' - cid = cid + licensetemplate + u'\n' - cid = cid + u'\n' - cid = cid + getOriginalUploadLog(imagepage) - cid = cid + u'__NOTOC__\n' - if categories.strip()==u'': - cid = cid + u'{{Subst:Unc}}' - else: - cid = cid + categories - return cid - - -def processImage(page, checkTemplate): - skip = False - if page.exists() and (page.namespace() == 6) and (not page.isRedirectPage()): - imagepage = wikipedia.ImagePage(page.site(), page.title()) - - #First do autoskip. - if doiskip(imagepage): - wikipedia.output(u'Skipping %s : Got a template on the skip list.' % page.title()) - return False - - text = imagepage.get() - foundMatch = False for (regex, replacement) in licenseTemplates: match = re.search(regex, text, re.IGNORECASE) if match: - foundMatch = True - if not foundMatch: - wikipedia.output(u'Skipping %s : No suitable license template was found.' % page.title()) - return False + result = re.sub(regex, replacement, match.group(0), re.IGNORECASE) + return result % {u'author' : self.getAuthor(imagepage), + u'lang' : lang, + u'family' : family} + + return result
- (description, date, source, author, licensetemplate, categories) = getNewFields(imagepage) + def getNewCategories(self, imagepage): + ''' + Get a categories for the image + Dont forget to filter + ''' + result = u'' + (commonshelperCats, usage, galleries) = imagerecat.getCommonshelperCats(imagepage) + newcats = imagerecat.applyAllFilters(commonshelperCats) + for newcat in newcats: + result = result + u'[[Category:' + newcat + u']] ' + return result
+class userInteraction(threading.Thread): + ''' + Prompt all images to the user. + ''' + def __init__ ( self, prefetchQueue, uploadQueue): + self.prefetchQueue = prefetchQueue + self.uploadQueue = uploadQueue + threading.Thread.__init__ ( self ) + + def run(self): while True: + fields = self.prefetchQueue.get() + if fields: + self.processImage(fields) + else: + break + self.uploadQueue.put(None) + wikipedia.output(u'User worked on all images.') + return True + + def processImage(self, fields): + ''' + Work on a single image + ''' + (imagepage, description, date, source, author, licensetemplate, categories) = fields + while True: # Do the Tkdialog to accept/reject and change te name (filename, description, date, source, author, licensetemplate, categories, skip)=Tkdialog(imagepage, description, date, source, author, licensetemplate, categories).getnewmetadata()
if skip: - wikipedia.output(u'Skipping %s : User pressed skip.' % page.title()) + wikipedia.output(u'Skipping %s : User pressed skip.' % imagepage.title()) return False
# Check if the image already exists @@ -524,13 +522,115 @@ break else: wikipedia.output('Image already exists, pick another name or skip this image') - # We dont overwrite images, pick another name, go to the start of the loop - - cid = buildNewImageDescription(imagepage, description, date, source, author, licensetemplate, categories, checkTemplate) + # We dont overwrite images, pick another name, go to the start of the loop + + self.uploadQueue.put((imagepage, filename, description, date, source, author, licensetemplate, categories)) + + +class uploader(threading.Thread): + ''' + Upload all images + ''' + def __init__ ( self, uploadQueue): + self.uploadQueue = uploadQueue + self.checktemplate = True + threading.Thread.__init__ ( self ) + + def run(self): + while True: #Change later + fields = self.uploadQueue.get() + if fields: + self.processImage(fields) + else: + break + return True + + def nochecktemplate(self): + ''' + Don't want to add {{BotMoveToCommons}} + ''' + self.checktemplate = False + return + + def processImage(self, fields): + ''' + Work on a single image + ''' + (imagepage, filename, description, date, source, author, licensetemplate, categories) = fields + cid = self.buildNewImageDescription(imagepage, description, date, source, author, licensetemplate, categories) wikipedia.output(cid) bot = UploadRobot(url=imagepage.fileUrl(), description=cid, useFilename=filename, keepFilename=True, verifyDescription=False, ignoreWarning = True, targetSite = wikipedia.getSite('commons', 'commons')) bot.run()
+ self.tagNowcommons(imagepage, filename) + self.replaceUsage(imagepage, filename) + + + def buildNewImageDescription(self, imagepage, description, date, source, author, licensetemplate, categories): + ''' + Build a new information template + ''' + + site = imagepage.site() + lang = site.language() + family = site.family.name + + cid = u'' + if self.checktemplate: + cid = cid + u'\n{{BotMoveToCommons|%(lang)s.%(family)s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n' % {u'lang' : lang, u'family' : family} + cid = cid + u'== {{int:filedesc}} ==\n' + cid = cid + u'{{Information\n' + cid = cid + u'|description={{%(lang)s|1=' % {u'lang' : lang, u'family' : family} + cid = cid + description + u'}}\n' + cid = cid + u'|date=' + date + u'\n' + cid = cid + u'|source=' + source + u'\n' + cid = cid + u'|author=' + author + u'\n' + cid = cid + u'|permission=\n' + cid = cid + u'|other_versions=\n' + cid = cid + u'}}\n' + cid = cid + u'== {{int:license}} ==\n' + cid = cid + licensetemplate + u'\n' + cid = cid + u'\n' + cid = cid + self.getOriginalUploadLog(imagepage) + cid = cid + u'__NOTOC__\n' + if categories.strip()==u'': + cid = cid + u'{{Subst:Unc}}' + else: + cid = cid + categories + return cid + + def getOriginalUploadLog(self, imagepage): + ''' + Get the original upload log to put at the bottom of the image description page at Commons. + ''' + filehistory = imagepage.getFileVersionHistory() + filehistory.reverse() + + site = imagepage.site() + lang = site.language() + family = site.family.name + + sourceimage = imagepage.site().get_address(imagepage.title()).replace(u'&redirect=no&useskin=monobook', u'') + + result = u'== {{Original upload log}} ==\n' + result = result + u'The original description page is/was [http://%(lang)s.%(family)s.org%(sourceimage)s here]. All following user names refer to %(lang)s.%(family)s.\n' % {u'lang' : lang, u'family' : family , u'sourceimage' : sourceimage} + for (timestamp, username, resolution, size, comment) in filehistory: + date = datetime.strptime(timestamp, u'%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d %H:%M') + result = result + u'* %(date)s [[:%(lang)s:user:%(username)s|%(username)s]] %(resolution)s (%(size)s bytes) ''<nowiki>%(comment)s</nowiki>''\n' % { + u'lang' : lang, + u'family' : family , + u'date' : date, + u'username' : username, + u'resolution': resolution, + u'size': size, + u'comment' : comment} + + return result + + def tagNowcommons(self, imagepage, filename): + ''' + Tagged the imag which has been moved to Commons for deletion. + ''' if wikipedia.Page(wikipedia.getSite('commons', 'commons'), u'File:' + filename).exists(): #Get a fresh copy, force to get the page so we dont run into edit conflicts imtxt=imagepage.get(force=True) @@ -554,27 +654,32 @@ wikipedia.showDiff(imagepage.get(), imtxt + addTemplate) imagepage.put(imtxt + addTemplate, comment = commentText)
+ def replaceUsage(self, imagepage, filename): + ''' + If the image is uploaded under a different name, replace all usage. + ''' + if imagepage.titleWithoutNamespace() != filename: gen = pagegenerators.FileLinksGenerator(imagepage) preloadingGen = pagegenerators.PreloadingGenerator(gen) - - #If the image is uploaded under a different name, replace all instances - if imagepage.titleWithoutNamespace() != filename: - if imagepage.site().language() in imageMoveMessage: - moveSummary = imageMoveMessage[imagepage.site().language()] % (imagepage.titleWithoutNamespace(), filename) - else: - moveSummary = imageMoveMessage['_default'] % (imagepage.titleWithoutNamespace(), filename) - imagebot = ImageRobot(generator = preloadingGen, oldImage = imagepage.titleWithoutNamespace(), newImage = filename, summary = moveSummary, always = True, loose = True) - imagebot.run() + + if imagepage.site().language() in imageMoveMessage: + moveSummary = imageMoveMessage[imagepage.site().language()] % (imagepage.titleWithoutNamespace(), filename) + else: + moveSummary = imageMoveMessage['_default'] % (imagepage.titleWithoutNamespace(), filename) + imagebot = ImageRobot(generator = preloadingGen, oldImage = imagepage.titleWithoutNamespace(), newImage = filename, summary = moveSummary, always = True, loose = True) + imagebot.run()
+def main(args): + wikipedia.output(u'WARNING: This is an experimental bot') + wikipedia.output(u'WARNING: It will only work on self published work images') + wikipedia.output(u'WARNING: This bot is still full of bugs') + wikipedia.output(u'WARNING: Use at your own risk!')
-def main(args): generator = None; - #newname = ""; - imagepage = None; always = False checkTemplate = True - imagerecat.initLists() + # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory()
@@ -590,16 +695,25 @@
pregenerator = pagegenerators.PreloadingGenerator(generator)
- for page in pregenerator: - processImage(page, checkTemplate) + prefetchQueue = Queue(maxsize=50) + uploadQueue = Queue(maxsize=200)
+ imageFetcherThread = imageFetcher(pregenerator, prefetchQueue) + userInteractionThread = userInteraction(prefetchQueue, uploadQueue) + uploaderThread = uploader(uploadQueue)
- wikipedia.output(u'Still ' + str(threading.activeCount()) + u' active threads, lets wait') - for openthread in threading.enumerate(): - if openthread != threading.currentThread(): - openthread.join() - wikipedia.output(u'All threads are done') + imageFetcherThread.daemon=False + userInteractionThread.daemon=False + uploaderThread.daemon=False + + if not checkTemplate: + uploaderThread.nochecktemplate() + + fetchDone = imageFetcherThread.start() + userDone = userInteractionThread.start() + uploadDone = uploaderThread.start()
+ if __name__ == "__main__": try: main(sys.argv[1:])
pywikipedia-svn@lists.wikimedia.org