Revision: 5610 Author: leogregianin Date: 2008-06-23 02:13:25 +0000 (Mon, 23 Jun 2008)
Log Message: ----------- This module is deprecated and has been replaced by new funcionality in wikipedia.py. Move module to archive directory.
Added Paths: ----------- trunk/pywikipedia/archive/mediawiki_messages.py
Removed Paths: ------------- trunk/pywikipedia/mediawiki_messages.py
Added: trunk/pywikipedia/archive/mediawiki_messages.py =================================================================== --- trunk/pywikipedia/archive/mediawiki_messages.py (rev 0) +++ trunk/pywikipedia/archive/mediawiki_messages.py 2008-06-23 02:13:25 UTC (rev 5610) @@ -0,0 +1,218 @@ +# -*- coding: utf-8 -*- +""" +Allows access to the MediaWiki messages, that's the label texts of the MediaWiki +software in the current language. These can be used in other bots. + +The function refresh_messages() downloads all the current messages and saves +them to disk. It is run automatically when a bot first tries to access one of +the messages. It can be updated manually by running this script, e.g. when +somebody changed the current message at the wiki. The texts will also be +reloaded automatically once a month. + +Syntax: python mediawiki_messages [-all] + +Command line options: + -refresh - Reloads messages for the home wiki or for the one defined via + the -lang and -family parameters. + + -all - Reloads messages for all wikis where messages are already present + + If another parameter is given, it will be interpreted as a MediaWiki key. + The script will then output the respective value, without refreshing.. + +""" + +# (C) Daniel Herding, 2004 +# +# Distributed under the terms of the MIT license. + +##THIS MODULE IS DEPRECATED AND HAS BEEN REPLACED BY NEW FUNCTIONALITY IN +##WIKIPEDIA.PY. It is being retained solely for compatibility in case any +##custom-written bots rely upon it. Bot authors should replace any uses +##of this module as follows: +## +## OLD: mediawiki_messages.get(key, site) +## NEW: site.mediawiki_message(key) +## +## OLD: mediawiki_messages.has(key, site) +## NEW: site.has_mediawiki_message(key) +## +## OLD: mediawiki_messages.makepath(path) +## NEW: wikipedia.makepath(path) +## +########################################################################## + +import warnings +warnings.warn( +"""The mediawiki_messages module is deprecated and no longer +maintained; see the source code for new methods to replace +calls to this module.""", + DeprecationWarning, stacklevel=2) + + +import wikipedia +import re, sys, pickle +import os.path +import time +import codecs +import urllib +from BeautifulSoup import * + +__version__='$Id: mediawiki_messages.py 3731 2007-06-20 14:42:55Z russblau $' + +loaded = {} + +def get(key, site = None, allowreload = True): + site = site or wikipedia.getSite() + if loaded.has_key(site): + # Use cached copy if it exists. + dictionary = loaded[site] + else: + fn = 'mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang) + try: + # find out how old our saved dump is (in seconds) + file_age = time.time() - os.path.getmtime(fn) + # if it's older than 1 month, reload it + if file_age > 30 * 24 * 60 * 60: + print 'Current MediaWiki message dump is one month old, reloading' + refresh_messages(site) + except OSError: + # no saved dumped exists yet + refresh_messages(site) + f = open(fn, 'r') + dictionary = pickle.load(f) + f.close() + loaded[site] = dictionary + key = key[0].lower() + key[1:] + if dictionary.has_key(key): + return dictionary[key] + elif allowreload: + refresh_messages(site = site) + return get(key, site = site, allowreload = False) + else: + raise KeyError('MediaWiki Key %s not found' % key) + +def has(key, site = None, allowreload = True): + try: + get(key, site, allowreload) + return True + except KeyError: + return False + +def makepath(path): + """ creates missing directories for the given path and + returns a normalized absolute version of the path. + + - if the given path already exists in the filesystem + the filesystem is not modified. + + - otherwise makepath creates directories along the given path + using the dirname() of the path. You may append + a '/' to the path if you want it to be a directory path. + + from holger@trillke.net 2002/03/18 + """ + from os import makedirs + from os.path import normpath,dirname,exists,abspath + + dpath = normpath(dirname(path)) + if not exists(dpath): makedirs(dpath) + return normpath(abspath(path)) + +def refresh_messages(site = None): + site = site or wikipedia.getSite() + # get 'all messages' special page's path + path = site.allmessages_address() + print 'Retrieving MediaWiki messages for %s' % repr(site) + wikipedia.put_throttle() # It actually is a get, but a heavy one. + allmessages = site.getUrl(path) + + print 'Parsing MediaWiki messages' + soup = BeautifulSoup(allmessages, + convertEntities=BeautifulSoup.HTML_ENTITIES) + # The MediaWiki namespace in URL-encoded format, as it can contain + # non-ASCII characters and spaces. + quotedMwNs = urllib.quote(site.namespace(8).replace(' ', '_').encode(site.encoding())) + mw_url = site.path() + "?title=" + quotedMwNs + ":" + altmw_url = site.path() + "/" + quotedMwNs + ":" + nicemw_url = site.nice_get_address(quotedMwNs + ":") + shortmw_url = "/" + quotedMwNs + ":" + ismediawiki = lambda url:url and (url.startswith(mw_url) + or url.startswith(altmw_url) + or url.startswith(nicemw_url) + or url.startswith(shortmw_url)) + # we will save the found key:value pairs here + dictionary = {} + + try: + for keytag in soup('a', href=ismediawiki): + # Key strings only contain ASCII characters, so we can save them as + # strs + key = str(keytag.find(text=True)) + keyrow = keytag.parent.parent + if keyrow['class'] == "orig": + valrow = keyrow.findNextSibling('tr') + assert valrow['class'] == "new" + value = unicode(valrow.td.string).strip() + elif keyrow['class'] == 'def': + value = unicode(keyrow('td')[1].string).strip() + else: + raise AssertionError("Unknown tr class value: %s" % keyrow['class']) + dictionary[key] = value + except Exception, e: + wikipedia.debugDump( 'MediaWiki_Msg', site, u'%s: %s while processing URL: %s' % (repr(e), str(e), unicode(path)), allmessages) + raise + + # Save the dictionary to disk + # The file is stored in the mediawiki_messages subdir. Create if necessary. + if dictionary == {}: + wikipedia.debugDump( 'MediaWiki_Msg', site, u'Error URL: '+unicode(path), allmessages ) + sys.exit() + else: + f = open(makepath('mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)), 'w') + pickle.dump(dictionary, f) + f.close() + print "Loaded %i values from %s" % (len(dictionary.keys()), site) + #print dictionary['sitestatstext'] + +def refresh_all_messages(): + import dircache, time + filenames = dircache.listdir('mediawiki-messages') + message_filenameR = re.compile('mediawiki-messages-([a-z:]+)-([a-z:]+).dat') + for filename in filenames: + match = message_filenameR.match(filename) + if match: + family = match.group(1) + lang = match.group(2) + site = wikipedia.getSite(code = lang, fam = family) + refresh_messages(site) + +def main(): + refresh_all = False + refresh = False + key = None + for arg in wikipedia.handleArgs(): + if arg == '-all': + refresh_all = True + elif arg == '-refresh': + refresh = True + else: + key = arg + if key: + wikipedia.output(get(key), toStdout = True) + elif refresh_all: + refresh_all_messages() + elif refresh: + refresh_messages(wikipedia.getSite()) + else: + wikipedia.showHelp('mediawiki_messages') + +if __name__ == "__main__": + try: + main() + except: + wikipedia.stopme() + raise + else: + wikipedia.stopme() +
Deleted: trunk/pywikipedia/mediawiki_messages.py =================================================================== --- trunk/pywikipedia/mediawiki_messages.py 2008-06-23 01:51:43 UTC (rev 5609) +++ trunk/pywikipedia/mediawiki_messages.py 2008-06-23 02:13:25 UTC (rev 5610) @@ -1,218 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Allows access to the MediaWiki messages, that's the label texts of the MediaWiki -software in the current language. These can be used in other bots. - -The function refresh_messages() downloads all the current messages and saves -them to disk. It is run automatically when a bot first tries to access one of -the messages. It can be updated manually by running this script, e.g. when -somebody changed the current message at the wiki. The texts will also be -reloaded automatically once a month. - -Syntax: python mediawiki_messages [-all] - -Command line options: - -refresh - Reloads messages for the home wiki or for the one defined via - the -lang and -family parameters. - - -all - Reloads messages for all wikis where messages are already present - - If another parameter is given, it will be interpreted as a MediaWiki key. - The script will then output the respective value, without refreshing.. - -""" - -# (C) Daniel Herding, 2004 -# -# Distributed under the terms of the MIT license. - -##THIS MODULE IS DEPRECATED AND HAS BEEN REPLACED BY NEW FUNCTIONALITY IN -##WIKIPEDIA.PY. It is being retained solely for compatibility in case any -##custom-written bots rely upon it. Bot authors should replace any uses -##of this module as follows: -## -## OLD: mediawiki_messages.get(key, site) -## NEW: site.mediawiki_message(key) -## -## OLD: mediawiki_messages.has(key, site) -## NEW: site.has_mediawiki_message(key) -## -## OLD: mediawiki_messages.makepath(path) -## NEW: wikipedia.makepath(path) -## -########################################################################## - -import warnings -warnings.warn( -"""The mediawiki_messages module is deprecated and no longer -maintained; see the source code for new methods to replace -calls to this module.""", - DeprecationWarning, stacklevel=2) - - -import wikipedia -import re, sys, pickle -import os.path -import time -import codecs -import urllib -from BeautifulSoup import * - -__version__='$Id$' - -loaded = {} - -def get(key, site = None, allowreload = True): - site = site or wikipedia.getSite() - if loaded.has_key(site): - # Use cached copy if it exists. - dictionary = loaded[site] - else: - fn = 'mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang) - try: - # find out how old our saved dump is (in seconds) - file_age = time.time() - os.path.getmtime(fn) - # if it's older than 1 month, reload it - if file_age > 30 * 24 * 60 * 60: - print 'Current MediaWiki message dump is one month old, reloading' - refresh_messages(site) - except OSError: - # no saved dumped exists yet - refresh_messages(site) - f = open(fn, 'r') - dictionary = pickle.load(f) - f.close() - loaded[site] = dictionary - key = key[0].lower() + key[1:] - if dictionary.has_key(key): - return dictionary[key] - elif allowreload: - refresh_messages(site = site) - return get(key, site = site, allowreload = False) - else: - raise KeyError('MediaWiki Key %s not found' % key) - -def has(key, site = None, allowreload = True): - try: - get(key, site, allowreload) - return True - except KeyError: - return False - -def makepath(path): - """ creates missing directories for the given path and - returns a normalized absolute version of the path. - - - if the given path already exists in the filesystem - the filesystem is not modified. - - - otherwise makepath creates directories along the given path - using the dirname() of the path. You may append - a '/' to the path if you want it to be a directory path. - - from holger@trillke.net 2002/03/18 - """ - from os import makedirs - from os.path import normpath,dirname,exists,abspath - - dpath = normpath(dirname(path)) - if not exists(dpath): makedirs(dpath) - return normpath(abspath(path)) - -def refresh_messages(site = None): - site = site or wikipedia.getSite() - # get 'all messages' special page's path - path = site.allmessages_address() - print 'Retrieving MediaWiki messages for %s' % repr(site) - wikipedia.put_throttle() # It actually is a get, but a heavy one. - allmessages = site.getUrl(path) - - print 'Parsing MediaWiki messages' - soup = BeautifulSoup(allmessages, - convertEntities=BeautifulSoup.HTML_ENTITIES) - # The MediaWiki namespace in URL-encoded format, as it can contain - # non-ASCII characters and spaces. - quotedMwNs = urllib.quote(site.namespace(8).replace(' ', '_').encode(site.encoding())) - mw_url = site.path() + "?title=" + quotedMwNs + ":" - altmw_url = site.path() + "/" + quotedMwNs + ":" - nicemw_url = site.nice_get_address(quotedMwNs + ":") - shortmw_url = "/" + quotedMwNs + ":" - ismediawiki = lambda url:url and (url.startswith(mw_url) - or url.startswith(altmw_url) - or url.startswith(nicemw_url) - or url.startswith(shortmw_url)) - # we will save the found key:value pairs here - dictionary = {} - - try: - for keytag in soup('a', href=ismediawiki): - # Key strings only contain ASCII characters, so we can save them as - # strs - key = str(keytag.find(text=True)) - keyrow = keytag.parent.parent - if keyrow['class'] == "orig": - valrow = keyrow.findNextSibling('tr') - assert valrow['class'] == "new" - value = unicode(valrow.td.string).strip() - elif keyrow['class'] == 'def': - value = unicode(keyrow('td')[1].string).strip() - else: - raise AssertionError("Unknown tr class value: %s" % keyrow['class']) - dictionary[key] = value - except Exception, e: - wikipedia.debugDump( 'MediaWiki_Msg', site, u'%s: %s while processing URL: %s' % (repr(e), str(e), unicode(path)), allmessages) - raise - - # Save the dictionary to disk - # The file is stored in the mediawiki_messages subdir. Create if necessary. - if dictionary == {}: - wikipedia.debugDump( 'MediaWiki_Msg', site, u'Error URL: '+unicode(path), allmessages ) - sys.exit() - else: - f = open(makepath('mediawiki-messages/mediawiki-messages-%s-%s.dat' % (site.family.name, site.lang)), 'w') - pickle.dump(dictionary, f) - f.close() - print "Loaded %i values from %s" % (len(dictionary.keys()), site) - #print dictionary['sitestatstext'] - -def refresh_all_messages(): - import dircache, time - filenames = dircache.listdir('mediawiki-messages') - message_filenameR = re.compile('mediawiki-messages-([a-z:]+)-([a-z:]+).dat') - for filename in filenames: - match = message_filenameR.match(filename) - if match: - family = match.group(1) - lang = match.group(2) - site = wikipedia.getSite(code = lang, fam = family) - refresh_messages(site) - -def main(): - refresh_all = False - refresh = False - key = None - for arg in wikipedia.handleArgs(): - if arg == '-all': - refresh_all = True - elif arg == '-refresh': - refresh = True - else: - key = arg - if key: - wikipedia.output(get(key), toStdout = True) - elif refresh_all: - refresh_all_messages() - elif refresh: - refresh_messages(wikipedia.getSite()) - else: - wikipedia.showHelp('mediawiki_messages') - -if __name__ == "__main__": - try: - main() - except: - wikipedia.stopme() - raise - else: - wikipedia.stopme() -