http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9477
Revision: 9477 Author: xqt Date: 2011-08-29 15:09:42 +0000 (Mon, 29 Aug 2011) Log Message: ----------- moved to archive
Removed Paths: ------------- trunk/pywikipedia/archive/README trunk/pywikipedia/archive/WdT.py
Deleted: trunk/pywikipedia/archive/README =================================================================== --- trunk/pywikipedia/archive/README 2011-08-29 15:09:15 UTC (rev 9476) +++ trunk/pywikipedia/archive/README 2011-08-29 15:09:42 UTC (rev 9477) @@ -1 +0,0 @@ -These bot scripts are deprecated. Do not use them!
Deleted: trunk/pywikipedia/archive/WdT.py =================================================================== --- trunk/pywikipedia/archive/WdT.py 2011-08-29 15:09:15 UTC (rev 9476) +++ trunk/pywikipedia/archive/WdT.py 2011-08-29 15:09:42 UTC (rev 9477) @@ -1,122 +0,0 @@ -# -*- coding: utf-8 -*- -""" -(C) 2004 Thomas R. Koll, tomk32@tomk32.de - Distributed under the terms of the MIT license. - -This bot consists of WdT.py and WdTXMLpaser.py and -imports XML-files into Wikipedia. -The XML-file contains the an automatic generated list -of the most significant word in current events -which the bot use as article-links and compare to -a local list of all articles. Only the not-yet-written -articles will be saved on wikipedia. - -""" - -__version__='$Id: WdT.py,v 1.9 2005/12/21 17:51:26 wikipedian Exp $' - - -import WdTXMLParser,wikipedia,re,datetime,xml.sax,fileinput -import string - -DEBUG = 0 -host = "http://wortschatz.uni-leipzig.de/wort-des-tages/RDF/WdT/" - -localArticleList = "Stichwortliste_de_Wikipedia_2004-04-17_sortiert.txt" - -XMLfiles = { - "ort.xml" : "Orte", - "ereignis.xml" : "Ereignisse", - "kuenstler.xml" : "Kunst, Kultur und Wissenschaft", - "organisation.xml" : "Organisationen", - "politiker.xml" : "Politiker", - "schlagwort.xml" : u"Schlagwörter", - "sportler.xml" : "Sportler", - "sport.xml" : "Sport", - "person.xml" : "sonstige" - } -article = "Wikipedia:Wort_des_Tages" - -newText = "\n== " + str(datetime.date.today()) + " ==" - -#start the xml parser -ch = WdTXMLParser.WdTXMLParser() -parser = xml.sax.make_parser() -parser.setContentHandler(ch) - -# first we get the XML-File -for file in XMLfiles: - print "\ngetting: " + file, - parser.parse(host + file) - data = ch.result - print " parsing..." - # now we parse the file - - - # and make a result text for wikipedia - skip = [] - if localArticleList != "": - import string - add = {} - for a in data: - print "\nchecking: " + a, - userCommand = raw_input('[C]orrect, [S]kip or [K]eep?') - if userCommand == 'c': - b = raw_input('Correct it: ') - if b != a: - add[b] = data[a] - skip.append(a) - a = b - if userCommand == 's': - print "...skipping ", - skip.append(a) - continue - for line in fileinput.input(localArticleList): - if unicode(string.strip(line),"iso-8859-1") == a: - skip.append(a) - print "...skipping ", - break - fileinput.close() - if skip.count(a) == 0: - try: - pl = wikipedia.Page(wikipedia.getSite(), a) - text = pl.get() - if len(text) > 500: - skip.append(a) - print "...skipping ", - break - else: - print "...stub ", - except wikipedia.NoPage: - print "...doesn't exist yet", - continue - except: - skip.append(a) - print "...skipping ", - break - for b in add: - data[b] = add[b] - for a in skip: - del data[a] - - if DEBUG >= 2: - print data - - if data: - newText = newText + "\n* '''" + XMLfiles[file] + ":''' " - for a in data: - newText = newText + "[[" + a + "]] ([" + \ - data[a]['link'] + ' ' + data[a]['count'] + ']) ' - if DEBUG >= 2: - print newText - -pl = wikipedia.Page(wikipedia.getSite(), article) -text = pl.get() -newText = text + newText - - -if DEBUG: - print newText -else: - status, reason, data = pl.put(newText, "WdT: updated") - print status, reason
pywikipedia-svn@lists.wikimedia.org