http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9476
Revision: 9476
Author: xqt
Date: 2011-08-29 15:09:15 +0000 (Mon, 29 Aug 2011)
Log Message:
-----------
moved to archive
Added Paths:
-----------
archive/trunk/README
archive/trunk/WdT.py
Copied: archive/trunk/README (from rev 9461, trunk/pywikipedia/archive/README)
===================================================================
--- archive/trunk/README (rev 0)
+++ archive/trunk/README 2011-08-29 15:09:15 UTC (rev 9476)
@@ -0,0 +1 @@
+These bot scripts are deprecated. Do not use them!
Copied: archive/trunk/WdT.py (from rev 9461, trunk/pywikipedia/archive/WdT.py)
===================================================================
--- archive/trunk/WdT.py (rev 0)
+++ archive/trunk/WdT.py 2011-08-29 15:09:15 UTC (rev 9476)
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+"""
+(C) 2004 Thomas R. Koll, <tomk32(a)tomk32.de>
+ Distributed under the terms of the MIT license.
+
+This bot consists of WdT.py and WdTXMLpaser.py and
+imports XML-files into Wikipedia.
+The XML-file contains the an automatic generated list
+of the most significant word in current events
+which the bot use as article-links and compare to
+a local list of all articles. Only the not-yet-written
+articles will be saved on wikipedia.
+
+"""
+
+__version__='$Id: WdT.py,v 1.9 2005/12/21 17:51:26 wikipedian Exp $'
+
+
+import WdTXMLParser,wikipedia,re,datetime,xml.sax,fileinput
+import string
+
+DEBUG = 0
+host = "http://wortschatz.uni-leipzig.de/wort-des-tages/RDF/WdT/"
+
+localArticleList = "Stichwortliste_de_Wikipedia_2004-04-17_sortiert.txt"
+
+XMLfiles = {
+ "ort.xml" : "Orte",
+ "ereignis.xml" : "Ereignisse",
+ "kuenstler.xml" : "Kunst, Kultur und Wissenschaft",
+ "organisation.xml" : "Organisationen",
+ "politiker.xml" : "Politiker",
+ "schlagwort.xml" : u"Schlagwörter",
+ "sportler.xml" : "Sportler",
+ "sport.xml" : "Sport",
+ "person.xml" : "sonstige"
+ }
+article = "Wikipedia:Wort_des_Tages"
+
+newText = "\n== " + str(datetime.date.today()) + " =="
+
+#start the xml parser
+ch = WdTXMLParser.WdTXMLParser()
+parser = xml.sax.make_parser()
+parser.setContentHandler(ch)
+
+# first we get the XML-File
+for file in XMLfiles:
+ print "\ngetting: " + file,
+ parser.parse(host + file)
+ data = ch.result
+ print " parsing..."
+ # now we parse the file
+
+
+ # and make a result text for wikipedia
+ skip = []
+ if localArticleList != "":
+ import string
+ add = {}
+ for a in data:
+ print "\nchecking: " + a,
+ userCommand = raw_input('[C]orrect, [S]kip or [K]eep?')
+ if userCommand == 'c':
+ b = raw_input('Correct it: ')
+ if b != a:
+ add[b] = data[a]
+ skip.append(a)
+ a = b
+ if userCommand == 's':
+ print "...skipping ",
+ skip.append(a)
+ continue
+ for line in fileinput.input(localArticleList):
+ if unicode(string.strip(line),"iso-8859-1") == a:
+ skip.append(a)
+ print "...skipping ",
+ break
+ fileinput.close()
+ if skip.count(a) == 0:
+ try:
+ pl = wikipedia.Page(wikipedia.getSite(), a)
+ text = pl.get()
+ if len(text) > 500:
+ skip.append(a)
+ print "...skipping ",
+ break
+ else:
+ print "...stub ",
+ except wikipedia.NoPage:
+ print "...doesn't exist yet",
+ continue
+ except:
+ skip.append(a)
+ print "...skipping ",
+ break
+ for b in add:
+ data[b] = add[b]
+ for a in skip:
+ del data[a]
+
+ if DEBUG >= 2:
+ print data
+
+ if data:
+ newText = newText + "\n* '''" + XMLfiles[file] +
":''' "
+ for a in data:
+ newText = newText + "[[" + a + "]] ([" + \
+ data[a]['link'] + ' ' + data[a]['count'] +
']) '
+ if DEBUG >= 2:
+ print newText
+
+pl = wikipedia.Page(wikipedia.getSite(), article)
+text = pl.get()
+newText = text + newText
+
+
+if DEBUG:
+ print newText
+else:
+ status, reason, data = pl.put(newText, "WdT: updated")
+ print status, reason