SVN: [9476] archive/trunk - Pywikipedia-svn

29 Aug 2011

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/9476
Revision: 9476
Author:   xqt
Date:     2011-08-29 15:09:15 +0000 (Mon, 29 Aug 2011)
Log Message:
-----------
moved to archive
Added Paths:
-----------
    archive/trunk/README
    archive/trunk/WdT.py
Copied: archive/trunk/README (from rev 9461, trunk/pywikipedia/archive/README)
===================================================================

--- archive/trunk/README	                        (rev 0)
+++ archive/trunk/README	2011-08-29 15:09:15 UTC (rev 9476)
@@ -0,0 +1 @@
+These bot scripts are deprecated. Do not use them!
Copied: archive/trunk/WdT.py (from rev 9461, trunk/pywikipedia/archive/WdT.py)
===================================================================
--- archive/trunk/WdT.py	                        (rev 0)
+++ archive/trunk/WdT.py	2011-08-29 15:09:15 UTC (rev 9476)
@@ -0,0 +1,122 @@
+# -*- coding: utf-8  -*-
+"""
+(C) 2004 Thomas R. Koll, tomk32@tomk32.de
+ Distributed under the terms of the MIT license.
+
+This bot consists of WdT.py and WdTXMLpaser.py and
+imports XML-files into Wikipedia.
+The XML-file contains the an automatic generated list
+of the most significant word in current events
+which the bot use as article-links and compare to
+a local list of all articles. Only the not-yet-written
+articles will be saved on wikipedia.
+
+"""
+
+__version__='$Id: WdT.py,v 1.9 2005/12/21 17:51:26 wikipedian Exp $'
+
+
+import WdTXMLParser,wikipedia,re,datetime,xml.sax,fileinput
+import string
+
+DEBUG = 0
+host = "http://wortschatz.uni-leipzig.de/wort-des-tages/RDF/WdT/"
+
+localArticleList = "Stichwortliste_de_Wikipedia_2004-04-17_sortiert.txt"
+
+XMLfiles = {
+    "ort.xml"           : "Orte",
+    "ereignis.xml"      : "Ereignisse",
+    "kuenstler.xml"     : "Kunst, Kultur und Wissenschaft", 
+    "organisation.xml"  : "Organisationen",
+    "politiker.xml"     : "Politiker",
+    "schlagwort.xml"    : u"Schlagwörter",
+    "sportler.xml"      : "Sportler",
+    "sport.xml"         : "Sport",
+    "person.xml"        : "sonstige"
+    }
+article = "Wikipedia:Wort_des_Tages"
+
+newText = "\n== " + str(datetime.date.today()) + " =="
+
+#start the xml parser
+ch = WdTXMLParser.WdTXMLParser()
+parser = xml.sax.make_parser()
+parser.setContentHandler(ch)
+
+# first we get the XML-File
+for file in XMLfiles:
+    print "\ngetting: " + file,
+    parser.parse(host + file)
+    data = ch.result
+    print " parsing..."
+    # now we parse the file
+
+
+    # and make a result text for wikipedia
+    skip = []
+    if localArticleList != "":
+        import string
+        add = {}
+        for a in data:
+            print "\nchecking: " + a,
+            userCommand = raw_input('[C]orrect, [S]kip or [K]eep?')
+            if userCommand == 'c':
+                b = raw_input('Correct it: ')
+                if b != a:
+                    add[b] = data[a]
+                    skip.append(a)
+                    a = b
+            if userCommand == 's':
+                print "...skipping ",
+                skip.append(a)
+                continue              
+            for line in fileinput.input(localArticleList):
+                if unicode(string.strip(line),"iso-8859-1") == a:
+                    skip.append(a)
+                    print "...skipping ",
+                    break
+            fileinput.close()
+            if skip.count(a) == 0:
+                try:
+                    pl = wikipedia.Page(wikipedia.getSite(), a)
+                    text = pl.get()
+                    if len(text) > 500:
+                        skip.append(a)
+                        print "...skipping ",
+                        break
+                    else:
+                        print "...stub ",
+                except wikipedia.NoPage:
+                    print "...doesn't exist yet",
+                    continue
+                except:
+                    skip.append(a)
+                    print "...skipping ",
+                    break
+    for b in add:
+        data[b] = add[b]
+    for a in skip:
+        del data[a]
+
+    if DEBUG >= 2:
+        print data
+
+    if data:
+        newText = newText + "\n* '''" +  XMLfiles[file] + ":''' "
+    for a in data:
+        newText = newText + "[[" + a + "]] ([" + \
+                  data[a]['link'] + ' ' + data[a]['count'] + ']) '
+    if DEBUG >= 2:
+        print newText
+
+pl = wikipedia.Page(wikipedia.getSite(), article)
+text = pl.get()
+newText = text + newText
+
+
+if DEBUG:
+    print newText
+else:
+    status, reason, data = pl.put(newText, "WdT: updated")
+    print status, reason