[Pywikipedia-l] SVN: [4905] trunk/pywikipedia/table2wiki.py
wikipedian at svn.wikimedia.org
wikipedian at svn.wikimedia.org
Wed Jan 16 17:24:49 UTC 2008
Revision: 4905
Author: wikipedian
Date: 2008-01-16 17:24:49 +0000 (Wed, 16 Jan 2008)
Log Message:
-----------
Use GeneratorFactory. Now you can run this script with -links, -ref etc.
Modified Paths:
--------------
trunk/pywikipedia/table2wiki.py
Modified: trunk/pywikipedia/table2wiki.py
===================================================================
--- trunk/pywikipedia/table2wiki.py 2008-01-16 16:53:28 UTC (rev 4904)
+++ trunk/pywikipedia/table2wiki.py 2008-01-16 17:24:49 UTC (rev 4905)
@@ -3,7 +3,10 @@
"""
Nifty script to convert HTML-tables to MediaWiki's own syntax.
+These command line parameters can be used to specify which pages to work on:
+¶ms;
+
-xml - Retrieve information from a local XML dump (pages_current, see
http://download.wikimedia.org).
Argument can also be given as "-xml:filename".
@@ -14,14 +17,8 @@
Searches for pages with HTML tables, and tries to convert them
on the live wiki.
--start: - used as -start:pagename, specifies that the robot should
- go alphabetically through all pages on the home wiki,
- starting at the named page.
+This SQL query can be used to find pages to work on:
--file:filename - Will read any [[wikipedia link]] and use these articles
- This SQL query might be helpful to generate this file, but
- it doesn't work for MediaWiki version 1.5 and above.
-
SELECT CONCAT('[[', cur_title, ']]')
FROM cur
WHERE (cur_text LIKE '%<table%'
@@ -30,7 +27,7 @@
AND cur_namespace=0
ORDER BY cur_title
LIMIT 500
-
+
Example:
python table2wiki.py -xml:20050713_pages_current.xml -lang:de
@@ -52,6 +49,12 @@
import re, sys, time
import wikipedia, config, pagegenerators
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp,
+}
+
msg_no_warnings = {'de':u'Bot: Tabellensyntax konvertiert',
'en':u'User-controlled Bot: table syntax updated',
'es':u'Bot controlado: actualizada sintaxis de tabla',
@@ -86,7 +89,7 @@
'fr':u'Bot: wikification syntaxe tableaux - %d avertissements !',
'he':u'בוט בפיקוח משתמש: עדכון תחביר הטבלה - %d אזהרות!',
'ia':u'Robot controlate: Syntaxe del tabella cambiate - %d advertimentos!',
- 'kk':u'Басқарылмалы бот: Кесте пішімі түзетілді - %d құлақтандыру!',
+ 'kk':u'Басқарылмалы бот: Кесте пішімі түзетілді - %d құлақтандыру!',
'lt':u'kontroliuojamas robotas: atnaujinta lentelės sintaksė - %d įspėjimai!',
'nl':u'Tabel gewijzigd van HTML- naar Wikisyntax - %d waarschuwingen!',
'no':u'bot: Konverterer tabellsyntaks – %d advarsler!',
@@ -501,22 +504,14 @@
debug = False
xmlfilename = None
gen = None
- textfilename = None
- startpage = None
+
+ # This factory is responsible for processing command line arguments
+ # that are also used by other scripts and that determine on which pages
+ # to work on.
+ genFactory = pagegenerators.GeneratorFactory()
+
for arg in wikipedia.handleArgs():
- if arg.startswith('-file:'):
- if len(arg) == 5:
- textfilename = wikipedia.input(u'Please enter the textfile\'s name:')
- else:
- textfilename = arg[6:]
- gen = pagegenerators.TextfilePageGenerator(textfilename)
- elif arg.startswith('-start:'):
- if len(arg) == 6:
- startpage = wikipedia.input(u'Please enter the article to start then:')
- else:
- startpage = arg[7:]
- gen = pagegenerators.AllpagesPageGenerator(startpage)
- elif arg.startswith('-xml'):
+ if arg.startswith('-xml'):
if len(arg) == 4:
xmlfilename = wikipedia.input(u'Please enter the XML dump\'s filename:')
else:
@@ -540,7 +535,11 @@
elif arg.startswith('-debug'):
debug = True
else:
- page_title.append(arg)
+ generator = genFactory.handleArg(arg)
+ if generator:
+ gen = generator
+ else:
+ page_title.append(arg)
# if the page is given as a command line argument,
# connect the title's parts with spaces
More information about the Pywikipedia-l
mailing list