Revision: 3980
Author: wikipedian
Date: 2007-08-06 21:27:56 +0000 (Mon, 06 Aug 2007)
Log Message:
-----------
Threw out -nobacklink and -noshownew. There are config variables for
these, and this is not stuff you want to change every time you run the
bot.
For the same reason, removed -array and as a replacement added the
config variable interwiki_min_subjects.
Modified Paths:
--------------
trunk/pywikipedia/config.py
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2007-08-06 21:16:40 UTC (rev 3979)
+++ trunk/pywikipedia/config.py 2007-08-06 21:27:56 UTC (rev 3980)
@@ -178,6 +178,12 @@
# You need pydot for this: http://dkbza.org/pydot.html
interwiki_graph = False
+# Specifies that the robot should process that amount of subjects at a time,
+# only starting to load new pages in the original language when the total
+# falls below that number. Default is to process (at least) 100 subjects at
+# once.
+interwiki_min_subjects = 100
+
# If interwiki graphs are enabled, which format(s) should be used?
# Supported formats include png, jpg, ps, and svg. See:
# http://www.graphviz.org/doc/info/output.html
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2007-08-06 21:16:40 UTC (rev 3979)
+++ trunk/pywikipedia/interwiki.py 2007-08-06 21:27:56 UTC (rev 3980)
@@ -166,19 +166,6 @@
will be changed if there are that number or more links to
change or add
-Other arguments:
-
- -noshownew: don't show the source of every new pagelink found.
-
- -nobacklink: switch off the backlink warnings
-
- -array: used as -array:#, specifies that the robot should process
- that amount of pages at once, only starting to load new
- pages in the original language when the total falls below
- that number. Default is to process (at least) 100 pages at
- once. The number of new ones loaded is equal to the number
- that is loaded at once from another language (default 60)
-
Some configuration option can be used to change the working of this robot:
interwiki_backlink: if set to True, all problems in foreign wikis will
@@ -330,16 +317,13 @@
"""Container class for global settings.
Use of globals outside of this is to be avoided."""
autonomous = False
- backlink = config.interwiki_backlink
confirm = False
select = False
debug = True
followredirect = True
force = False
- minarraysize = 100
maxquerysize = 60
same = False
- shownew = config.interwiki_shownew
skip = set()
skipauto = False
untranslated = False
@@ -659,7 +643,7 @@
else:
if not (self.isIgnored(redirectTargetPage) or self.namespaceMismatch(page, redirectTargetPage) or self.wiktionaryMismatch(redirectTargetPage) or (page.site().family != redirectTargetPage.site().family)):
if self.addIfNew(redirectTargetPage, counter, page):
- if globalvar.shownew:
+ if config.interwiki_shownew:
wikipedia.output(u"%s: %s gives new redirect %s" % (self.originPage.aslink(), page.aslink(True), redirectTargetPage.aslink(True)))
except wikipedia.NoPage:
wikipedia.output(u"NOTE: %s does not exist" % page.aslink(True))
@@ -712,7 +696,7 @@
wikipedia.output(u"NOTE: %s: %s gives duplicate interwiki on same site %s" % (self.originPage.aslink(), page.aslink(True), linkedPage.aslink(True)))
break
else:
- if globalvar.shownew:
+ if config.interwiki_shownew:
wikipedia.output(u"%s: %s gives new interwiki %s"% (self.originPage.aslink(), page.aslink(True), linkedPage.aslink(True)))
# These pages are no longer 'in progress'
@@ -856,7 +840,7 @@
"""Round up the subject, making any necessary changes. This method
should be called exactly once after the todo list has gone empty.
- This contains a shortcut: if a subject array is given in the argument
+ This contains a shortcut: if a subject list is given in the argument
bot, just before submitting a page change to the live wiki it is
checked whether we will have to wait. If that is the case, the bot will
be told to make another get request first."""
@@ -947,7 +931,7 @@
# self.createGraph()
# don't report backlinks for pages we already changed
- if globalvar.backlink:
+ if config.interwiki_backlink:
self.reportBacklinks(new, updatedSites)
def replaceLinks(self, page, newPages, bot):
@@ -1236,7 +1220,7 @@
# Do we still have enough subjects to work on for which the
# home language has been retrieved? This is rough, because
# some subjects may need to retrieve a second home-language page!
- if len(self.subjects) - mycount < globalvar.minarraysize:
+ if len(self.subjects) - mycount < config.interwiki_min_subjects:
# Can we make more home-language queries by adding subjects?
if self.pageGenerator and mycount < globalvar.maxquerysize:
timeout = 60
@@ -1430,10 +1414,6 @@
globalvar.select = True
elif arg == '-autonomous':
globalvar.autonomous = True
- elif arg == '-noshownew':
- globalvar.shownew = False
- elif arg == '-nobacklink':
- globalvar.backlink = False
elif arg == '-noredirect':
globalvar.followredirect = False
elif arg == '-localonly':
@@ -1482,8 +1462,6 @@
# deprecated for consistency with other scripts
elif arg.startswith('-number:'):
number = int(arg[8:])
- elif arg.startswith('-array:'):
- globalvar.minarraysize = int(arg[7:])
elif arg.startswith('-neverlink:'):
globalvar.neverlink += arg[11:].split(",")
elif arg.startswith('-ignore:'):
Revision: 3979
Author: wikipedian
Date: 2007-08-06 21:16:40 +0000 (Mon, 06 Aug 2007)
Log Message:
-----------
ordered command-line help
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2007-08-06 20:39:15 UTC (rev 3978)
+++ trunk/pywikipedia/interwiki.py 2007-08-06 21:16:40 UTC (rev 3979)
@@ -9,27 +9,77 @@
right interwiki links, and if this is unambiguous, the interwiki links in the
original page will be automatically updated and the modified page uploaded.
-This script understands various command-line arguments:
+These command-line arguments can be used to specify which pages to work on:
&pagegenerators_help;
- -force: do not ask permission to make "controversial" changes,
- like removing a language because none of the found
- alternatives actually exists.
+ -days: Like -years, but runs through all date pages. Stops at
+ Dec 31. If the argument is given in the form -days:X,
+ it will start at month no. X through Dec 31. If the
+ argument is simply given as -days, it will run from
+ Jan 1 through Dec 31. E.g. for -days:9 it will run
+ from Sep 1 through Dec 31.
+ -years: run on all year pages in numerical order. Stop at year 2050.
+ If the argument is given in the form -years:XYZ, it
+ will run from [[XYZ]] through [[2050]]. If XYZ is a
+ negative value, it is interpreted as a year BC. If the
+ argument is simply given as -years, it will run from 1
+ through 2050.
+
+ This implies -noredirect.
+
+ -restore: restore a set of "dumped" pages the robot was working on
+ when it terminated.
+
+ -continue: like restore, but after having gone through the dumped pages,
+ continue alphabetically starting at the last of the dumped
+ pages.
+
+ -warnfile: used as -warnfile:filename, reads all warnings from the
+ given file that apply to the home wiki language,
+ and read the rest of the warning as a hint. Then
+ treats all the mentioned pages. A quicker way to
+ implement warnfile suggestions without verifying them
+ against the live wiki is using the warnfile.py
+ script.
+
+Additionaly, these arguments can be used to restrict the bot to certain pages:
+
+ -number: used as -number:#, specifies that the robot should process
+ that amount of pages and then stop. This is only useful in
+ combination with -start. The default is not to stop.
+
+ -bracket only work on pages that have (in the home language) parenthesis
+ in their title. All other pages are skipped.
+
+ -skipfile: used as -skipfile:filename, skip all links mentioned in
+ the given file. This does not work with -number!
+
+ -skipauto: use to skip all pages that can be translated automatically,
+ like dates, centuries, months, etc.
+
+These arguments are useful to provide hints to the bot:
+
-hint: used as -hint:de:Anweisung to give the robot a hint
where to start looking for translations. This is only
useful if you specify a single page to work on. If no
text is given after the second ':', the name of the page
itself is used as the title for the hint.
- There are some special hints, trying a number of languages at once:
- all: Provides the hint for all languages with at least ca. 100 pages
- 10: Provides the hint for ca. 10 of the largest languages
- 20:, 30:, 50: Analogous to 10: with ca. 20, 30 and 50 languages
- cyril: Provides the hint for all languages that use the cyrillic alphabet
+ There are some special hints, trying a number of languages
+ at once:
+ * all: All languages with at least ca. 100 articles.
+ * 10: The 10 largest languages (sites with most
+ articles). Analogous for any other natural
+ number.
+ * cyril: All languages that use the Cyrillic alphabet.
+ * chinese: All Chinese dialects.
+ * scand: All Scandinavian languages.
- -new Work on the most recent new pages on the wiki
+ -askhints: for each page one or more hints are asked. See hint: above
+ for the format, one can for example give "en:something" or
+ "20:" as hint.
-same: looks over all 'serious' languages for the same title.
-same is equivalent to -hint:all:
@@ -39,12 +89,8 @@
capitalized, it will only go through other wikis without
automatic capitalization.
- -askhints: for each page one or more hints are asked. See hint: above
- for the format, one can for example give "en:something" or
- "20:" as hint.
-
-untranslated: works normally on pages with at least one interlanguage
- link; asks hints for pages that have none.
+ link; asks for hints for pages that have none.
-untranslatedonly: same as -untranslated, but pages which already have a
translation are skipped. Hint: do NOT use this in
@@ -52,77 +98,39 @@
you will go through the whole alphabet before any queries
are performed!
+ -showpage when asking for hints, show the first bit of the text
+ of the page always, rather than doing so only when being
+ asked for (by typing '?'). Only useful in combination
+ with a hint-asking option like -untranslated, -askhints
+ or -untranslatedonly
+
+ -noauto: Do not use the automatic translation feature for years and
+ dates, only use found links and hints.
+
+These arguments define how much user confirmation is required:
+
+ -autonomous: run automatically, do not ask any questions. If a question
+ to an operator is needed, write the name of the page
+ to autonomous_problems.dat and continue on the next page.
+
-confirm: ask for confirmation before any page is changed on the
live wiki. Without this argument, additions and
unambiguous modifications are made without confirmation.
+ -force: do not ask permission to make "controversial" changes,
+ like removing a language because none of the found
+ alternatives actually exists.
+
-select: ask for each link whether it should be include before
changing any page. This is useful if you want to remove
invalid interwiki and if you do multiple hints of which
some might be correct and others incorrect. Combining
-select and -confirm is possible, but seems like overkill.
- -autonomous: run automatically, do not ask any questions. If a question
- to an operator is needed, write the name of the page
- to autonomous_problems.dat and continue on the next page.
+These arguments specify in which way the bot should follow interwiki links:
- -nobacklink: switch off the backlink warnings
-
- -number: used as -number:#, specifies that the robot should process
- that amount of pages and then stop. This is only useful in
- combination with -start. The default is not to stop.
-
- -array: used as -array:#, specifies that the robot should process
- that amount of pages at once, only starting to load new
- pages in the original language when the total falls below
- that number. Default is to process (at least) 100 pages at
- once. The number of new ones loaded is equal to the number
- that is loaded at once from another language (default 60)
-
- -years: run on all year pages in numerical order. Stop at year 2050.
- If the argument is given in the form -years:XYZ, it
- will run from [[XYZ]] through [[2050]]. If XYZ is a
- negative value, it is interpreted as a year BC. If the
- argument is simply given as -years, it will run from 1
- through 2050.
-
- This implies -noredirect.
-
- -noauto: Do not use the automatic translation feature for years and
- dates, only use found links and hints.
-
- -days: Like -years, but runs through all date pages. Stops at
- Dec 31. If the argument is given in the form -days:X,
- it will start at month no. X through Dec 31. If the
- argument is simply given as -days, it will run from
- Jan 1 through Dec 31. E.g. for -days:9 it will run
- from Sep 1 through Dec 31.
-
- -skipfile: used as -skipfile:filename, skip all links mentioned in
- the given file. This does not work with -number!
-
- -skipauto: use to skip all pages that can be translated automatically,
- like dates, centuries, months, etc.
-
- -restore: restore a set of "dumped" pages the robot was working on
- when it terminated.
-
- -continue: as restore, but after having gone through the dumped pages,
- continue alphabetically starting at the last of the dumped
- pages.
-
- -warnfile: used as -warnfile:filename, reads all warnings from the
- given file that apply to the home wiki language,
- and read the rest of the warning as a hint. Then
- treats all the mentioned pages. A quicker way to
- implement warnfile suggestions without verifying them
- against the live wiki is using the warnfile.py
- robot.
-
-noredirect do not follow redirects (note: without ending columns).
- -noshownew: don't show the source of every new pagelink found.
-
-neverlink: used as -neverlink:xx where xx is a language code:
Disregard any links found to language xx. You can also
specify a list of languages to disregard, separated by
@@ -134,11 +142,8 @@
-ignorefile: similar to -ignore, except that the pages are taken from
the given file instead of the command line.
- -showpage when asking for hints, show the first bit of the text
- of the page always, rather than doing so only when being
- asked for (by typing '?'). Only useful in combination
- with a hint-asking option like -untranslated, -askhints
- or -untranslatedonly
+The following arguments are only important for users who have accounts for
+multiple languages, and specify on which sites the bot should modify pages:
-localonly only work on the local wiki, not on other wikis in the family
I have a login at
@@ -161,11 +166,19 @@
will be changed if there are that number or more links to
change or add
- -bracket only work on pages that have (in the home language) a bracket
- in their title. All other pages are skipped.
+Other arguments:
- -withoutinterwiki work on [[Special:Withoutinterwiki]] articles.
+ -noshownew: don't show the source of every new pagelink found.
+ -nobacklink: switch off the backlink warnings
+
+ -array: used as -array:#, specifies that the robot should process
+ that amount of pages at once, only starting to load new
+ pages in the original language when the total falls below
+ that number. Default is to process (at least) 100 pages at
+ once. The number of new ones loaded is equal to the number
+ that is loaded at once from another language (default 60)
+
Some configuration option can be used to change the working of this robot:
interwiki_backlink: if set to True, all problems in foreign wikis will
Revision: 3964
Author: siebrand
Date: 2007-08-05 14:14:48 +0000 (Sun, 05 Aug 2007)
Log Message:
-----------
*Fixed LinksearchPageGenerator (submitted by Filnik)
*Added -linksearch in replace.py (submitted by Filnik)
*Removed EOL whitespace in pagegenerators.py
Modified Paths:
--------------
trunk/pywikipedia/pagegenerators.py
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2007-08-04 11:42:25 UTC (rev 3963)
+++ trunk/pywikipedia/pagegenerators.py 2007-08-05 14:14:48 UTC (rev 3964)
@@ -30,7 +30,7 @@
namespace = wikipedia.Page(wikipedia.getSite(), start).namespace()
for page in wikipedia.getSite().allpages(start=start, namespace=namespace, includeredirects = includeredirects):
yield page
-
+
def PrefixingPageGenerator(prefix, namespace=None):
for page in AllpagesPageGenerator(prefix, namespace):
if page.titleWithoutNamespace().startswith(prefix):
@@ -43,7 +43,7 @@
site = wikipedia.getSite()
for page in site.newpages(number=number, get_redirect=get_redirect, repeat=repeat):
yield page[0]
-
+
def FileLinksGenerator(referredPage):
for page in referredPage.getFileLinks():
yield page
@@ -56,7 +56,7 @@
if site is None:
site = wikipedia.getSite()
for page in site.unusedfiles(number=number, repeat=repeat):
- yield wikipedia.ImagePage(page.site(), page.title())
+ yield wikipedia.ImagePage(page.site(), page.title())
def WithoutInterwikiPageGenerator(number = 100, repeat = False, site = None):
if site is None:
@@ -186,6 +186,7 @@
site = wikipedia.getSite()
elRX = re.compile('<a .* class="external ?" .*</a>.*<a .*>(.*)</a>') #TODO: de-uglify?
offset = 0
+ pageyeldlist = list()
found = step
while found == step:
found = 0
@@ -194,7 +195,12 @@
data = site.getUrl(url)
for elM in elRX.finditer(data):
found += 1
- yield wikipedia.Page(site,elM.group(1))
+ pagenameofthelink = elM.group(1)
+ if pagenameofthelink in pageyeldlist:
+ continue
+ else:
+ pageyeldlist.append(pagenameofthelink)
+ yield wikipedia.Page(site, pagenameofthelink)
offset += step
class GoogleSearchPageGenerator:
@@ -206,9 +212,9 @@
'''
def __init__(self, query = None):
self.query = query or wikipedia.input(u'Please enter the search query:')
-
+
#########
- # partially commented out because it is probably not in compliance with Google's "Terms of
+ # partially commented out because it is probably not in compliance with Google's "Terms of
# service" (see 5.3, http://www.google.com/accounts/TOS?loc=US)
def queryGoogle(self, query):
#if config.google_key:
@@ -230,7 +236,7 @@
estimatedTotalResultsCount = None
while not estimatedTotalResultsCount or offset < estimatedTotalResultsCount:
while (True):
- # Google often yields 502 errors.
+ # Google often yields 502 errors.
try:
wikipedia.output(u'Querying Google, offset %i' % offset)
data = google.doGoogleSearch(query, start = offset, filter = False)
@@ -255,7 +261,7 @@
offset += 10
#########
- # commented out because it is probably not in compliance with Google's "Terms of
+ # commented out because it is probably not in compliance with Google's "Terms of
# service" (see 5.3, http://www.google.com/accounts/TOS?loc=US)
#def queryViaWeb(self, query):
@@ -460,7 +466,7 @@
if pageNumber < 2:
raise ValueError("PreloadingGenerator needs to load more than 1 page.")
pagequeue = Queue.Queue(min(pageNumber//2, 10))
- preloader = _Preloader(pagequeue, generator, pageNumber)
+ preloader = _Preloader(pagequeue, generator, pageNumber)
preloader.start()
while True:
# Queue.get() blocks the main thread. This means that the
@@ -476,7 +482,7 @@
if p is None:
return
yield p
-
+
class GeneratorFactory:
"""
This factory is responsible for processing command line arguments
@@ -593,4 +599,3 @@
wikipedia.output(page.title(), toStdout = True)
finally:
wikipedia.stopme()
-
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2007-08-04 11:42:25 UTC (rev 3963)
+++ trunk/pywikipedia/replace.py 2007-08-05 14:14:48 UTC (rev 3964)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
@@ -19,6 +19,8 @@
parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
+-linksearch - Retrieve all the results using Special:Linksearch.
+ Argument can also be given as "-linksearch:url".
-filelinks - Works on all pages that link to a certain image.
Argument can also be given as "-filelinks:ImageName".
-links - Work on all pages that are linked to from a certain page.
@@ -350,6 +352,9 @@
summary_commandline = True
elif arg.startswith('-allowoverlap'):
allowoverlap = True
+ elif arg.startswith('-linksearch:'):
+ linkselected = (arg[12:])
+ gen = pagegenerators.LinksearchPageGenerator(linkselected)
else:
generator = genFactory.handleArg(arg)
if generator:
Revision: 3977
Author: wikipedian
Date: 2007-08-06 17:50:31 +0000 (Mon, 06 Aug 2007)
Log Message:
-----------
docu
Modified Paths:
--------------
trunk/pywikipedia/selflink.py
Modified: trunk/pywikipedia/selflink.py
===================================================================
--- trunk/pywikipedia/selflink.py 2007-08-06 17:50:16 UTC (rev 3976)
+++ trunk/pywikipedia/selflink.py 2007-08-06 17:50:31 UTC (rev 3977)
@@ -5,33 +5,22 @@
This bot goes over multiple pages of the home wiki, searches for selflinks, and
allows removing them.
-This script understands various command-line arguments:
+These command line parameters can be used to specify which pages to work on:
- -start: used as -start:page_name, specifies that the robot should
- go alphabetically through all pages on the home wiki,
- starting at the named page.
+¶ms;
- -file: used as -file:file_name, read a list of pages to treat
- from the named textfile. Page titles should be enclosed
- in [[double-squared brackets]].
+ -xml Retrieve information from a local XML dump (pages-articles
+ or pages-meta-current, see http://download.wikimedia.org).
+ Argument can also be given as "-xml:filename".
- -ref: used as -ref:page_name, specifies that the robot should
- work on all pages referring to the named page.
-
- -links: used as -links:page_name, specifies that the robot should
- work on all pages referred to from the named page.
-
- -cat: used as -cat:category_name, specifies that the robot should
- work on all pages in the named category.
-
- -namespace:n - Number of namespace to process. The parameter can be used
+ -namespace:n Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g.
- want to iterate over all user pages starting at User:M, use
- -start:User:M.
+ want to iterate over all categories starting at M, use
+ -start:Category:M.
-All other parameters will be regarded as a page title; in this case, the bot
-will only work on a single page.
+All other parameters will be regarded as part of the title of a single page,
+and the bot will only work on that single page.
"""
__version__='$Id$'
@@ -40,6 +29,12 @@
import editarticle
import re, sys
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp,
+}
+
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.
Revision: 3976
Author: wikipedian
Date: 2007-08-06 17:50:16 +0000 (Mon, 06 Aug 2007)
Log Message:
-----------
formatted docu
Modified Paths:
--------------
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2007-08-06 16:38:41 UTC (rev 3975)
+++ trunk/pywikipedia/replace.py 2007-08-06 17:50:16 UTC (rev 3976)
@@ -11,6 +11,7 @@
-xml Retrieve information from a local XML dump (pages-articles
or pages-meta-current, see http://download.wikimedia.org).
Argument can also be given as "-xml:filename".
+
-page Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can
give this parameter multiple times to edit multiple pages.
@@ -19,29 +20,38 @@
-regex Make replacements using regular expressions. If this argument
isn't given, the bot will make simple text replacements.
+
+ -nocase Use case insensitive regular expressions.
+
-except:XYZ Ignore pages which contain XYZ. If the -regex argument is
given, XYZ will be regarded as a regular expression.
+
-summary:XYZ Set the summary message text for the edit to XYZ, bypassing
the predefined message texts with original and replacements
inserted.
+
-fix:XYZ Perform one of the predefined replacements tasks, which are
given in the dictionary 'fixes' defined inside the file
fixes.py.
- The -regex argument and given replacements will be ignored if
- you use -fix.
+ The -regex and -nocase argument and given replacements will
+ be ignored if you use -fix.
Currently available predefined fixes are:
&fixes-help;
+
-namespace:n Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g.
want to iterate over all categories starting at M, use
-start:Category:M.
+
-always Don't prompt you for each replacement
+
-recursive Recurse replacement until possible. Be careful, this might
lead to an infinite loop.
- -nocase Use case insensitive regular expressions.
+
-allowoverlap When occurences of the pattern overlap, replace all of them.
Be careful, this might lead to an infinite loop.
+
other: First argument is the old text, second argument is the new text.
If the -regex argument is given, the first argument will be
regarded as a regular expression, and the second argument might
@@ -50,7 +60,7 @@
Examples:
If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the
-new syntax, e.g. {{Stub}}, download an XML dump file (cur table) from
+new syntax, e.g. {{Stub}}, download an XML dump file (pages-articles) from
http://download.wikimedia.org, then use this command:
python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}"
Revision: 3975
Author: wikipedian
Date: 2007-08-06 16:38:41 +0000 (Mon, 06 Aug 2007)
Log Message:
-----------
moved docu to fixes.py
Modified Paths:
--------------
trunk/pywikipedia/fixes.py
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/fixes.py
===================================================================
--- trunk/pywikipedia/fixes.py 2007-08-06 16:38:08 UTC (rev 3974)
+++ trunk/pywikipedia/fixes.py 2007-08-06 16:38:41 UTC (rev 3975)
@@ -1,6 +1,19 @@
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
""" File containing all standard fixes """
__version__ = '$Id$'
+
+help = """
+ * HTML - Convert HTML tags to wiki syntax, and
+ fix XHTML.
+ * syntax - Try to fix bad wiki markup. Do not run
+ this in automatic mode, as the bot may
+ make mistakes.
+ * syntax-safe - Like syntax, but less risky, so you can
+ run this in automatic mode.
+ * case-de - fix upper/lower case errors in German
+ * grammar-de - fix grammar and typography in German
+"""
+
fixes = {
# These replacements will convert HTML to wiki syntax where possible, and
# make remaining tags XHTML compliant.
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2007-08-06 16:38:08 UTC (rev 3974)
+++ trunk/pywikipedia/replace.py 2007-08-06 16:38:41 UTC (rev 3975)
@@ -30,10 +30,7 @@
The -regex argument and given replacements will be ignored if
you use -fix.
Currently available predefined fixes are:
- * HTML - convert HTML tags to wiki syntax, and fix XHTML
- * syntax - try to fix bad wiki markup.
- * case-de - fix upper/lower case errors in German
- * grammar-de - fix grammar and typography in German
+&fixes-help;
-namespace:n Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g.
@@ -78,15 +75,16 @@
import sys, re
import wikipedia, pagegenerators,catlib, config
+# Imports predefined replacements tasks from fixes.py
+import fixes
+
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
- '¶ms;': pagegenerators.parameterHelp
+ '¶ms;': pagegenerators.parameterHelp,
+ '&fixes-help;': fixes.help,
}
-# Imports predefined replacements tasks from fixes.py
-from fixes import fixes
-
__version__='$Id$'
# Summary messages in different languages
@@ -392,9 +390,9 @@
else:
# Perform one of the predefined actions.
try:
- fix = fixes[fix]
+ fix = fixes.fixes[fix]
except KeyError:
- wikipedia.output(u'Available predefined fixes are: %s' % fixes.keys())
+ wikipedia.output(u'Available predefined fixes are: %s' % fixes.fixes.keys())
wikipedia.stopme()
sys.exit()
if fix.has_key('regex'):
Revision: 3973
Author: wikipedian
Date: 2007-08-06 16:25:44 +0000 (Mon, 06 Aug 2007)
Log Message:
-----------
extended docu
Modified Paths:
--------------
trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py
===================================================================
--- trunk/pywikipedia/replace.py 2007-08-06 16:13:28 UTC (rev 3972)
+++ trunk/pywikipedia/replace.py 2007-08-06 16:25:44 UTC (rev 3973)
@@ -4,64 +4,52 @@
which pages might need changes either from an XML dump or a text file, or only
change a single page.
-You can run the bot with the following commandline parameters:
+These command line parameters can be used to specify which pages to work on:
--xml - Retrieve information from a local XML dump (pages_current, see
- http://download.wikimedia.org).
- Argument can also be given as "-xml:filename".
--file - Work on all pages given in a local text file.
- Will read any [[wiki link]] and use these articles.
- Argument can also be given as "-file:filename".
--cat - Work on all pages which are in a specific category.
- Argument can also be given as "-cat:categoryname".
--page - Only edit a specific page.
- Argument can also be given as "-page:pagetitle". You can give this
- parameter multiple times to edit multiple pages.
--ref - Work on all pages that link to a certain page.
- Argument can also be given as "-ref:referredpagetitle".
--linksearch - Retrieve all the results using Special:Linksearch.
- Argument can also be given as "-linksearch:url".
--filelinks - Works on all pages that link to a certain image.
- Argument can also be given as "-filelinks:ImageName".
--links - Work on all pages that are linked to from a certain page.
- Argument can also be given as "-links:linkingpagetitle".
--start - Work on all pages in the wiki, starting at a given page. Choose
- "-start:!" to start at the beginning.
- NOTE: You are advised to use -xml instead of this option; this is
- meant for cases where there is no recent XML dump.
--regex - Make replacements using regular expressions. If this argument
- isn't given, the bot will make simple text replacements.
--except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
- XYZ will be regarded as a regular expression.
--summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
- predefined message texts with original and replacements inserted.
--fix:XYZ - Perform one of the predefined replacements tasks, which are given
- in the dictionary 'fixes' defined inside the file fixes.py.
- The -regex argument and given replacements will be ignored if
- you use -fix.
- Currently available predefined fixes are:
- * HTML - convert HTML tags to wiki syntax, and fix XHTML
- * syntax - try to fix bad wiki markup.
- * case-de - fix upper/lower case errors in German
- * grammar-de - fix grammar and typography in German
--namespace:n - Number of namespace to process. The parameter can be used
- multiple times. It works in combination with all other
- parameters, except for the -start parameter. If you e.g. want to
- iterate over all user pages starting at User:M, use
- -start:User:M.
--always - Don't prompt you for each replacement
--recursive - Recurse replacement until possible.
--nocase - Use case insensitive regular expressions.
--allowoverlap - When occurences of the pattern overlap, replace all of them.
- Warning! Don't use this option if you don't know what you're
- doing, because it might easily lead to infinite loops then.
-other: - First argument is the old text, second argument is the new text.
- If the -regex argument is given, the first argument will be
- regarded as a regular expression, and the second argument might
- contain expressions like \\1 or \g<name>.
+¶ms;
-NOTE: Only use either -xml or -file or -page, but don't mix them.
+ -xml Retrieve information from a local XML dump (pages-articles
+ or pages-meta-current, see http://download.wikimedia.org).
+ Argument can also be given as "-xml:filename".
+ -page Only edit a specific page.
+ Argument can also be given as "-page:pagetitle". You can
+ give this parameter multiple times to edit multiple pages.
+Furthermore, the following command line parameters are supported:
+
+ -regex Make replacements using regular expressions. If this argument
+ isn't given, the bot will make simple text replacements.
+ -except:XYZ Ignore pages which contain XYZ. If the -regex argument is
+ given, XYZ will be regarded as a regular expression.
+ -summary:XYZ Set the summary message text for the edit to XYZ, bypassing
+ the predefined message texts with original and replacements
+ inserted.
+ -fix:XYZ Perform one of the predefined replacements tasks, which are
+ given in the dictionary 'fixes' defined inside the file
+ fixes.py.
+ The -regex argument and given replacements will be ignored if
+ you use -fix.
+ Currently available predefined fixes are:
+ * HTML - convert HTML tags to wiki syntax, and fix XHTML
+ * syntax - try to fix bad wiki markup.
+ * case-de - fix upper/lower case errors in German
+ * grammar-de - fix grammar and typography in German
+ -namespace:n Number of namespace to process. The parameter can be used
+ multiple times. It works in combination with all other
+ parameters, except for the -start parameter. If you e.g.
+ want to iterate over all categories starting at M, use
+ -start:Category:M.
+ -always Don't prompt you for each replacement
+ -recursive Recurse replacement until possible. Be careful, this might
+ lead to an infinite loop.
+ -nocase Use case insensitive regular expressions.
+ -allowoverlap When occurences of the pattern overlap, replace all of them.
+ Be careful, this might lead to an infinite loop.
+ other: First argument is the old text, second argument is the new text.
+ If the -regex argument is given, the first argument will be
+ regarded as a regular expression, and the second argument might
+ contain expressions like \\1 or \g<name>.
+
Examples:
If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the
@@ -70,10 +58,10 @@
python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}"
-If you have a dump called foobar.xml and want to fix typos, e.g.
+If you have a dump called foobar.xml and want to fix typos in articles, e.g.
Errror -> Error, use this:
- python replace.py -xml:foobar.xml "Errror" "Error"
+ python replace.py -xml:foobar.xml "Errror" "Error" -namespace:0
If you have a page called 'John Doe' and want to convert HTML tags to wiki
syntax, use:
@@ -90,6 +78,12 @@
import sys, re
import wikipedia, pagegenerators,catlib, config
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp
+}
+
# Imports predefined replacements tasks from fixes.py
from fixes import fixes