Revision: 8760
Author: purodha
Date: 2010-12-05 22:07:02 +0000 (Sun, 05 Dec 2010)
Log Message:
-----------
titletranslate() better documented, saner and slightly quicker code, made more flexible.
Modified Paths:
--------------
trunk/pywikipedia/interwiki.py
trunk/pywikipedia/titletranslate.py
Modified: trunk/pywikipedia/interwiki.py
===================================================================
--- trunk/pywikipedia/interwiki.py 2010-12-05 21:09:39 UTC (rev 8759)
+++ trunk/pywikipedia/interwiki.py 2010-12-05 22:07:02 UTC (rev 8760)
@@ -108,10 +108,9 @@
These arguments are useful to provide hints to the bot:
- -hint: used as -hint:de:Anweisung to give the robot a hint
- where to start looking for translations. This is only
- useful if you specify a single page to work on. If no
- text is given after the second ':', the name of the page
+ -hint: used as -hint:de:Anweisung to give the robot a hint
+ where to start looking for translations. If no text
+ is given after the second ':', the name of the page
itself is used as the title for the hint, unless the
-hintnobracket command line option (see there) is also
selected.
@@ -122,12 +121,15 @@
* 10: The 10 largest languages (sites with most
articles). Analogous for any other natural
number.
- * arab: All languages useing the Arabic alphabet.
+ * arab: All languages using the Arabic alphabet.
* cyril: All languages that use the Cyrillic alphabet.
* chinese: All Chinese dialects.
* latin: All languages using the Latin script.
* scand: All Scandinavian languages.
+ Languages and groups having the same page title can be
+ combined, as in -hint:5,scand,sr,pt:New_York
+
-hintfile: similar to -hint, except that hints are taken from the given
file, enclosed in [[]] each, instead of the command line.
Modified: trunk/pywikipedia/titletranslate.py
===================================================================
--- trunk/pywikipedia/titletranslate.py 2010-12-05 21:09:39 UTC (rev 8759)
+++ trunk/pywikipedia/titletranslate.py 2010-12-05 22:07:02 UTC (rev 8760)
@@ -13,14 +13,34 @@
import wikipedia as pywikibot
import date
-def translate(page, hints = None, auto = True, removebrackets = False):
+def _join_to_(result, join):
+ for x in join:
+ if x not in result:
+ result.append(x)
+
+def translate(page, hints = None, auto = True, removebrackets = False, site = None,
family = None):
"""
Please comment your source code! --Daniel
Does some magic stuff. Returns a list of pages.
+
+ Goes through all entries in 'hints'. Returns a list of pages.
+
+ Entries for single page titles list those pages. Page titles for entries
+ such as "all:" or "xyz:" or "20:" are first built from
the page title of
+ 'page' and then listed. When 'removebrackets' is True, a trailing
pair of
+ brackets and the text between them is removed from the page title.
+ If 'auto' is true, known year and date page titles are autotranslated
+ to all known target languages and inserted into the list.
+
"""
result = []
- site = page.site()
+ if site is None and page:
+ site = page.site()
+ if family is None and site:
+ family = site.family
+ if site:
+ sitelang = site.language()
if hints:
for h in hints:
if ':' not in h:
@@ -33,9 +53,11 @@
# if given as -hint:xy or -hint:xy:, assume that there should
# be a page in language xy with the same title as the page
# we're currently working on ...
+ if page is None:
+ continue
ns = page.namespace()
if ns:
- newname = u'%s:%s' %
(site.family.namespace('_default', ns),
+ newname = u'%s:%s' % (family.namespace('_default',
ns),
page.titleWithoutNamespace())
else:
# article in the main namespace
@@ -43,45 +65,49 @@
# ... unless we do want brackets
if removebrackets:
newname = re.sub(re.compile(ur"\W*?\(.*?\)\W*?",
re.UNICODE), u" ", newname)
- try:
- number = int(codes)
- codes = site.family.languages_by_size[:number]
- except ValueError:
- if codes == 'all':
- codes = site.family.languages_by_size
- elif codes in site.family.language_groups:
- codes = site.family.language_groups[codes]
- else:
- codes = codes.split(',')
+ codesplit = codes.split(',')
+ codes = []
+ for code in codesplit:
+ try:
+ number = int(code)
+ _join_to_(codes, family.languages_by_size[:number] )
+ except ValueError:
+ if code == 'all':
+ _join_to_(codes, family.languages_by_size )
+ elif code in family.language_groups:
+ _join_to_(codes, family.language_groups[code] )
+ elif code:
+ _join_to_(codes, [ code ] )
for newcode in codes:
- if newcode in site.languages():
- if newcode != site.language():
- x = pywikibot.Page(site.getSite(code=newcode), newname)
- if x not in result:
- result.append(x)
+ x = None
+ if newcode in family.langs.keys():
+ if ( page is None ) or ( newcode != sitelang ):
+ x = pywikibot.Page(pywikibot.getSite(fam=family, code=newcode),
newname)
+# elif newcode in family.interwiki_forwarded_from:
+# x = pywikibot.Page(pywikibot.getSite(fam=newcode, code=newcode),
newname)
else:
if pywikibot.verbose:
- pywikibot.output(u"Ignoring unknown language code %s"
- % newcode)
+ pywikibot.output(u"Ignoring the unknown language code
%s" % newcode)
+ if x:
+ _join_to_(result, [ x ] )
# Autotranslate dates into all other languages, the rest will come from
# existing interwiki links.
- if auto:
+ if auto and page:
# search inside all dictionaries for this link
- dictName, value = date.getAutoFormat(page.site().language(),
- page.title())
+ dictName, value = date.getAutoFormat(sitelang, page.title())
if dictName:
if not (dictName == 'yearsBC' and
- page.site().language() in date.maxyearBC and
- value > date.maxyearBC[page.site().language()]) or \
+ sitelang in date.maxyearBC and
+ value > date.maxyearBC[sitelang]) or \
(dictName == 'yearsAD' and
- page.site().language() in date.maxyearAD and
- value > date.maxyearAD[page.site().language()]):
+ sitelang in date.maxyearAD and
+ value > date.maxyearAD[sitelang]):
pywikibot.output(
u'TitleTranslate: %s was recognized as %s with value %d'
% (page.title(), dictName, value))
for entryLang, entry in date.formats[dictName].iteritems():
- if entryLang != page.site().language():
+ if entryLang != sitelang:
if dictName == 'yearsBC' and \
entryLang in date.maxyearBC and \
value > date.maxyearBC[entryLang]:
@@ -94,9 +120,8 @@
newname = entry(value)
x = pywikibot.Page(
pywikibot.getSite(code=entryLang,
- fam=site.family), newname)
- if x not in result:
- result.append(x) # add new page
+ fam=family), newname)
+ _join_to_(result, [ x ] )
return result
bcDateErrors = [u'[[ko:%d년]]']