jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes
......................................................................
[PEP8] changes
Change-Id: I7f0d85a1c4fc4c7f2902d9835a9ea51108975775
---
M simple_family.py
1 file changed, 30 insertions(+), 30 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/simple_family.py b/simple_family.py
index 357fede..6424cdb 100644
--- a/simple_family.py
+++ b/simple_family.py
@@ -7,15 +7,17 @@
# ============================================
# (C) Kim Bruning for Wikiation, sponsored by Kennisnet, 2009
+# (C) Pywikibot team, 2009-2013
#
# Distributed under the terms of the MIT license.
#
-import sys #, settings # No such module settings
-#if settings.pywikipedia_path not in sys.path:
-# sys.path.append(settings.pywikipedia_path)
+import sys
+import urllib
+import config
+import family
-import config, family, urllib
+
class Family(family.Family):
"""Friendlier version of the pywikipedia family class.
We can use this in conjunction with none-pywikipedia
@@ -27,16 +29,16 @@
"""
def __init__(self,
- name='MY_NAME_FOR_THIS_SERVER',
- protocol='http',
- server='www.my_server.com',
- scriptpath='/my/script/path',
- version='1.13.2',
- lang='en',
- encoding='utf-8',
- api_supported=False,
- RversionTab=None # very rare beast, you probably won't need it.
- ):
+ name='MY_NAME_FOR_THIS_SERVER',
+ protocol='http',
+ server='www.my_server.com',
+ scriptpath='/my/script/path',
+ version='1.13.2',
+ lang='en',
+ encoding='utf-8',
+ api_supported=False,
+ RversionTab=None # very rare beast, you probably won't need it.
+ ):
"""name: arbitrary name. Pick something easy to remember
protocol: http|https
server: dns address or ip address
@@ -47,6 +49,7 @@
encoding: should (almost) always be utf-8
api_supported: Does this mediawiki instance support the mediawiki api?
RversionTab: Magic. See superclass for information.
+
"""
family.Family.__init__(self)
@@ -55,34 +58,31 @@
self.langs = { # REQUIRED
lang: server, # Include one line for each wiki in family
}
- self._protocol=protocol
- self._scriptpath=scriptpath
- self._version=version
- self._encoding=encoding
+ self._protocol = protocol
+ self._scriptpath = scriptpath
+ self._version = version
+ self._encoding = encoding
# may as well add these here, so we can have a 1 stop shop
- self._lang=lang
- self._server=server
- self._api_supported=api_supported
- self._RversionTab=RversionTab
+ self._lang = lang
+ self._server = server
+ self._api_supported = api_supported
+ self._RversionTab = RversionTab
def protocol(self, code):
- """
- returns "http" or "https"
- """
+ """ returns "http" or "https" """
return self._protocol
def scriptpath(self, code):
- """returns the prefix used to locate scripts on this wiki.
- """
+ """ returns the prefix used to locate scripts on this wiki. """
return self._scriptpath
def apipath(self, code):
- """returns whether or not this wiki
- """
+ """returns whether or not this wiki """
if self._api_supported:
return '%s/api.php' % self.scriptpath(code)
else:
- raise NotImplementedError, "%s wiki family does not support api.php" % self.name
+ raise NotImplementedError(
+ "%s wiki family does not support api.php" % self.name)
# Which version of MediaWiki is used?
def version(self, code):
--
To view, visit https://gerrit.wikimedia.org/r/102916
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I7f0d85a1c4fc4c7f2902d9835a9ea51108975775
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes
......................................................................
[PEP8] changes
Change-Id: I0bffe2ae3d09dfa7fa9dad014bae5bace3bce00b
---
M featured.py
1 file changed, 5 insertions(+), 4 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/featured.py b/featured.py
index f437a33..a052f96 100644
--- a/featured.py
+++ b/featured.py
@@ -6,7 +6,7 @@
Task commands:
Without additional task command this script is used for
- featured articles as default.
+ featured articles as default.
-good use this script for good articles.
@@ -64,13 +64,14 @@
from copy import copy
import wikipedia as pywikibot
from pywikibot import i18n
-import catlib, config
from pagegenerators import PreloadingGenerator
+import catlib
+import config
def CAT(site, name, hide):
name = site.namespace(14) + ':' + name
- cat=catlib.Category(site, name)
+ cat = catlib.Category(site, name)
for article in cat.articles(endsort=hide):
yield article
if hide:
@@ -679,7 +680,7 @@
pywikibot.output('\nQuitting program...')
finally:
if not nocache:
- pickle.dump(cache,file(filename,"wb"))
+ pickle.dump(cache, file(filename, "wb"))
if __name__ == "__main__":
--
To view, visit https://gerrit.wikimedia.org/r/102888
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0bffe2ae3d09dfa7fa9dad014bae5bace3bce00b
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] changes, code improvements
......................................................................
[PEP8] changes, code improvements
Change-Id: Ibfa8741849c0c59d38963afac94ba92a2765bdf7
---
M standardize_notes.py
1 file changed, 447 insertions(+), 308 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/standardize_notes.py b/standardize_notes.py
index fec9af1..2682e21 100644
--- a/standardize_notes.py
+++ b/standardize_notes.py
@@ -35,8 +35,9 @@
"""
# Derived from replace.py
#
-# (C) Daniel Herding, 2004
-# Copyright Scot E. Wilcoxon 2005
+# (c) Daniel Herding, 2004
+# (c) Scot E. Wilcoxon, 2005
+# (c) pywikibot team, 2006-2013
#
# Distributed under the terms of the MIT license.
#
@@ -49,11 +50,19 @@
# doFindAllCitationSections(). (SEWilco)
#
-import subprocess, sys, re, random
-import socket, urllib, robotparser
+import subprocess
+import sys
+import re
+import random
+import socket
+import urllib
+import robotparser
from datetime import date
+import string
+
import wikipedia as pywikibot
-import pagegenerators, config
+import pagegenerators
+import config
# httpcache is optional
have_httpcache = True
@@ -64,38 +73,38 @@
# Summary messages in different languages
msg = {
- 'ar':u'روبوت: معالجة مراجع تلقائية %s',
- 'de':u'Bot: Automatisierte Textersetzung %s',
- 'en':u'Robot: Automated reference processing %s',
- 'es':u'Robot: Reemplazo automático de texto %s',
- 'fr':u'Robot : Remplacement de texte automatisé %s',
- 'he':u'בוט: הופך את הערת השוליים %s לאוטומטית',
- 'hu':u'Robot: Automatikus szövegcsere %s',
- 'ia':u'Robot: Reimplaciamento automatic de texto %s',
- 'is':u'Vélmenni: breyti texta %s',
- 'nl':u'Bot: geautomatiseerde verwerking van referenties %s',
- 'pl':u'Robot automatycznie przetwarza źródła %s',
- 'pt':u'Bot: Mudança automática %s',
- }
+ 'ar': u'روبوت: معالجة مراجع تلقائية %s',
+ 'de': u'Bot: Automatisierte Textersetzung %s',
+ 'en': u'Robot: Automated reference processing %s',
+ 'es': u'Robot: Reemplazo automático de texto %s',
+ 'fr': u'Robot : Remplacement de texte automatisé %s',
+ 'he': u'בוט: הופך את הערת השוליים %s לאוטומטית',
+ 'hu': u'Robot: Automatikus szövegcsere %s',
+ 'ia': u'Robot: Reimplaciamento automatic de texto %s',
+ 'is': u'Vélmenni: breyti texta %s',
+ 'nl': u'Bot: geautomatiseerde verwerking van referenties %s',
+ 'pl': u'Robot automatycznie przetwarza źródła %s',
+ 'pt': u'Bot: Mudança automática %s',
+}
fixes = {
- # These replacements will convert alternate reference formats to format used
- # by this tool.
+ # These replacements will convert alternate reference formats to format
+ # used by this tool.
'ALTREFS': {
'regex': True,
# We don't want to mess up pages which discuss HTML tags, so we skip
# all pages which contain nowiki tags.
'exceptions': ['<nowiki>[^<]{3,}</nowiki>'],
'msg': {
- 'ar':u'روبوت: إضافة/ترتيب المراجع.',
- 'en':u'Robot: Adding/sorting references.',
- 'ar':u'روبوت: إضافة/ترتيب المراجع.',
- 'fr':u'Robot : Ajoute/trie les références.',
- 'he':u'בוט: מוסיף/מסדר הערות שוליים',
- 'ia':u'Robot: Addition/assortimento de referentias',
- 'nl':u'Bot: referenties toegevoegd/gesorteerd',
- 'pl':u'Robot dodaje/sortuje źródła',
- },
+ 'ar': u'روبوت: إضافة/ترتيب المراجع.',
+ 'en': u'Robot: Adding/sorting references.',
+ 'ar': u'روبوت: إضافة/ترتيب المراجع.',
+ 'fr': u'Robot : Ajoute/trie les références.',
+ 'he': u'בוט: מוסיף/מסדר הערות שוליים',
+ 'ia': u'Robot: Addition/assortimento de referentias',
+ 'nl': u'Bot: referenties toegevoegd/gesorteerd',
+ 'pl': u'Robot dodaje/sortuje źródła',
+ },
'replacements': [
# Everything case-insensitive (?i)
# These translate variations of footnote templates to ref|note
@@ -141,7 +150,7 @@
'references',
'source',
'sources',
- ]
+]
# news sites for which to generate 'news reference' citations, the org name, and prefix to strip
newssites = [
@@ -218,7 +227,9 @@
"""
- def __init__(self, source, replacements, exceptions, regex = False, namespace = -1, textfilename = None, sqlfilename = None, categoryname = None, pagenames = None):
+ def __init__(self, source, replacements, exceptions, regex=False,
+ namespace=-1, textfilename=None, sqlfilename=None,
+ categoryname=None, pagenames=None):
self.source = source
self.replacements = replacements
self.exceptions = exceptions
@@ -286,14 +297,14 @@
"""
import catlib
category = catlib.Category(pywikibot.getSite(), self.categoryname)
- for page in category.articles(recurse = False):
+ for page in category.articles(recurse=False):
yield page
def read_pages_from_text_file(self):
"""
- Generator which will yield pages that are listed in a text file created by
- the bot operator. Will regard everything inside [[double brackets]] as a
- page name, and yield Pages for these pages.
+ Generator which will yield pages that are listed in a text file created
+ by the bot operator. Will regard everything inside [[double brackets]]
+ as a page name, and yield Pages for these pages.
Arguments:
* textfilename - the textfile's path, either absolute or relative
@@ -306,13 +317,13 @@
for line in f.readlines():
# BUG: this will only find one link per line.
# TODO: use findall() instead.
- m=R.match(line)
+ m = R.match(line)
if m:
yield pywikibot.Page(pywikibot.getSite(), m.group(1))
f.close()
def read_pages_from_wiki_page(self):
- '''
+ """
Generator which will yield pages that are listed in a wiki page. Will
regard everything inside [[double brackets]] as a page name, except for
interwiki and category links, and yield Pages for these pages.
@@ -320,16 +331,14 @@
Arguments:
* pagetitle - the title of a page on the home wiki
- '''
+ """
listpage = pywikibot.Page(pywikibot.getSite(), self.pagetitle)
list = pywikibot.get(listpage)
# TODO - UNFINISHED
# TODO: Make MediaWiki's search feature available.
def __iter__(self):
- '''
- Starts the generator.
- '''
+ """ Starts the generator. """
if self.source == 'sqldump':
for pl in self.read_pages_from_sql_dump():
yield pl
@@ -343,10 +352,11 @@
for pagename in self.pagenames:
yield pywikibot.Page(pywikibot.getSite(), pagename)
+
class ReplaceRobot:
def __init__(self, generator, replacements, refsequence, references,
- refusage, exceptions = [], regex = False, acceptall = False,
- summary = ''):
+ refusage, exceptions=[], regex=False, acceptall=False,
+ summary=''):
self.generator = generator
self.replacements = replacements
self.exceptions = exceptions
@@ -361,6 +371,7 @@
"""
If one of the exceptions applies for the given text, returns the
substring. which matches the exception. Otherwise it returns None.
+
"""
for exception in self.exceptions:
if self.regex:
@@ -372,12 +383,12 @@
hit = original_text.find(exception)
if hit != -1:
return original_text[hit:hit + len(exception)]
- return None
def doReplacements(self, new_text):
"""
Returns the text which is generated by applying all replacements to the
given text.
+
"""
# For any additional replacements, loop through them
@@ -396,15 +407,18 @@
refsectionname)
# Read existing Notes section contents into references list
pywikibot.output(u"Reading existing Notes section")
- self.doReadReferencesSection( new_text, refsectionname )
- while self.references and self.references[len(self.references)-1] == u'\n':
- del self.references[len(self.references)-1] # delete trailing empty lines
+ self.doReadReferencesSection(new_text, refsectionname)
+ while self.references and \
+ self.references[len(self.references) - 1] == u'\n':
+ # delete trailing empty lines
+ del self.references[len(self.references) - 1]
# Convert any external links to footnote references
- pywikibot.output(u"Converting external links" )
+ pywikibot.output(u"Converting external links")
new_text = self.doConvertExternalLinks(new_text)
# Accumulate ordered list of all references
pywikibot.output(u"Collecting references")
- (duplicatefound, self.refusage) = self.doBuildSequenceListOfReferences( new_text )
+ (duplicatefound,
+ self.refusage) = self.doBuildSequenceListOfReferences(new_text)
# Rewrite references, including dealing with duplicates.
pywikibot.output(u"Rewriting references")
new_text = self.doRewriteReferences(new_text, self.refusage,
@@ -414,7 +428,7 @@
self.references = self.doReorderReferences(self.references,
self.refusage)
# Rebuild Notes section
- pywikibot.output(u"Rebuilding References section" )
+ pywikibot.output(u"Rebuilding References section")
new_text = self.doUpdateReferencesSection(new_text, self.refusage,
refsectionname)
return new_text
@@ -424,71 +438,93 @@
References. Adds References to reference list.
"""
- new_text = '' # Default is no text
+ new_text = ''
skipsection = False
- for text_line in original_text.splitlines(True): # Scan all text line by line
+ # Scan all text line by line
+ for text_line in original_text.splitlines(True):
# Check for protected sections
m = re.search("== *(?P<sectionname>[^\]\|=]*) *==", text_line)
# TODO: support subheadings within Notes section
# TODO: support Notes in alphabetic order
# TODO: support Notes in other orders
- if m: # if in a section, check if should skip this section
- if m.group('sectionname').lower().strip() in referencesectionnames:
- skipsection = True # skipsection left True so no further links converted
+ if m: # if in a section, check if should skip this section
+ if m.group('sectionname').lower().strip() in \
+ referencesectionnames:
+ # skipsection left True so no further links converted
+ skipsection = True
if skipsection:
- new_text = new_text + text_line # skip section, so retain text.
+ new_text += text_line # skip section, so retain text.
else:
- # TODO: recognize {{inline}} invisible footnotes when something can be done with them
- #
+ # TODO: recognize {{inline}} invisible footnotes when something
+ # can be done with them
+
# Ignore lines within comments
- if not text_line.startswith( u'<!--'):
+ if not text_line.startswith(u'<!--'):
# Fix erroneous external links in double brackets
- Rextlink = re.compile(r'(?i)\[\[(?P<linkname>http://[^\]]+?)\]\]')
+ Rextlink = re.compile(
+ r'(?i)\[\[(?P<linkname>http://[^\]]+?)\]\]')
# TODO: compiling the regex each time might be inefficient
text_lineR = re.compile(Rextlink)
MOextlink = text_lineR.search(text_line)
- while MOextlink: # find all links on line
+ while MOextlink: # find all links on line
extlink_linkname = MOextlink.group('linkname')
# Rewrite double brackets to single ones
- text_line=text_line[:MOextlink.start()] + '[%s]' % extlink_linkname + text_line[MOextlink.end(0):]
- MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1)
- # Regular expression to look for external link [linkname linktext] - linktext is optional.
+ text_line = text_line[:MOextlink.start()] + \
+ '[%s]' % extlink_linkname + \
+ text_line[MOextlink.end(0):]
+ MOextlink = text_lineR.search(text_line,
+ MOextlink.start(0) + 1)
+ # Regular expression to look for external link
+ # [linkname linktext] - linktext is optional.
# Also accepts erroneous pipe symbol as separator.
# Accepts wikilinks within <linktext>
- #Rextlink = re.compile(r'[^\[]\[(?P<linkname>[h]*[ft]+tp:[^ [\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\][^\]]')
- #Rextlink = re.compile(r'\[(?P<linkname>[h]*[ft]+tp:[^ [\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\]')
- Rextlink = re.compile(r'(?i)\[(?P<linkname>[h]*[ft]+tp:[^ [\]\|]+?)(?P<linktext>[ \|]+(( *[^\]\|]*)|( *\[\[.+?\]\])*)+)*\]')
+ Rextlink = re.compile(
+ r'(?i)\[(?P<linkname>[h]*[ft]+tp:[^ [\]\|]+?)(?P<linktext>[ \|]+((*[^\]\|]*)|(*\[\[.+?\]\])*)+)*\]')
# TODO: compiling the regex each time might be inefficient
text_lineR = re.compile(Rextlink)
MOextlink = text_lineR.search(text_line)
- while MOextlink: # find all links on line
+ while MOextlink: # find all links on line
extlink_linkname = MOextlink.group('linkname')
extlink_linktext = MOextlink.group('linktext')
self.refsequence += 1
- ( refname, reftext ) = self.doConvertLinkTextToReference(self.refsequence, extlink_linkname, extlink_linktext)
- self.references.append( reftext ) # append new entry to References
+ (refname, reftext) = self.doConvertLinkTextToReference(
+ self.refsequence, extlink_linkname,
+ extlink_linktext)
+ # append new entry to References
+ self.references.append(reftext)
if extlink_linktext:
- # If there was text as part of link, reinsert text before footnote.
- text_line=text_line[:MOextlink.start(0)] + '%s{{ref|%s}}' % (extlink_linktext, refname) + text_line[MOextlink.end(0):]
+ # If there was text as part of link, reinsert text
+ # before footnote.
+ text_line = (text_line[:MOextlink.start(0)] +
+ '%s{{ref|%s}}' % (extlink_linktext,
+ refname) +
+ text_line[MOextlink.end(0):])
else:
- text_line=text_line[:MOextlink.start(0)] + '{{ref|%s}}' % refname + text_line[MOextlink.end(0):]
- MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1)
+ text_line = (text_line[:MOextlink.start(0)] +
+ '{{ref|%s}}' % refname +
+ text_line[MOextlink.end(0):])
+ MOextlink = text_lineR.search(text_line,
+ MOextlink.start(0) + 1)
# Search for {{doi}}
Rdoi = re.compile(r'(?i){{doi\|(?P<doilink>[^}|]*)}}')
# TODO: compiling the regex each time might be inefficient
doiR = re.compile(Rdoi)
MOdoi = doiR.search(text_line)
- while MOdoi: # find all doi on line
+ while MOdoi: # find all doi on line
doi_link = MOdoi.group('doilink')
if doi_link:
self.refsequence += 1
- ( refname, reftext ) = self.doConvertDOIToReference( self.refsequence, doi_link )
- self.references.append( reftext ) # append new entry to References
- text_line=text_line[:MOdoi.start(0)] + '{{ref|%s}}' % refname + text_line[MOdoi.end(0):]
- MOdoi = doiR.search(text_line, MOdoi.start(0)+1)
- new_text = new_text + text_line # append new line to new text
+ (refname, reftext) = self.doConvertDOIToReference(
+ self.refsequence, doi_link)
+ # append new entry to References
+ self.references.append(reftext)
+ text_line = text_line[:MOdoi.start(0)] + \
+ '{{ref|%s}}' % refname + \
+ text_line[MOdoi.end(0):]
+ MOdoi = doiR.search(text_line, MOdoi.start(0) + 1)
+ new_text += text_line # append new line to new text
if new_text == '':
- new_text = original_text # If somehow no new text, return original text
+ new_text = original_text # If no new text, return original text
return new_text
def doFindRefSection(self, original_text):
@@ -498,121 +534,157 @@
"""
refsectionname = ''
sectionname = ''
- for text_line in original_text.splitlines(True): # Scan all text line by line
- if refsectionname == '': # if ref section not found
+ # Scan all text line by line
+ for text_line in original_text.splitlines(True):
+ if not refsectionname:
# Check if line has a section name
- m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
- if m: # if in a section, remember section name
+ m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+ if m: # if in a section, remember section name
sectionname = m.group('sectionname').strip()
- pywikibot.output( u'Section: %s' % sectionname )
- else: # else not a section name so look for reference
- n = re.search( r'(i?){{(note|ibid)[|]', text_line )
- if n: # if reference found
- refsectionname = sectionname # found reference section
- pywikibot.output( u'Ref section: %s' % refsectionname )
- break # stop looking
+ pywikibot.output(u'Section: %s' % sectionname)
+ else: # else not a section name so look for reference
+ n = re.search(r'(i?){{(note|ibid)[|]', text_line)
+ if n:
+ refsectionname = sectionname # found reference section
+ pywikibot.output(u'Ref section: %s' % refsectionname)
+ break
return refsectionname
def doFindAllCitationSections(self, original_text, refsectionname):
""" Returns list of sections which may contain citations. """
- refsectionlist = [ ( refsectionname) ]
+ refsectionlist = [refsectionname]
sectionname = ''
- for text_line in original_text.splitlines(True): # Scan all text line by line
+ # Scan all text line by line
+ for text_line in original_text.splitlines(True):
# Check if line has a section name
- m = re.search( "==[ ]*(?P<sectionname>[^=]+)[ ]*==", text_line )
- if m: # if in a section, remember section name
+ m = re.search("==[ ]*(?P<sectionname>[^=]+)[ ]*==", text_line)
+ if m:
sectionname = m.group('sectionname').strip()
if sectionname.lower().strip() in referencesectionnames:
- if sectionname not in refsectionlist: # if not already in list, add to list.
- refsectionlist.extend( sectionname )
+ # if not already in list, add to list.
+ if sectionname not in refsectionlist:
+ refsectionlist.extend(sectionname)
return refsectionlist
def doRewriteReferences(self, original_text, refusage, refsectionname):
"""
- Returns the text which is generated by rewriting references, including duplicate refs.
+ Returns the text which is generated by rewriting references, including
+ duplicate refs.
+
"""
- new_text = '' # Default is no text
+ new_text = '' # Default is no text
skipsection = False
- for text_line in original_text.splitlines(True): # Scan all text line by line
+ # Scan all text line by line
+ for text_line in original_text.splitlines(True):
# Check for protected sections
- m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
- if m: # if in a section, check if should skip this section
- if refsectionname != '': # if a certain section name has been identified
+ m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+ if m: # if in a section, check if should skip this section
+ # if a certain section name has been identified
+ if refsectionname != '':
m_section = m.group('sectionname')
- pywikibot.output( u'Looking for "%s": "%s"' % (refsectionname,unicode(m_section)) )
+ pywikibot.output(u'Looking for "%s": "%s"'
+ % (refsectionname, unicode(m_section)))
if unicode(m_section.strip()) == unicode(refsectionname):
- pywikibot.output( u'Found Ref section.')
- skipsection = True # skipsection left True so no further links converted
- else: # else grab all possible sections
- if m.group('sectionname').lower().strip() in referencesectionnames:
- pywikibot.output('RefSection found by default names: %s' % m.group('sectionname') )
- skipsection = True # skipsection left True so no further links converted
+ pywikibot.output(u'Found Ref section.')
+ skipsection = True
+ else: # else grab all possible sections
+ if m.group('sectionname').lower().strip() in \
+ referencesectionnames:
+ pywikibot.output(
+ 'RefSection found by default names: %s'
+ % m.group('sectionname'))
+ skipsection = True
if skipsection:
- new_text = new_text + text_line # skip section, so retain text.
+ new_text += text_line
else:
- # TODO: recognize {{inline}} invisible footnotes when something can be done with them
+ # TODO: recognize {{inline}} invisible footnotes when something
+ # can be done with them
#
- # Data structure: refusage[reference_key] = [ sequence_in_document, count, count_during_dup_handling ]
+ # Data structure:
+ # refusage[reference_key] = [sequence_in_document,
+ # count, count_during_dup_handling]
# Check for various references
# TODO: compiling the regex each time might be inefficient
- Rtext_line = re.compile(r'(?i){{(?P<reftype>ref|ref_num|ref_label)\|(?P<refname>[^}|]+?)}}')
- m = Rtext_line.search( text_line )
- alphabet26 = u'abcdefghijklmnopqrstuvwxyz'
+ Rtext_line = re.compile(
+ r'(?i){{(?P<reftype>ref|ref_num|ref_label)\|(?P<refname>[^}|]+?)}}')
+ m = Rtext_line.search(text_line)
while m: # if found a reference
- if m.group('reftype').lower() in ('ref', 'ref_num', 'ref_label'): # confirm ref
+ if m.group('reftype').lower() in ('ref', 'ref_num',
+ 'ref_label'):
refkey = m.group('refname').strip()
if refkey != '':
if refkey in refusage:
- # pywikibot.output( u'refusage[%s] = %s' % (refkey,refusage[refkey]) )
- if refusage[refkey][2] == 0: # if first use of reference
- text_line=text_line[:m.start(0)] + '{{ref|%s}}' % (refkey) + text_line[m.end(0):]
- refusage[refkey][2] += 1 # count use of reference
- else: # else not first use of reference
- text_line=text_line[:m.start(0)] + '{{ref_label|%s|%d|%s}}' % (refkey,(refusage[refkey][0])+1,alphabet26[((refusage[refkey][2])-1)%26]) + text_line[m.end(0):]
- refusage[refkey][2] += 1 # count use of reference
+ # pywikibot.output(u'refusage[%s] = %s' % (refkey,refusage[refkey]))
+ # if first use of reference
+ if refusage[refkey][2] == 0:
+ text_line = (
+ text_line[:m.start(0)] +
+ '{{ref|%s}}' % (refkey) +
+ text_line[m.end(0):])
+ # count use of reference
+ refusage[refkey][2] += 1
+ else: # else not first use of reference
+ text_line = (
+ text_line[:m.start(0)] +
+ '{{ref_label|%s|%d|%s}}'
+ % (refkey, (refusage[refkey][0]) + 1,
+ string.ascii_lowercase[
+ ((refusage[refkey][2]) - 1) % 26
+ ]) + text_line[m.end(0):])
+ # count use of reference
+ refusage[refkey][2] += 1
else:
- # Odd, because refusage list is populated the key should exist already.
- refusage[refkey] = [len(refusage),1,1] # remember this reference
- text_line=text_line[:m.start(0)] + '{{ref|%s}}' % refkey + text_line[m.end(0):]
- m = Rtext_line.search( text_line, m.start(0)+1 )
- new_text = new_text + text_line # append new line to new text
+ # Odd, because refusage list is populated the
+ # key should exist already.
+
+ # remember this reference
+ refusage[refkey] = [len(refusage), 1, 1]
+ text_line = (text_line[:m.start(0)] +
+ '{{ref|%s}}' % refkey +
+ text_line[m.end(0):])
+ m = Rtext_line.search(text_line, m.start(0) + 1)
+ new_text += text_line
if new_text == '':
- new_text = original_text # If somehow no new text, return original text
+ # If somehow no new text, return original text
+ new_text = original_text
return new_text
- def doGetTitleFromURL(self, extlink_linkname ):
+ def doGetTitleFromURL(self, extlink_linkname):
"""
Returns text derived from between <title>...</title> tags through a URL.
Obeys robots.txt restrictions.
+
"""
# if no descriptive text get from web site, if not PDF
urltitle = u''
urlfile = None
urlheaders = None
if len(extlink_linkname) > 5:
- socket.setdefaulttimeout(20) # timeout in seconds
- pywikibot.get_throttle() # throttle down to Wikipedia rate
+ socket.setdefaulttimeout(20) # timeout in seconds
+ pywikibot.get_throttle() # throttle down to Wikipedia rate
# Obey robots.txt restrictions
rp = robotparser.RobotFileParser()
- rp.set_url( extlink_linkname )
+ rp.set_url(extlink_linkname)
try:
- rp.read() # read robots.txt
+ rp.read() # read robots.txt
except (IOError, socket.timeout):
pywikibot.output(u'Error accessing URL: %s'
% unicode(extlink_linkname))
else:
urlobj = None
- if not rp.can_fetch( "*", extlink_linkname ):
+ if not rp.can_fetch("*", extlink_linkname):
pywikibot.output(u'Robot prohibited: %s'
% unicode(extlink_linkname))
else: # else access allowed
try:
if have_httpcache:
cache = HTTPCache(extlink_linkname)
- urlfile = cache.filename() # filename of cached date
+ # filename of cached date
+ urlfile = cache.filename()
urlheaders = cache.info()
else:
- (urlfile, urlheaders) = urllib.urlretrieve(extlink_linkname)
+ (urlfile,
+ urlheaders) = urllib.urlretrieve(extlink_linkname)
except IOError:
pywikibot.output(u'Error accessing URL. %s'
% unicode(extlink_linkname))
@@ -626,13 +698,17 @@
unicode(extlink_linkname)))
except: # Ignore other errors
pass
- if urlfile != None:
- urlobj = open( urlfile )
+ if urlfile:
+ urlobj = open(urlfile)
if extlink_linkname.lower().endswith('.pdf'):
# If file has a PDF suffix
- pywikibot.output( u'PDF file.')
+ pywikibot.output(u'PDF file.')
try:
- pdfinfo_out = subprocess.Popen([r"pdfinfo","/dev/stdin"], stdin=urlobj, stdout=subprocess.PIPE, shell=False).communicate()[0]
+ pdfinfo_out = subprocess.Popen(
+ [r"pdfinfo", "/dev/stdin"],
+ stdin=urlobj,
+ stdout=subprocess.PIPE,
+ shell=False).communicate()[0]
for aline in pdfinfo_out.splitlines():
if aline.lower().startswith('title'):
urltitle = aline.split(None)[1:]
@@ -646,66 +722,78 @@
urlauthor = ' '.join(urlauthor)
if urlauthor:
pywikibot.output(u'author: %s'
- % urlauthor )
+ % urlauthor)
except ValueError:
- pywikibot.output( u'pdfinfo value error.')
+ pywikibot.output(u'pdfinfo value error.')
except OSError:
- pywikibot.output( u'pdfinfo OS error.')
- except: # Ignore errors
- pywikibot.output( u'PDF processing error.')
+ pywikibot.output(u'pdfinfo OS error.')
+ except: # Ignore errors
+ pywikibot.output(u'PDF processing error.')
pass
- pywikibot.output( u'PDF done.')
+ pywikibot.output(u'PDF done.')
if urlobj:
urlobj.close()
else:
# urlinfo = urlobj.info()
aline = urlobj.read()
maxalines = 100
- while maxalines > 0 and aline and urltitle == '':
- maxalines -= 1 # reduce number of lines left to consider
- titleRE = re.search("(?i)<title>(?P<HTMLtitle>[^<>]+)", aline)
+ while maxalines > 0 and aline and not urltitle:
+ # reduce number of lines left to consider
+ maxalines -= 1
+ titleRE = re.search(
+ "(?i)<title>(?P<HTMLtitle>[^<>]+)", aline)
if titleRE:
try:
- urltitle = unicode(titleRE.group('HTMLtitle'), 'utf-8')
+ urltitle = unicode(titleRE.group('HTMLtitle'),
+ 'utf-8')
except:
- urltitle = u' ' # error, no title
- urltitle = u' '.join(urltitle.split()) # merge whitespace
- pywikibot.output( u'::::Title: %s' % urltitle )
- break # found a title so stop looking
+ urltitle = u' ' # error, no title
+ urltitle = u' '.join(urltitle.split())
+ pywikibot.output(u'::::Title: %s' % urltitle)
+ break # found a title so stop looking
else:
if maxalines < 1:
pywikibot.output(
u'No title in URL. %s'
- % unicode(extlink_linkname) )
+ % unicode(extlink_linkname))
else:
- if urlobj != None:
- pywikibot.output( u'::+URL: ' + extlink_linkname )
+ if urlobj:
+ pywikibot.output(u'::+URL: ' + extlink_linkname)
# urlinfo = urlobj.info()
aline = urlobj.read()
full_page = ''
# while aline and urltitle == '':
while aline:
full_page = full_page + aline
- titleRE = re.search("(?i)<title>(?P<HTMLtitle>[^<>]+)", aline)
+ titleRE = re.search(
+ "(?i)<title>(?P<HTMLtitle>[^<>]+)",
+ aline)
if titleRE:
if titleRE.group('HTMLtitle'):
urltitle = u''
try:
- urltitle = unicode(titleRE.group('HTMLtitle'), 'utf-8')
- urltitle = u' '.join(urltitle.split()) # merge whitespace
- pywikibot.output( u'::::Title: %s' % urltitle )
+ urltitle = unicode(
+ titleRE.group('HTMLtitle'),
+ 'utf-8')
+ urltitle = u' '.join(
+ urltitle.split())
+ pywikibot.output(
+ u'::::Title: %s'
+ % urltitle)
except:
aline = urlobj.read()
continue
else:
aline = urlobj.read()
continue
- break # found a title so stop looking
+ # found a title so stop looking
+ break
else:
aline = urlobj.read()
else:
aline = urlobj.read()
- if urltitle != '': pywikibot.output( u'title: ' + urltitle )
+ if urltitle:
+ pywikibot.output(u'title: ' + urltitle)
# Try a more advanced search
##from nltk.parser.probabilistic import *
##from nltk.tokenizer import *
@@ -734,37 +822,43 @@
##WhitespaceTokenizer(SUBTOKENS='WORDS').tokenize(text_token)
#unitagger.tag(text_token)
#britagger.tag(text_token)
- ### pywikibot.output( unicode(text_token) )
+ ### pywikibot.output(unicode(text_token))
else:
- pywikibot.output( u'No data retrieved.')
+ pywikibot.output(u'No data retrieved.')
socket.setdefaulttimeout(200)
- urltitle = urltitle.replace(u'|',u':')
+ urltitle = urltitle.replace(u'|', u':')
return urltitle.strip()
- def doConvertLinkTextToReference(self, refsequence, extlink_linkname, extlink_linktext):
- """
- Returns the text which is generated by converting a link to
+ def doConvertLinkTextToReference(self, refsequence, extlink_linkname,
+ extlink_linktext):
+ """ Returns the text which is generated by converting a link to
a format suitable for the References section.
+
"""
refname = u'refbot.%d' % refsequence
- m = re.search("[\w]+://([\w]\.)*(?P<siteend>[\w.]+)[/\Z]", extlink_linkname)
+ m = re.search("[\w]+://([\w]\.)*(?P<siteend>[\w.]+)[/\Z]",
+ extlink_linkname)
if m:
- refname = m.group('siteend') + u'.%d' % refsequence # use end of site URL as reference name
- new_text = u'# {{note|%s}} %s' % (refname, self.doConvertRefToCitation( extlink_linktext, extlink_linkname, refname ) ) + '\n'
+ # use end of site URL as reference name
+ refname = m.group('siteend') + u'.%d' % refsequence
+ new_text = u'# {{note|%s}} %s' % (refname,
+ self.doConvertRefToCitation(
+ extlink_linktext,
+ extlink_linkname,
+ refname)) + '\n'
return (refname, new_text)
- def doConvertRefToCitation(self, extlink_linktext, extlink_linkname, refname ):
- """
- Returns text with a citation created from link information
- """
+ def doConvertRefToCitation(self, extlink_linktext, extlink_linkname,
+ refname):
+ """ Returns text with a citation created from link information """
new_text = u''
now = date.today()
- if extlink_linktext == None or len(extlink_linktext.strip()) < 20:
- pywikibot.output( u'Fetching URL: %s' % unicode(extlink_linkname) )
- urltitle = self.doGetTitleFromURL( extlink_linkname ) # try to get title from URL
- if urltitle == None or urltitle == '':
+ if not extlink_linktext or len(extlink_linktext.strip()) < 20:
+ pywikibot.output(u'Fetching URL: %s' % unicode(extlink_linkname))
+ urltitle = self.doGetTitleFromURL(extlink_linkname) # try to get title from URL
+ if not urltitle:
urltitle = extlink_linkname
- pywikibot.output( u'Title is: %s' % urltitle )
+ pywikibot.output(u'Title is: %s' % urltitle)
extlink_linktext = urltitle
for newref in self.references: # scan through all references
if extlink_linkname in newref: # if undescribed linkname same as a previous entry
@@ -773,27 +867,29 @@
else:
extlink_linktext = extlink_linkname + ' (See above)'
break # found a matching previous linkname so stop looking
- if extlink_linktext == None or len(extlink_linktext) < 20:
+ if not extlink_linktext or len(extlink_linktext) < 20:
exlink_linktext = urltitle
# Look for a news web site
for (sitename, newscompany, stripprefix) in newssites:
- if refname.startswith( sitename ):
+ if refname.startswith(sitename):
# If there is a prefix to strip from the title
if stripprefix and extlink_linktext.startswith(stripprefix):
extlink_linktext = extlink_linktext[len(stripprefix):]
- new_text = u'{{news reference | title=%s | url=%s | urldate=%s | org=%s }}' % ( extlink_linktext, extlink_linkname, now.isoformat(), newscompany ) + '\n'
+ new_text = u'{{news reference | title=%s | url=%s | urldate=%s | org=%s }}' % (extlink_linktext, extlink_linkname, now.isoformat(), newscompany) + '\n'
break
else: # else no special site found
- new_text = u'{{web reference | title=%s | url=%s | date=%s }}' % ( extlink_linktext, extlink_linkname, now.isoformat() )
+ new_text = u'{{web reference | title=%s | url=%s | date=%s }}' % (extlink_linktext, extlink_linkname, now.isoformat())
return (new_text)
def doConvertDOIToReference(self, refsequence, doi_linktext):
"""
Returns the text which is generated by converting a DOI reference to
a format suitable for the Notes section.
+
"""
# TODO: look up DOI info and create full reference
- urltitle = self.doGetTitleFromURL('http://dx.doi.org/' + doi_linktext ) # try to get title from URL
+ # try to get title from URL
+ urltitle = self.doGetTitleFromURL('http://dx.doi.org/' + doi_linktext)
refname = 'refbot%d' % refsequence
if urltitle:
new_text = '# {{note|%s}} %s {{doi|%s}}\n' \
@@ -806,6 +902,7 @@
def doBuildSequenceListOfReferences(self, original_text):
"""
Returns a list with all found references and sequence numbers.
+
"""
duplicatefound = False
refusage = {}
@@ -813,18 +910,20 @@
for text_line in original_text.splitlines(True): # Scan all text line by line
# Check for various references
Rtext_line = re.compile(r'(?i){{(?P<reftype>ref|ref_num|ref_label)\|(?P<refname>[^}|]+?)}}')
- m = Rtext_line.search( text_line )
+ m = Rtext_line.search(text_line)
while m: # if found a reference
- if m.group('reftype').lower() in ('ref', 'ref_num', 'ref_label'): # confirm ref
+ if m.group('reftype').lower() in ('ref', 'ref_num', 'ref_label'):
refkey = m.group('refname').strip()
if refkey != '':
if refkey in refusage:
- refusage[refkey][1] += 1 # duplicate use of reference
+ # duplicate use of reference
+ refusage[refkey][1] += 1
duplicatefound = True
else:
- refusage[refkey] = [len(refusage),0,0] # remember this reference
- m = Rtext_line.search( text_line, m.end() )
- pywikibot.output( u'Number of refs: %d' % (len(refusage)) )
+ # remember this reference
+ refusage[refkey] = [len(refusage), 0, 0]
+ m = Rtext_line.search(text_line, m.end())
+ pywikibot.output(u'Number of refs: %d' % (len(refusage)))
return (duplicatefound, refusage)
def doReadReferencesSection(self, original_text, refsectionname):
@@ -832,180 +931,212 @@
Returns the text which is generated by reading the Notes section.
Also appends references to self.references.
Contents of all Notes sections will be read.
+
"""
# TODO: support subsections within Notes
new_text = ''
intargetsection = False
- for text_line in original_text.splitlines(True): # Scan all text line by line
+ for text_line in original_text.splitlines(True):
# Check for target section
- m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
- if m: # if in a section, check if Notes section
- if refsectionname != '': # if a certain section name has been identified
+ m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+ if m: # if in a section, check if Notes section
+ # if a certain section name has been identified
+ if refsectionname != '':
m_section = m.group('sectionname')
pywikibot.output(u'Looking for "%s": "%s"'
- % (refsectionname,m_section) )
+ % (refsectionname, m_section))
if unicode(m_section.strip()) == unicode(refsectionname):
pywikibot.output(u'Read Ref section.')
intargetsection = True
- new_text = new_text + text_line
+ new_text += text_line
else:
intargetsection = False
- else: # else grab all possible sections
- if m.group('sectionname').lower().strip() in referencesectionnames:
+ else: # else grab all possible sections
+ if m.group('sectionname').lower().strip() in \
+ referencesectionnames:
intargetsection = True
- new_text = new_text + text_line
+ new_text += text_line
else:
intargetsection = False
else:
- if intargetsection: # if inside target section, remember this reference line
- if text_line.strip() != '':
- if text_line.lstrip()[0] in u'[{': # if line starts with non-Ref WikiSyntax
- intargetsection = False # flag as not being in section
+ # if inside target section, remember this reference line
+ if intargetsection:
+ if text_line.strip():
+ if text_line.lstrip()[0] in u'[{': # if line starts with non-Ref WikiSyntax
+ intargetsection = False
# TODO: need better way to handle special cases at end of refs
if text_line.strip() == u'<!--READ ME!! PLEASE DO NOT JUST ADD NEW NOTES AT THE BOTTOM. See the instructions above on ordering. -->': # This line ends some Notes sections
- intargetsection = False # flag as not being in section
- if text_line.strip() == u'</div>': # This line ends some Notes sections
- intargetsection = False # flag as not being in section
+ intargetsection = False
+ if text_line.strip() == u'</div>': # This line ends some Notes sections
+ intargetsection = False
if intargetsection: # if still inside target section
# Convert any # wiki list to *; will be converted later if a reference
if text_line[0] == '#':
text_line = '*' + text_line[1:]
self.references.append(text_line.rstrip() + u'\n')
- new_text = new_text + text_line.rstrip() + u'\n'
+ new_text += text_line.rstrip() + u'\n'
return new_text
def doReorderReferences(self, references, refusage):
"""
Returns the new references list after reordering to match refusage list
Non-references are moved to top, unused references to bottom.
+
"""
# TODO: add tests for duplicate references/Ibid handling.
newreferences = references
- if references != [] and refusage != {}:
+ if references and refusage:
newreferences = []
- for i in range(len(references)): # move nonrefs to top of list
+ for i in xrange(len(references)): # move nonrefs to top of list
text_line = references[i]
# TODO: compile search?
- m = re.search(r'(?i)[*#][\s]*{{(?P<reftype>note)\|(?P<refname>[^}|]+?)}}', text_line)
+ m = re.search(
+ r'(?i)[*#][\s]*{{(?P<reftype>note)\|(?P<refname>[^}|]+?)}}',
+ text_line)
# Special test to ignore Footnote instructions comment.
text_line_stripped = text_line.strip()
- if text_line_stripped.startswith(u'4) Add ') or not m: # if no ref found
- newreferences.append(text_line) # add nonref to new list
+ # if no ref found
+ if text_line_stripped.startswith(u'4) Add ') or not m:
+ newreferences.append(text_line) # add nonref to new list
references[i] = None
refsort = {}
- for refkey in refusage.keys(): # build list of keys in document order
- refsort[ refusage[refkey][0] ] = refkey # refsort contains reference key names
- alphabet26 = u'abcdefghijklmnopqrstuvwxyz'
- for i in range(len(refsort)): # collect references in document order
- for search_num in range(len(references)): # find desired entry
+ # build list of keys in document order
+ for refkey in refusage.keys():
+ # refsort contains reference key names
+ refsort[refusage[refkey][0]] = refkey
+ # collect references in document order
+ for i in xrange(len(refsort)):
+ for search_num in range(len(references)): # find desired entry
search_line = references[search_num]
if search_line:
# TODO: compile search?
- # Note that the expression finds all neighboring note|note_label expressions.
- m2 = re.search(r'(?i)[*#]([\s]*{{(?P<reftype>note|note_label)\|(?P<refname>[^}|]+?)}})+', search_line)
+ # Note that the expression finds all neighboring
+ # note|note_label expressions.
+ m2 = re.search(
+ r'(?i)[*#]([\s]*{{(?P<reftype>note|note_label)\|(?P<refname>[^}|]+?)}})+',
+ search_line)
if m2:
refkey = m2.group('refname').strip()
- if refkey == refsort[i]: # if expected ref found
+ if refkey == refsort[i]: # if expected ref found
# Rewrite references
- note_text = '# {{note|%s}}' % refkey # rewrite note tag
- if refusage[refkey][1] > 1: # if more than one reference to citation
- for n in range(refusage[refkey][1]): # loop through all repetitions
- note_text = note_text + '{{note_label|%s|%d|%s}}' % (refkey,(refusage[refkey][0])+1,alphabet26[n%26])
- search_line=search_line[:m2.start(0)] + note_text + search_line[m2.end(0):]
- newreferences.append(search_line) # found, add entry
- del references[search_num] # delete used reference
- break # stop the search loop after entry found
- newreferences = newreferences + references # append any unused references
+ note_text = '# {{note|%s}}' % refkey
+ # if more than one reference to citation
+ if refusage[refkey][1] > 1:
+ # loop through all repetitions
+ for n in xrange(refusage[refkey][1]):
+ note_text += (
+ '{{note_label|%s|%d|%s}}'
+ % (refkey,
+ (refusage[refkey][0]) + 1,
+ string.ascii_lowercase[n % 26]))
+ search_line = search_line[:m2.start(0)] + \
+ note_text + \
+ search_line[m2.end(0):]
+ newreferences.append(search_line)
+ # delete used reference
+ del references[search_num]
+ break # stop the search loop after entry found
+ newreferences += references # append any unused references
return newreferences
- def doUpdateReferencesSection(self, original_text, refusage, refsectionname):
+ def doUpdateReferencesSection(self, original_text, refusage,
+ refsectionname):
"""
Returns the text which is generated by rebuilding the Notes section.
Rewrite Notes section from references list.
+
"""
new_text = ''
intargetsection = False
- for text_line in original_text.splitlines(True): # Scan all text line by line
+ # Scan all text line by line
+ for text_line in original_text.splitlines(True):
# Check for target section
- m = re.search( r'==+(?P<sectionname>[^=]+)==', text_line )
- if m: # if in a section, check if Notes section
- if refsectionname != '': # if a certain section name has been identified
+ m = re.search(r'==+(?P<sectionname>[^=]+)==', text_line)
+ if m:
+ if refsectionname != '':
m_section = m.group('sectionname')
- pywikibot.output( u'Looking for "%s": "%s"' % (refsectionname,m_section) )
+ pywikibot.output(u'Looking for "%s": "%s"'
+ % (refsectionname, m_section))
if unicode(m_section.strip()) == unicode(refsectionname):
- pywikibot.output( u'Updating Ref section.')
- intargetsection = True # flag as being in section
+ pywikibot.output(u'Updating Ref section.')
+ intargetsection = True
else:
- intargetsection = False # flag as not being in section
- else: # else grab all possible sections
- if m.group('sectionname').lower().strip() in referencesectionnames:
- intargetsection = True # flag as being in section
- else:
- intargetsection = False # flag as not being in section
+ intargetsection = False
+ else: # else grab all possible sections
+ intargetsection = (m.group('sectionname').lower().strip()
+ in referencesectionnames)
if intargetsection:
- new_text = new_text + text_line # append new line to new text
+ new_text += text_line
if self.references != []:
- for newref in self.references: # scan through all references
- if newref != None:
- new_text = new_text + newref.rstrip() + u'\n' # insert references
- new_text = new_text + u'\n' # one trailing blank line
- self.references = [] # empty references
+ # scan through all references
+ for newref in self.references:
+ if newref:
+ # insert references
+ new_text += newref.rstrip() + u'\n'
+ new_text += u'\n'
+ self.references = []
else:
- new_text = new_text + text_line # copy section headline
+ new_text += text_line # copy section headline
else:
if intargetsection:
- if text_line.strip() != '':
- if text_line.lstrip()[0] in u'[{': # if line starts with non-Ref WikiSyntax
- intargetsection = False # flag as not being in section
+ if text_line.strip():
+ # if line starts with non-Ref WikiSyntax
+ if text_line.lstrip()[0] in u'[{':
+ # flag as not being in section
+ intargetsection = False
# TODO: need better way to handle special cases at end of refs
- if text_line.strip() == u'<!--READ ME!! PLEASE DO NOT JUST ADD NEW NOTES AT THE BOTTOM. See the instructions above on ordering. -->': # This line ends some Notes sections
- intargetsection = False # flag as not being in section
- if text_line.strip() == u'</div>': # This line ends some Notes sections
- intargetsection = False # flag as not being in section
- if not intargetsection: # if not in Notes section, remember line
- new_text = new_text + text_line # append new line to new text
+ if text_line.strip() == u'<!--READ ME!! PLEASE DO NOT JUST ADD NEW NOTES AT THE BOTTOM. See the instructions above on ordering. -->':
+ intargetsection = False
+ if text_line.strip() == u'</div>':
+ intargetsection = False
+ if not intargetsection:
+ new_text += text_line
# If references list not emptied, there was no Notes section found
- if self.references != []:
+ if self.references:
# New Notes section needs to be created at bottom.
- text_line_counter = 0 # current line
- last_text_line_counter_value = 0 # number of last line of possible text
- for text_line in original_text.splitlines(True): # Search for last normal text line
- text_line_counter += 1 # count this line
- if text_line.strip() != '':
- if text_line.lstrip()[0].isalnum(): # if line starts with alphanumeric
- last_text_line_counter = text_line_counter # number of last line of possible text
+ text_line_counter = 0
+ # number of last line of possible text
+ last_text_line_counter_value = 0
+ # Search for last normal text line
+ for text_line in original_text.splitlines(True):
+ text_line_counter += 1
+ if text_line.strip():
+ if text_line.lstrip()[0].isalnum():
+ # number of last line of possible text
+ last_text_line_counter = text_line_counter
else:
- if text_line.lstrip()[0] in u'<=!|*#': # if line starts with recognized wiki char
- if not text_line.startswith(u'<!--'): # if line not start with a comment
- last_text_line_counter = text_line_counter # number of last line of possible content
- new_text = '' # erase previous new_text
- text_line_counter = 0 # current line
- for text_line in original_text.splitlines(True): # Search for last normal text line
- text_line_counter += 1 # count this line
- if last_text_line_counter == text_line_counter: # if found insertion point
- new_text = new_text + text_line # append new line to new text
- new_text = new_text + '\n== Notes ==\n' # set to standard name
- new_text = new_text + u'{{subst:Footnote3text}}\n'
- if self.references != []:
- for newref in self.references: # scan through all references
+ # if line starts with recognized wiki char
+ if text_line.lstrip()[0] in u'<=!|*#':
+ if not text_line.startswith(u'<!--'):
+ # number of last line of possible content
+ last_text_line_counter = text_line_counter
+ new_text = ''
+ text_line_counter = 0
+ # Search for last normal text line
+ for text_line in original_text.splitlines(True):
+ text_line_counter += 1
+ # if found insertion point
+ if last_text_line_counter == text_line_counter:
+ new_text += text_line
+ new_text += '\n== Notes ==\n' # set to standard name
+ new_text += u'{{subst:Footnote3text}}\n'
+ if self.references:
+ for newref in self.references:
if newref is not None:
- new_text = new_text + newref # insert references
- new_text = new_text + u'\n' # one trailing blank line
- self.references = [] # empty references
+ new_text += newref
+ new_text += u'\n'
+ self.references = []
else:
- new_text = new_text + text_line # append new line to new text
- if new_text == '':
- new_text = original_text # If somehow no new text, return original text
+ new_text += text_line
+ if not new_text:
+ new_text = original_text
return new_text
def run(self):
- """
- Starts the robot.
- """
+ """ Starts the robot. """
# Run the generator which will yield Pages to pages which might need to be
# changed.
for pl in self.generator:
- print ''
try:
# Load the page's text from the wiki
original_text = pl.get()
@@ -1038,6 +1169,7 @@
if self.acceptall or choice in ['y', 'Y']:
pl.put(new_text, self.summary)
+
def main():
# How we want to retrieve information on which pages need to be changed.
# Can either be 'sqldump', 'textfile' or 'userinput'.
@@ -1045,15 +1177,16 @@
# Array which will collect commandline parameters.
# First element is original text, second element is replacement text.
commandline_replacements = []
- # A dictionary where keys are original texts and values are replacement texts.
+ # A dictionary where keys are original texts and values are replacement
+ # texts.
replacements = {}
# Don't edit pages which contain certain texts.
exceptions = []
# Should the elements of 'replacements' and 'exceptions' be interpreted
# as regular expressions?
regex = False
- # the dump's path, either absolute or relative, which will be used when source
- # is 'sqldump'.
+ # the dump's path, either absolute or relative, which will be used when
+ # source is 'sqldump'.
sqlfilename = None
# the textfile's path, either absolute or relative, which will be used when
# source is 'textfile'.
@@ -1062,8 +1195,8 @@
categoryname = None
# a list of pages which will be used when source is 'userinput'.
pagenames = []
- # will become True when the user presses a ('yes to all') or uses the -always
- # commandline paramater.
+ # will become True when the user presses a ('yes to all') or uses the
+ # -always commandline paramater.
acceptall = False
# Which namespace should be processed when using a SQL dump
# default to -1 which means all namespaces will be processed
@@ -1120,20 +1253,26 @@
else:
commandline_replacements.append(arg)
- if source == None or len(commandline_replacements) not in [0, 2]:
- # syntax error, show help text from the top of this file
- pywikibot.output(__doc__, 'utf-8')
+ if not (source and len(commandline_replacements) in (0, 2)):
+ # show help text from the top of this file
+ pywikibot.showHelp()
return
- if (len(commandline_replacements) == 2):
+
+ if len(commandline_replacements) == 2:
replacements[commandline_replacements[0]] = commandline_replacements[1]
- editSummary = pywikibot.translate(pywikibot.getSite(), msg) % ' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')'
+ editSummary = pywikibot.translate(pywikibot.getSite(), msg,
+ ' (-' + commandline_replacements[0] +
+ ' +' + commandline_replacements[1] +
+ ')')
else:
change = ''
- default_summary_message = pywikibot.translate(pywikibot.getSite(), msg) % change
+ default_summary_message = pywikibot.translate(pywikibot.getSite(), msg,
+ change)
pywikibot.output(u'The summary message will default to: %s'
% default_summary_message)
summary_message = pywikibot.input(
- u'Press Enter to use this default message, or enter a description of the changes your bot will make:')
+ u'Press Enter to use this default message, or enter a\n'
+ u'description of the changes your bot will make:')
if summary_message == '':
summary_message = default_summary_message
editSummary = summary_message
@@ -1156,7 +1295,7 @@
gen = ReplacePageGenerator(source, replacements, exceptions, regex,
namespace, textfilename, sqlfilename,
categoryname, pagenames)
- preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber = 20)
+ preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=20)
bot = ReplaceRobot(preloadingGen, replacements, refsequence, references,
refusage, exceptions, regex, acceptall, editSummary)
bot.run()
--
To view, visit https://gerrit.wikimedia.org/r/100146
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ibfa8741849c0c59d38963afac94ba92a2765bdf7
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Siebrand <siebrand(a)wikimedia.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot
jenkins-bot has submitted this change and it was merged.
Change subject: [PEP8] strip trailing whitespace
......................................................................
[PEP8] strip trailing whitespace
Change-Id: I8b979e9c4433ab67ec87f1d765638f23f9f7b52c
---
M wikipedia.py
1 file changed, 1 insertion(+), 1 deletion(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/wikipedia.py b/wikipedia.py
index 60199e8..eed2049 100644
--- a/wikipedia.py
+++ b/wikipedia.py
@@ -856,7 +856,7 @@
if 'textmissing' in lastRev:
# Maybe we could use a new error exception.
# Now we just pass and got a server error
- pass
+ pass
elif isinstance(lastRev['*'], basestring):
textareaFound = True
# I got page date with 'revisions' in pageInfo but
--
To view, visit https://gerrit.wikimedia.org/r/102895
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8b979e9c4433ab67ec87f1d765638f23f9f7b52c
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot