Revision: 7487 Author: xqt Date: 2009-10-17 12:09:43 +0000 (Sat, 17 Oct 2009)
Log Message: ----------- solve some identation errors coming with r6540 (bug #2840435)
Modified Paths: -------------- trunk/pywikipedia/standardize_notes.py
Modified: trunk/pywikipedia/standardize_notes.py =================================================================== --- trunk/pywikipedia/standardize_notes.py 2009-10-17 11:10:07 UTC (rev 7486) +++ trunk/pywikipedia/standardize_notes.py 2009-10-17 12:09:43 UTC (rev 7487) @@ -46,7 +46,7 @@ # 2005-07-15: Build list of all sections which may contain citations: doFindAllCitationSections(). (SEWilco) #
-from __future__ import generators +#from __future__ import generators import subprocess, sys, re, random import socket, urllib, robotparser import wikipedia, pagegenerators, config @@ -378,7 +378,7 @@ wikipedia.output( u"Reading existing Notes section" ) self.doReadReferencesSection( new_text, refsectionname ) while self.references and self.references[len(self.references)-1] == u'\n': - del self.references[len(self.references)-1] # delete trailing empty lines + del self.references[len(self.references)-1] # delete trailing empty lines # Convert any external links to footnote references wikipedia.output( u"Converting external links" ) new_text = self.doConvertExternalLinks( new_text ) @@ -416,53 +416,53 @@ new_text = new_text + text_line # skip section, so retain text. else: # TODO: recognize {{inline}} invisible footnotes when something can be done with them - # - # Ignore lines within comments - if not text_line.startswith( u'<!--' ): - # Fix erroneous external links in double brackets - Rextlink = re.compile(r'(?i)[[(?P<linkname>http://%5B%5E%5C%5D%5D+?)%5C%5D%5C]') - # TODO: compiling the regex each time might be inefficient - text_lineR = re.compile(Rextlink) - MOextlink = text_lineR.search(text_line) - while MOextlink: # find all links on line - extlink_linkname = MOextlink.group('linkname') - # Rewrite double brackets to single ones - text_line=text_line[:MOextlink.start()] + '[%s]' % extlink_linkname + text_line[MOextlink.end(0):] - MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1) - # Regular expression to look for external link [linkname linktext] - linktext is optional. - # Also accepts erroneous pipe symbol as separator. - # Accepts wikilinks within <linktext> - #Rextlink = re.compile(r'[^[][(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*][^]]') - #Rextlink = re.compile(r'[(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*]') - Rextlink = re.compile(r'(?i)[(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*]') - # TODO: compiling the regex each time might be inefficient - text_lineR = re.compile(Rextlink) - MOextlink = text_lineR.search(text_line) - while MOextlink: # find all links on line - extlink_linkname = MOextlink.group('linkname') - extlink_linktext = MOextlink.group('linktext') - self.refsequence += 1 - ( refname, reftext ) = self.doConvertLinkTextToReference(self.refsequence, extlink_linkname, extlink_linktext) - self.references.append( reftext ) # append new entry to References - if extlink_linktext: - # If there was text as part of link, reinsert text before footnote. - text_line=text_line[:MOextlink.start(0)] + '%s{{ref|%s}}' % (extlink_linktext, refname) + text_line[MOextlink.end(0):] - else: - text_line=text_line[:MOextlink.start(0)] + '{{ref|%s}}' % refname + text_line[MOextlink.end(0):] - MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1) - # Search for {{doi}} - Rdoi = re.compile(r'(?i){{doi|(?P<doilink>[^}|]*)}}') - # TODO: compiling the regex each time might be inefficient - doiR = re.compile(Rdoi) - MOdoi = doiR.search(text_line) - while MOdoi: # find all doi on line - doi_link = MOdoi.group('doilink') - if doi_link: - self.refsequence += 1 - ( refname, reftext ) = self.doConvertDOIToReference( self.refsequence, doi_link ) - self.references.append( reftext ) # append new entry to References - text_line=text_line[:MOdoi.start(0)] + '{{ref|%s}}' % refname + text_line[MOdoi.end(0):] - MOdoi = doiR.search(text_line, MOdoi.start(0)+1) + # + # Ignore lines within comments + if not text_line.startswith( u'<!--' ): + # Fix erroneous external links in double brackets + Rextlink = re.compile(r'(?i)[[(?P<linkname>http://%5B%5E%5C%5D%5D+?)%5C%5D%5C]') + # TODO: compiling the regex each time might be inefficient + text_lineR = re.compile(Rextlink) + MOextlink = text_lineR.search(text_line) + while MOextlink: # find all links on line + extlink_linkname = MOextlink.group('linkname') + # Rewrite double brackets to single ones + text_line=text_line[:MOextlink.start()] + '[%s]' % extlink_linkname + text_line[MOextlink.end(0):] + MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1) + # Regular expression to look for external link [linkname linktext] - linktext is optional. + # Also accepts erroneous pipe symbol as separator. + # Accepts wikilinks within <linktext> + #Rextlink = re.compile(r'[^[][(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*][^]]') + #Rextlink = re.compile(r'[(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*]') + Rextlink = re.compile(r'(?i)[(?P<linkname>[h]*[ft]+tp:[^ []|]+?)(?P<linktext>[ |]+(( *[^]|]*)|( *[[.+?]])*)+)*]') + # TODO: compiling the regex each time might be inefficient + text_lineR = re.compile(Rextlink) + MOextlink = text_lineR.search(text_line) + while MOextlink: # find all links on line + extlink_linkname = MOextlink.group('linkname') + extlink_linktext = MOextlink.group('linktext') + self.refsequence += 1 + ( refname, reftext ) = self.doConvertLinkTextToReference(self.refsequence, extlink_linkname, extlink_linktext) + self.references.append( reftext ) # append new entry to References + if extlink_linktext: + # If there was text as part of link, reinsert text before footnote. + text_line=text_line[:MOextlink.start(0)] + '%s{{ref|%s}}' % (extlink_linktext, refname) + text_line[MOextlink.end(0):] + else: + text_line=text_line[:MOextlink.start(0)] + '{{ref|%s}}' % refname + text_line[MOextlink.end(0):] + MOextlink = text_lineR.search(text_line,MOextlink.start(0)+1) + # Search for {{doi}} + Rdoi = re.compile(r'(?i){{doi|(?P<doilink>[^}|]*)}}') + # TODO: compiling the regex each time might be inefficient + doiR = re.compile(Rdoi) + MOdoi = doiR.search(text_line) + while MOdoi: # find all doi on line + doi_link = MOdoi.group('doilink') + if doi_link: + self.refsequence += 1 + ( refname, reftext ) = self.doConvertDOIToReference( self.refsequence, doi_link ) + self.references.append( reftext ) # append new entry to References + text_line=text_line[:MOdoi.start(0)] + '{{ref|%s}}' % refname + text_line[MOdoi.end(0):] + MOdoi = doiR.search(text_line, MOdoi.start(0)+1) new_text = new_text + text_line # append new line to new text if new_text == '': new_text = original_text # If somehow no new text, return original text
pywikipedia-svn@lists.wikimedia.org