Revision: 4237 Author: wikipedian Date: 2007-09-11 15:22:56 +0000 (Tue, 11 Sep 2007)
Log Message: ----------- solved the problem where the bot doesn't know where to put the references section
Modified Paths: -------------- trunk/pywikipedia/noreferences.py
Modified: trunk/pywikipedia/noreferences.py =================================================================== --- trunk/pywikipedia/noreferences.py 2007-09-11 13:46:05 UTC (rev 4236) +++ trunk/pywikipedia/noreferences.py 2007-09-11 15:22:56 UTC (rev 4237) @@ -59,7 +59,8 @@ 'de': [ # no explicit policy on where to put the references u'Literatur', u'Weblinks', - u'Siehe auch' + u'Siehe auch', + u'Weblink', # bad, but common singular form of Weblinks ], 'en': [ # no explicit policy on where to put the references u'Further reading', @@ -170,7 +171,7 @@ oldText = page.get()
# Is there an existing section where we can add the references tag? - for section in wikipedia.translate(wikipedia.getSite(), referencesSections): + for section in wikipedia.translate(page.site(), referencesSections): sectionR = re.compile(r'\r\n=+ *%s *=+\r\n' % section) index = 0 while index < len(oldText): @@ -188,7 +189,7 @@ break
# Create a new section for the references tag - for section in wikipedia.translate(wikipedia.getSite(), placeBeforeSections): + for section in wikipedia.translate(page.site(), placeBeforeSections): # Find out where to place the new section sectionR = re.compile(r'\r\n=+ *%s *=+\r\n' % section) index = 0 @@ -200,16 +201,44 @@ index = match.end() else: wikipedia.output(u'Adding references section before %s section...\n' % section) - pos = match.start() - newSection = u'\n== %s ==\n\n<references/>\n' % wikipedia.translate(wikipedia.getSite(), referencesSections)[0] - newText = oldText[:match.start()] + newSection + oldText[match.start():] - self.save(page, newText) + index = match.start() + self.createReferenceSection(page, index) return else: break - # TODO: Make up a clever way of handling this. - wikipedia.output(u'Found no section that can be preceeded by a new references section. Please add a references section manually.') + # This gets complicated: we want to place the new references + # section over the interwiki links and categories, but also + # over all navigation bars, persondata, and other templates + # that are at the bottom of the page. So we need some advanced + # regex magic. + # The strategy is: create a temporary copy of the text. From that, + # keep removing interwiki links, templates etc. from the bottom. + # At the end, look at the length of the temp text. That's the position + # where we'll insert the references section. + catNamespaces = '|'.join(page.site().category_namespaces()) + categoryPattern = r'[[\s*(%s)\s*:[^\n]*]]\s*' % catNamespaces + interwikiPattern = r'[[([a-zA-Z-]+)\s?:([^[]\n]*)]]\s*' + # won't work with nested templates + templatePattern = r'{{((?!}}).)+?}}\s*' # the negative lookahead assures that we'll match the last template occurence in the temp text. + commentPattern = r'<!--((?!-->).)*?-->\s*' + metadataR = re.compile(r'(\r\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) + tmpText = oldText + while True: + match = metadataR.search(tmpText) + if match: + tmpText = tmpText[:match.start()] + else: + break + wikipedia.output(u'Found no section that can be preceeded by a new references section. Placing it before interwiki links, categories, and bottom templates.') + index = len(tmpText) + self.createReferenceSection(page, index)
+ def createReferenceSection(self, page, index): + oldText = page.get() + newSection = u'\n== %s ==\n\n<references/>\n' % wikipedia.translate(page.site(), referencesSections)[0] + newText = oldText[:index] + newSection + oldText[index:] + self.save(page, newText) + def save(self, page, newText): """ Saves the page to the wiki, if the user accepts the changes made.