Revision: 5779 Author: valhallasw Date: 2008-08-01 20:36:19 +0000 (Fri, 01 Aug 2008)
Log Message: ----------- Updated replace.py with -requiretitle: support. Kinda hackish implementation, but a better one requires some major refactoring. Requirement is now stored in the exceptions dict and handled as such (i.e. if the requirement is not met, that page is on the exceptions list)
Modified Paths: -------------- trunk/pywikipedia/replace.py
Modified: trunk/pywikipedia/replace.py =================================================================== --- trunk/pywikipedia/replace.py 2008-08-01 19:29:29 UTC (rev 5778) +++ trunk/pywikipedia/replace.py 2008-08-01 20:36:19 UTC (rev 5779) @@ -33,6 +33,10 @@ argument is given, XYZ will be regarded as a regular expression.
+-requiretitle:XYZ Only do pages with titles that contain XYZ. If the -regex + argument is given, XYZ will be regarded as a regular + expression. + -excepttext:XYZ Skip pages which contain the text XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. @@ -226,6 +230,11 @@ for exc in self.exceptions['title']: if exc.search(title): return True + if self.exceptions.has_key('require-title'): + for req in self.exceptions['require-title']: + if not req.search(title): # if not all requirements are met: + return True + return False
def isTextExcepted(self, text): @@ -298,6 +307,10 @@ for exc in self.exceptions['title']: if exc.search(title): return True + if self.exceptions.has_key('require-title'): + for req in self.exceptions['require-title']: + if not req.search(title): + return True return False
def isTextExcepted(self, original_text): @@ -454,7 +467,9 @@ 'text-contains': [], 'inside': [], 'inside-tags': [], - } + 'require-title': [], # using a seperate requirements dict needs some + } # major refactoring of code. + # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False @@ -514,6 +529,8 @@ PageTitles.append(arg[6:]) elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) + elif arg.startswith('-requiretitle:'): + exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): @@ -627,7 +644,7 @@ oldR = re.compile(old, re.UNICODE) replacements[i] = oldR, new
- for exceptionCategory in ['title', 'text-contains', 'inside']: + for exceptionCategory in ['title', 'require-title', 'text-contains', 'inside']: if exceptions.has_key(exceptionCategory): patterns = exceptions[exceptionCategory] if not regex: