Revision: 6780 Author: cosoleto Date: 2009-04-30 22:35:53 +0000 (Thu, 30 Apr 2009)
Log Message: ----------- Reduces ugliness a bit in URLExclusion.scan() by removing two re.sub() calls. And the for loop with range() is just pointless now.
Modified Paths: -------------- trunk/pywikipedia/copyright.py
Modified: trunk/pywikipedia/copyright.py =================================================================== --- trunk/pywikipedia/copyright.py 2009-04-30 15:27:26 UTC (rev 6779) +++ trunk/pywikipedia/copyright.py 2009-04-30 22:35:53 UTC (rev 6780) @@ -382,8 +382,7 @@ list2 += entry.split("and ") for entry in list2: # Remove unnecessary part of URL - entry = re.sub("http://", "", entry) - entry = re.sub("www.", "", entry) + entry = re.sub("(http://%7Cwww%5C.)", "", entry) entry = re.sub("</?nowiki>", "", entry) if entry: if '/' in entry: @@ -399,8 +398,8 @@ cut_comment = True, cut_newlines = True ).splitlines()
- for i in range(len(result_list)): - cleaned = re.sub('\s+$', '', result_list[i]) + for item in result_list: + cleaned = item.strip() if cleaned: self.URLlist.add(cleaned)