Revision: 4519 Author: cosoleto Date: 2007-11-09 11:40:26 +0000 (Fri, 09 Nov 2007)
Log Message: ----------- fixed line ending style
Modified Paths: -------------- trunk/pywikipedia/copyright_clean.py trunk/pywikipedia/copyright_put.py
Modified: trunk/pywikipedia/copyright_clean.py =================================================================== --- trunk/pywikipedia/copyright_clean.py 2007-11-09 11:34:57 UTC (rev 4518) +++ trunk/pywikipedia/copyright_clean.py 2007-11-09 11:40:26 UTC (rev 4519) @@ -1,159 +1,159 @@ -# -*- coding: utf-8 -*- -""" -""" - -# -# (C) Francesco Cosoleto, 2006 -# -# Distributed under the terms of the MIT license. -# - -import httplib, socket, simplejson, re, time -import config, wikipedia, catlib, pagegenerators, query - -from urllib import urlencode -from copyright import mysplit, put, reports_cat - -import sys - -summary_msg = { - 'en': u'Removing', - 'it': u'Rimozione', -} - -headC = re.compile("(?m)^=== (?:<strike>)?(?:<s>)?(?:<del>)?[[(?::)?(.*?)]]") -separatorC = re.compile('(?m)^== +') -next_headC = re.compile("(?m)^=+.*?=+") - -# -# {{botbox|title|newid|oldid|author|...}} -rev_templateC = re.compile("(?m)^(?:{{/t|.*?}}\n?)?{{(?:/box|botbox)|.*?|(.*?)|") - -def query_yurik_api(data): - - predata = [ - ('format', 'json'), - ('what', 'revisions'), - ('rvlimit', '1'), - data] - - data = urlencode(predata) - host = wikipedia.getSite().hostname() - address = wikipedia.getSite().query_address() - conn = httplib.HTTPConnection(host) - conn.request("GET", address + data) - response = conn.getresponse() - data = response.read() - conn.close() - - return data - -def page_exist(title): - for pageobjs in query_results_titles: - for key in pageobjs['pages']: - if pageobjs['pages'][key]['title'] == title: - if int(key) >= 0: - return True - wikipedia.output('* ' + title) - return False - -def revid_exist(revid): - for pageobjs in query_results_revids: - for id in pageobjs['pages']: - for rv in range(len(pageobjs['pages'][id]['revisions'])): - if pageobjs['pages'][id]['revisions'][rv]['revid'] == int(revid): - # print rv - return True - wikipedia.output('* ' + revid) - return False - -cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % wikipedia.translate(wikipedia.getSite(), reports_cat)) -gen = pagegenerators.CategorizedPageGenerator(cat, recurse = True) - -for page in gen: - data = page.get() - wikipedia.output(page.aslink()) - output = '' - - # - # Preserve text before of the sections - # - - m = re.search("(?m)^==\s*[^=]*?\s*==", data) - if m: - output = data[:m.end() + 1] - else: - m = re.search("(?m)^===\s*[^=]*?", data) - if not m: - continue - output = data[:m.start()] - - titles = headC.findall(data) - revids = rev_templateC.findall(data) - - query_results_titles = list() - query_results_revids = list() - - # No more of 100 titles at a time using Yurik's API - for s in mysplit(query.ListToParam(titles), 100, "|"): - query_results_titles.append(simplejson.loads(query_yurik_api(('titles', s)))) - for s in mysplit(query.ListToParam(revids), 100, "|"): - query_results_revids.append(simplejson.loads(query_yurik_api(('revids', s)))) - - comment_entry = list() - add_separator = False - index = 0 - - while True: - head = headC.search(data, index) - if not head: - break - index = head.end() - title = head.group(1) - next_head = next_headC.search(data, index) - if next_head: - if separatorC.search(data[next_head.start():next_head.end()]): - add_separator = True - stop = next_head.start() - else: - stop = len(data) - - exist = True - if page_exist(title): - # check {{botbox}} - revid = re.search("{{(?:/box|botbox)|.*?|(.*?)|", data[head.end():stop]) - if revid: - if not revid_exist(revid.group(1)): - exist = False - else: - exist = False - - if exist: - output += "=== [[" + title + "]]" + data[head.end():stop] - else: - comment_entry.append("[[%s]]" % title) - - if add_separator: - output += data[next_head.start():next_head.end()] + '\n' - add_separator = False - - add_comment = u'%s: %s' % (wikipedia.translate(wikipedia.getSite(), summary_msg),", ".join(comment_entry)) - - # remove useless newlines - output = re.sub("(?m)^\n", "", output) - - if comment_entry: - wikipedia.output(add_comment) - if wikipedia.verbose: - wikipedia.showDiff(page.get(), output) - - if len(sys.argv)!=1: - choice = wikipedia.inputChoice(u'Do you want to clean the page?', ['Yes', 'No'], ['y', 'n'], 'n') - if choice in ['n', 'N']: - continue - try: - put(page, output, add_comment) - except wikipedia.PageNotSaved: - raise - -wikipedia.stopme() +# -*- coding: utf-8 -*- +""" +""" + +# +# (C) Francesco Cosoleto, 2006 +# +# Distributed under the terms of the MIT license. +# + +import httplib, socket, simplejson, re, time +import config, wikipedia, catlib, pagegenerators, query + +from urllib import urlencode +from copyright import mysplit, put, reports_cat + +import sys + +summary_msg = { + 'en': u'Removing', + 'it': u'Rimozione', +} + +headC = re.compile("(?m)^=== (?:<strike>)?(?:<s>)?(?:<del>)?[[(?::)?(.*?)]]") +separatorC = re.compile('(?m)^== +') +next_headC = re.compile("(?m)^=+.*?=+") + +# +# {{botbox|title|newid|oldid|author|...}} +rev_templateC = re.compile("(?m)^(?:{{/t|.*?}}\n?)?{{(?:/box|botbox)|.*?|(.*?)|") + +def query_yurik_api(data): + + predata = [ + ('format', 'json'), + ('what', 'revisions'), + ('rvlimit', '1'), + data] + + data = urlencode(predata) + host = wikipedia.getSite().hostname() + address = wikipedia.getSite().query_address() + conn = httplib.HTTPConnection(host) + conn.request("GET", address + data) + response = conn.getresponse() + data = response.read() + conn.close() + + return data + +def page_exist(title): + for pageobjs in query_results_titles: + for key in pageobjs['pages']: + if pageobjs['pages'][key]['title'] == title: + if int(key) >= 0: + return True + wikipedia.output('* ' + title) + return False + +def revid_exist(revid): + for pageobjs in query_results_revids: + for id in pageobjs['pages']: + for rv in range(len(pageobjs['pages'][id]['revisions'])): + if pageobjs['pages'][id]['revisions'][rv]['revid'] == int(revid): + # print rv + return True + wikipedia.output('* ' + revid) + return False + +cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % wikipedia.translate(wikipedia.getSite(), reports_cat)) +gen = pagegenerators.CategorizedPageGenerator(cat, recurse = True) + +for page in gen: + data = page.get() + wikipedia.output(page.aslink()) + output = '' + + # + # Preserve text before of the sections + # + + m = re.search("(?m)^==\s*[^=]*?\s*==", data) + if m: + output = data[:m.end() + 1] + else: + m = re.search("(?m)^===\s*[^=]*?", data) + if not m: + continue + output = data[:m.start()] + + titles = headC.findall(data) + revids = rev_templateC.findall(data) + + query_results_titles = list() + query_results_revids = list() + + # No more of 100 titles at a time using Yurik's API + for s in mysplit(query.ListToParam(titles), 100, "|"): + query_results_titles.append(simplejson.loads(query_yurik_api(('titles', s)))) + for s in mysplit(query.ListToParam(revids), 100, "|"): + query_results_revids.append(simplejson.loads(query_yurik_api(('revids', s)))) + + comment_entry = list() + add_separator = False + index = 0 + + while True: + head = headC.search(data, index) + if not head: + break + index = head.end() + title = head.group(1) + next_head = next_headC.search(data, index) + if next_head: + if separatorC.search(data[next_head.start():next_head.end()]): + add_separator = True + stop = next_head.start() + else: + stop = len(data) + + exist = True + if page_exist(title): + # check {{botbox}} + revid = re.search("{{(?:/box|botbox)|.*?|(.*?)|", data[head.end():stop]) + if revid: + if not revid_exist(revid.group(1)): + exist = False + else: + exist = False + + if exist: + output += "=== [[" + title + "]]" + data[head.end():stop] + else: + comment_entry.append("[[%s]]" % title) + + if add_separator: + output += data[next_head.start():next_head.end()] + '\n' + add_separator = False + + add_comment = u'%s: %s' % (wikipedia.translate(wikipedia.getSite(), summary_msg),", ".join(comment_entry)) + + # remove useless newlines + output = re.sub("(?m)^\n", "", output) + + if comment_entry: + wikipedia.output(add_comment) + if wikipedia.verbose: + wikipedia.showDiff(page.get(), output) + + if len(sys.argv)!=1: + choice = wikipedia.inputChoice(u'Do you want to clean the page?', ['Yes', 'No'], ['y', 'n'], 'n') + if choice in ['n', 'N']: + continue + try: + put(page, output, add_comment) + except wikipedia.PageNotSaved: + raise + +wikipedia.stopme()
Modified: trunk/pywikipedia/copyright_put.py =================================================================== --- trunk/pywikipedia/copyright_put.py 2007-11-09 11:34:57 UTC (rev 4518) +++ trunk/pywikipedia/copyright_put.py 2007-11-09 11:40:26 UTC (rev 4519) @@ -1,252 +1,252 @@ -# -*- coding: utf-8 -*- -""" -""" - -# -# (C) Francesco Cosoleto, 2006 -# -# Distributed under the terms of the MIT license. -# - -import sys, re, codecs, os, time, shutil -import wikipedia, config, date - -from copyright import put, join_family_data, appdir, reports_cat - -# -# Month + Year save method -append_date_to_wiki_save_path = True - -# -# Add pubblication date to entries (template:botdate) -append_date_to_entries = False - -msg_table = { - 'it': {'_default': [u'Pagine nuove', u'Nuove voci'], - 'feed': [u'Aggiunte a voci esistenti', u'Testo aggiunto in']}, - 'en': {'_default': [u'New entries', u'New entries']} -} - -wiki_save_path = { - '_default': u'User:%s/Report' % config.usernames[wikipedia.getSite().family.name][wikipedia.getSite().lang], - 'it': u'Utente:RevertBot/Report' -} - -template_cat = { - '_default': [u'This template is used by copyright.py, a script part of [[:m:Using the python wikipediabot|PyWikipediaBot]].', u''], - 'it': [u'Questo template è usato dallo script copyright.py del [[:m:Using the python wikipediabot|PyWikipediaBot]].', u'Template usati da bot'], -} - -stat_msg = { - 'en': [u'Statistics', u'Page', u'Entries', u'Size', u'Total', 'Update'], - 'it': [u'Statistiche', u'Pagina', u'Segnalazioni', u'Lunghezza', u'Totale', u'Ultimo aggiornamento'], -} - -wiki_save_path = wikipedia.translate(wikipedia.getSite(), wiki_save_path) -template_cat = wikipedia.translate(wikipedia.getSite(), template_cat) -stat_wiki_save_path = '%s/%s' % (wiki_save_path, wikipedia.translate(wikipedia.getSite(), stat_msg)[0]) - -if append_date_to_wiki_save_path: - wiki_save_path += '_' + date.monthName(wikipedia.getSite().language(), time.localtime()[1]) + '_' + str(time.localtime()[0]) - -separatorC = re.compile('(?m)^== +') - -def set_template(name = None): - - site = wikipedia.getSite() - url = "%s://%s%s" % (site.protocol(), site.hostname(), site.path()) - - botdate = u""" -<div style="text-align:right">{{{1}}}</div><noinclude>%s\n[[%s:%s]]</noinclude> -""" % (template_cat[0], site.namespace(14), template_cat[1]) - - botbox = """ -<div class=plainlinks style="text-align:right">[%s?title={{{1}}}&diff={{{2}}}&oldid={{{3}}} diff] - [%s?title={{{1}}}&action=history cron] - [%s?title=Special:Log&page={{{1}}} log]</div><noinclude>%s\n[[%s:%s]]</noinclude> -""" % (url, url, url, template_cat[0], site.namespace(14), template_cat[1]) - - if name == 'botdate': - p = wikipedia.Page(site, 'Template:botdate') - if not p.exists(): - p.put(botdate, comment = 'Init.') - - if name == 'botbox': - p = wikipedia.Page(site, 'Template:botbox') - if not p.exists(): - p.put(botbox, comment = 'Init.') - -def stat_sum(engine, text): - return len(re.findall('(?im)^*.*?' + engine + '.*?- ', text)) - -def get_stats(): - - import catlib, pagegenerators - - msg = wikipedia.translate(wikipedia.getSite(), stat_msg) - - cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % wikipedia.translate(wikipedia.getSite(), reports_cat)) - gen = pagegenerators.CategorizedPageGenerator(cat, recurse = True) - - output = u"""{| {{prettytable|width=|align=|text-align=left}} -! %s -! %s -! %s -! %s -! %s -! %s -|- -""" % ( msg[1], msg[2], msg[3], 'Google', 'Yahoo', 'Live Search' ) - - gnt = 0 ; ynt = 0 ; mnt = 0 ; ent = 0 ; sn = 0 ; snt = 0 - - for page in gen: - data = page.get() - - gn = stat_sum('google', data) - yn = stat_sum('yahoo', data) - mn = stat_sum('(msn|live)', data) - - en = len(re.findall('=== [[', data)) - sn = len(data) - - gnt += gn ; ynt += yn ; mnt += mn ; ent += en ; snt += sn - - output += u"|%s||%s||%s KB||%s||%s||%s\n|-\n" % (page.aslink(), en, sn / 1024, gn, yn, mn) - - output += u"""| |||||||| -|- -|'''%s'''||%s||%s KB||%s||%s||%s -|- -|colspan="6" align=right style="background-color:#eeeeee;"|<small>''%s: %s''</small> -|} -""" % (msg[4], ent, snt / 1024, gnt, ynt, mnt, msg[5], time.strftime("%d " + "%s" % (date.monthName(wikipedia.getSite().language(), time.localtime()[1])) + " %Y")) - - return output - -def put_stats(): - page = wikipedia.Page(wikipedia.getSite(), stat_wiki_save_path) - page.put(get_stats(), comment = wikipedia.translate(wikipedia.getSite(), stat_msg)[0]) - -def output_files_gen(): - for f in os.listdir(appdir): - if 'output' in f and not '_pending' in f: - m = re.search('output_(.*?).txt', f) - if m: - tag = m.group(1) - else: - tag = '_default' - - section_name_and_summary = wikipedia.translate(wikipedia.getSite(), msg_table)[tag] - - section = section_name_and_summary[0] - summary = section_name_and_summary[1] - - yield os.path.join(appdir, f), section, summary - -def read_output_file(filename): - if os.path.isfile(filename + '_pending'): - shutil.move(filename, filename + '_temp') - ap = codecs.open(filename + '_pending', 'a', 'utf-8') - ot = codecs.open(filename + '_temp', 'r', 'utf-8') - ap.write(ot.read()) - ap.close() - ot.close() - os.remove(filename + '_temp') - else: - shutil.move(filename, filename + '_pending') - - f = codecs.open(filename + '_pending', 'r', 'utf-8') - data = f.read() - f.close() - - return data - -def run(send_stats = False): - page = wikipedia.Page(wikipedia.getSite(), wiki_save_path) - - try: - wikitext = page.get() - except wikipedia.NoPage: - wikipedia.output("%s not found." % page.aslink()) - wikitext = '[[%s:%s]]\n' % (wikipedia.getSite().namespace(14), wikipedia.translate(wikipedia.getSite(), reports_cat)) - - final_summary = u'' - output_files = list() - - for f, section, summary in output_files_gen(): - wikipedia.output('File: '%s'\nSection: %s\n' % (f, section)) - - output_data = read_output_file(f) - output_files.append(f) - - entries = re.findall('=== (.*?) ===', output_data) - - if not entries: - continue - - if append_date_to_entries: - dt = time.strftime('%d-%m-%Y %H:%M', time.localtime()) - output_data = re.sub("(?m)^(=== [[.*?]] ===\n)", r"\1{{botdate|%s}}\n" % dt, output_data) - - m = re.search('(?m)^==\s*%s\s*==' % section, wikitext) - if m: - m_end = re.search(separatorC, wikitext[m.end():]) - if m_end: - wikitext = wikitext[:m_end.start() + m.end()] + output_data + wikitext[m_end.start() + m.end():] - else: - wikitext += '\n' + output_data - else: - wikitext += '\n' + output_data - - if final_summary: - final_summary += ' ' - final_summary += u'%s: %s' % (summary, ', '.join(entries)) - - if final_summary: - wikipedia.output(final_summary + '\n') - - # if a page in 'Image' or 'Category' namespace is checked then fix - # title section by adding ':' in order to avoid wiki code effects. - - wikitext = re.sub(u'(?i)=== [[%s:' % join_family_data('Image', 6), ur'== [[:\1:', wikitext) - wikitext = re.sub(u'(?i)=== [[%s:' % join_family_data('Category', 14), ur'== [[:\1:', wikitext) - - # TODO: - # List of frequent rejected address to improve upload process. - - wikitext = re.sub('http://(.*?)((forumcommunity%7Cforumfree).net)%27,r'<blacklist>\1\2', wikitext) - - if len(final_summary)>=200: - final_summary = final_summary[:200] - final_summary = final_summary[:final_summary.rindex("[")-3] + "..." - - try: - put(page, wikitext, comment = final_summary) - for f in output_files: - os.remove(f + '_pending') - wikipedia.output("'%s' deleted." % f) - except wikipedia.PageNotSaved: - raise - - if append_date_to_entries: - set_template(name = 'botdate') - if '{{botbox' in wikitext: - set_template(name = 'botbox') - - if send_stats: - put_stats() - -def main(): - # - # Send statistics - send_stats = False - - for arg in wikipedia.handleArgs(): - if arg == "-stats": - send_stats = True - run(send_stats = send_stats) - -if __name__ == "__main__": - try: - main() - finally: +# -*- coding: utf-8 -*- +""" +""" + +# +# (C) Francesco Cosoleto, 2006 +# +# Distributed under the terms of the MIT license. +# + +import sys, re, codecs, os, time, shutil +import wikipedia, config, date + +from copyright import put, join_family_data, appdir, reports_cat + +# +# Month + Year save method +append_date_to_wiki_save_path = True + +# +# Add pubblication date to entries (template:botdate) +append_date_to_entries = False + +msg_table = { + 'it': {'_default': [u'Pagine nuove', u'Nuove voci'], + 'feed': [u'Aggiunte a voci esistenti', u'Testo aggiunto in']}, + 'en': {'_default': [u'New entries', u'New entries']} +} + +wiki_save_path = { + '_default': u'User:%s/Report' % config.usernames[wikipedia.getSite().family.name][wikipedia.getSite().lang], + 'it': u'Utente:RevertBot/Report' +} + +template_cat = { + '_default': [u'This template is used by copyright.py, a script part of [[:m:Using the python wikipediabot|PyWikipediaBot]].', u''], + 'it': [u'Questo template è usato dallo script copyright.py del [[:m:Using the python wikipediabot|PyWikipediaBot]].', u'Template usati da bot'], +} + +stat_msg = { + 'en': [u'Statistics', u'Page', u'Entries', u'Size', u'Total', 'Update'], + 'it': [u'Statistiche', u'Pagina', u'Segnalazioni', u'Lunghezza', u'Totale', u'Ultimo aggiornamento'], +} + +wiki_save_path = wikipedia.translate(wikipedia.getSite(), wiki_save_path) +template_cat = wikipedia.translate(wikipedia.getSite(), template_cat) +stat_wiki_save_path = '%s/%s' % (wiki_save_path, wikipedia.translate(wikipedia.getSite(), stat_msg)[0]) + +if append_date_to_wiki_save_path: + wiki_save_path += '_' + date.monthName(wikipedia.getSite().language(), time.localtime()[1]) + '_' + str(time.localtime()[0]) + +separatorC = re.compile('(?m)^== +') + +def set_template(name = None): + + site = wikipedia.getSite() + url = "%s://%s%s" % (site.protocol(), site.hostname(), site.path()) + + botdate = u""" +<div style="text-align:right">{{{1}}}</div><noinclude>%s\n[[%s:%s]]</noinclude> +""" % (template_cat[0], site.namespace(14), template_cat[1]) + + botbox = """ +<div class=plainlinks style="text-align:right">[%s?title={{{1}}}&diff={{{2}}}&oldid={{{3}}} diff] - [%s?title={{{1}}}&action=history cron] - [%s?title=Special:Log&page={{{1}}} log]</div><noinclude>%s\n[[%s:%s]]</noinclude> +""" % (url, url, url, template_cat[0], site.namespace(14), template_cat[1]) + + if name == 'botdate': + p = wikipedia.Page(site, 'Template:botdate') + if not p.exists(): + p.put(botdate, comment = 'Init.') + + if name == 'botbox': + p = wikipedia.Page(site, 'Template:botbox') + if not p.exists(): + p.put(botbox, comment = 'Init.') + +def stat_sum(engine, text): + return len(re.findall('(?im)^*.*?' + engine + '.*?- ', text)) + +def get_stats(): + + import catlib, pagegenerators + + msg = wikipedia.translate(wikipedia.getSite(), stat_msg) + + cat = catlib.Category(wikipedia.getSite(), 'Category:%s' % wikipedia.translate(wikipedia.getSite(), reports_cat)) + gen = pagegenerators.CategorizedPageGenerator(cat, recurse = True) + + output = u"""{| {{prettytable|width=|align=|text-align=left}} +! %s +! %s +! %s +! %s +! %s +! %s +|- +""" % ( msg[1], msg[2], msg[3], 'Google', 'Yahoo', 'Live Search' ) + + gnt = 0 ; ynt = 0 ; mnt = 0 ; ent = 0 ; sn = 0 ; snt = 0 + + for page in gen: + data = page.get() + + gn = stat_sum('google', data) + yn = stat_sum('yahoo', data) + mn = stat_sum('(msn|live)', data) + + en = len(re.findall('=== [[', data)) + sn = len(data) + + gnt += gn ; ynt += yn ; mnt += mn ; ent += en ; snt += sn + + output += u"|%s||%s||%s KB||%s||%s||%s\n|-\n" % (page.aslink(), en, sn / 1024, gn, yn, mn) + + output += u"""| |||||||| +|- +|'''%s'''||%s||%s KB||%s||%s||%s +|- +|colspan="6" align=right style="background-color:#eeeeee;"|<small>''%s: %s''</small> +|} +""" % (msg[4], ent, snt / 1024, gnt, ynt, mnt, msg[5], time.strftime("%d " + "%s" % (date.monthName(wikipedia.getSite().language(), time.localtime()[1])) + " %Y")) + + return output + +def put_stats(): + page = wikipedia.Page(wikipedia.getSite(), stat_wiki_save_path) + page.put(get_stats(), comment = wikipedia.translate(wikipedia.getSite(), stat_msg)[0]) + +def output_files_gen(): + for f in os.listdir(appdir): + if 'output' in f and not '_pending' in f: + m = re.search('output_(.*?).txt', f) + if m: + tag = m.group(1) + else: + tag = '_default' + + section_name_and_summary = wikipedia.translate(wikipedia.getSite(), msg_table)[tag] + + section = section_name_and_summary[0] + summary = section_name_and_summary[1] + + yield os.path.join(appdir, f), section, summary + +def read_output_file(filename): + if os.path.isfile(filename + '_pending'): + shutil.move(filename, filename + '_temp') + ap = codecs.open(filename + '_pending', 'a', 'utf-8') + ot = codecs.open(filename + '_temp', 'r', 'utf-8') + ap.write(ot.read()) + ap.close() + ot.close() + os.remove(filename + '_temp') + else: + shutil.move(filename, filename + '_pending') + + f = codecs.open(filename + '_pending', 'r', 'utf-8') + data = f.read() + f.close() + + return data + +def run(send_stats = False): + page = wikipedia.Page(wikipedia.getSite(), wiki_save_path) + + try: + wikitext = page.get() + except wikipedia.NoPage: + wikipedia.output("%s not found." % page.aslink()) + wikitext = '[[%s:%s]]\n' % (wikipedia.getSite().namespace(14), wikipedia.translate(wikipedia.getSite(), reports_cat)) + + final_summary = u'' + output_files = list() + + for f, section, summary in output_files_gen(): + wikipedia.output('File: '%s'\nSection: %s\n' % (f, section)) + + output_data = read_output_file(f) + output_files.append(f) + + entries = re.findall('=== (.*?) ===', output_data) + + if not entries: + continue + + if append_date_to_entries: + dt = time.strftime('%d-%m-%Y %H:%M', time.localtime()) + output_data = re.sub("(?m)^(=== [[.*?]] ===\n)", r"\1{{botdate|%s}}\n" % dt, output_data) + + m = re.search('(?m)^==\s*%s\s*==' % section, wikitext) + if m: + m_end = re.search(separatorC, wikitext[m.end():]) + if m_end: + wikitext = wikitext[:m_end.start() + m.end()] + output_data + wikitext[m_end.start() + m.end():] + else: + wikitext += '\n' + output_data + else: + wikitext += '\n' + output_data + + if final_summary: + final_summary += ' ' + final_summary += u'%s: %s' % (summary, ', '.join(entries)) + + if final_summary: + wikipedia.output(final_summary + '\n') + + # if a page in 'Image' or 'Category' namespace is checked then fix + # title section by adding ':' in order to avoid wiki code effects. + + wikitext = re.sub(u'(?i)=== [[%s:' % join_family_data('Image', 6), ur'== [[:\1:', wikitext) + wikitext = re.sub(u'(?i)=== [[%s:' % join_family_data('Category', 14), ur'== [[:\1:', wikitext) + + # TODO: + # List of frequent rejected address to improve upload process. + + wikitext = re.sub('http://(.*?)((forumcommunity%7Cforumfree).net)%27,r'<blacklist>\1\2', wikitext) + + if len(final_summary)>=200: + final_summary = final_summary[:200] + final_summary = final_summary[:final_summary.rindex("[")-3] + "..." + + try: + put(page, wikitext, comment = final_summary) + for f in output_files: + os.remove(f + '_pending') + wikipedia.output("'%s' deleted." % f) + except wikipedia.PageNotSaved: + raise + + if append_date_to_entries: + set_template(name = 'botdate') + if '{{botbox' in wikitext: + set_template(name = 'botbox') + + if send_stats: + put_stats() + +def main(): + # + # Send statistics + send_stats = False + + for arg in wikipedia.handleArgs(): + if arg == "-stats": + send_stats = True + run(send_stats = send_stats) + +if __name__ == "__main__": + try: + main() + finally: wikipedia.stopme() \ No newline at end of file
pywikipedia-l@lists.wikimedia.org