Pywikipedia-svn

pywikipedia-svn@lists.wikimedia.org

5163 discussions

SVN: [11442] trunk/pywikipedia/pywikibot/textlib.py
by amir＠svn.wikimedia.org 23 Apr '13

23 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11442 Revision: 11442 Author: amir Date: 2013-04-23 16:10:15 +0000 (Tue, 23 Apr 2013) Log Message: ----------- continue of bugfix Modified Paths: -------------- trunk/pywikipedia/pywikibot/textlib.py Modified: trunk/pywikipedia/pywikibot/textlib.py =================================================================== --- trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 16:05:17 UTC (rev 11441) +++ trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 16:10:15 UTC (rev 11442) @@ -859,7 +859,7 @@ regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \ r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd} - regexb = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ + regexb = r'(?P<urlb>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \ r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEndb} if withoutBracketed:

1 0

SVN: [11441] trunk/pywikipedia/pywikibot/textlib.py
by amir＠svn.wikimedia.org 23 Apr '13

23 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11441 Revision: 11441 Author: amir Date: 2013-04-23 16:05:17 +0000 (Tue, 23 Apr 2013) Log Message: ----------- a very stupid bug Modified Paths: -------------- trunk/pywikipedia/pywikibot/textlib.py Modified: trunk/pywikipedia/pywikibot/textlib.py =================================================================== --- trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 16:02:18 UTC (rev 11440) +++ trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 16:05:17 UTC (rev 11441) @@ -867,7 +867,7 @@ elif onlyBracketed: regex = r'\[' + regexb else: - regex=r'(?:(?<!\[)'+ regex+r'|\['+regexb=')' + regex=r'(?:(?<!\[)'+ regex+r'|\['+regexb+')' linkR = re.compile(regex) return linkR

1 0

SVN: [11440] trunk/pywikipedia/pywikibot/textlib.py
by amir＠svn.wikimedia.org 23 Apr '13

23 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11440 Revision: 11440 Author: amir Date: 2013-04-23 16:02:18 +0000 (Tue, 23 Apr 2013) Log Message: ----------- fixing bug #3610818 in a way that doesn't cause bug #3455789. It's not very good coding though Modified Paths: -------------- trunk/pywikipedia/pywikibot/textlib.py Modified: trunk/pywikipedia/pywikibot/textlib.py =================================================================== --- trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 15:20:48 UTC (rev 11439) +++ trunk/pywikipedia/pywikibot/textlib.py 2013-04-23 16:02:18 UTC (rev 11440) @@ -844,7 +844,9 @@ # Note: While allowing dots inside URLs, MediaWiki will regard # dots at the end of the URL as not part of that URL. # The same applies to comma, colon and some other characters. - notAtEnd = '\]\s\.:;,<>"\|' + notAtEnd = '\]\s\.:;,<>"\|\)' + #This is specially set for brackted link + notAtEndb = '\]\s\.:;,<>"\|' # So characters inside the URL can be anything except whitespace, # closing squared brackets, quotation marks, greater than and less # than, and the last character also can't be parenthesis or another @@ -857,11 +859,15 @@ regex = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \ r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEnd} - + regexb = r'(?P<url>http[s]?://[^%(notInside)s]*?[^%(notAtEnd)s]' \ + r'(?=[%(notAtEnd)s]*\'\')|http[s]?://[^%(notInside)s]*' \ + r'[^%(notAtEnd)s])' % {'notInside': notInside, 'notAtEnd': notAtEndb} if withoutBracketed: regex = r'(?<!\[)' + regex elif onlyBracketed: - regex = r'\[' + regex + regex = r'\[' + regexb + else: + regex=r'(?:(?<!\[)'+ regex+r'|\['+regexb=')' linkR = re.compile(regex) return linkR

1 0

SVN: [11439] branches/rewrite/pywikibot/bot.py
by drtrigon＠svn.wikimedia.org 23 Apr '13

23 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11439 Revision: 11439 Author: drtrigon Date: 2013-04-23 15:20:48 +0000 (Tue, 23 Apr 2013) Log Message: ----------- minor changes in logging header (from trunk) Modified Paths: -------------- branches/rewrite/pywikibot/bot.py Modified: branches/rewrite/pywikibot/bot.py =================================================================== --- branches/rewrite/pywikibot/bot.py 2013-04-23 15:14:51 UTC (rev 11438) +++ branches/rewrite/pywikibot/bot.py 2013-04-23 15:20:48 UTC (rev 11439) @@ -241,11 +241,11 @@ # script call output(u'COMMAND: %s' % unicode(sys.argv)) - # new framework release/revision? + # new framework release/revision? (handleArgs needs to be called first) site = pywikibot.getSite() - output(u'VERSION: %s' % unicode((version.getversion().strip(' ()'), - version.getversion_onlinerepo(), - site.live_version()))) + output(u'VERSION: %s' % unicode((version.getversion().strip(), + version.getversion_onlinerepo(), + site.live_version()))) # system if hasattr(os, 'uname'):

1 0

SVN: [11438] trunk/pywikipedia
by drtrigon＠svn.wikimedia.org 23 Apr '13

23 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11438 Revision: 11438 Author: drtrigon Date: 2013-04-23 15:14:51 +0000 (Tue, 23 Apr 2013) Log Message: ----------- bug fix; KeyError: 'caller_file' (adoption to pywikibot logger needed) improvement; command line parameter handling minor changes in logging header Modified Paths: -------------- trunk/pywikipedia/subster.py trunk/pywikipedia/subster_irc.py trunk/pywikipedia/sum_disc.py trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/subster.py =================================================================== --- trunk/pywikipedia/subster.py 2013-04-23 08:35:20 UTC (rev 11437) +++ trunk/pywikipedia/subster.py 2013-04-23 15:14:51 UTC (rev 11438) @@ -443,7 +443,7 @@ external_data_dict = {param['value']: external_data[0]} else: external_data_dict = {param['value']: str(external_data)} - logging.getLogger('subster').debug( str(external_data_dict) ) + pywikibot.debug( str(external_data_dict) ) param['postproc'] = eval(param['postproc']) # should be secured as given below, but needs code changes in wiki too @@ -461,7 +461,7 @@ if func: exec(self._code + (self._bot_config['CodeTemplate'] % func), scope, scope) external_data = DATA[0] - logging.getLogger('subster').debug( external_data ) + pywikibot.debug( external_data ) # 5.) subst content (content, tags) = self.subTag(content, value, external_data, int(param['count'])) @@ -776,10 +776,9 @@ def main(): args = pywikibot.handleArgs() bot = SubsterBot() # for several user's, but what about complete automation (continous running...) - if len(args) > 0: - for arg in args: - pywikibot.showHelp() - return + for arg in args: + pywikibot.showHelp() + return try: bot.run() except KeyboardInterrupt: Modified: trunk/pywikipedia/subster_irc.py =================================================================== --- trunk/pywikipedia/subster_irc.py 2013-04-23 08:35:20 UTC (rev 11437) +++ trunk/pywikipedia/subster_irc.py 2013-04-23 15:14:51 UTC (rev 11438) @@ -145,12 +145,22 @@ del bot def main(): + args = pywikibot.handleArgs() subster.debug = debug site = pywikibot.getSite() site.forceLogin() chan = '#' + site.language() + '.' + site.family.name bot = SubsterTagModifiedBot(site, chan, site.loggedInAs(), "irc.wikimedia.org") - bot.start() + for arg in args: + pywikibot.showHelp() + return + try: + bot.start() + except KeyboardInterrupt: + pywikibot.output('\nQuitting program...') if __name__ == "__main__": - main() + try: + main() + finally: + pywikibot.stopme() Modified: trunk/pywikipedia/sum_disc.py =================================================================== --- trunk/pywikipedia/sum_disc.py 2013-04-23 08:35:20 UTC (rev 11437) +++ trunk/pywikipedia/sum_disc.py 2013-04-23 15:14:51 UTC (rev 11438) @@ -917,7 +917,7 @@ # skip to local disc page, since this is the only page the user should watch itself if (page.site().language() == localinterwiki) and \ (page.site().family.name == u'wikipedia'): - logging.getLogger('sum_disc').warning( + pywikibot.warning( u'skipping global wiki notify to local wiki %s' % page.title(asLink=True) ) continue @@ -1075,7 +1075,7 @@ try: self.append(page, text, comment=comment, minorEdit=minEd, force=True) except pywikibot.MaxTriesExceededError: - logging.getLogger('sum_disc').warning( + pywikibot.warning( u'Problem MaxTriesExceededError occurred, thus skipping this user!') return # skip history write else: @@ -1433,7 +1433,7 @@ #sections = page.getSections(minLevel=1, force=True) # slow for pages with > 100 sections sections = [] - logging.getLogger('sum_disc').warning( + pywikibot.warning( u'Problem resolving section data, processing the whole page at once...') # drop from templates included headings (are None) Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2013-04-23 08:35:20 UTC (rev 11437) +++ trunk/pywikipedia/wikipedia.py 2013-04-23 15:14:51 UTC (rev 11438) @@ -9173,9 +9173,9 @@ # script call output(u'COMMAND: %s' % unicode(sys.argv)) - # new framework release/revision? + # new framework release/revision? (handleArgs needs to be called first) site = getSite() - output(u'VERSION: %s' % unicode((version.getversion().strip(' ()'), + output(u'VERSION: %s' % unicode((version.getversion().strip(), version.getversion_onlinerepo(), site.live_version())))

1 0

SVN: [11437] trunk/pywikipedia/wikipedia.py
by drtrigon＠svn.wikimedia.org 23 Apr '13

23 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11437 Revision: 11437 Author: drtrigon Date: 2013-04-23 08:35:20 +0000 (Tue, 23 Apr 2013) Log Message: ----------- bug fix; AttributeError: Logger instance has no attribute 'flush', partly revert r11436 Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2013-04-22 22:23:48 UTC (rev 11436) +++ trunk/pywikipedia/wikipedia.py 2013-04-23 08:35:20 UTC (rev 11437) @@ -9539,7 +9539,6 @@ not slow down other bots any more. """ get_throttle.drop() - logger.flush() #logging.shutdown() def _flush():

1 0

SVN: [11436] trunk/pywikipedia/wikipedia.py
by drtrigon＠svn.wikimedia.org 22 Apr '13

22 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11436 Revision: 11436 Author: drtrigon Date: 2013-04-22 22:23:48 +0000 (Mon, 22 Apr 2013) Log Message: ----------- bug fix; do NOT shutdown logger in case of additional/accidential output Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2013-04-22 20:57:35 UTC (rev 11435) +++ trunk/pywikipedia/wikipedia.py 2013-04-22 22:23:48 UTC (rev 11436) @@ -9539,7 +9539,8 @@ not slow down other bots any more. """ get_throttle.drop() - logging.shutdown() + logger.flush() + #logging.shutdown() def _flush(): """Wait for the page-putter to flush its queue.

1 0

SVN: [11435] trunk/pywikipedia/wikipedia.py
by drtrigon＠svn.wikimedia.org 22 Apr '13

22 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11435 Revision: 11435 Author: drtrigon Date: 2013-04-22 20:57:35 +0000 (Mon, 22 Apr 2013) Log Message: ----------- bug fix; KeyError: 'claim' (partly reverts/corrects r11382) Modified Paths: -------------- trunk/pywikipedia/wikipedia.py Modified: trunk/pywikipedia/wikipedia.py =================================================================== --- trunk/pywikipedia/wikipedia.py 2013-04-22 18:57:15 UTC (rev 11434) +++ trunk/pywikipedia/wikipedia.py 2013-04-22 20:57:35 UTC (rev 11435) @@ -4312,7 +4312,7 @@ value = "{\"entity-type\":\"item\",\"numeric-id\":%s}" % value else: pass - claims = self.get()['claims'] if 'claims' in self.get() else [] + claims = self.get()['claims'] theclaim = None for claim in claims: if claim['m'][1] == propertyID: @@ -4352,7 +4352,7 @@ raise RuntimeError("API query error: %s" % data) if 'warnings' in data: output(str(data[u'warnings'])) - guid=data['claim']['id'] + guid=data['claim']['id'] if 'claim' in data else '' if refs: snak = [] if isinstance(refs, dict):

1 0

SVN: [11434] trunk/pywikipedia/subster.py
by drtrigon＠svn.wikimedia.org 22 Apr '13

22 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11434 Revision: 11434 Author: drtrigon Date: 2013-04-22 18:57:15 +0000 (Mon, 22 Apr 2013) Log Message: ----------- improvement; adopted wikidata template page format to #switch syntax Modified Paths: -------------- trunk/pywikipedia/subster.py Modified: trunk/pywikipedia/subster.py =================================================================== --- trunk/pywikipedia/subster.py 2013-04-22 18:40:18 UTC (rev 11433) +++ trunk/pywikipedia/subster.py 2013-04-22 18:57:15 UTC (rev 11434) @@ -528,14 +528,14 @@ def data_convertContent(self, substed_content): """Converts the substed content to Wikidata format in order to save. - Template page format: + Template page format (adopted from #switch): <pre> | key1 = value1 | key2 = value2 ... </pre> - (1 line of wiki text is converted to 1 claim/statement, the lines - have to be embedded into pre-tags and start with '|') + every entry has to start with a '|' and contain a '=', the entries + have to be embedded into pre-tags (entries may share the same line) @param substed_content: New/Changed content (including tags). @type substed_content: string @@ -545,14 +545,10 @@ # DRTRIGON-130: convert talk page result to wikidata(base) data = u'\n'.join(re.findall('<pre>(.*?)</pre>', substed_content, re.S | re.I)) + data = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', data) res = {} - for line in data.splitlines(): - #line = self.get_var_regex('(.*?)', '(.*?)').findall(line) - line = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', line) - line = line.strip() - if (not line) or (line[0] != u'|'): - continue - line = line.lstrip(u'|').split(u'=', 1) + for line in data.split(u'|'): + line = line.strip().split(u'=', 1) if len(line) != 2: continue res[line[0].strip()] = line[1].strip()

1 0

SVN: [11433] trunk/pywikipedia/subster.py
by drtrigon＠svn.wikimedia.org 22 Apr '13

22 Apr '13

http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11433 Revision: 11433 Author: drtrigon Date: 2013-04-22 18:40:18 +0000 (Mon, 22 Apr 2013) Log Message: ----------- bug fix; wikidata recognition for unchanged data improvement; wikidata template page format further refined Modified Paths: -------------- trunk/pywikipedia/subster.py Modified: trunk/pywikipedia/subster.py =================================================================== --- trunk/pywikipedia/subster.py 2013-04-22 18:10:03 UTC (rev 11432) +++ trunk/pywikipedia/subster.py 2013-04-22 18:40:18 UTC (rev 11433) @@ -527,24 +527,35 @@ def data_convertContent(self, substed_content): """Converts the substed content to Wikidata format in order to save. - (1 line of wiki text is converted to 1 claim/statement) - @param substed_content: New content (with tags). + Template page format: + <pre> + | key1 = value1 + | key2 = value2 + ... + </pre> + (1 line of wiki text is converted to 1 claim/statement, the lines + have to be embedded into pre-tags and start with '|') + + @param substed_content: New/Changed content (including tags). @type substed_content: string + + Returns the extracted and converted data. """ # DRTRIGON-130: convert talk page result to wikidata(base) - # TODO: consider format; every line starting with "|" is data - # TODO: combine with 'outputContentDiff' in order to update changed only + data = u'\n'.join(re.findall('<pre>(.*?)</pre>', substed_content, + re.S | re.I)) res = {} - for line in substed_content.splitlines(): - #data = self.get_var_regex('(.*?)', '(.*?)').findall(line) - data = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', line) - #if not data: - if data == line: + for line in data.splitlines(): + #line = self.get_var_regex('(.*?)', '(.*?)').findall(line) + line = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', line) + line = line.strip() + if (not line) or (line[0] != u'|'): continue - data = data.lstrip(u'|') - key, value = data.split(u'=') - res[key.strip()] = value.strip() + line = line.lstrip(u'|').split(u'=', 1) + if len(line) != 2: + continue + res[line[0].strip()] = line[1].strip() return res @@ -562,14 +573,14 @@ datapage = pywikibot.DataPage(self.site, page.title()) links = datapage.searchentities(u'%s:%s' % (self._bot_config['BotName'], datapage.title().split(u':')[1])) for element in links: - propid = self._bot_config['data_PropertyId'] + propid = int(self._bot_config['data_PropertyId']) el = element[u'aliases'][0].split(u':') item = el[2] if item not in data: pywikibot.output(u'Value "%s" not found.' % (item,)) data[item] = u'%s: N/A' % self._bot_config['BotName'] if len(el) > 3: - propid = el[3] + propid = int(el[3]) dataoutpage = pywikibot.DataPage(self.site, element['id']) @@ -579,8 +590,8 @@ claim = [ claim for claim in buf[u'claims'] if (claim['m'][1] == propid) ] # TODO: does this check (if) work with multiple claims per property? if (not claim) or (claim[0]['m'][3] != data[item]): - pywikibot.output(u'%s in %s <--- %s = %s' %\ - (element[u'aliases'][0], dataoutpage.title(asLink=True), item, data[item])) + pywikibot.output(u'%s in %s changed to "%s"' %\ + (element[u'aliases'][0], dataoutpage.title(asLink=True), data[item])) dataoutpage.editclaim(u'p%s' % propid, data[item], refs={"p%s" % propid: [{"snaktype": "value",

1 0

Jump to page:

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

2010

2009

Pywikipedia-svn