http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11436
Revision: 11436
Author: drtrigon
Date: 2013-04-22 22:23:48 +0000 (Mon, 22 Apr 2013)
Log Message:
-----------
bug fix; do NOT shutdown logger in case of additional/accidential output
Modified Paths:
--------------
trunk/pywikipedia/wikipedia.py
Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py 2013-04-22 20:57:35 UTC (rev 11435)
+++ trunk/pywikipedia/wikipedia.py 2013-04-22 22:23:48 UTC (rev 11436)
@@ -9539,7 +9539,8 @@
not slow down other bots any more.
"""
get_throttle.drop()
- logging.shutdown()
+ logger.flush()
+ #logging.shutdown()
def _flush():
"""Wait for the page-putter to flush its queue.
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11434
Revision: 11434
Author: drtrigon
Date: 2013-04-22 18:57:15 +0000 (Mon, 22 Apr 2013)
Log Message:
-----------
improvement; adopted wikidata template page format to #switch syntax
Modified Paths:
--------------
trunk/pywikipedia/subster.py
Modified: trunk/pywikipedia/subster.py
===================================================================
--- trunk/pywikipedia/subster.py 2013-04-22 18:40:18 UTC (rev 11433)
+++ trunk/pywikipedia/subster.py 2013-04-22 18:57:15 UTC (rev 11434)
@@ -528,14 +528,14 @@
def data_convertContent(self, substed_content):
"""Converts the substed content to Wikidata format in order to save.
- Template page format:
+ Template page format (adopted from #switch):
<pre>
| key1 = value1
| key2 = value2
...
</pre>
- (1 line of wiki text is converted to 1 claim/statement, the lines
- have to be embedded into pre-tags and start with '|')
+ every entry has to start with a '|' and contain a '=', the entries
+ have to be embedded into pre-tags (entries may share the same line)
@param substed_content: New/Changed content (including tags).
@type substed_content: string
@@ -545,14 +545,10 @@
# DRTRIGON-130: convert talk page result to wikidata(base)
data = u'\n'.join(re.findall('<pre>(.*?)</pre>', substed_content,
re.S | re.I))
+ data = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', data)
res = {}
- for line in data.splitlines():
- #line = self.get_var_regex('(.*?)', '(.*?)').findall(line)
- line = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', line)
- line = line.strip()
- if (not line) or (line[0] != u'|'):
- continue
- line = line.lstrip(u'|').split(u'=', 1)
+ for line in data.split(u'|'):
+ line = line.strip().split(u'=', 1)
if len(line) != 2:
continue
res[line[0].strip()] = line[1].strip()
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11433
Revision: 11433
Author: drtrigon
Date: 2013-04-22 18:40:18 +0000 (Mon, 22 Apr 2013)
Log Message:
-----------
bug fix; wikidata recognition for unchanged data
improvement; wikidata template page format further refined
Modified Paths:
--------------
trunk/pywikipedia/subster.py
Modified: trunk/pywikipedia/subster.py
===================================================================
--- trunk/pywikipedia/subster.py 2013-04-22 18:10:03 UTC (rev 11432)
+++ trunk/pywikipedia/subster.py 2013-04-22 18:40:18 UTC (rev 11433)
@@ -527,24 +527,35 @@
def data_convertContent(self, substed_content):
"""Converts the substed content to Wikidata format in order to save.
- (1 line of wiki text is converted to 1 claim/statement)
- @param substed_content: New content (with tags).
+ Template page format:
+ <pre>
+ | key1 = value1
+ | key2 = value2
+ ...
+ </pre>
+ (1 line of wiki text is converted to 1 claim/statement, the lines
+ have to be embedded into pre-tags and start with '|')
+
+ @param substed_content: New/Changed content (including tags).
@type substed_content: string
+
+ Returns the extracted and converted data.
"""
# DRTRIGON-130: convert talk page result to wikidata(base)
- # TODO: consider format; every line starting with "|" is data
- # TODO: combine with 'outputContentDiff' in order to update changed only
+ data = u'\n'.join(re.findall('<pre>(.*?)</pre>', substed_content,
+ re.S | re.I))
res = {}
- for line in substed_content.splitlines():
- #data = self.get_var_regex('(.*?)', '(.*?)').findall(line)
- data = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', line)
- #if not data:
- if data == line:
+ for line in data.splitlines():
+ #line = self.get_var_regex('(.*?)', '(.*?)').findall(line)
+ line = self.get_var_regex('.*?', '(.*?)').sub('\g<1>', line)
+ line = line.strip()
+ if (not line) or (line[0] != u'|'):
continue
- data = data.lstrip(u'|')
- key, value = data.split(u'=')
- res[key.strip()] = value.strip()
+ line = line.lstrip(u'|').split(u'=', 1)
+ if len(line) != 2:
+ continue
+ res[line[0].strip()] = line[1].strip()
return res
@@ -562,14 +573,14 @@
datapage = pywikibot.DataPage(self.site, page.title())
links = datapage.searchentities(u'%s:%s' % (self._bot_config['BotName'], datapage.title().split(u':')[1]))
for element in links:
- propid = self._bot_config['data_PropertyId']
+ propid = int(self._bot_config['data_PropertyId'])
el = element[u'aliases'][0].split(u':')
item = el[2]
if item not in data:
pywikibot.output(u'Value "%s" not found.' % (item,))
data[item] = u'%s: N/A' % self._bot_config['BotName']
if len(el) > 3:
- propid = el[3]
+ propid = int(el[3])
dataoutpage = pywikibot.DataPage(self.site, element['id'])
@@ -579,8 +590,8 @@
claim = [ claim for claim in buf[u'claims'] if (claim['m'][1] == propid) ]
# TODO: does this check (if) work with multiple claims per property?
if (not claim) or (claim[0]['m'][3] != data[item]):
- pywikibot.output(u'%s in %s <--- %s = %s' %\
- (element[u'aliases'][0], dataoutpage.title(asLink=True), item, data[item]))
+ pywikibot.output(u'%s in %s changed to "%s"' %\
+ (element[u'aliases'][0], dataoutpage.title(asLink=True), data[item]))
dataoutpage.editclaim(u'p%s' % propid, data[item],
refs={"p%s" % propid:
[{"snaktype": "value",
http://www.mediawiki.org/wiki/Special:Code/pywikipedia/11430
Revision: 11430
Author: legoktm
Date: 2013-04-22 09:52:39 +0000 (Mon, 22 Apr 2013)
Log Message:
-----------
Expose the sortkey options in Category.articles so pagegenerators can use it.
This fixes the breakage that occured in r11267 and completes the fixme from r11401.
Modified Paths:
--------------
branches/rewrite/pywikibot/page.py
branches/rewrite/pywikibot/pagegenerators.py
Modified: branches/rewrite/pywikibot/page.py
===================================================================
--- branches/rewrite/pywikibot/page.py 2013-04-22 09:37:03 UTC (rev 11429)
+++ branches/rewrite/pywikibot/page.py 2013-04-22 09:52:39 UTC (rev 11430)
@@ -1722,7 +1722,9 @@
@deprecate_arg("startFrom", None)
def articles(self, recurse=False, step=None, total=None,
- content=False, namespaces=None):
+ content=False, namespaces=None, sortby="",
+ starttime=None, endtime=None,startsort=None,
+ endsort=None):
"""
Yields all articles in the current category.
@@ -1741,6 +1743,23 @@
@type namespace: int or list of ints
@param content: if True, retrieve the content of the current version
of each page (default False)
+ @param sortby: determines the order in which results are generated,
+ valid values are "sortkey" (default, results ordered by category
+ sort key) or "timestamp" (results ordered by time page was
+ added to the category). This applies recursively.
+ @type sortby: str
+ @param starttime: if provided, only generate pages added after this
+ time; not valid unless sortby="timestamp"
+ @type starttime: pywikibot.Timestamp
+ @param endtime: if provided, only generate pages added before this
+ time; not valid unless sortby="timestamp"
+ @type endtime: pywikibot.Timestamp
+ @param startsort: if provided, only generate pages >= this title
+ lexically; not valid if sortby="timestamp"
+ @type startsort: str
+ @param endsort: if provided, only generate pages <= this title
+ lexically; not valid if sortby="timestamp"
+ @type endsort: str
"""
if namespaces is None:
@@ -1749,7 +1768,10 @@
for member in self.site.categorymembers(self,
namespaces=namespaces,
step=step, total=total,
- content=content):
+ content=content, sortby=sortby,
+ starttime=starttime, endtime=endtime,
+ startsort=startsort, endsort=endsort,
+ ):
yield member
if total is not None:
total -= 1
@@ -1761,7 +1783,10 @@
for subcat in self.subcategories(step=step):
for article in subcat.articles(recurse, step=step, total=total,
content=content,
- namespaces=namespaces):
+ namespaces=namespaces, sortby=sortby,
+ starttime=starttime, endtime=endtime,
+ startsort=startsort, endsort=endsort,
+ ):
yield article
if total is not None:
total -= 1
Modified: branches/rewrite/pywikibot/pagegenerators.py
===================================================================
--- branches/rewrite/pywikibot/pagegenerators.py 2013-04-22 09:37:03 UTC (rev 11429)
+++ branches/rewrite/pywikibot/pagegenerators.py 2013-04-22 09:52:39 UTC (rev 11430)
@@ -604,18 +604,14 @@
retrieved page will be downloaded.
"""
- kwargs = dict(recurse=recurse,
- step=step,
- total=total,
- content=content,
- )
+ kwargs = dict(recurse=recurse, step=step, total=total,
+ content=content)
if start:
kwargs['sortby'] = 'sortkey'
kwargs['startsort'] = start
- for a in category.site.categorymembers(category, **kwargs):
+ for a in category.articles(**kwargs):
yield a
-
def SubCategoriesPageGenerator(category, recurse=False, start=None,
step=None, total=None, content=False):
"""Yield all subcategories in a specific category.