jenkins-bot has submitted this change and it was merged.
Change subject: use site.versionnumber() instead of family.versionnumber(site.lang)
......................................................................
use site.versionnumber() instead of family.versionnumber(site.lang)
Change-Id: I834b79374ddf850d01326bffffd3470b4eaa26c8
---
M scripts/cosmetic_changes.py
1 file changed, 1 insertion(+), 1 deletion(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/cosmetic_changes.py b/scripts/cosmetic_changes.py
index e7836d2..d261459 100755
--- a/scripts/cosmetic_changes.py
+++ b/scripts/cosmetic_changes.py
@@ -338,7 +338,7 @@
thisNs = namespaces.pop(0)
if nsNumber == 6 and family.name == 'wikipedia':
if self.site.lang in ('en', 'fr') and \
- family.versionnumber(self.site.lang) >= 14:
+ self.site.versionnumber() >= 14:
# do not change "Image" on en-wiki and fr-wiki
assert u'Image' in namespaces
namespaces.remove(u'Image')
--
To view, visit https://gerrit.wikimedia.org/r/129956
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I834b79374ddf850d01326bffffd3470b4eaa26c8
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Alex S.H. Lin <alexsh(a)mail2000.com.tw>
Gerrit-Reviewer: Huji <huji.huji(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: use categoryinfo for the number of articles and subcats, pep8 changes
......................................................................
use categoryinfo for the number of articles and subcats, pep8 changes
categoryinfo gives the number of articles and subcategories. We use
this information for the messages and use the generator functions
to process each article or subcat. This speeds up processing time
(f.e. 10% for "tree" with depth:3)
Made some pep8 changes.
Change-Id: I32e23ccfdc4a9e623fde93fcd161dd910879e9af
---
M scripts/category.py
1 file changed, 15 insertions(+), 20 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/category.py b/scripts/category.py
index 8922efe..d032c3a 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -323,7 +323,7 @@
comment = self.editSummary
if not comment:
comment = i18n.twtranslate(page.site, 'category-adding',
- {'newcat': newcatTitle})
+ {'newcat': newcatTitle})
pywikibot.output(u'Comment: %s' % comment)
if not self.dry:
if not self.always:
@@ -449,16 +449,14 @@
self.comment = comment
if not self.comment:
if self.newcat:
- template_vars = { \
- 'oldcat': self.oldcat.title(
- withNamespace=False),
- 'newcat': self.newcat.title(
- withNamespace=False)}
+ template_vars = {
+ 'oldcat': self.oldcat.title(withNamespace=False),
+ 'newcat': self.newcat.title(withNamespace=False)}
self.comment = i18n.twtranslate(self.site,
'category-replacing',
template_vars)
else:
- template_vars = {'oldcat': self.oldcat.title( \
+ template_vars = {'oldcat': self.oldcat.title(
withNamespace=False)}
self.comment = i18n.twtranslate(self.site,
'category-removing',
@@ -475,8 +473,7 @@
self._hist()
self._change(pagegenerators.CategorizedPageGenerator(self.oldcat))
if not self.pagesonly:
- self._change(pagegenerators.SubCategoriesPageGenerator( \
- self.oldcat))
+ self._change(pagegenerators.SubCategoriesPageGenerator(self.oldcat))
if self.oldcat.isEmptyCategory() and self.delete_oldcat and \
((self.newcat and self.move_oldcat) or not self.newcat):
self._delete()
@@ -524,7 +521,8 @@
match = re.compile(REGEX,
re.IGNORECASE | re.MULTILINE | re.DOTALL)
self.newcat.text = match.sub('', self.newcat.text)
- site_templates = pywikibot.translate(self.site, cfd_templates, fallback=False)
+ site_templates = pywikibot.translate(self.site, cfd_templates,
+ fallback=False)
for template_name in site_templates:
match = re.compile(r"{{%s.*?}}" % template_name, re.IGNORECASE)
self.newcat.text = match.sub('', self.newcat.text)
@@ -641,12 +639,11 @@
{'oldcat': self.cat.title()})
def run(self):
- articles = set(self.cat.articles())
- if len(articles) == 0:
+ if self.cat.categoryinfo['pages'] == 0:
pywikibot.output(u'There are no articles in category %s'
% self.cat.title())
else:
- for article in articles:
+ for article in self.cat.articles():
if not self.titleRegex or re.search(self.titleRegex,
article.title()):
article.change_category(self.cat, None,
@@ -656,12 +653,11 @@
return
# Also removes the category tag from subcategories' pages
- subcategories = set(self.cat.subcategories())
- if len(subcategories) == 0:
+ if self.cat.categoryinfo['subcats'] == 0:
pywikibot.output(u'There are no subcategories in category %s'
% self.cat.title())
else:
- for subcategory in subcategories:
+ for subcategory in self.cat.subcategories():
subcategory.change_category(self.cat, None,
comment=self.editSummary,
inPlace=self.inPlace)
@@ -831,12 +827,11 @@
def run(self):
cat = pywikibot.Category(self.site, self.catTitle)
- articles = set(cat.articles())
- if len(articles) == 0:
+ if cat.categoryinfo['pages'] == 0:
pywikibot.output(u'There are no articles in category ' +
self.catTitle)
else:
- preloadingGen = pagegenerators.PreloadingGenerator(iter(articles))
+ preloadingGen = pagegenerators.PreloadingGenerator(cat.articles())
for article in preloadingGen:
pywikibot.output('')
pywikibot.output(u'=' * 67)
@@ -880,7 +875,7 @@
if currentDepth > 0:
result += u' '
result += cat.title(asLink=True, textlink=True, withNamespace=False)
- result += ' (%d)' % len(self.catDB.getArticles(cat))
+ result += ' (%d)' % cat.categoryinfo['pages']
if currentDepth < self.maxDepth / 2:
# noisy dots
pywikibot.output('.', newline=False)
--
To view, visit https://gerrit.wikimedia.org/r/130301
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I32e23ccfdc4a9e623fde93fcd161dd910879e9af
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: categoryinfo is only availlable for categories, pep8 changes
......................................................................
categoryinfo is only availlable for categories, pep8 changes
categoryinfo is now a Category attribute. Assinging it from a page
object raises an AttributeError as suggested in the comment. There
is no need to try calling the site method. It always return a dict.
Change-Id: I0b73f4f07dbb079a42345290ebf6c259a23442b9
---
M pywikibot/page.py
1 file changed, 31 insertions(+), 35 deletions(-)
Approvals:
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 92607f7..5e5d09c 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -1333,12 +1333,13 @@
pywikibot.output(u'Deleting %s.' % (self.title(asLink=True)))
reason = pywikibot.input(u'Please enter a reason for the deletion:')
- if self.site.username(sysop=True): # If user is a sysop, delete the page
+ # If user is a sysop, delete the page
+ if self.site.username(sysop=True):
answer = u'y'
if prompt and not hasattr(self.site, '_noDeletePrompt'):
answer = pywikibot.inputChoice(
- u'Do you want to delete %s?' % self.title(asLink=True,
- forceInterwiki=True),
+ u'Do you want to delete %s?' % self.title(
+ asLink=True, forceInterwiki=True),
['Yes', 'No', 'All'],
['Y', 'N', 'A'],
'N')
@@ -1572,20 +1573,6 @@
% (self.title(asLink=True), error.message))
return False
- @property
- def categoryinfo(self):
- """If supported, return a dict containing category content values:
-
- Numbers of pages, subcategories, files, and total contents.
-
- """
- if not self.isCategory():
- return # should this raise an exception??
- try:
- return self.site.categoryinfo(self)
- except NotImplementedError:
- return
-
######## DEPRECATED METHODS ########
@deprecated("Site.encoding()")
@@ -1726,13 +1713,13 @@
username, resolution, filesize, comment).
"""
- #TODO; return value may need to change
+ # TODO; return value may need to change
return self.site.loadimageinfo(self, history=True)
def getFileVersionHistoryTable(self):
"""Return the version history in the form of a wiki table."""
lines = []
- #TODO: if getFileVersionHistory changes, make sure this follows it
+ # TODO: if getFileVersionHistory changes, make sure this follows it
for (datetime, username, resolution, size, comment) \
in self.getFileVersionHistory():
lines.append(u'| %s || %s || %s || %s || <nowiki>%s</nowiki>'
@@ -2048,6 +2035,15 @@
targetCat.put(newtext, creationSummary)
return True
+ @property
+ def categoryinfo(self):
+ """return a dict containing category content values:
+
+ Numbers of pages, subcategories, files, and total contents.
+
+ """
+ return self.site.categoryinfo(self)
+
#### DEPRECATED METHODS ####
@deprecated("list(Category.subcategories(...))")
def subcategoriesList(self, recurse=False):
@@ -2224,8 +2220,8 @@
@type subpage: unicode
"""
if self._isAutoblock:
- #This user is probably being queried for purpose of lifting
- #an autoblock, so has no user pages per se.
+ # This user is probably being queried for purpose of lifting
+ # an autoblock, so has no user pages per se.
raise AutoblockUser(
"This is an autoblock ID, you can only use to unblock it.")
if subpage:
@@ -2241,8 +2237,8 @@
@type subpage: unicode
"""
if self._isAutoblock:
- #This user is probably being queried for purpose of lifting
- #an autoblock, so has no user talk pages per se.
+ # This user is probably being queried for purpose of lifting
+ # an autoblock, so has no user talk pages per se.
raise AutoblockUser(
"This is an autoblock ID, you can only use to unblock it.")
if subpage:
@@ -2438,12 +2434,12 @@
id = 'ids'
site = 'sites'
title = 'titles'
- #id overrides all
+ # id overrides all
if hasattr(self, 'id'):
params[id] = self.id
return params
- #the rest only applies to ItemPages, but is still needed here.
+ # the rest only applies to ItemPages, but is still needed here.
if hasattr(self, '_site') and hasattr(self, '_title'):
params[site] = self._site.dbName()
params[title] = self._title
@@ -2476,7 +2472,7 @@
self.lastrevid = self._content['lastrevid']
else:
raise pywikibot.NoPage(self)
- #aliases
+ # aliases
self.aliases = {}
if 'aliases' in self._content:
for lang in self._content['aliases']:
@@ -2484,14 +2480,14 @@
for value in self._content['aliases'][lang]:
self.aliases[lang].append(value['value'])
- #labels
+ # labels
self.labels = {}
if 'labels' in self._content:
for lang in self._content['labels']:
if 'removed' not in self._content['labels'][lang]: # Bug 54767
self.labels[lang] = self._content['labels'][lang]['value']
- #descriptions
+ # descriptions
self.descriptions = {}
if 'descriptions' in self._content:
for lang in self._content['descriptions']:
@@ -2648,7 +2644,7 @@
"""
super(ItemPage, self).get(force=force, *args)
- #claims
+ # claims
self.claims = {}
if 'claims' in self._content:
for pid in self._content['claims']:
@@ -2658,7 +2654,7 @@
c.on_item = self
self.claims[pid].append(c)
- #sitelinks
+ # sitelinks
self.sitelinks = {}
if 'sitelinks' in self._content:
for dbname in self._content['sitelinks']:
@@ -2749,7 +2745,7 @@
dbName = self.getdbName(obj.site)
data[dbName] = {'site': dbName, 'title': obj.title()}
else:
- #TODO: Do some verification here
+ # TODO: Do some verification here
dbName = obj['site']
data[dbName] = obj
data = {'sitelinks': data}
@@ -2965,7 +2961,7 @@
data = self.repo.changeClaimTarget(self, snaktype=snaktype,
**kwargs)
- #TODO: Re-create the entire item from JSON, not just id
+ # TODO: Re-create the entire item from JSON, not just id
self.snak = data['claim']['id']
def getTarget(self):
@@ -3326,7 +3322,7 @@
# Pages with "/./" or "/../" appearing in the URLs will
# often be unreachable due to the way web browsers deal
- #* with 'relative' URLs. Forbid them explicitly.
+ # * with 'relative' URLs. Forbid them explicitly.
if u'.' in t and (
t == u'.' or t == u'..'
@@ -3546,8 +3542,8 @@
158: 382, # ž
159: 376 # Ÿ
}
- #ensuring that illegal   and , which have no known values,
- #don't get converted to unichr(129), unichr(141) or unichr(157)
+ # ensuring that illegal   and , which have no known values,
+ # don't get converted to unichr(129), unichr(141) or unichr(157)
ignore = set(ignore) | set([129, 141, 157])
result = u''
i = 0
--
To view, visit https://gerrit.wikimedia.org/r/130295
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0b73f4f07dbb079a42345290ebf6c259a23442b9
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: New site method expand_text() which uses api to parse text.
......................................................................
New site method expand_text() which uses api to parse text.
Change Page.expand_text() by calling this site method.
Change Site.getcurrenttimestamp() by calling this site method.
Change-Id: I88ad007a015787d93d2c00fe4b4acc1956ff2d0c
---
M pywikibot/page.py
M pywikibot/site.py
2 files changed, 40 insertions(+), 14 deletions(-)
Approvals:
Ricordisamoa: Looks good to me, but someone else must approve
Merlijn van Deen: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/page.py b/pywikibot/page.py
index 92607f7..754aba9 100644
--- a/pywikibot/page.py
+++ b/pywikibot/page.py
@@ -419,14 +419,22 @@
"""
return self.properties(force=force).get('defaultsort')
- def expand_text(self, refresh=False):
- """Return the page text with all templates expanded."""
- if not hasattr(self, "_expanded_text") or (
- self._expanded_text is None) or refresh:
- req = pywikibot.data.api.Request(
- action="expandtemplates", text=self.text,
- title=self.title(withSection=False), site=self.site)
- self._expanded_text = req.submit()["expandtemplates"]["*"]
+ @deprecate_arg('refresh', 'force')
+ def expand_text(self, force=False, includecomments=False):
+ """Return the page text with all templates and parser words expanded.
+
+ @param force: force updating from the live site
+ @param includecomments: Also strip comments if includecomments
+ parameter is not True.
+ @return: unicode or None
+
+ """
+ if not hasattr(self, '_expanded_text') or (
+ self._expanded_text is None) or force:
+ self._expanded_text = self.site.expand_text(
+ self.text,
+ title=self.title(withSection=False),
+ includecomments=includecomments)
return self._expanded_text
def userName(self):
diff --git a/pywikibot/site.py b/pywikibot/site.py
index fccf2f0..0132716 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -927,18 +927,36 @@
return self._months_names
+ def expand_text(self, text, title=None, includecomments=None):
+ """ Parses the given text for preprocessing and rendering
+ e.g expand templates and strip comments if includecomments
+ parameter is not True. Keeps text inside
+ <nowiki></nowiki> tags unchanges etc. Can be used to parse
+ magic parser words like {{CURRENTTIMESTAMP}}.
+
+ @param text: text to be expanded
+ @type text: unicode
+ @param title: page title without section
+ @type title: unicode
+ @param includecomments: if True do not strip comments
+ @type includecomments: bool
+ @return: unicode
+
+ """
+ req = api.Request(site=self, action='expandtemplates', text=text)
+ if title is not None:
+ req['title'] = title
+ if includecomments is True:
+ req['includecomments'] = u''
+ return req.submit()['expandtemplates']['*']
+
def getcurrenttimestamp(self):
"""Return server time, {{CURRENTTIMESTAMP}}, as a string.
Format is 'yyyymmddhhmmss'
"""
- r = api.Request(site=self,
- action="parse",
- text="{{CURRENTTIMESTAMP}}",
- contentmodel="wikitext")
- result = r.submit()
- return re.search('\d+', result['parse']['text']['*']).group()
+ return self.expand_text("{{CURRENTTIMESTAMP}}")
def getcurrenttime(self):
"""Return a Timestamp object representing the current server time."""
--
To view, visit https://gerrit.wikimedia.org/r/129710
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I88ad007a015787d93d2c00fe4b4acc1956ff2d0c
Gerrit-PatchSet: 14
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Legoktm <legoktm.wikipedia(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Pyfisch <pyfisch(a)gmail.com>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)live.it>
Gerrit-Reviewer: Russell Blau <russblau(a)imapmail.org>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: fix typos about Wikidata
......................................................................
fix typos about Wikidata
replaced 'wikidata' with 'Wikibase' when appropriate,
capitalized 'wikidata' into 'Wikidata' otherwise
Change-Id: Icf35735400eb023a61ded75a35781eb1d8d9f560
---
M pywikibot/family.py
M pywikibot/site.py
M pywikibot/textlib.py
3 files changed, 6 insertions(+), 6 deletions(-)
Approvals:
Xqt: Looks good to me, approved
jenkins-bot: Verified
diff --git a/pywikibot/family.py b/pywikibot/family.py
index 8d38a11..ca98266 100644
--- a/pywikibot/family.py
+++ b/pywikibot/family.py
@@ -1022,7 +1022,7 @@
return (None, None)
def shared_data_repository(self, code, transcluded=False):
- """Return the shared wikidata repository, if any."""
+ """Return the shared Wikibase repository, if any."""
return (None, None)
@pywikibot.deprecated("Site.getcurrenttime()")
diff --git a/pywikibot/site.py b/pywikibot/site.py
index fccf2f0..e994bc6 100644
--- a/pywikibot/site.py
+++ b/pywikibot/site.py
@@ -1109,19 +1109,19 @@
@property
def has_image_repository(self):
- """Return True if site has a shared image repository like commons"""
+ """Return True if site has a shared image repository like Commons"""
code, fam = self.shared_image_repository()
return bool(code or fam)
@property
def has_data_repository(self):
- """Return True if site has a shared image repository like wikidata"""
+ """Return True if site has a shared data repository like Wikidata"""
code, fam = self.shared_data_repository()
return bool(code or fam)
@property
def has_transcluded_data(self):
- """Return True if site has a shared image repository like wikidata"""
+ """Return True if site has a shared data repository like Wikidata"""
code, fam = self.shared_data_repository(True)
return bool(code or fam)
@@ -1133,7 +1133,7 @@
return pywikibot.Site(code, fam, self.username())
def data_repository(self):
- """Return Site object for data repository e.g. wikidata."""
+ """Return Site object for data repository e.g. Wikidata."""
code, fam = self.shared_data_repository()
if bool(code or fam):
diff --git a/pywikibot/textlib.py b/pywikibot/textlib.py
index 4535b4d..b1de87f 100644
--- a/pywikibot/textlib.py
+++ b/pywikibot/textlib.py
@@ -97,7 +97,7 @@
'interwiki': re.compile(r'(?i)\[\[:?(%s)\s?:[^\]]*\]\][\s]*'
% '|'.join(site.validLanguageLinks() +
list(site.family.obsolete.keys()))),
- # Wikidata property inclusions
+ # Wikibase property inclusions
'property': re.compile(r'(?i)\{\{\s*#property:\s*p\d+\s*\}\}'),
# Module invocations (currently only Lua)
'invoke': re.compile(r'(?i)\{\{\s*#invoke:.*?}\}'),
--
To view, visit https://gerrit.wikimedia.org/r/130559
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Icf35735400eb023a61ded75a35781eb1d8d9f560
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Ricordisamoa <ricordisamoa(a)live.it>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: (bug 64186) Decrease exhausing memory usage, speed up processing
......................................................................
(bug 64186) Decrease exhausing memory usage, speed up processing
- delete local list after we are done with it
- decrease processing speed: list.remove() consumes a lot of time
because lists are not indexed and removing an item needs to
search inside the sequence. There is an easy way for more speed
when the parent category is just skipped inside the loop. This
decreases time consumtion by 65% for depth:3
Change-Id: I067c2a0c95e86fa7367d6a3795962ec31fd5208c
---
M scripts/category.py
1 file changed, 19 insertions(+), 22 deletions(-)
Approvals:
Ladsgroup: Looks good to me, approved
jenkins-bot: Verified
diff --git a/scripts/category.py b/scripts/category.py
index e12fd6c..8922efe 100755
--- a/scripts/category.py
+++ b/scripts/category.py
@@ -876,41 +876,37 @@
"""
- result = u'#' * currentDepth + ' '
+ result = u'#' * currentDepth
+ if currentDepth > 0:
+ result += u' '
result += cat.title(asLink=True, textlink=True, withNamespace=False)
result += ' (%d)' % len(self.catDB.getArticles(cat))
- # We will remove an element of supercats, but need the original set
- # later, so we create a list from the catDB.getSupercats(cat) set
- supercats = list(self.catDB.getSupercats(cat))
+ if currentDepth < self.maxDepth / 2:
+ # noisy dots
+ pywikibot.output('.', newline=False)
# Find out which other cats are supercats of the current cat
- try:
- supercats.remove(parent)
- except:
- pass
- if supercats:
- if currentDepth < self.maxDepth / 2:
- # noisy dots
- pywikibot.output('.', newline=False)
- supercat_names = []
- for i, cat in enumerate(supercats):
- # create a list of wiki links to the supercategories
+ supercat_names = []
+ for cat in self.catDB.getSupercats(cat):
+ # create a list of wiki links to the supercategories
+ if cat != parent:
supercat_names.append(cat.title(asLink=True,
textlink=True,
withNamespace=False))
- # print this list, separated with commas, using translations
- # given in also_in_cats
+ if supercat_names:
+ # print this list, separated with commas, using translations
+ # given in also_in_cats
result += ' ' + i18n.twtranslate(self.site, 'category-also-in',
{'alsocat': ', '.join(
supercat_names)})
+ del supercat_names
result += '\n'
if currentDepth < self.maxDepth:
for subcat in self.catDB.getSubcats(cat):
# recurse into subdirectories
result += self.treeview(subcat, currentDepth + 1, parent=cat)
- else:
- if self.catDB.getSubcats(cat):
- # show that there are more categories beyond the depth limit
- result += '#' * (currentDepth + 1) + ' [...]\n'
+ elif self.catDB.getSubcats(cat):
+ # show that there are more categories beyond the depth limit
+ result += '#' * (currentDepth + 1) + ' [...]\n'
return result
def run(self):
@@ -1097,4 +1093,5 @@
except pywikibot.Error:
pywikibot.error("Fatal error:", exc_info=True)
finally:
- catDB.dump()
+ if 'catDB' in globals():
+ catDB.dump()
--
To view, visit https://gerrit.wikimedia.org/r/128433
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I067c2a0c95e86fa7367d6a3795962ec31fd5208c
Gerrit-PatchSet: 4
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Merlijn van Deen <valhallasw(a)arctus.nl>
Gerrit-Reviewer: Pyfisch <pyfisch(a)gmail.com>
Gerrit-Reviewer: Ricordisamoa <ricordisamoa(a)live.it>
Gerrit-Reviewer: Russell Blau <russblau(a)imapmail.org>
Gerrit-Reviewer: jenkins-bot <>
jenkins-bot has submitted this change and it was merged.
Change subject: (bug 64186) Decrease exhausing memory usage, speed up processing
......................................................................
(bug 64186) Decrease exhausing memory usage, speed up processing
update and synchronized from/with core
- delete local list after we are done with it
- decrease processing speed: list.remove() consumes a lot of time
because lists are not indexed and removing an item needs to
search inside the sequence. There is an easy way for more speed
when the parent category is just skipped inside the loop. This
decreasea time consumtion by 65% with depth:3.
Change-Id: I5117a9d6071ab7f9d069b870f1a3e158274965ca
---
M category.py
1 file changed, 56 insertions(+), 52 deletions(-)
Approvals:
Ladsgroup: Looks good to me, approved
jenkins-bot: Verified
diff --git a/category.py b/category.py
index 3d06ca0..e2bf0c1 100644
--- a/category.py
+++ b/category.py
@@ -63,6 +63,8 @@
and do not remove them
* -match - Only work on pages whose titles match the given regex (for
move and remove actions).
+ * -depth: - The max depth limit beyond which no subcategories will be
+ listed.
For the actions tidy and tree, the bot will store the category structure
locally in category.dump. This saves time and server load, but if it uses
@@ -82,7 +84,6 @@
This will move all pages in the category US to the category United States.
"""
-
#
# (C) Rob W.W. Hooft, 2004
# (C) Daniel Herding, 2004
@@ -90,12 +91,12 @@
# (C) leogregianin, 2004-2008
# (C) Cyde, 2006-2010
# (C) Anreas J Schwab, 2007
-# (C) xqt, 2009-2013
-# (C) Pywikipedia team, 2008-2013
-#
-__version__ = '$Id$'
+# (C) xqt, 2009-2014
+# (C) Pywikibot team, 2008-2013
#
# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
#
import os
@@ -120,7 +121,7 @@
'fi': [u'roskaa', u'poistettava', u'korjattava/nimi',
u'yhdistettäväLuokka'],
'he': [u'הצבעת מחיקה', u'למחוק'],
- 'nl': [u'categorieweg', u'catweg', u'wegcat', u'weg2']
+ 'nl': [u'categorieweg', u'catweg', u'wegcat', u'weg2'],
},
'commons': {
'commons': [u'cfd', u'move']
@@ -309,7 +310,7 @@
else:
pywikibot.output(u"Page %s does not exist; skipping."
% page.title(asLink=True))
- except pywikibot.IsRedirectPage, arg:
+ except pywikibot.IsRedirectPage as arg:
redirTarget = pywikibot.Page(self.site, arg.args[0])
if self.follow_redirects:
text = redirTarget.get()
@@ -354,7 +355,7 @@
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
- except pywikibot.SpamfilterError, error:
+ except pywikibot.SpamfilterError as error:
pywikibot.output(
u'Cannot change %s because of spam blacklist entry '
u'%s' % (page.title(), error.url))
@@ -390,8 +391,8 @@
% page.title(asLink=True))
-class CategoryMoveRobot:
- """Robot to move pages from one category to another."""
+class CategoryMoveRobot(object):
+ """Bot to move pages from one category to another."""
def __init__(self, oldCatTitle, newCatTitle, batchMode=False,
editSummary='', inPlace=False, moveCatPage=True,
@@ -410,6 +411,7 @@
self.withHistory = withHistory
def run(self):
+ """The main bot function that does all the work."""
site = pywikibot.getSite()
newCat = catlib.Category(site, self.newCatTitle)
# set edit summary message
@@ -689,9 +691,8 @@
contextLength = full_text.find('\n\n', contextLength + 2)
if contextLength > 1000 or contextLength < 0:
contextLength = 500
- print
- pywikibot.output(full_text[:contextLength])
- print
+
+ pywikibot.output('\n' + full_text[:contextLength] + '\n')
subcatlist = self.catDB.getSubcats(current_cat)
supercatlist = self.catDB.getSupercats(current_cat)
@@ -775,15 +776,13 @@
longchoice = True
elif choice in ['m', 'M', '?']:
contextLength += 500
- print
- pywikibot.output(full_text[:contextLength])
- print
+ pywikibot.output('\n' + full_text[:contextLength] + '\n')
# if categories possibly weren't visible, show them additionally
# (maybe this should always be shown?)
if len(full_text) > contextLength:
- print ''
- print 'Original categories: '
+ pywikibot.output('')
+ pywikibot.output('Original categories: ')
for cat in article.categories():
pywikibot.output(u'* %s' % cat.title())
elif choice[0] == 'u':
@@ -810,7 +809,8 @@
articles = cat.articlesList(recurse=False)
if len(articles) == 0:
- pywikibot.output(u'There are no articles in category ' + catTitle)
+ pywikibot.output(u'There are no articles in category ' +
+ self.catTitle)
else:
preloadingGen = pagegenerators.PreloadingGenerator(iter(articles))
for article in preloadingGen:
@@ -820,8 +820,7 @@
class CategoryTreeRobot:
- '''
- Robot to create tree overviews of the category structure.
+ """ Robot to create tree overviews of the category structure.
Parameters:
* catTitle - The category which will be the tree's root.
@@ -831,7 +830,8 @@
won't be a problem.
* filename - The textfile where the tree should be saved; None to print
the tree to stdout.
- '''
+
+ """
def __init__(self, catTitle, catDB, filename=None, maxDepth=10):
self.catTitle = catTitle
@@ -839,53 +839,51 @@
if filename and not os.path.isabs(filename):
filename = config.datafilepath(filename)
self.filename = filename
- # TODO: make maxDepth changeable with a parameter or config file entry
self.maxDepth = maxDepth
self.site = pywikibot.getSite()
def treeview(self, cat, currentDepth=0, parent=None):
- '''
- Returns a multi-line string which contains a tree view of all
+ """ Return a multi-line string which contains a tree view of all
subcategories of cat, up to level maxDepth. Recursively calls itself.
Parameters:
* cat - the Category of the node we're currently opening
* currentDepth - the current level in the tree (for recursion)
* parent - the Category of the category we're coming from
- '''
+
+ """
result = u'#' * currentDepth
- result += '[[:%s|%s]]' % (cat.title(), cat.title().split(':', 1)[1])
+ if currentDepth > 0:
+ result += u' '
+ result += '[[:%s|%s]]' % (cat.title(), cat.title(withNamespace=False))
result += ' (%d)' % len(self.catDB.getArticles(cat))
- # We will remove an element of this array, but will need the original
- # array later, so we create a shallow copy with [:]
- supercats = self.catDB.getSupercats(cat)[:]
+ if currentDepth < self.maxDepth / 2:
+ # noisy dots
+ pywikibot.output('.', newline=False)
# Find out which other cats are supercats of the current cat
- try:
- supercats.remove(parent)
- except:
- pass
- if supercats != []:
- supercat_names = []
- for i in range(len(supercats)):
- # create a list of wiki links to the supercategories
+ supercat_names = []
+ for cat in self.catDB.getSupercats(cat):
+ # create a list of wiki links to the supercategories
+ if cat != parent:
supercat_names.append('[[:%s|%s]]'
- % (supercats[i].title(),
- supercats[i].title().split(':', 1)[1]))
- # print this list, separated with commas, using translations
- # given in also_in_cats
+ % (cat.title(),
+ cat.title(withNamespace=False)))
+ if supercat_names:
+ # print this list, separated with commas, using translations
+ # given in also_in_cats
result += ' ' + i18n.twtranslate(self.site, 'category-also-in',
{'alsocat': ', '.join(
supercat_names)})
+ del supercat_names
result += '\n'
if currentDepth < self.maxDepth:
for subcat in self.catDB.getSubcats(cat):
# recurse into subdirectories
result += self.treeview(subcat, currentDepth + 1, parent=cat)
- else:
- if self.catDB.getSubcats(cat) != []:
- # show that there are more categories beyond the depth limit
- result += '#' * (currentDepth + 1) + '[...]\n'
+ elif self.catDB.getSubcats(cat):
+ # show that there are more categories beyond the depth limit
+ result += '#' * (currentDepth + 1) + ' [...]\n'
return result
def run(self):
@@ -898,7 +896,9 @@
"""
cat = catlib.Category(self.site, 'Category:' + self.catTitle)
+ pywikibot.output('Generating tree...', newline=False)
tree = self.treeview(cat)
+ pywikibot.output(u'')
if self.filename:
pywikibot.output(u'Saving results in %s' % self.filename)
import codecs
@@ -921,9 +921,11 @@
showImages = False
talkPages = False
recurse = False
- withHistory = False
titleRegex = None
pagesonly = False
+ withHistory = False
+ rebuild = False
+ depth = 5
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
@@ -935,10 +937,8 @@
# If this is set to true then the custom edit summary given for removing
# categories from articles will also be used as the deletion reason.
useSummaryForDeletion = True
- catDB = CategoryDatabase()
action = None
sort_by_last_name = False
- restore = False
create_pages = False
follow_redirects = False
deleteEmptySourceCat = True
@@ -960,7 +960,7 @@
elif arg == '-person':
sort_by_last_name = True
elif arg == '-rebuild':
- catDB.rebuild()
+ rebuild = True
elif arg.startswith('-from:'):
oldCatTitle = arg[len('-from:'):].replace('_', ' ')
fromGiven = True
@@ -997,9 +997,12 @@
follow_redirects = True
elif arg == '-hist':
withHistory = True
+ elif arg.startswith('-depth:'):
+ depth = int(arg[len('-depth:'):])
else:
genFactory.handleArg(arg)
+ catDB = CategoryDatabase(rebuild=rebuild)
if action == 'add':
# Note that the add functionality is the only bot that actually
# uses the the generator factory. Every other bot creates its own
@@ -1035,7 +1038,8 @@
bot = CategoryMoveRobot(oldCatTitle, newCatTitle, batchMode,
editSummary, inPlace,
deleteEmptySourceCat=deleteEmptySourceCat,
- titleRegex=titleRegex, withHistory=withHistory)
+ titleRegex=titleRegex,
+ withHistory=withHistory)
bot.run()
elif action == 'tidy':
catTitle = pywikibot.input(u'Which category do you want to tidy up?')
@@ -1047,7 +1051,7 @@
filename = pywikibot.input(
u'Please enter the name of the file where the tree should be saved,'
u'\nor press enter to simply show the tree:')
- bot = CategoryTreeRobot(catTitle, catDB, filename)
+ bot = CategoryTreeRobot(catTitle, catDB, filename, depth)
bot.run()
elif action == 'listify':
if not fromGiven:
--
To view, visit https://gerrit.wikimedia.org/r/129229
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I5117a9d6071ab7f9d069b870f1a3e158274965ca
Gerrit-PatchSet: 2
Gerrit-Project: pywikibot/compat
Gerrit-Branch: master
Gerrit-Owner: Xqt <info(a)gno.de>
Gerrit-Reviewer: Andre Engels <andreengels(a)gmail.com>
Gerrit-Reviewer: Huji <huji.huji(a)gmail.com>
Gerrit-Reviewer: Ladsgroup <ladsgroup(a)gmail.com>
Gerrit-Reviewer: Rotem Liss <rotemliss(a)gmail.com>
Gerrit-Reviewer: Xqt <info(a)gno.de>
Gerrit-Reviewer: jenkins-bot <>