jenkins-bot submitted this change.

View Change

Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
[IMPR]: pagefromfile.py: compute regex only once

Do not compute regexes for each page.

Change-Id: If97d7ce2a52572066909474f75ac4cb6e576811a
---
M scripts/pagefromfile.py
1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/scripts/pagefromfile.py b/scripts/pagefromfile.py
index 8705099..6b506f8 100755
--- a/scripts/pagefromfile.py
+++ b/scripts/pagefromfile.py
@@ -195,10 +195,26 @@
super().__init__(**kwargs)
self.filename = filename
self.site = site or pywikibot.Site()
+ self.page_regex, self.title_regex = self._make_regexes()
+
+ def _make_regexes(self):
+ """Make regex from options."""
+ if self.opt.textonly:
+ pattern = '^(.*)$'
+ else:
+ pattern = (re.escape(self.opt.begin) + '(.*?)'
+ + re.escape(self.opt.end))
+ page_regex = re.compile(pattern, re.DOTALL)
+ title_regex = re.compile(
+ re.escape(self.opt.titlestart) + '(.*?)'
+ + re.escape(self.opt.titleend))
+ return page_regex, title_regex

@property
def generator(self) -> Iterator[pywikibot.Page]:
- """Read file and yield a tuple of page title and content.
+ """Read file and yield a page with content from file.
+
+ content is stored as a page attribute defined by CTX_ATTR.

.. versionchanged:: 7.6
changed from iterator method to generator property
@@ -216,7 +232,7 @@
length = 0
while text:
try:
- length, title, contents = self.findpage(text)
+ length, title, contents = self.find_page(text)
except TypeError:
if not length:
pywikibot.info('\nStart or end marker not found.')
@@ -233,18 +249,9 @@
yield page
text = text[length:]

- def findpage(self, text) -> Tuple[int, str, str]:
+ def find_page(self, text) -> Tuple[int, str, str]:
"""Find page to work on."""
- if self.opt.textonly:
- pattern = '^(.*)$'
- else:
- pattern = (re.escape(self.opt.begin) + '(.*?)'
- + re.escape(self.opt.end))
- page_regex = re.compile(pattern, re.DOTALL)
- title_regex = re.compile(
- re.escape(self.opt.titlestart) + '(.*?)'
- + re.escape(self.opt.titleend))
- location = page_regex.search(text)
+ location = self.page_regex.search(text)
if self.opt.include:
contents = location[0]
else:
@@ -253,10 +260,10 @@
title = self.opt.title
if not title:
try:
- title = title_regex.search(contents)[1]
+ title = self.title_regex.search(contents)[1]
if self.opt.notitle:
# Remove title (to allow creation of redirects)
- contents = title_regex.sub('', contents, count=1)
+ contents = self.title_regex.sub('', contents, count=1)
except TypeError:
raise NoTitleError(location.end())


To view, visit change 837207. To unsubscribe, or for help writing mail filters, visit settings.

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Change-Id: If97d7ce2a52572066909474f75ac4cb6e576811a
Gerrit-Change-Number: 837207
Gerrit-PatchSet: 1
Gerrit-Owner: Mpaa <mpaa.wiki@gmail.com>
Gerrit-Reviewer: D3r1ck01 <xsavitar.wiki@aol.com>
Gerrit-Reviewer: Xqt <info@gno.de>
Gerrit-Reviewer: jenkins-bot
Gerrit-MessageType: merged