jenkins-bot has submitted this change. ( https://gerrit.wikimedia.org/r/c/pywikibot/core/+/1052375?usp=email )
Change subject: [IMPr] use f-string instead of str.format in scripts ......................................................................
[IMPr] use f-string instead of str.format in scripts
using:
ruff check --fix --select UP scripts autopep8 --in-place -aa -r scripts/.
Change-Id: I134b9379618ce62fa1a759e27b5e0d19661921af --- M scripts/archivebot.py M scripts/blockpageschecker.py M scripts/category.py M scripts/checkimages.py M scripts/clean_sandbox.py M scripts/commonscat.py M scripts/coordinate_import.py M scripts/create_isbn_edition.py M scripts/dataextend.py M scripts/delete.py M scripts/delinker.py M scripts/djvutext.py M scripts/download_dump.py M scripts/harvest_template.py M scripts/image.py M scripts/imagetransfer.py M scripts/interwiki.py M scripts/interwikidata.py M scripts/listpages.py M scripts/maintenance/cache.py M scripts/movepages.py M scripts/newitem.py M scripts/noreferences.py M scripts/nowcommons.py M scripts/parser_function_count.py M scripts/patrol.py M scripts/redirect.py M scripts/reflinks.py M scripts/replace.py M scripts/solve_disambiguation.py M scripts/speedy_delete.py M scripts/transferbot.py M scripts/transwikiimport.py M scripts/unusedfiles.py M scripts/watchlist.py M scripts/weblinkchecker.py M scripts/welcome.py 37 files changed, 1,678 insertions(+), 1,072 deletions(-)
Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified
diff --git a/scripts/archivebot.py b/scripts/archivebot.py index f9c72ad..3cc0a78 100755 --- a/scripts/archivebot.py +++ b/scripts/archivebot.py @@ -117,7 +117,7 @@ KeyboardInterrupt was enabled with ``-async`` option. """ # -# (C) Pywikibot team, 2006-2023 +# (C) Pywikibot team, 2006-2024 # # Distributed under the terms of the MIT license. # @@ -611,8 +611,9 @@ if not (title.startswith(page_title + '/') or self.force or self.key_ok()): raise ArchiveSecurityError( - 'Archive page {} does not start with page title ({})!' - .format(archive_link, page_title)) + f'Archive page {archive_link} does not start with page ' + f'title ({page_title})!' + ) self.archives[title] = DiscussionPage(archive_link, self, params)
return self.archives[title] @@ -936,8 +937,8 @@
if not templates: templates = ['User:MiszaBot/config'] - pywikibot.info('No template was specified, using default {{{{{}}}}}.' - .format(templates[0])) + pywikibot.info('No template was specified, using default ' + f'{{{{{templates[0]}}}}}.')
if asynchronous: signal.signal(signal.SIGINT, signal_handler) @@ -994,5 +995,5 @@ if __name__ == '__main__': start = datetime.datetime.now() main() - pywikibot.info('\nExecution time: {} seconds' - .format((datetime.datetime.now() - start).seconds)) + pywikibot.info('\nExecution time: ' + f'{(datetime.datetime.now() - start).seconds} seconds') diff --git a/scripts/blockpageschecker.py b/scripts/blockpageschecker.py index 154dba0..afb9eb1 100755 --- a/scripts/blockpageschecker.py +++ b/scripts/blockpageschecker.py @@ -291,6 +291,8 @@ ttmp = i18n.translate(self.site, template_total_move_protection) tnr = i18n.translate(self.site, template_no_regex) tu = i18n.translate(self.site, template_unique) + missing_l10n = ('This script is not localized to use it on ' + f'{self.site.sitename}.\nMissing ')
while True: text, restrictions = yield @@ -312,11 +314,9 @@ # page is not edit-protected # Deleting the template because the page doesn't need it. if not (ttp or tsp): - raise Error( - 'This script is not localized to use it on {}.\n' - 'Missing "template_semi_protection" or' - '"template_total_protection"' - .format(self.site.sitename)) + raise Error(missing_l10n + + ('"template_semi_protection" ' + 'or "template_total_protection"'))
replacement = '|'.join(ttp + tsp + (tu or [])) text, changes = re.subn( @@ -341,10 +341,7 @@ pywikibot.info(msg) else: if not tnr or tu and not tnr[4] or not (tu or tnr[1]): - raise Error( - 'This script is not localized to use it on \n{}. ' - 'Missing "template_no_regex"' - .format(self.site.sitename)) + raise Error(missing_l10n + '"template_no_regex"')
pywikibot.info( 'The page is protected to the sysop, but the template ' @@ -364,10 +361,7 @@ pywikibot.info(msg) else: if not tnr or tu and not tnr[4] or not (tu or tnr[1]): - raise Error( - 'This script is not localized to use it on \n' - '{}. Missing "template_no_regex"' - .format(self.site.sitename)) + raise Error(missing_l10n + '"template_no_regex"') pywikibot.info( 'The page is editable only for the autoconfirmed ' 'users, but the template seems not correct. Fixing...') diff --git a/scripts/category.py b/scripts/category.py index 60a203f..756a800 100755 --- a/scripts/category.py +++ b/scripts/category.py @@ -143,7 +143,7 @@ :mod:`pagegenerators` are supported with "move" and "remove" action. """ # -# (C) Pywikibot team, 2004-2023 +# (C) Pywikibot team, 2004-2024 # # Distributed under the terms of the MIT license. # @@ -441,8 +441,8 @@ else: with suppress(EnvironmentError): os.remove(filename) - pywikibot.info('Database is empty. {} removed' - .format(config.shortpath(filename))) + pywikibot.info( + f'Database is empty. {config.shortpath(filename)} removed')
class CategoryAddBot(CategoryPreprocess): @@ -512,8 +512,8 @@ pywikibot.info('* ' + cat.title()) catpl = pywikibot.Category(self.current_page.site, self.newcat) if catpl in cats: - pywikibot.info('{} is already in {}.' - .format(self.current_page.title(), catpl.title())) + pywikibot.info( + f'{self.current_page.title()} is already in {catpl.title()}.') else: if self.sort: catpl = self.sorted_by_last_name(catpl, self.current_page) @@ -539,8 +539,7 @@ else: if self.includeonly == ['includeonly']: text += '\n\n' - text += '<{0}>\n{1}\n</{0}>'.format( - tagname, categorytitle) + text += f'<{tagname}>\n{categorytitle}\n</{tagname}>' else: cats.append(catpl) text = textlib.replaceCategoryLinks( @@ -663,8 +662,8 @@ if self.wikibase and repo.username() is None: # The bot can't move categories nor update the Wikibase repo raise NoUsernameError( - "The 'wikibase' option is turned on and {} has no " - 'registered username.'.format(repo)) + f"The 'wikibase' option is turned on and {repo} has no " + 'registered username.')
template_vars = {'oldcat': self.oldcat.title(with_ns=False)} if self.newcat: @@ -860,15 +859,15 @@ """ move_possible = True if new_page and new_page.exists(): - pywikibot.warning("The {} target '{}' already exists." - .format(name, new_page.title())) + pywikibot.warning( + f"The {name} target '{new_page.title()}' already exists.") move_possible = False if not old_page.exists(): # only warn if not a talk page log = (pywikibot.log if old_page.namespace() % 2 else pywikibot.warning) - log("Moving {} '{}' requested, but the page doesn't exist." - .format(name, old_page.title())) + log(f"Moving {name} '{old_page.title()}' requested, but the page" + " doesn't exist.") move_possible = False return move_possible
@@ -883,8 +882,8 @@ Do not use this function from outside the class. """ # Some preparing - pywikibot.info('Moving text from {} to {}.'.format( - self.oldcat.title(), self.newcat.title())) + pywikibot.info(f'Moving text from {self.oldcat.title()} to ' + f'{self.newcat.title()}.') comma = self.site.mediawiki_message('comma-separator') authors = comma.join(self.oldcat.contributors().keys()) template_vars = {'oldcat': self.oldcat.title(), 'authors': authors} @@ -1021,8 +1020,10 @@ list_string = '' for article in sorted(set_of_articles): textlink = not (article.is_filepage() and self.show_images) - list_string += '{} {}'.format( - self.prefix, article.title(as_link=True, textlink=textlink)) + list_string += ( + f'{self.prefix} ' + f'{article.title(as_link=True, textlink=textlink)}' + ) if self.talk_pages and not article.isTalkPage(): list_string += ( f' -- [[{article.toggleTalkPage().title()}|talk]]') @@ -1117,8 +1118,11 @@ if cat != original_cat: text += cat.title(as_link=True) else: - text += '<<lightpurple>>{}<<default>>'.format( - current_cat.title(as_link=True)) + text += ( + '<<lightpurple>>' + f'{current_cat.title(as_link=True)}' + '<<default>>' + ) text += '\n' return text
@@ -1155,9 +1159,11 @@ # columnify i2 = i + new_column if i2 < count: - lines.append('[{0}{1}] {2:35}[{0}{3}] {4}' - .format(prefix, index % i, cat, - index % i2, cat_list[i2])) + lines.append( + f'[{prefix}{index % i}] ' + f'{cat:35}[{prefix}{index % i2}] ' + f'{cat_list[i2]}' + ) else: lines.append(f'[{prefix}{index % i}] {cat}') else: @@ -1223,18 +1229,22 @@
# show possible options for the user pywikibot.info() - options = (supercat_option, - subcat_option, - StandardOption( - 'save page to category <<lightpurple>>{}<<default>>' - .format(current_cat.title(with_ns=False)), 'c'), - StandardOption('remove the category from page', 'r'), - StandardOption('skip page', 's'), - context_option, - StandardOption('jump to custom category', 'j')) + options = ( + supercat_option, + subcat_option, + StandardOption('save page to category <<lightpurple>>' + f'{current_cat.title(with_ns=False)}<<default>>', + 'c'), + StandardOption('remove the category from page', 'r'), + StandardOption('skip page', 's'), + context_option, + StandardOption('jump to custom category', 'j') + ) choice = pywikibot.input_choice( - 'Choice for page <<lightpurple>>{}<<default>>:' - .format(member.title()), options, default='c') + f'Choice for page <<lightpurple>>{member.title()}<<default>>:', + options, + default='c' + )
if choice == 'c': pywikibot.info(f'Saving page to {current_cat.title()}') @@ -1457,18 +1467,17 @@ return
if config.verbose_output: - pywikibot.info('Subcategory "{}" is parent for:' - .format(child.title(with_ns=False))) + pywikibot.info( + f'Subcategory "{child.title(with_ns=False)}" is parent for:')
for grandchild in overcategorized: pywikibot.info(f'\t{grandchild.title()}')
for grandchild in overcategorized: - msg = ('Remove "<<lightpurple>>{}<<default>>" from "{}" because ' - 'it is already under subcategory "<<green>>{}<<default>>"?' - .format(grandchild.title(with_ns=False), - self.cat.title(with_ns=False), - child.title(with_ns=False))) + msg = (f'Remove "<<lightpurple>>{grandchild.title(with_ns=False)}' + f'<<default>>" from "{self.cat.title(with_ns=False)}"' + ' because it is already under subcategory ' + f'"<<green>>{child.title(with_ns=False)}<<default>>"?')
if not self.user_confirm(msg): continue diff --git a/scripts/checkimages.py b/scripts/checkimages.py index e5d0d87..67bea43 100755 --- a/scripts/checkimages.py +++ b/scripts/checkimages.py @@ -961,8 +961,8 @@ fp = pywikibot.FilePage(self.site, images_to_tag_list[-1]) already_reported_in_past = fp.revision_count(self.bots) image_title = re.escape(self.image.title(as_url=True)) - from_regex = (r'\n*[[:{}{}]]' - .format(self.image_namespace, image_title)) + from_regex = ( + rf'\n*[[:{self.image_namespace}{image_title}]]') # Delete the image in the list where we're write on text_for_the_report = re.sub(from_regex, '', text_for_the_report) @@ -992,8 +992,8 @@ == self.image.title(as_url=True): # the image itself, not report also this as duplicate continue - repme += '\n** [[:{}{}]]'.format(self.image_namespace, - dup_page.title(as_url=True)) + repme += (f'\n** [[:{self.image_namespace}' + f'{dup_page.title(as_url=True)}]]')
result = self.report_image(self.image_name, self.rep_page, self.com, repme, addings=False) @@ -1037,14 +1037,12 @@ pywikibot.info(f'{image_to_report} is already in the report page.') reported = False elif len(text_get) >= self.log_full_number: + msg = (f'The log page ({another_page.title()}) is full! Please' + ' delete the old files reported.') if self.log_full_error: - raise LogIsFull( - 'The log page ({}) is full! Please delete the old files ' - 'reported.'.format(another_page.title())) + raise LogIsFull(msg)
- pywikibot.info( - 'The log page ({}) is full! Please delete the old files ' - ' reported. Skip!'.format(another_page.title())) + pywikibot.info(msg + ' Skip!') # Don't report, but continue with the check # (we don't know if this is the first time we check this file # or not) diff --git a/scripts/clean_sandbox.py b/scripts/clean_sandbox.py index d48f4c3..6b450cb 100755 --- a/scripts/clean_sandbox.py +++ b/scripts/clean_sandbox.py @@ -238,8 +238,9 @@ + sandbox_page.title(as_link=True)) if sandbox_page.isRedirectPage(): pywikibot.warning( - '{} is a redirect page, cleaning it anyway' - .format(sandbox_page.title(as_link=True))) + f'{sandbox_page.title(as_link=True)} is a redirect' + ' page, cleaning it anyway' + ) try: text = sandbox_page.text if self.opt.summary: @@ -274,8 +275,10 @@ 'sandbox cleaned.') else: # wait for the rest pywikibot.info( - 'Sandbox edited {:.1f} minutes ago...' - .format(edit_delta.seconds / 60.0)) + 'Sandbox edited ' + f'{edit_delta.seconds / 60.0:.1f} minutes' + ' ago...' + ) pywikibot.info( f'Sleeping for {delta.seconds // 60} minutes.') pywikibot.sleep(delta.seconds) diff --git a/scripts/commonscat.py b/scripts/commonscat.py index bbfc466..9536a96 100755 --- a/scripts/commonscat.py +++ b/scripts/commonscat.py @@ -250,8 +250,8 @@ """Skip category redirects.""" if page.isCategoryRedirect(): pywikibot.warning( - 'Page {page} on {page.site} is a category redirect. ' - 'Skipping.'.format(page=page)) + f'Page {page} on {page.site} is a category redirect. ' + 'Skipping.') return True return super().skip_page(page)
@@ -301,8 +301,8 @@
if currentCommonscatTarget == checkedCommonscatTarget: # The current commonscat link is good - pywikibot.info('Commonscat link at {} to Category:{} is ok' - .format(page.title(), currentCommonscatTarget)) + pywikibot.info(f'Commonscat link at {page.title()} to ' + f'Category:{currentCommonscatTarget} is ok') return
if checkedCommonscatTarget: @@ -403,10 +403,9 @@
checkedCommonscat = self.checkCommonscatLink(commonscatLink[1]) if checkedCommonscat: - pywikibot.info( - 'Found link for {} at [[{}:{}]] to {}.' - .format(page.title(), ipage.site.code, ipage.title(), - checkedCommonscat)) + pywikibot.info(f'Found link for {page.title()} at ' + f'[[{ipage.site.code}:{ipage.title()}]] to ' + f'{checkedCommonscat}.') return checkedCommonscat return ''
@@ -496,9 +495,9 @@
if not m: pywikibot.info( - "getCommonscat: {} deleted by {}. Couldn't find " - 'move target in "{}"' - .format(commonsPage, loguser, logcomment)) + f'getCommonscat: {commonsPage} deleted by {loguser}. ' + f'Couldn't find move target in "{logcomment}"' + ) break
if m['newcat1']: diff --git a/scripts/coordinate_import.py b/scripts/coordinate_import.py index de4f6d2..25813ed 100755 --- a/scripts/coordinate_import.py +++ b/scripts/coordinate_import.py @@ -100,15 +100,14 @@ """ claims = item.get().get('claims') if self.prop in claims: - pywikibot.info('Item {} already contains coordinates ({})' - .format(item.title(), self.prop)) + pywikibot.info(f'Item {item.title()} already contains coordinates ' + f'({self.prop})') return True
prop = self.has_coord_qualifier(claims) if prop: - pywikibot.info( - 'Item {} already contains coordinates ({}) as qualifier for {}' - .format(item.title(), self.prop, prop)) + pywikibot.info(f'Item {item.title()} already contains coordinates ' + f'({self.prop}) as qualifier for {prop}') return True return False
diff --git a/scripts/create_isbn_edition.py b/scripts/create_isbn_edition.py index 7e2b19a..f5dcb5c 100755 --- a/scripts/create_isbn_edition.py +++ b/scripts/create_isbn_edition.py @@ -224,7 +224,7 @@ .. versionadded:: 7.7 """ # noqa: E501, W505, W605 # -# (C) Pywikibot team, 2022-2023 +# (C) Pywikibot team, 2022-2024 # # Distributed under the terms of the MIT license. # @@ -459,9 +459,11 @@ targetx[propty] = pywikibot.ItemPage(repo, target[propty])
try: - pywikibot.warning('Add {} ({}): {} ({})'.format( - proptyx[propty].labels[booklang], propty, - targetx[propty].labels[booklang], target[propty])) + pywikibot.warning( + f'Add {proptyx[propty].labels[booklang]} ' + f'({propty}): {targetx[propty].labels[booklang]} ' + f'({target[propty]})' + ) except: # noqa: B001, E722, H201 pywikibot.warning(f'Add {propty}:{target[propty]}')
@@ -797,8 +799,8 @@ if propty in propreqinst and ( 'P31' not in targetx[propty].claims or not is_in_list( targetx[propty].claims['P31'], propreqinst[propty])): - pywikibot.critical('{} ({}) is not a language'.format( - targetx[propty].labels[mainlang], target[propty])) + pywikibot.critical(f'{targetx[propty].labels[mainlang]} ' + f'({target[propty]}) is not a language') return
# check dependencies diff --git a/scripts/dataextend.py b/scripts/dataextend.py index 676affa..4ce1227 100755 --- a/scripts/dataextend.py +++ b/scripts/dataextend.py @@ -458,12 +458,11 @@ if claim.getTarget() is None: pywikibot.info(f'{self.label(prop)}: unknown') else: - pywikibot.info('{}: {}'.format( - self.label(prop), - self.label(claim.getTarget().title()))) + pywikibot.info( + f'{self.label(prop)}: {self.label(claim.getTarget().title())}') elif claim.type == 'time': - pywikibot.info('{}: {}'.format( - self.label(prop), self.showtime(claim.getTarget()))) + pywikibot.info( + f'{self.label(prop)}: {self.showtime(claim.getTarget())}') elif claim.type in ['external-id', 'commonsMedia']: pywikibot.info(f'{self.label(prop)}: {claim.getTarget()}') elif claim.type == 'quantity': @@ -472,8 +471,8 @@ claim.getTarget().amount, self.label(claim.getTarget().unit.split('/')[-1]))) else: - pywikibot.info('Unknown type {} for property {}'.format( - claim.type, self.label(prop))) + pywikibot.info( + f'Unknown type {claim.type} for property {self.label(prop)}')
MONTHNUMBER = { '1': 1, @@ -956,9 +955,8 @@ if sourcepart is not None ]
- pywikibot.info('Sourcing {}: {}' - .format(self.label(claim[0]), - self.label(claim[1]))) + pywikibot.info( + f'Sourcing {self.label(claim[0])}: {self.label(claim[1])}')
# probably means the sourcing is already there with suppress(APIError): @@ -980,9 +978,8 @@ target = self.createdateclaim(claim[1][6:]) except ValueError as ex: pywikibot.info( - 'Unable to analyze date "{}" for {}: {}' - .format(claim[1][6:], - self.label(claim[0]), ex)) + f'Unable to analyze date "{claim[1][6:]}" for {self.label(claim[0])}: {ex}' + ) pywikibot.input('Press enter to continue') target = None
@@ -1007,8 +1004,8 @@ else: createdclaim.setTarget(claim[1])
- pywikibot.info('Adding {}: {}'.format( - self.label(claim[0]), self.label(claim[1]))) + pywikibot.info( + f'Adding {self.label(claim[0])}: {self.label(claim[1])}')
try: item.addClaim(createdclaim) @@ -1074,9 +1071,8 @@ sourcepart for sourcepart in sourcedata if sourcepart is not None ] - pywikibot.info('Sourcing {}: {}' - .format(self.label(claim[0]), - self.label(claim[1]))) + pywikibot.info( + f'Sourcing {self.label(claim[0])}: {self.label(claim[1])}')
try: createdclaim.addSources([ @@ -1235,9 +1231,13 @@ if claim.type == 'wikibase-item' \ and claim.getTarget().title() == value: return True - if claim.type == 'commonsMedia' \ - and claim.getTarget().title().split( - ':', 1)[1].replace('_', ' ') == value.replace('_', ' '): + if claim.type == 'commonsMedia' and claim.getTarget().title().split( + ':', + 1)[1].replace( + '_', + ' ') == value.replace( + '_', + ' '): return True if claim.type == 'time' \ and self.showtime(claim.getTarget()) == self.showtime( @@ -1959,281 +1959,241 @@ def finddefaultmixedrefs(self, html, includesocial=True): defaultmixedrefs = [ ('P214', self.findbyre(r'viaf.org/(?:viaf/)?(\d+)', html)), - ('P227', self.findbyre(r'd-nb.info/(?:gnd/)?([\d-xX]+)', html)), - ('P244', self.findbyre(r'id.loc.gov/authorities/\w+/(\w+)', - html)), + ('P227', self.findbyre( + r'd-nb.info/(?:gnd/)?([\d-xX]+)', html)), + ('P244', self.findbyre( + r'id.loc.gov/authorities/\w+/(\w+)', html)), ('P244', self.findbyre(r'https?://lccn.loc.gov/(\w+)', html)), - ('P245', - self.findbyre(r'https?://www.getty.edu/[^"'\s]+subjectid=(\w+)', - html)), + ('P245', self.findbyre( + r'https?://www.getty.edu/[^"'\s]+subjectid=(\w+)', html)), ('P245', self.findbyre(r'getty.edu/page/ulan/(\w+)', html)), - ('P268', - self.findbyre(r'https?://catalogue.bnf.fr/ark./\d+/(?:cb)?(\w+)', - html)), + ('P268', self.findbyre( + r'https?://catalogue.bnf.fr/ark./\d+/(?:cb)?(\w+)', html)), ('P268', self.findbyre(r'data.bnf.fr/ark:/\d+/cb(\w+)', html)), ('P269', self.findbyre(r'https?://\w+.idref.fr/(\w+)', html)), - ('P345', self.findbyre(r'https?://www.imdb.com/\w+/(\w+)', html)), - ('P349', - self.findbyre(r'https?://id.ndl.go.jp/auth/[^"'\s]+/(\w+)', - html)), - ('P396', - self.findbyre( - r'opac.sbn.it/opacsbn/opac/[^<>'"\s]+?bid=([^\s'"<>]+)', - html)), - ('P409', - self.findbyre(r'https?://nla.gov.au/anbd.aut-an(\w+)', html)), - ('P434', - self.findbyre(r'https?://musicbrainz.org/\w+/([\w-]+)', html)), + ('P345', self.findbyre( + r'https?://www.imdb.com/\w+/(\w+)', html)), + ('P349', self.findbyre( + r'https?://id.ndl.go.jp/auth/[^"'\s]+/(\w+)', html)), + ('P396', self.findbyre( + r'opac.sbn.it/opacsbn/opac/[^<>'"\s]+?bid=([^\s'"<>]+)', + html)), + ('P409', self.findbyre( + r'https?://nla.gov.au/anbd.aut-an(\w+)', html)), + ('P434', self.findbyre( + r'https?://musicbrainz.org/\w+/([\w-]+)', html)), ('P496', self.findbyre(r'https?://orcid.org/([\d-]+)', html)), - ('P535', - self.findbyre(r'https?://www.findagrave.com/memorial/(\w+)', - html)), - ('P535', - self.findbyre( - r'https?://www.findagrave.com/cgi-bin/fg.cgi?[^<>"']*id=(\w+)', - html)), - ('P549', - self.findbyre(r'genealogy.math.ndsu.nodak.edu/id.php?id=(\w+)', - html)), - ('P650', - self.findbyre(r'https?://rkd.nl(?:/\w+)?/explore/artists/(\w+)', - html)), - ('P651', - self.findbyre(r'biografischportaal.nl/persoon/(\w+)', html)), - ('P723', - self.findbyre(r'dbnl.(?:nl|org)/auteurs/auteur.php?id=(\w+)', - html)), - ('P723', self.findbyre(r'data.bibliotheken.nl/id/dbnla/(\w+)', - html)), - ('P866', self.findbyre(r'perlentaucher.de/autor/([\w-]+)', html)), - ('P902', - self.findbyre(r'hls-dhs-dss.ch/textes/\w/[A-Z]?(\d+).php', html)), - ('P906', - self.findbyre(r'libris.kb.se/(?:resource/)?auth/(\w+)', html)), - ('P950', - self.findbyre(r'catalogo.bne.es/[^"'\s]+authority.id=(\w+)', - html)), - ('P1006', - self.findbyre(r'data.bibliotheken.nl/id/thes/p(\d+X?)', html)), - ('P1047', - self.findbyre(r'catholic-hierarchy.org/\w+/b(.+?).html', html)), - ('P1220', self.findbyre(r'//ibdb.com/person.php?id=(\d+)', html)), - ('P1233', - self.findbyre(r'https?://www.isfdb.org/cgi-bin/ea.cgi?(\d+)', - html)), - ('P1415', self.findbyre(r'doi.org/\d+.\d+/ref:odnb/(\d+)', html)), - ('P1417', - self.findbyre(r'https://www.britannica.com/(%5B%5Cw%5C-/%5D+)', html)), + ('P535', self.findbyre( + r'https?://www.findagrave.com/memorial/(\w+)', html)), + ('P535', self.findbyre( + r'https?://www.findagrave.com/cgi-bin/fg.cgi?[^<>"']*id=(\w+)', + html)), + ('P549', self.findbyre( + r'genealogy.math.ndsu.nodak.edu/id.php?id=(\w+)', html)), + ('P650', self.findbyre( + r'https?://rkd.nl(?:/\w+)?/explore/artists/(\w+)', html)), + ('P651', self.findbyre( + r'biografischportaal.nl/persoon/(\w+)', html)), + ('P723', self.findbyre( + r'dbnl.(?:nl|org)/auteurs/auteur.php?id=(\w+)', html)), + ('P723', self.findbyre( + r'data.bibliotheken.nl/id/dbnla/(\w+)', html)), + ('P866', self.findbyre( + r'perlentaucher.de/autor/([\w-]+)', html)), + ('P902', self.findbyre( + r'hls-dhs-dss.ch/textes/\w/[A-Z]?(\d+).php', html)), + ('P906', self.findbyre( + r'libris.kb.se/(?:resource/)?auth/(\w+)', html)), + ('P950', self.findbyre( + r'catalogo.bne.es/[^"'\s]+authority.id=(\w+)', html)), + ('P1006', self.findbyre( + r'data.bibliotheken.nl/id/thes/p(\d+X?)', html)), + ('P1047', self.findbyre( + r'catholic-hierarchy.org/\w+/b(.+?).html', html)), + ('P1220', self.findbyre( + r'//ibdb.com/person.php?id=(\d+)', html)), + ('P1233', self.findbyre( + r'https?://www.isfdb.org/cgi-bin/ea.cgi?(\d+)', html)), + ('P1415', self.findbyre( + r'doi.org/\d+.\d+/ref:odnb/(\d+)', html)), + ('P1417', self.findbyre( + r'https://www.britannica.com/(%5B%5Cw%5C-/%5D+)', html)), ('P1422', self.findbyre(r'ta.sandrartnet/-person-(\w+)', html)), - ('P1563', - self.findbyre( - r'https?://www-history.mcs.st-andrews.ac.uk/Biographies/([^'"<>\s]+)', - html)), - ('P1728', - self.findbyre(r'https?://www.allmusic.com/artist/[\w-]*?(mn/d+)', - html)), - ('P1749', - self.findbyre( - r'https?://www.parlement(?:airdocumentatiecentrum)?.(?:com|nl)/id/(\w+)', - html)), - ('P1788', - self.findbyre( - r'huygens.knaw.nl/vrouwenlexicon/lemmata/data/([^"'<>\s]+)', - html)), - ('P1802', - self.findbyre( - r'https?://emlo.bodleian.ox.ac.uk/profile/person/([\w-]+)', - html)), - ('P1842', - self.findbyre(r'https?://gameo.org/index.php?title=([^'"\s]+)', - html)), - ('P1871', - self.findbyre( - r'https?://(?:data|thesaurus).cerl.org/(?:thesaurus|record)/(\w+)', - html)), - ('P1871', - self.findbyre(r'thesaurus.cerl.org/cgi-bin/record.pl?rid=(\w+)', - html)), - ('P1902', - self.findbyre(r'https?://open.spotify.com/artist/(\w+)', html)), - ('P1907', - self.findbyre(r'https?://adb.anu.edu.au/biography/([\w-]+)', - html)), - ('P1938', - self.findbyre(r'https?://www.gutenberg.org/ebooks/author/(\d+)', - html)), - ('P1953', - self.findbyre(r'https?://www.discogs.com/(\w+/)?artist/(\d+)', - html)), - ('P1986', - self.findbyre( - r'treccani.it/enciclopedia/([\w-_]+)_(Dizionario-Biografico)', - html)), - ('P2016', - self.findbyre(r'hoogleraren.ub.rug.nl/hoogleraren/(\w+)', - html)), - ('P2038', - self.findbyre( - r'https?://www.researchgate.net/profile/([^'"<>\s?]+)', - html)), + ('P1563', self.findbyre( + r'https?://www-history.mcs.st-andrews.ac.uk/Biographies/([^'"<>\s]+)', + html)), + ('P1728', self.findbyre( + r'https?://www.allmusic.com/artist/[\w-]*?(mn/d+)', html)), + ('P1749', self.findbyre( + r'https?://www.parlement(?:airdocumentatiecentrum)?.(?:com|nl)/id/(\w+)', + html)), + ('P1788', self.findbyre( + r'huygens.knaw.nl/vrouwenlexicon/lemmata/data/([^"'<>\s]+)', + html)), + ('P1802', self.findbyre( + r'https?://emlo.bodleian.ox.ac.uk/profile/person/([\w-]+)', + html)), + ('P1842', self.findbyre( + r'https?://gameo.org/index.php?title=([^'"\s]+)', html)), + ('P1871', self.findbyre( + r'https?://(?:data|thesaurus).cerl.org/(?:thesaurus|record)/(\w+)', + html)), + ('P1871', self.findbyre( + r'thesaurus.cerl.org/cgi-bin/record.pl?rid=(\w+)', html)), + ('P1902', self.findbyre( + r'https?://open.spotify.com/artist/(\w+)', html)), + ('P1907', self.findbyre( + r'https?://adb.anu.edu.au/biography/([\w-]+)', html)), + ('P1938', self.findbyre( + r'https?://www.gutenberg.org/ebooks/author/(\d+)', html)), + ('P1953', self.findbyre( + r'https?://www.discogs.com/(\w+/)?artist/(\d+)', html)), + ('P1986', self.findbyre( + r'treccani.it/enciclopedia/([\w-_]+)_(Dizionario-Biografico)', + html)), + ('P2016', self.findbyre( + r'hoogleraren.ub.rug.nl/hoogleraren/(\w+)', html)), + ('P2038', self.findbyre( + r'https?://www.researchgate.net/profile/([^'"<>\s?]+)', + html)), ('P2163', self.findbyre(r'id.worldcat.org/fast/(\d+)', html)), ('P2332', self.findbyre(r'/arthistorians.info/(\w+)', html)), ('P2372', self.findbyre(r'odis.be/lnk/([\w_]+)', html)), - ('P2373', - self.findbyre(r'https?://genius.com/artists/([^\s'"]*)', html)), - ('P2397', self.findbyre(r'youtube.com/channel/([\w-_]+)', html)), - ('P2454', - self.findbyre(r'https?://www.dwc.knaw.nl/[^'"\s]+=(\w+)', html)), - ('P2456', - self.findbyre(r'https?://dblp.uni-trier.de/pid/([\w/]+)', html)), + ('P2373', self.findbyre( + r'https?://genius.com/artists/([^\s'"]*)', html)), + ('P2397', self.findbyre( + r'youtube.com/channel/([\w-_]+)', html)), + ('P2454', self.findbyre( + r'https?://www.dwc.knaw.nl/[^'"\s]+=(\w+)', html)), + ('P2456', self.findbyre( + r'https?://dblp.uni-trier.de/pid/([\w/]+)', html)), ('P2469', self.findbyre(r'theatricalia.com/person/(\w+)', html)), - ('P2639', (self.findbyre(r'filmportal.de/person/(\w+)', html) - or '').lower() or None), + ('P2639', + (self.findbyre(r'filmportal.de/person/(\w+)', html) or ''). + lower() or None), ('P2722', self.findbyre(r'deezer.com/artist/(\w+)', html)), - ('P2799', self.findbyre(r'cervantesvirtual.com/person/(\d+)', - html)), - ('P2850', - self.findbyre( - r'https?://itunes.apple.com(?:/\w{2})?/(?:id)?(\d+)', html)), - ('P2909', - self.findbyre(r'https?://www.secondhandsongs.com/artist/(\w+)', - html)), - ('P2915', - self.findbyre(r'vondel.humanities.uva.nl/ecartico/persons/(\d+)', - html)), - ('P2941', - self.findbyre( - r'munksroll.rcplondon.ac.uk/Biography/Details/(\d+)', html)), - ('P2949', self.findbyre(r'www.wikitree.com/wiki/(\w+-\d+)', - html)), - ('P2963', self.findbyre(r'goodreads.com/author/show/(\d+)', html)), - ('P2969', self.findbyre(r'goodreads.com/book/show/(\d+)', html)), - ('P3040', self.findbyre(r'https?://soundcloud.com/([\w-]+)', - html)), - ('P3192', - self.findbyre(r'https?://www.last.fm/music/([^'"\s]+)', html)), - ('P3217', - self.findbyre( - r'https?://sok.riksarkivet.se/sbl/Presentation.aspx?id=(\d+)', - html)), - ('P3217', - self.findbyre(r'https?://sok.riksarkivet.se/sbl/artikel/(\d+)', - html)), - ('P3241', - self.findbyre(r'https?://www.newadvent.org/cathen/(\w+).htm', - html)), + ('P2799', self.findbyre( + r'cervantesvirtual.com/person/(\d+)', html)), + ('P2850', self.findbyre( + r'https?://itunes.apple.com(?:/\w{2})?/(?:id)?(\d+)', html)), + ('P2909', self.findbyre( + r'https?://www.secondhandsongs.com/artist/(\w+)', html)), + ('P2915', self.findbyre( + r'vondel.humanities.uva.nl/ecartico/persons/(\d+)', html)), + ('P2941', self.findbyre( + r'munksroll.rcplondon.ac.uk/Biography/Details/(\d+)', html)), + ('P2949', self.findbyre( + r'www.wikitree.com/wiki/(\w+-\d+)', html)), + ('P2963', self.findbyre( + r'goodreads.com/author/show/(\d+)', html)), + ('P2969', self.findbyre( + r'goodreads.com/book/show/(\d+)', html)), + ('P3040', self.findbyre( + r'https?://soundcloud.com/([\w-]+)', html)), + ('P3192', self.findbyre( + r'https?://www.last.fm/music/([^'"\s]+)', html)), + ('P3217', self.findbyre( + r'https?://sok.riksarkivet.se/sbl/Presentation.aspx?id=(\d+)', + html)), + ('P3217', self.findbyre( + r'https?://sok.riksarkivet.se/sbl/artikel/(\d+)', html)), + ('P3241', self.findbyre( + r'https?://www.newadvent.org/cathen/(\w+).htm', html)), ('P3265', self.findbyre( r'https?://myspace.com/([\w-_/]+)', html)), - ('P3365', - self.findbyre(r'treccani.it/enciclopedia/([\w-_]+)', html)), - ('P3368', - self.findbyre(r'https?://prabook.com/web/[^/<>"']+/(\d+)', html)), - ('P3368', - self.findbyre( - r'prabook.com/web/person-view.html?profileId=(\d+)', html)), + ('P3365', self.findbyre( + r'treccani.it/enciclopedia/([\w-_]+)', html)), + ('P3368', self.findbyre( + r'https?://prabook.com/web/[^/<>"']+/(\d+)', html)), + ('P3368', self.findbyre( + r'prabook.com/web/person-view.html?profileId=(\d+)', html)), ('P3435', self.findbyre(r'vgmdb.net/artist/(\w+)', html)), ('P3478', self.findbyre(r'songkick.com/artists/(\w+)', html)), - ('P3630', - self.findbyre( - r'https?://www.babelio.com/auteur/[^<>'"\s]+/(\d+)', html)), - ('P3854', self.findbyre(r'soundtrackcollector.com/\w+/(\w+)', - html)), + ('P3630', self.findbyre( + r'https?://www.babelio.com/auteur/[^<>'"\s]+/(\d+)', html)), + ('P3854', self.findbyre( + r'soundtrackcollector.com/\w+/(\w+)', html)), ('P4013', self.findbyre(r'https?://giphy.com/(\w+)', html)), ('P4073', self.findbyre(r'(\w+).wikia.com', html)), - ('P4198', - self.findbyre(r'play.google.com/store/music/artist?id=(\w+)', - html)), - ('P4223', - self.findbyre( - r'treccani.it/enciclopedia/([\w-_]+)_(Enciclopedia-Italiana)', - html)), - ('P4228', self.findbyre(r'www.eoas.info/biogs/([^\s]+).html', - html)), - ('P4228', - self.findbyre(r'www.eoas.info%2Fbiogs%2F([^\s]+).html', html)), - ('P4252', - self.findbyre(r'www.mathnet.ru/[\w/.]+?.*?personid=(\w+)', - html)), - ('P4862', - self.findbyre(r'https?://www.amazon.com/[\w-]*/e/(\w+)', html)), - ('P5357', self.findbyre(r'sf-encyclopedia.com/entry/([\w_]+)', - html)), - ('P5404', - self.findbyre(r'rateyourmusic.com/artist/([^'"<>\s]+)', html)), - ('P5431', - self.findbyre( - r'https?://www.setlist.fm/setlists/[\w-]*?(\w+).html', html)), - ('P5570', - self.findbyre(r'www.noosfere.org/[\w./]+?numauteur=(\w+)', - html)), + ('P4198', self.findbyre( + r'play.google.com/store/music/artist?id=(\w+)', html)), + ('P4223', self.findbyre( + r'treccani.it/enciclopedia/([\w-_]+)_(Enciclopedia-Italiana)', + html)), + ('P4228', self.findbyre( + r'www.eoas.info/biogs/([^\s]+).html', html)), + ('P4228', self.findbyre( + r'www.eoas.info%2Fbiogs%2F([^\s]+).html', html)), + ('P4252', self.findbyre( + r'www.mathnet.ru/[\w/.]+?.*?personid=(\w+)', html)), + ('P4862', self.findbyre( + r'https?://www.amazon.com/[\w-]*/e/(\w+)', html)), + ('P5357', self.findbyre( + r'sf-encyclopedia.com/entry/([\w_]+)', html)), + ('P5404', self.findbyre( + r'rateyourmusic.com/artist/([^'"<>\s]+)', html)), + ('P5431', self.findbyre( + r'https?://www.setlist.fm/setlists/[\w-]*?(\w+).html', + html)), + ('P5570', self.findbyre( + r'www.noosfere.org/[\w./]+?numauteur=(\w+)', html)), ('P5882', self.findbyre(r'www.muziekweb.nl/\w+/(\w+)', html)), - ('P5924', - self.findbyre(r'lyrics.wikia.com/wiki/([^'"<>\s]*)', html)), - ('P6194', - self.findbyre(r'biographien.ac.\at/oebl/oebl_\w/[^\s.]+.', - html)), + ('P5924', self.findbyre( + r'lyrics.wikia.com/wiki/([^'"<>\s]*)', html)), + ('P6194', self.findbyre( + r'biographien.ac.\at/oebl/oebl_\w/[^\s.]+.', html)), ('P6517', self.findbyre(r'whosampled.com/([^'"<>/\s]+)', html)), - ('P6594', - self.findbyre(r'gf.org/fellows/all-fellows/([\w-]+)', html)), - ('P7032', - self.findbyre( - r'historici.nl/Onderzoek/Projecten/Repertorium/app/personen/(\d+)', - html)), - ('P7032', - self.findbyre( - r'repertoriumambtsdragersambtenaren1428-1861/app/personen/(\d+)', - html)), - ('P7195', - self.findbyre(r'https?://www.bandsintown.com/\w+/(\d+)', html)), - ('P7545', - self.findbyre(r'https?://www.askart.com/artist/[\w_]*/(\d+)/', - html)), - ('P7620', - self.findbyre( - r'treccani.it/enciclopedia/([\w-]+)_(Enciclopedia_dei_Papi)', - html)), - ('P7902', - self.findbyre(r'www.deutsche-biographie.de/pnd(\w+).html', html)), - ('P8034', self.findbyre(r'viaf.org/viaf/sourceID/BAV|(\w+)', - html)), - ('P9029', - self.findbyre(r'viceversalitterature.ch/author/(\d+)', html)), - ] + ('P6594', self.findbyre( + r'gf.org/fellows/all-fellows/([\w-]+)', html)), + ('P7032', self.findbyre( + r'historici.nl/Onderzoek/Projecten/Repertorium/app/personen/(\d+)', + html)), + ('P7032', self.findbyre( + r'repertoriumambtsdragersambtenaren1428-1861/app/personen/(\d+)', + html)), + ('P7195', self.findbyre( + r'https?://www.bandsintown.com/\w+/(\d+)', html)), + ('P7545', self.findbyre( + r'https?://www.askart.com/artist/[\w_]*/(\d+)/', html)), + ('P7620', self.findbyre( + r'treccani.it/enciclopedia/([\w-]+)_(Enciclopedia_dei_Papi)', + html)), + ('P7902', self.findbyre( + r'www.deutsche-biographie.de/pnd(\w+).html', html)), + ('P8034', self.findbyre( + r'viaf.org/viaf/sourceID/BAV|(\w+)', html)), + ('P9029', self.findbyre( + r'viceversalitterature.ch/author/(\d+)', html))] if includesocial: defaultmixedrefs += [ - ('P2002', - self.findbyre(r'https?://(?:www.)?twitter.com/#?(\w+)', - html)), - ('P2003', - self.findbyre( - r'https?://(?:\w+.)?instagram.com/([^/\s'"]{2,})', - html)), - ('P2013', - self.findbyre( - r'https?://www.facebook.com/(?:pg/)?([^/\s'"<>?]+)', - html)), - ('P2847', - self.findbyre(r'https?://plus.google.com/(+?\w+)', html)), - ('P2850', - self.findbyre( - r'https?://itunes.apple.com/(?:\w+/)?artist/(?:\w*/)?[a-z]{0,2}(\d{3,})', - html)), - ('P3258', - self.findbyre(r'https?://([\w-]+).livejournal.com', html)), - ('P3258', - self.findbyre(r'https?://users.livejournal.com/(\w+)', html)), - ('P3265', - self.findbyre(r'https?://www.myspace.com/([\w-]+)', html)), - ('P3283', - self.findbyre(r'https?://([^/"']+).bandcamp.com', html)), - ('P4003', - self.findbyre( - r'https?://www.facebook.com/pages/([^\s'"<>?]+)', html)), - ('P4175', - self.findbyre(r'https://www.patreon.com/(%5B%5Cw%5C-%5D+)', html)), + ('P2002', self.findbyre( + r'https?://(?:www.)?twitter.com/#?(\w+)', html)), + ('P2003', self.findbyre( + r'https?://(?:\w+.)?instagram.com/([^/\s'"]{2,})', + html)), + ('P2013', self.findbyre( + r'https?://www.facebook.com/(?:pg/)?([^/\s'"<>?]+)', + html)), + ('P2847', self.findbyre( + r'https?://plus.google.com/(+?\w+)', html)), + ('P2850', self.findbyre( + r'https?://itunes.apple.com/(?:\w+/)?artist/(?:\w*/)?[a-z]{0,2}(\d{3,})', + html)), + ('P3258', self.findbyre( + r'https?://([\w-]+).livejournal.com', html)), + ('P3258', self.findbyre( + r'https?://users.livejournal.com/(\w+)', html)), + ('P3265', self.findbyre( + r'https?://www.myspace.com/([\w-]+)', html)), + ('P3283', self.findbyre( + r'https?://([^/"']+).bandcamp.com', html)), + ('P4003', self.findbyre( + r'https?://www.facebook.com/pages/([^\s'"<>?]+)', + html)), + ('P4175', self.findbyre( + r'https://www.patreon.com/(%5B%5Cw%5C-%5D+)', html)), ('P6634', self.findbyre( - r'.linkedin.com/in/([\w-]+)', html)), - ] + r'.linkedin.com/in/([\w-]+)', html))] result = [ pair for pair in defaultmixedrefs if pair[0] != self.dbproperty ] @@ -2614,8 +2574,14 @@ ]
def findlongtext(self, html: str): - return re.sub(r'\s', ' ', self.findbyre(r'(?s)(<table id="fullRecordTable" .*?</table>)', html) or ''). \ - replace('<tr>', '\n') + return re.sub( + r'\s', + ' ', + self.findbyre( + r'(?s)(<table id="fullRecordTable" .*?</table>)', + html) or ''). replace( + '<tr>', + '\n')
def findnames(self, html) -> list[str]: result = [] @@ -2670,8 +2636,13 @@ return self.findallbyre(r'([\w\s]+)(', section, 'country')
def findbirthplace(self, html: str): - return self.findbyre(r'(?s)Geburtsort:\s*(?:<[^<>]*>)?([^<>&]*)', html, 'city') or\ - self.findbyre(r'(?s)([\s\w]+)(Geburtsort)', html, 'city') + return self.findbyre( + r'(?s)Geburtsort:\s*(?:<[^<>]*>)?([^<>&]*)', + html, + 'city') or self.findbyre( + r'(?s)([\s\w]+)(Geburtsort)', + html, + 'city')
def finddeathplace(self, html: str): return self.findbyre(r'(?s)Sterbeort:\s*(?:<[^<>]*>)?([^<>&]*)', html, @@ -2810,8 +2781,7 @@
def findemployers(self, html: str): section = self.findbyre( - r'(?s)<strong>Beziehungen zu Organisationen</strong>.*?(<td.*?</td>)', - html) + r'(?s)<strong>Beziehungen zu Organisationen</strong>.*?(<td.*?</td>)', html) if section: return self.findallbyre(r'(?s)[>;]([^<>;]*)[<;]', section, 'employer', alt=['university']) @@ -2830,8 +2800,7 @@
def findmemberships(self, html: str): section = self.findbyre( - r'(?s)<strong>Beziehungen zu Organisationen</strong>.*?(<td.*?</td>)', - html) + r'(?s)<strong>Beziehungen zu Organisationen</strong>.*?(<td.*?</td>)', html) if section: return self.findallbyre( r'>([^<>]*)</a>', section, 'organization', @@ -2839,8 +2808,7 @@
def findrelorder(self, html: str): section = self.findbyre( - r'(?s)<strong>Beziehungen zu Organisationen</strong>.*?(<td.*?</td>)', - html) + r'(?s)<strong>Beziehungen zu Organisationen</strong>.*?(<td.*?</td>)', html) if section: return self.findbyre( r'>([^<>]*)</a>', section, 'religious order', @@ -3260,9 +3228,11 @@ self.escapehtml = True
def finddescriptions(self, html: str): - return self.findallbyre(r'(?s)Notice de type</span>.*?([^<>]*)</span>', html) \ - + self.findallbyre(r'(?s)<span class="detail_label">Note publique d'information.*?"detail_value">(.*?)<', - html) + return self.findallbyre( + r'(?s)Notice de type</span>.*?([^<>]*)</span>', + html) + self.findallbyre( + r'(?s)<span class="detail_label">Note publique d'information.*?"detail_value">(.*?)<', + html)
def findnames(self, html) -> list[str]: result = [] @@ -3288,9 +3258,13 @@ html, 'instanceof')
def findlanguagesspoken(self, html: str): - result = self.findallbyre("Traducteur de l['ea](.*?)vers", html, 'language') +\ - self.findallbyre( - "Traducteur de .*? vers l['ea](.*?)<", html, 'language') + result = self.findallbyre( + "Traducteur de l['ea](.*?)vers", + html, + 'language') + self.findallbyre( + "Traducteur de .*? vers l['ea](.*?)<", + html, + 'language') section = self.findbyre( r'(?s)<span id="Langues" class="DataCoded">(.*?)</span>', html) if section: @@ -3560,8 +3534,13 @@ self.findbyre(r'Nato ad? ([^<>.]+)', html, 'city')
def finddeathplace(self, html: str): - return self.findbyre(r'[mM]orto ad? ([^<>.(]+) nel', html, 'city') or \ - self.findbyre(r'[mM]orto ad? ([^<>.(]+)', html, 'city') + return self.findbyre( + r'[mM]orto ad? ([^<>.(]+) nel', + html, + 'city') or self.findbyre( + r'[mM]orto ad? ([^<>.(]+)', + html, + 'city')
def findlanguagesspoken(self, html: str): section = self.findbyre(r'Lingua.*?detail_value">(.*?)<', html) @@ -3698,9 +3677,16 @@
def findformationlocation(self, html: str): if not self.isperson: - return self.findbyre(r'(?s)<dt>Founded in:</dt>.*?<bdi>(\w+)', html, 'city') \ - or self.findbyre(r'(?s)<dt>Founded in:</dt>.*?<bdi>(.*?)</bdi>', html, 'city') \ - or self.findbyre(r'(?s)<dt>Area:</dt>.*?<bdi>(.*?)</bdi>', html, 'city') + return self.findbyre( + r'(?s)<dt>Founded in:</dt>.*?<bdi>(\w+)', + html, + 'city') or self.findbyre( + r'(?s)<dt>Founded in:</dt>.*?<bdi>(.*?)</bdi>', + html, + 'city') or self.findbyre( + r'(?s)<dt>Area:</dt>.*?<bdi>(.*?)</bdi>', + html, + 'city')
def findorigcountry(self, html: str): if not self.isperson: @@ -3719,8 +3705,11 @@ return self.findbyre(r'"https://viaf.org/viaf/(%5Cw+)/?"', html)
def findwebsite(self, html: str): - return self.findbyre(r'(?s)<th>offici.le website:.*?<bdi>(.*?)<', html) or \ - self.findbyre(r'<li class="home-favicon"><a href="(.*?)">', html) + return self.findbyre( + r'(?s)<th>offici.le website:.*?<bdi>(.*?)<', + html) or self.findbyre( + r'<li class="home-favicon"><a href="(.*?)">', + html)
def findtwitter(self, html: str): return self.findbyre( @@ -3759,9 +3748,13 @@ return None
def findmixedrefs(self, html: str): - return self.finddefaultmixedrefs(html, includesocial=False) + \ - [('P4862', self.findbyre(r'<li class="amazon-favicon"><a href="[^"]*amazon[^"?]*/(B\w+)[?"]', html))] +\ - [('P3453', result) for result in self.findallbyre( + return self.finddefaultmixedrefs( + html, includesocial=False) + [ + ('P4862', self.findbyre( + r'<li class="amazon-favicon"><a href="[^"]*amazon[^"?]*/(B\w+)[?"]', + html))] + [ + ('P3453', result) + for result in self.findallbyre( r'<dd class="ipi-code">(.*?)</dd>', html)]
@@ -3891,8 +3884,11 @@ return self.findbyre(r'<h1>[^<>]+,\s*([\w-]+)', html, 'firstname')
def findbirthdate(self, html: str): - result = self.findbyre(r'(?s)Año de nacimiento:\s*<span>(.*?)<', html) or \ - self.findbyre(r'<h1>[^<>]+((?:n.\s*)?([^)<>-]+?)[–-)]', html) + result = self.findbyre( + r'(?s)Año de nacimiento:\s*<span>(.*?)<', + html) or self.findbyre( + r'<h1>[^<>]+((?:n.\s*)?([^)<>-]+?)[–-)]', + html) if result and 'fl.' not in result and not result.strip().startswith( 'm.') and '1' in result: return result @@ -4252,8 +4248,9 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'(?s)>\s*{}\s*<.*?<TD[^<>]*>(?:<[^<>]*>|\s)*([^<>]+)</'.format( - field), html, dtype) + rf'(?s)>\s*{field}\s*<.*?<TD[^<>]*>(?:<[^<>]*>|\s)*([^<>]+)</', + html, + dtype)
def findinstanceof(self, html: str): return 'Q5' @@ -4352,15 +4349,25 @@ r'">([^<>]+)</span>', section, 'occupation')
def findbirthplace(self, html: str): - return self.findbyre(r'itemprop="birthPlace">([^<>]*),', html, 'city') or \ - self.findbyre(r'itemprop="birthPlace">([^<>]*)<', html, 'city') + return self.findbyre( + r'itemprop="birthPlace">([^<>]*),', + html, + 'city') or self.findbyre( + r'itemprop="birthPlace">([^<>]*)<', + html, + 'city')
def findbirthdate(self, html: str): return self.findbyre(r'itemprop="birthDate">([^<>]*?)[</]', html)
def finddeathplace(self, html: str): - return self.findbyre(r'itemprop="deathPlace">([^<>]*),', html, 'city') or \ - self.findbyre(r'itemprop="deathPlace">([^<>]*)<', html, 'city') + return self.findbyre( + r'itemprop="deathPlace">([^<>]*),', + html, + 'city') or self.findbyre( + r'itemprop="deathPlace">([^<>]*)<', + html, + 'city')
def finddeathdate(self, html: str): return self.findbyre(r'itemprop="deathDate">([^<>]*?)[</]', html) @@ -4407,14 +4414,26 @@ self.findallbyre(r'[zZ]us(?:ter)? van ([^<>]*)', html, 'person')
def findfather(self, html: str): - return self.findbyre(r'[zZ]oon van ([^<>]*)', html, 'male-person', skips=['female-person']) or \ - self.findbyre(r'[dD]ochter van ([^<>]*)', html, - 'male-person', skips=['female-person']) + return self.findbyre( + r'[zZ]oon van ([^<>]*)', + html, + 'male-person', + skips=['female-person']) or self.findbyre( + r'[dD]ochter van ([^<>]*)', + html, + 'male-person', + skips=['female-person'])
def findmother(self, html: str): - return self.findbyre(r'[zZ]oon van ([^<>]*)', html, 'female-person', skips=['male-person']) or \ - self.findbyre(r'[dD]ochter van ([^<>]*)', html, - 'female-person', skips=['male-person']) + return self.findbyre( + r'[zZ]oon van ([^<>]*)', + html, + 'female-person', + skips=['male-person']) or self.findbyre( + r'[dD]ochter van ([^<>]*)', + html, + 'female-person', + skips=['male-person'])
def findmemberships(self, html: str): return self.findallbyre(r'Lid van[^<>]*<em>(.*?)<', html, @@ -5207,8 +5226,7 @@
def findoccupations(self, html: str): section = self.findbyre( - r'(?s)<div class="s12 wrapper tag-block-compact extramarg">(.*?)</div>', - html) + r'(?s)<div class="s12 wrapper tag-block-compact extramarg">(.*?)</div>', html) if section: return self.findallbyre(r'>([^<>]*)<', section, 'theater-occupation', alt=['occupation']) @@ -5221,8 +5239,7 @@ def findbirthplace(self, html: str): return self.findbyre( r'(?s)<div class="xt-lable">Born</div>\s*<div class="xt-main-title">' - r'[^<>]*</div>\s*<div class="xt-main-moreinfo">(.*?)</div>', html, - 'city') + r'[^<>]*</div>\s*<div class="xt-main-moreinfo">(.*?)</div>', html, 'city')
def finddeathdate(self, html: str): return self.findbyre( @@ -5452,14 +5469,16 @@
def getvalue(self, field, html, dtype=None, alt=None): return self.findbyre( - '(?s)<td[^<>]*class="eti">{}</td>.*?<td[^<>]*class="sub">(.*?)<' - .format(field), html, dtype, alt=alt) + f'(?s)<td[^<>]*class="eti">{field}</td>.*?<td[^<>]*class="sub">(.*?)<', + html, dtype, alt=alt)
def getvalues(self, field, html, dtype=None, alt=None) -> list[str]: result = [] for preresult in self.findallbyre( - '(?s)<td[^<>]*class="eti">{}</td>.*?<td[^<>]*class="sub">(.*?)<' - .format(field), html, dtype, alt=alt): + f'(?s)<td[^<>]*class="eti">{field}</td>.*?<td[^<>]*class="sub">(.*?)<', + html, + dtype, + alt=alt): result += preresult.split('|') return result
@@ -5888,8 +5907,10 @@
def getvalue(self, field, html, dtype=None, alt=None): return self.findbyre( - r'(?s)<td[^<>]*>[^<>]*{}[^<>]*</td>\s*<td[^<>]*>(.*?)</td>'.format( - field), html, dtype, alt=alt) + rf'(?s)<td[^<>]*>[^<>]*{field}[^<>]*</td>\s*<td[^<>]*>(.*?)</td>', + html, + dtype, + alt=alt)
def getvalues(self, field, html, dtype=None, alt=None) -> list[str]: parts = re.findall('(?s)<td[^<>]*>(.*?)</td>', html) @@ -6914,8 +6935,12 @@ html)
def findlongtext(self, html: str): - parts = [self.findbyre(r'(?s)<dt>Titles or roles</dt>\s*<dd>(.*?)</dd>', html) or ''] + \ - self.findallbyre(r'(?s)<div class="relations">(.*?)</div>', html) + parts = [ + self.findbyre( + r'(?s)<dt>Titles or roles</dt>\s*<dd>(.*?)</dd>', + html) or ''] + self.findallbyre( + r'(?s)<div class="relations">(.*?)</div>', + html) return '\n'.join(parts)
def findoccupations(self, html: str): @@ -6995,18 +7020,18 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'(?s)"fieldname">\s*{}\s*</span></td>\s*<td[^<>]*>(?:<[^<>]*>)*(.+?)<' - .format(field), html, dtype) + rf'(?s)"fieldname">\s*{field}\s*</span></td>\s*<td[^<>]*>(?:<[^<>]*>)*(.+?)<', + html, dtype)
def getallvalues(self, field, html, dtype=None): return self.findallbyre( - r'(?s)"fieldname">\s*{}\s*</span></td>\s*<td[^<>]*>(?:<[^<>]*>)*(.+?)<' - .format(field), html, dtype) + rf'(?s)"fieldname">\s*{field}\s*</span></td>\s*<td[^<>]*>(?:<[^<>]*>)*(.+?)<', + html, dtype)
def getfullvalue(self, field, html, dtype=None): return self.findbyre( - r'(?s)"fieldname">\s*{}\s*</span></td>\s*<td[^<>]*>(.*?)</td>' - .format(field), html, dtype) + rf'(?s)"fieldname">\s*{field}\s*</span></td>\s*<td[^<>]*>(.*?)</td>', + html, dtype)
def getsecondvalue(self, field, html, dtype=None): section = self.findbyre( @@ -7165,9 +7190,11 @@ html, dtype)
def getvalues(self, field, html, dtype=None, link=False) -> list[str]: - section = self.findbyre(r'(?s)>{}</span>(.*?>)[^<>]+</span><span' - .format(field), html) or \ - self.findbyre(fr'(?s)>{field}</span>(.*)', html) + section = self.findbyre( + rf'(?s)>{field}</span>(.*?>)[^<>]+</span><span', + html) or self.findbyre( + fr'(?s)>{field}</span>(.*)', + html) if section: return self.findallbyre( r'<{}[^<>]*>(.*?)[(<]'.format('a ' if link else 'span'), @@ -7243,8 +7270,8 @@
def getvalue(self, field, html, dtype=None, alt=None): return self.findbyre( - r'(?s)<dt>{}:</dt>\s*<dd[^<>]*>(?:<[^<>]*>)*(.+?)(?:<[^<>]*>)*</dd>' - .format(field), html, dtype, alt=alt) + rf'(?s)<dt>{field}:</dt>\s*<dd[^<>]*>(?:<[^<>]*>)*(.+?)(?:<[^<>]*>)*</dd>', + html, dtype, alt=alt)
def findinstanceof(self, html: str): return 'Q215380' @@ -7381,8 +7408,11 @@ ]
def findmixedrefs(self, html: str): - section = self.findbyre(r'(?s)<div class="head">Sites:</div>\s*<div[^<>]*>(.*?)</div>', html) or \ - self.findbyre(r'(?s)"sameAs": [(.*?)]', html) + section = self.findbyre( + r'(?s)<div class="head">Sites:</div>\s*<div[^<>]*>(.*?)</div>', + html) or self.findbyre( + r'(?s)"sameAs": [(.*?)]', + html) if section: return self.finddefaultmixedrefs(section)
@@ -7420,17 +7450,29 @@ 'gender')
def findbirthdate(self, html: str): - return self.findbyre(r'datetime="([\w-]*)" itemprop="birthDate"', html) or\ - self.findbyre(r'(?s)birthDate">(.*?)[<(]', html) + return self.findbyre( + r'datetime="([\w-]*)" itemprop="birthDate"', + html) or self.findbyre( + r'(?s)birthDate">(.*?)[<(]', + html)
def finddeathdate(self, html: str): - return self.findbyre(r'datetime="([\w-]*)" itemprop="deathDate"', html) or\ - self.findbyre(r'(?s)deathDate">(.*?)<', html) + return self.findbyre( + r'datetime="([\w-]*)" itemprop="deathDate"', + html) or self.findbyre( + r'(?s)deathDate">(.*?)<', + html)
def findoccupations(self, html: str): - return self.findallbyre(r'"metier-(.*?)"', html, 'theater-occupation', alt=['occupation']) +\ - self.findallbyre(r'(?s)-metier">(.*?)<', html, - 'theater-occupation', alt=['occupation']) + return self.findallbyre( + r'"metier-(.*?)"', + html, + 'theater-occupation', + alt=['occupation']) + self.findallbyre( + r'(?s)-metier">(.*?)<', + html, + 'theater-occupation', + alt=['occupation'])
class ItalianPeopleAnalyzer(Analyzer): @@ -7461,8 +7503,13 @@ return self.findbyre(r'<strong>(.*?)<', html, 'lastname')
def findfirstname(self, html: str): - return self.findbyre(r'<span class="sc">(.*?)<', html, 'firstname') or \ - self.findbyre(r'<span class="sc">\s*([\w-]+)', html, 'firstname') + return self.findbyre( + r'<span class="sc">(.*?)<', + html, + 'firstname') or self.findbyre( + r'<span class="sc">\s*([\w-]+)', + html, + 'firstname')
def findbirthplace(self, html: str): return self.findbyre( @@ -7719,9 +7766,13 @@ return result
def findoccupations(self, html: str): - return self.findallbyre(r'"jobTitle":\s*"(.*?)"', html, 'occupation') +\ - self.findallbyre( - r'[pP]osition(?:<[^<>]*>\s)*?([^<>]*?)</', html, 'occupation') + return self.findallbyre( + r'"jobTitle":\s*"(.*?)"', + html, + 'occupation') + self.findallbyre( + r'[pP]osition(?:<[^<>]*>\s)*?([^<>]*?)</', + html, + 'occupation')
class NgvAnalyzer(Analyzer): @@ -8081,8 +8132,11 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - '(?s)<TR><TD[^<>]*keyColumn[^<>]*>[^<>]*{}[^<>]*</TD>[^<>]*<TD[^<>]*valueColumn[^<>]*>(.*?)<' - .format(field), html.replace(' ', ' '), dtype) + f'(?s)<TR><TD[^<>]*keyColumn[^<>]*>[^<>]*{field}[^<>]*</TD>[^<>]*<TD[^<>]*valueColumn[^<>]*>(.*?)<', + html.replace( + ' ', + ' '), + dtype)
def findnames(self, html) -> list[str]: return [ @@ -8163,13 +8217,13 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'<label[^<>]*>\s*{}\s*<.*?"form_input_element">(.*?)<'.format( - field), html, dtype) + rf'<label[^<>]*>\s*{field}\s*<.*?"form_input_element">(.*?)<', + html, + dtype)
def getvalues(self, field, html, dtype=None) -> list[str]: sections = self.findallbyre( - r'<label[^<>]*>\s*{}\s*<.*?"form_input_element">(.*?)<'.format( - field), html) + rf'<label[^<>]*>\s*{field}\s*<.*?"form_input_element">(.*?)<', html) result = [] for section in sections: result += self.findallbyre('([^;]*)', section, dtype) @@ -8268,9 +8322,13 @@ 'lastname')
def findfirstname(self, html: str): - return self.findbyre(r'(?s)<td[^<>]*>roepnaam</td>\s*<td[^<>]*>(.*?)<', html, 'firstname') or \ - self.findbyre( - r'(?s)<td[^<>]*>eerste voornaam</td>\s*<td[^<>]*>(.*?)<', html, 'firstname') + return self.findbyre( + r'(?s)<td[^<>]*>roepnaam</td>\s*<td[^<>]*>(.*?)<', + html, + 'firstname') or self.findbyre( + r'(?s)<td[^<>]*>eerste voornaam</td>\s*<td[^<>]*>(.*?)<', + html, + 'firstname')
def findpseudonyms(self, html: str): return self.findallbyre( @@ -8294,8 +8352,7 @@
def findoccupations(self, html: str): section = self.findbyre( - r'(?s)<b>Professionele loopbaan</b>\s*</td>\s*</tr>\s*<tr>(.*?)</tbody>', - html) + r'(?s)<b>Professionele loopbaan</b>\s*</td>\s*</tr>\s*<tr>(.*?)</tbody>', html) if section: return self.findallbyre(r'(?s)<tr[^<>]*>\s*<td[^<>]*>(.*?)</td>', section, 'occupation') @@ -8305,10 +8362,12 @@ r'(?s)<b>Engagementen in organisaties en instellingen</b>\s*</td>\s*</tr>\s*<tr>(.*?)</tbody>', html) if section: - return self.findallbyre(r'(?s)<tr[^<>]*>\s*<td[^<>]*>[^<>]*</td>\s*<td[^<>]*>([^<>]*)</td>', section, - 'employer', alt=['university'], skips=['organization']) + \ - self.findallbyre(r'<a[^<>]*>(.*?)<', section, 'employer', - alt=['university'], skips=['organization']) + return self.findallbyre( + r'(?s)<tr[^<>]*>\s*<td[^<>]*>[^<>]*</td>\s*<td[^<>]*>([^<>]*)</td>', + section, 'employer', alt=['university'], + skips=['organization']) + self.findallbyre( + r'<a[^<>]*>(.*?)<', section, 'employer', alt=['university'], + skips=['organization']) return None
def findmemberships(self, html: str): @@ -8316,15 +8375,23 @@ r'(?s)<b>Engagementen in organisaties en instellingen</b>\s*</td>\s*</tr>\s*<tr>(.*?)</tbody>', html) if section: - return self.findallbyre(r'(?s)<tr[^<>]*>\s*<td[^<>]*>[^<>]*</td>\s*<td[^<>]*>([^<>]*)</td>', section, - 'organization', skips=['employer', 'university']) + \ - self.findallbyre( - r'<a[^<>]*>(.*?)<', section, 'organization', skips=['employer', 'university']) + return self.findallbyre( + r'(?s)<tr[^<>]*>\s*<td[^<>]*>[^<>]*</td>\s*<td[^<>]*>([^<>]*)</td>', + section, + 'organization', + skips=[ + 'employer', + 'university']) + self.findallbyre( + r'<a[^<>]*>(.*?)<', + section, + 'organization', + skips=[ + 'employer', + 'university'])
def findpositions(self, html: str): section = self.findbyre( - r'(?s)<b>Politieke mandaten</b>\s*</td>\s*</tr>\s*<tr>(.*?)</tbody>', - html) + r'(?s)<b>Politieke mandaten</b>\s*</td>\s*</tr>\s*<tr>(.*?)</tbody>', html) if section: result = [] for subsection in self.findallbyre(r'(?s)<tr[^<>]*>(.*?)</tr>', @@ -8890,7 +8957,8 @@ def getvalue(self, field, html, dtype=None): return self.findbyre( fr'(?s)<div class="InformationBoxTitle">{field}:</div>\s*<div class="InformationBoxContents">(.*?)</div>', - html, dtype) + html, + dtype)
def getvalues(self, field, html, dtype=None) -> list[str]: section = self.getvalue(field, html) @@ -9025,9 +9093,17 @@ html.replace('<br />', ' '), 'city')
def findworkplaces(self, html: str): - return self.findallbyre(r'(?s)Active in[^<>]*</h4>\s*<p>(.*?)<', html.replace('<br />', ' '), 'city') + \ - self.findallbyre( - r'(?s)Studio or Business[^<>]*</h4>\s*<p>(.*?)<', html.replace('<br />', ' '), 'city') + return self.findallbyre( + r'(?s)Active in[^<>]*</h4>\s*<p>(.*?)<', + html.replace( + '<br />', + ' '), + 'city') + self.findallbyre( + r'(?s)Studio or Business[^<>]*</h4>\s*<p>(.*?)<', + html.replace( + '<br />', + ' '), + 'city')
def findincollections(self, html: str): section = self.findbyre( @@ -9215,8 +9291,13 @@ return 'Q5'
def findgender(self, html: str): - return self.findbyre(r'schema:gender"[^<>]+resource="schema:([^<>]+?)"', html, 'gender') or \ - self.findbyre(r'Gender:</td><td[^<>]*>([^<>]+)', html, 'gender') + return self.findbyre( + r'schema:gender"[^<>]+resource="schema:([^<>]+?)"', + html, + 'gender') or self.findbyre( + r'Gender:</td><td[^<>]*>([^<>]+)', + html, + 'gender')
def findbirthplace(self, html: str): return self.findbyre(r'schema:birthPlace"[^<>]*>(.*?)<', html, 'city') @@ -9277,9 +9358,14 @@ [('P245', self.findbyre(r'page/ulan/(\w+)', html))]
def findgenres(self, html: str): - return self.findallbyre(r'<td>Subject of[^<>]*</td>\s*<td>(.*?)<', html, 'genre') + \ - self.findallbyre( - r'<td>Subject of[^<>]*</td>\s*<td>[^<>]+</td>\s*<td>(.*?)<', html, 'genre', alt=['art-genre']) + return self.findallbyre( + r'<td>Subject of[^<>]*</td>\s*<td>(.*?)<', + html, + 'genre') + self.findallbyre( + r'<td>Subject of[^<>]*</td>\s*<td>[^<>]+</td>\s*<td>(.*?)<', + html, + 'genre', + alt=['art-genre'])
def findlanguagesspoken(self, html: str): return self.findallbyre( @@ -9369,8 +9455,7 @@
def findmemberships(self, html: str): section = self.findbyre( - r'(?s)>wissenschaftliche\s*Mitgliedschaften:<.*?<tbody>(.*?)</tbody>', - html) + r'(?s)>wissenschaftliche\s*Mitgliedschaften:<.*?<tbody>(.*?)</tbody>', html) if section: return self.findallbyre( r'(?s)>(?:Korrespondierendes Mitglied, )?([^<>]*)<?td>\s*</tr>', @@ -9517,14 +9602,18 @@ return self.findbyre(r'"familyName">(.*?)<', html, 'lastname')
def findbirthdate(self, html: str): - return self.findbyre(r'itemprop="birthDate" datetime="(\d{4})-00-00"', html) or \ - self.findbyre( - r'itemprop="birthDate" datetime="(\d{4}-\d{2}-\d{2})"', html) + return self.findbyre( + r'itemprop="birthDate" datetime="(\d{4})-00-00"', + html) or self.findbyre( + r'itemprop="birthDate" datetime="(\d{4}-\d{2}-\d{2})"', + html)
def finddeathdate(self, html: str): - return self.findbyre(r'itemprop="deathDate" datetime="(\d{4})-00-00"', html) or \ - self.findbyre( - r'itemprop="deathDate" datetime="(\d{4}-\d{2}-\d{2})"', html) + return self.findbyre( + r'itemprop="deathDate" datetime="(\d{4})-00-00"', + html) or self.findbyre( + r'itemprop="deathDate" datetime="(\d{4}-\d{2}-\d{2})"', + html)
def findbirthplace(self, html: str): return self.findbyre(r'"birthPlace"[^<>]*>(?:<[^<>]*>)*([^<>]+)', html, @@ -9587,13 +9676,18 @@ + [self.findbyre(r'<title>([^<>()]*)', html)]
def finddescriptions(self, html: str): - return self.findallbyre(r'name="[^"]*description" content="(.*?)"', html) + \ - self.findallbyre(r"content='(.*?)' name='[^']*description", html) + return self.findallbyre( + r'name="[^"]*description" content="(.*?)"', + html) + self.findallbyre( + r"content='(.*?)' name='[^']*description", + html)
def findlongtext(self, html: str): - return self.findbyre(r'(?s)<span id="freeTextauthor\d+"[^<>]*>(.*?)</span>', html) or \ - self.findbyre( - r'(?s)<span id="freeTextContainerauthor\d+"[^<>]*>(.*?)</span>', html) + return self.findbyre( + r'(?s)<span id="freeTextauthor\d+"[^<>]*>(.*?)</span>', + html) or self.findbyre( + r'(?s)<span id="freeTextContainerauthor\d+"[^<>]*>(.*?)</span>', + html)
def findbirthplace(self, html: str): return self.findbyre(r'(?s)Born</div>\s*(?:in )?(.*?)<', html, 'city') @@ -9930,8 +10024,7 @@
def finddirectorsphotography(self, html: str): section = self.findbyre( - r'(?s)<i>Director of Photography (Kamera)</i>.*?(<table>.*?</table>)', - html) + r'(?s)<i>Director of Photography (Kamera)</i>.*?(<table>.*?</table>)', html) if section: return self.findallbyre(r'<b>([^<>]*)</b>', section, 'filmmaker')
@@ -10110,9 +10203,8 @@ 'chesstitle')
def findmixedrefs(self, html: str): - return [('P1440', - self.findbyre( - r'http://ratings.fide.com/card.phtml\?event=(\d+)', html))] + return [('P1440', self.findbyre( + r'http://ratings.fide.com/card.phtml\?event=(\d+)', html))]
def findparticipations(self, html: str): names = self.findallbyre( @@ -10137,9 +10229,11 @@ self.hrtre = '(<TABLE WIDTH="90%".*?)<TABLE CELLPADDING="3"'
def findlanguagenames(self, html: str): - result = [('en', name) for name in self.findallbyre(r'(?s)<font size="[^"]+"><b>(.*?)[<(]', html)] + \ - [('zh', name) for name in self.findallbyre( - r'(?s)<font size="[^"]+"><b>(.*?)[<(]', html)] + result = [ + ('en', name) for name in self.findallbyre( + r'(?s)<font size="[^"]+"><b>(.*?)[<(]', html)] + [ + ('zh', name) for name in self.findallbyre( + r'(?s)<font size="[^"]+"><b>(.*?)[<(]', html)] section = self.findbyre(r'(?s)Aliases:(.*?<TR>)', html) if section: section = section.replace(' ', ' ') @@ -10673,8 +10767,8 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'(?s)<td class="td1"[^<>]*>\s*<strong>{}</strong>\s*</td>\s*<td[^<>]*>(.*?)</td>' - .format(field), html, dtype) + rf'(?s)<td class="td1"[^<>]*>\s*<strong>{field}</strong>\s*</td>\s*<td[^<>]*>(.*?)</td>', + html, dtype)
def getvalues(self, field, html, dtype=None) -> list[str]: section = self.getvalue(field, html) @@ -11334,18 +11428,20 @@ return 'Q5'
def findbirthdate(self, html: str): - return (self.findbyre( - r'"description" content="[^"]+(([^"]*?)–', html - ) or self.findbyre( - r'(?:<span style="font-family:\'Times New Roman\';">|</b>)[^<>]*?((?:\d+.\w+.)?\d+) –', - html)) + return ( + self.findbyre( + r'"description" content="[^"]+(([^"]*?)–', + html) or self.findbyre( + r'(?:<span style="font-family:\'Times New Roman\';">|</b>)[^<>]*?((?:\d+.\w+.)?\d+) –', + html))
def finddeathdate(self, html: str): - return (self.findbyre( - r'"description" content="[^"]+–([^"]*?))', html - ) or self.findbyre( - r'(?:<span style="font-family:\'Times New Roman\';">|</b>)[^<>]*? – [^<>]*?((?:\d+.\w+.)?\d+)', - html)) + return ( + self.findbyre( + r'"description" content="[^"]+–([^"]*?))', + html) or self.findbyre( + r'(?:<span style="font-family:\'Times New Roman\';">|</b>)[^<>]*? – [^<>]*?((?:\d+.\w+.)?\d+)', + html))
def findbirthplace(self, html: str): return self.findbyre( @@ -11685,9 +11781,12 @@ return []
def findlongtext(self, html: str): - return (self.findbyre( - '(?s)<table cellpadding="5" cellspacing="0" border="0" bgcolor="#E0E0E0" width="100%">(.*?)<h1>', - html) or '').replace(' ', ' ') + return ( + self.findbyre( + '(?s)<table cellpadding="5" cellspacing="0" border="0" bgcolor="#E0E0E0" width="100%">(.*?)<h1>', + html) or '').replace( + ' ', + ' ')
def findbirthdate(self, html: str): return self.findbyre(r'Dates:\s*</td><td[^<>]*>(.*?)[-<]', @@ -11957,8 +12056,8 @@
def getvalue(self, field, html, category=None): return self.findbyre( - r'(?s)<td class="caption">{}</td>\s*<td class="value">(.*?)</td>' - .format(field), html, category) + rf'(?s)<td class="caption">{field}</td>\s*<td class="value">(.*?)</td>', + html, category)
def findlongtext(self, html: str): return self.findbyre( @@ -11977,14 +12076,12 @@ return result
def findbirthdate(self, html: str): - return self.getvalue('dáta breithe', html) or \ - self.findbyre( - r'"article-title">(?:<[^<>]*>)*[^<>]*(<[^<>]*>(\d+)</a>-', html) + return self.getvalue('dáta breithe', html) or self.findbyre( + r'"article-title">(?:<[^<>]*>)*[^<>]*(<[^<>]*>(\d+)</a>-', html)
def finddeathdate(self, html: str): - return self.getvalue('dáta báis', html) or \ - self.findbyre( - r'"article-title">(?:<[^<>]*>)*[^<>]*(.*?-<[^<>]*>(\d+)</a>', html) + return self.getvalue('dáta báis', html) or self.findbyre( + r'"article-title">(?:<[^<>]*>)*[^<>]*(.*?-<[^<>]*>(\d+)</a>', html)
def findbirthplace(self, html: str): section = self.getvalue('áit bhreithe', html) @@ -12227,8 +12324,13 @@ section = self.findbyre( r'(?s)(<dt>Address.*?<dd[^<>]*>.*?</dd>)', html) if section: - result = self.findbyre(r'(?s)>([^,<>]*),[^<>]*</li>', section, 'city') or \ - self.findbyre(r'(?s)>([^,<>]*)</li>', section, 'city') + result = self.findbyre( + r'(?s)>([^,<>]*),[^<>]*</li>', + section, + 'city') or self.findbyre( + r'(?s)>([^,<>]*)</li>', + section, + 'city') return [result]
def findnationality(self, html: str): @@ -12255,8 +12357,13 @@ def findgenres(self, html: str): section1 = self.findbyre(r'(?s)Genre:\s*</span>(.*?)<', html) or '' section2 = self.findbyre(r'(?s)Style:\s*</span>(.*?)<', html) or '' - return self.findallbyre('([^,]+)', section1, 'muziekgenre', alt=['genre']) +\ - self.findallbyre('([^,]+)', section2, 'muziekgenre', alt=['genre']) + return self.findallbyre('([^,]+)', + section1, + 'muziekgenre', + alt=['genre']) + self.findallbyre('([^,]+)', + section2, + 'muziekgenre', + alt=['genre'])
def findparts(self, html: str): section = self.findbyre(r'(?s)>Member(s)</li>(.*?)</ul>', html) or '' @@ -12288,11 +12395,13 @@ self.showurl = False
def getvalue(self, field, html, dtype=None): - return self.findbyre(r'(?si)itemprop="{}"[^<>]*>(.*?)<' - .format(field), html, dtype) \ - or self.findbyre( - r'(?si)"infoPiece"><span>{}:</span>(?:\s|<[^<>]*>)*([^<>]*)' - .format(field), html, dtype) + return self.findbyre( + rf'(?si)itemprop="{field}"[^<>]*>(.*?)<', + html, + dtype) or self.findbyre( + rf'(?si)"infoPiece"><span>{field}:</span>(?:\s|<[^<>]*>)*([^<>]*)', + html, + dtype)
def findnames(self, html) -> list[str]: return [self.findbyre(r'(?s)<h1>(.*?)<', html)] @@ -12727,8 +12836,7 @@
def getvalue(self, field, html, stype=None): section = self.findbyre( - r'(?s)"field-label">[^<>]*{}:[^<>]*</div>(.*?)</div><div>'.format( - field), html) + rf'(?s)"field-label">[^<>]*{field}:[^<>]*</div>(.*?)</div><div>', html) if section: return self.findbyre(r'>\s*(\w[^<>]+)<', section, stype) return None @@ -13259,8 +13367,8 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - '<span class="label">{}</span>: <span class="value">(.*?)</span>' - .format(field), html, dtype) + f'<span class="label">{field}</span>: <span class="value">(.*?)</span>', + html, dtype)
def getvalues(self, field, html, dtype=None, splitter=',') -> list[str]: field = self.getvalue(field, html) @@ -13325,9 +13433,12 @@ return result
def finddescriptions(self, html: str): - return self.findallbyre(r'<meta content="(.*?)"', html) + \ - [self.findbyre( - r'(?s)<div class="parent-breadcrumb">.*?</div>\s*<h2>[^<>]*</h2>\s*<h3>(.*?)</h3>', html)] + return self.findallbyre( + r'<meta content="(.*?)"', + html) + [ + self.findbyre( + r'(?s)<div class="parent-breadcrumb">.*?</div>\s*<h2>[^<>]*</h2>\s*<h3>(.*?)</h3>', + html)]
def findlongtext(self, html: str): return self.findbyre(r'(?s)<p id="abstract">(.*?)</div>', html) @@ -13382,8 +13493,9 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'(?is)<span class="pornstar_more_details_label">{}</span>\s*<span class="pornstar_more_details_data">(.*?)<' - .format(field), html, dtype) + rf'(?is)<span class="pornstar_more_details_label">{field}</span>\s*<span class="pornstar_more_details_data">(.*?)<', + html, + dtype)
def findnames(self, html) -> list[str]: result = self.findallbyre('<h1[^<>]*>(.*?)<', html) @@ -13393,9 +13505,11 @@ return result
def findlongtext(self, html: str): - return self.findbyre(r'(?s)<div class="pornstar_info_bio[^<>]*long_description">(?:\s|<[^<>]*>)*(.*?)<', html) or\ - self.findbyre( - r'(?s)<div class="pornstar_info_bio[^<>]*description">(?:\s|<[^<>]*>)*(.*?)<', html) + return self.findbyre( + r'(?s)<div class="pornstar_info_bio[^<>]*long_description">(?:\s|<[^<>]*>)*(.*?)<', + html) or self.findbyre( + r'(?s)<div class="pornstar_info_bio[^<>]*description">(?:\s|<[^<>]*>)*(.*?)<', + html)
def findheight(self, html: str): section = self.getvalue('Height', html) @@ -13447,9 +13561,23 @@ self.escapeunicode = True
def prepare(self, html: str): - return html.replace('\n', '\n').replace('\t', ' ').replace('\r', '').replace("\'", "'").\ - replace('\xe9', 'é').replace('\xe8', 'è').replace( - '\xea', 'ê').replace(' ', ' ') + return html.replace( + '\n', + '\n').replace( + '\t', + ' ').replace( + '\r', + '').replace( + "\'", + "'"). replace( + '\xe9', + 'é').replace( + '\xe8', + 'è').replace( + '\xea', + 'ê').replace( + ' ', + ' ')
def findinstanceof(self, html: str): return 'Q5' @@ -13629,8 +13757,8 @@
def getcode(self, code, html): return self.findbyre( - r'(?s)<b>Source of number or code:</b>\s*{}</p>\s*<p><b>Standard number or code:</b>\s*(.*?)</p>' - .format(code), html) + rf'(?s)<b>Source of number or code:</b>\s*{code}</p>\s*<p><b>Standard number or code:</b>\s*(.*?)</p>', + html)
def findmixedrefs(self, html: str): return [ @@ -13691,8 +13819,9 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - '<label>{}:</label>(.*?)<(?:label|<h3 class="underscratch)'.format( - field), html, dtype) + f'<label>{field}:</label>(.*?)<(?:label|<h3 class="underscratch)', + html, + dtype)
def getvalues(self, field, html, dtype=None, alt=None) -> list[str]: section = self.getvalue(field, html) @@ -14192,8 +14321,11 @@ self.language = 'en'
def findnames(self, html) -> list[str]: - section = self.findbyre(r'(?s)(<h1.*?)<script>', html) or self.findbyre( - r'(?s)(.*?)<script>', html) or html + section = self.findbyre( + r'(?s)(<h1.*?)<script>', + html) or self.findbyre( + r'(?s)(.*?)<script>', + html) or html return (self.findallbyre( r'itemprop="\w*[nN]ame"[^<>]*>(.*?)<', section) + self.findallbyre( r'itemprop="sameAs"[^<>]*>(.*?)<', section)) @@ -14230,8 +14362,8 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'(?s){}.\s*:\s*</strong>.*?<td[^<>]*>(?:<[^<>]*>)*([^<>]+)<' - .format(field), html, dtype) + rf'(?s){field}.\s*:\s*</strong>.*?<td[^<>]*>(?:<[^<>]*>)*([^<>]+)<', + html, dtype)
def findinstanceof(self, html: str): return 'Q5' @@ -14686,9 +14818,8 @@ parts = self.findallbyre(r'(?s)functie:(.*?<br.*?)<br', section) parts = [self.TAGRE.sub(' ', part) for part in parts] parts = [part.replace('instelling:', '') for part in parts] - result += [ - self.findbyre(r'(?s)(.*)', part, 'occupation') for part in parts - ] + result += [self.findbyre(r'(?s)(.*)', part, 'occupation') + for part in parts] result += self.findallbyre(r'(?s)<span class="functie">(.*?)[(<]', html, 'occupation') return result @@ -14881,8 +15012,8 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'(?s)<dt class="indexlabel">{}</dt>\s*<dd class="indexvalue">(.*?)</dd>' - .format(field), html, dtype) + rf'(?s)<dt class="indexlabel">{field}</dt>\s*<dd class="indexvalue">(.*?)</dd>', + html, dtype)
def findnames(self, html) -> list[str]: section = self.getvalue('Namensvarianten', html) or '' @@ -15124,13 +15255,14 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r'(?s)<p class="ltr (?:notice-label|text-muted)">\s*{}.*?<[^<>]* class="ltr"\s*>(.*?)<' - .format(field), html, dtype) + rf'(?s)<p class="ltr (?:notice-label|text-muted)">\s*{field}.*?<[^<>]* class="ltr"\s*>(.*?)<', + html, + dtype)
def getvalues(self, field, html, dtype=None) -> list[str]: return self.findallbyre( - r'(?s)<p class="ltr (?:notice-label|text-muted)">\s*{}.*?<[^<>]* class="ltr"\s*>(.*?)<' - .format(field), html, dtype) + rf'(?s)<p class="ltr (?:notice-label|text-muted)">\s*{field}.*?<[^<>]* class="ltr"\s*>(.*?)<', + html, dtype)
def instanceof(self, html: str): return 'Q5' @@ -15259,8 +15391,8 @@
def getvalue(self, field, html, dtype=None): return self.findbyre( - r"(?s)<div class='metadata-row'><span [^<>]*>\s*%s\s*</span>\s*<span [^<>]*>(.*?)</span>" - % field, html, dtype) + r"(?s)<div class='metadata-row'><span [^<>]*>\s*%s\s*</span>\s*<span [^<>]*>(.*?)</span>" % + field, html, dtype)
def getvalues(self, field, html, dtype=None) -> list[str]: section = self.getvalue(field, html) @@ -15327,8 +15459,8 @@
def getvalue(self, field, html, dtype=None): result = self.findbyre( - r'(?s)<div class="field-label">\s*{}\s*<.*?<div class=[^<>]*field-item[^<>]*>(.*?)</div>' - .format(field), html) + rf'(?s)<div class="field-label">\s*{field}\s*<.*?<div class=[^<>]*field-item[^<>]*>(.*?)</div>', + html) if result: result = self.TAGRE.sub('', result) if dtype: @@ -15535,8 +15667,7 @@ site = 'wikimedia' self.dbname = f'{site.title()} {self.language.upper()}' self.urlbase = f'https://%7Bself.language%7D.%7Bsite%7D.org/wiki/%7B%7Bid%7D%7D' - self.urlbase3 = 'https://%7B%7D.%7B%7D.org/w/index.php?title=%7B%7Bid%7D%7D&veswitched=1&... - self.language, site) + self.urlbase3 = f'https://%7Bself.language%7D.%7Bsite%7D.org/w/index.php?title=%7B%7Bid%7D%7D&...' self.hrtre = '{{(.*?)}}' self.mainRE = '(?s)<textarea[^<>]*name="wpTextbox1">(.*?)</textarea>' self.escapehtml = True @@ -15589,12 +15720,12 @@ continue if not splitters: result += self.findallbyre( - r'(?is)[\b|_\s]%s\s*=((?:[^|、{}]|{{[^{}]*}})+)' - % name, box, dtype, alt=alt) + r'(?is)[\b|_\s]%s\s*=((?:[^|、{}]|{{[^{}]*}})+)' % + name, box, dtype, alt=alt) else: sections = self.findallbyre( - r'(?is)[\b|_\s]%s\s*=((?:[^|、{}]|{{[^{}]*}})+)' - % name, box, alt=alt) + r'(?is)[\b|_\s]%s\s*=((?:[^|、{}]|{{[^{}]*}})+)' % + name, box, alt=alt) for section in sections: result += self.findallbyre(fr'([^{splitters}]+)', section, dtype) @@ -15613,13 +15744,13 @@ continue if not splitters: result = self.findallbyre( - r'(?is)[\b|_\s]%s\s*=((?:[^|{}]|{{[^{}]*}})+)' - % name, box, dtype, alt=alt) + r'(?is)[\b|_\s]%s\s*=((?:[^|{}]|{{[^{}]*}})+)' % + name, box, dtype, alt=alt) else: result = [] preresult = self.findallbyre( - r'(?is)[\b|_\s]%s\s*=((?:[^|{}]|{{[^{}]*}})+)' - % name, box, alt=alt) + r'(?is)[\b|_\s]%s\s*=((?:[^|{}]|{{[^{}]*}})+)' % + name, box, alt=alt) for section in preresult: result += self.findallbyre(fr'([^{splitters}]+)', section, dtype) @@ -15763,78 +15894,135 @@ "''", '')
def finddescriptions(self, html: str): - return self.getinfos([ - 'fineincipit', 'commentaire', 'kurzbeschreibung', 'fets_destacables', 'описание', 'bekendvan', - 'postcognome', 'postnazionalità', 'known_for', 'description', 'başlık', 'известен как', - ], html) \ - + [self.removewiki( - self.findbyre(r" %s(?: stato)? (?:e[eiu]?n |an? |u[nm][ea]? |eine[nr]? |'n |ne |e |unha |ett? |o |một )?(.+?)[.;]" % word, html)) for - word in [ - 'is', 'w[aio]s', 'ist', 'wao?r', 'este?', 'était', 'fu', 'fou', '—', '-', 'era', 'е', 'היה', 'by[łl]', - 'foi', 'был', 'fue', 'oli', 'bio', 'wie', 'var', 'je', 'იყო', 'adalah', 'é', 'ήταν', 'هو', 'стала', - '[eé]s', 'er', 'est[ia]s', 'एक', 'یک', 'كان', 'è', 'бил', 'là', 'on', ',', 'on', 'egy', 'sono', - 'är', 'are', 'fuit', 'وهو', 'esas', 'は、', 'ni', 'là']] \ - + self.findallbyre(r'{{short description|(.*?)}', html) \ - + self.findallbyre(r'[[[^[]|]+?:([^[]|]+)]]', html) \ - + [x.replace('_', ' ') - for x in self.findallbyre(r'((.*?))', self.id)] + return self.getinfos( + ['fineincipit', 'commentaire', 'kurzbeschreibung', + 'fets_destacables', 'описание', 'bekendvan', 'postcognome', + 'postnazionalità', 'known_for', 'description', 'başlık', + 'известен как'], + html) + [ + self.removewiki( + self.findbyre( + r" %s(?: stato)? (?:e[eiu]?n |an? |u[nm][ea]? |eine[nr]? |'n |ne |e |unha |ett? |o |một )?(.+?)[.;]" + % word, html)) + for word + in + ['is', 'w[aio]s', 'ist', 'wao?r', 'este?', 'était', 'fu', 'fou', + '—', '-', 'era', 'е', 'היה', 'by[łl]', 'foi', 'был', 'fue', 'oli', + 'bio', 'wie', 'var', 'je', 'იყო', 'adalah', 'é', 'ήταν', 'هو', + 'стала', '[eé]s', 'er', 'est[ia]s', 'एक', 'یک', 'كان', 'è', 'бил', + 'là', 'on', ',', 'on', 'egy', 'sono', 'är', 'are', 'fuit', 'وهو', + 'esas', 'は、', 'ni', 'là']] + self.findallbyre( + r'{{short description|(.*?)}', html) + self.findallbyre( + r'[[[^[]|]+?:([^[]|]+)]]', html) + [ + x.replace('_', ' ') for x in self.findallbyre( + r'((.*?))', self.id)]
def findoccupations(self, html: str): - return self.getinfos([ - 'charte', r'attività\s*(?:altre)?\d*', 'occupation', 'zawód', 'functie', 'spfunc', 'beroep', - 'рід_діяльності', 'المهنة', 'ocupación', 'עיסוק', '職業', 'ocupação', 'ιδιότητα', 'мамандығы', - 'zanimanje', 'meslek', 'mesleği', 'activités?', 'پیشه', 'профессия', 'profesión', '직업', - 'asa', 'kazi yake', r'(?:antaŭ|aliaj)?okupoj?\d*', 'работил', 'ocupacio', 'aktywność zawodowa', - 'funkcja', 'profesio', 'ocupație', 'povolání', 'töökoht', 'szakma', 'profession', - ], html, 'occupation') + \ - self.findallbyre(r'(?i)info(?:box|boks|taula|kast)(?:\s*-\s*)?([\w\s]+)', html, 'occupation') \ - + self.findallbyre( - r'基礎情報([\w\s]+)', html, 'occupation') \ - + self.findallbyre(r'{([\w\s]+)infobox', html, 'occupation') + self.findallbyre( - r'Categorie:\s*(\w+) (?:van|der) ', html, 'occupation') \ - + self.findallbyre(r'(?i)inligtingskas([\w\s]+)', html, 'occupation') + return self.getinfos(['charte', r'attività\s*(?:altre)?\d*', + 'occupation', 'zawód', 'functie', 'spfunc', + 'beroep', 'рід_діяльності', 'المهنة', + 'ocupación', 'עיסוק', '職業', 'ocupação', + 'ιδιότητα', 'мамандығы', 'zanimanje', 'meslek', + 'mesleği', 'activités?', 'پیشه', 'профессия', + 'profesión', '직업', 'asa', 'kazi yake', + r'(?:antaŭ|aliaj)?okupoj?\d*', 'работил', + 'ocupacio', 'aktywność zawodowa', 'funkcja', + 'profesio', 'ocupație', 'povolání', 'töökoht', + 'szakma', 'profession'], + html, 'occupation') + self.findallbyre( + r'(?i)info(?:box|boks|taula|kast)(?:\s*-\s*)?([\w\s]+)', html, + 'occupation') + self.findallbyre(r'基礎情報([\w\s]+)', html, + 'occupation') + self.findallbyre(r'{([\w\s]+)infobox', html, + 'occupation') + self.findallbyre(r'Categorie:\s*(\w+) (?:van|der) ', + html, 'occupation') + self.findallbyre( + r'(?i)inligtingskas([\w\s]+)', html, 'occupation')
def findpositions(self, html: str): - return self.getinfos([ - r'functie\d?', r'titre\d', r'stanowisko\d*', r'(?:\d+. )?funkcja', r'должность(?:_\d+)?', r'títulos?\d*', - 'tytuł', 'titles', 'chức vị', r'amt\d*', 'jabatan', 'carica', '(?:altri)?titol[oi]', r'титул_?\d*', - 'anderwerk', 'titels', 'autres fonctions', 'апісанне выявы', r'titul(?:y|as)?\d*', 'title', - r'\w*ambt(?:en)?', 'carica', 'other_post', 'посада', '事務所', '最高職務', - ], html, 'position') \ - + self.findallbyre(r'S-ttl|title\s*=(.*?)|', html, 'position') + self.findallbyre( - r"Categor[ií]a:((?:Re[iy]e?|Conde)s d['e].*?)(?:]|del siglo)", html, 'position') \ - + self.findallbyre(r'Kategorie:\s*(König ([^[]]*))', html, 'position') + self.findallbyre( - r'Category:([^[]]+ king)', html, 'position') \ - + self.findallbyre(r'Catégorie:\s*(Roi .*?)]', html, 'position') + self.findallbyre( - r'Kategoria:(Królowie .*?)]', html, 'position') \ - + self.findallbyre(r'Kategori:(Raja .*?)]', html, 'position') + self.findallbyre( - r'[cC]ategorie:\s*((?:Heerser|Bisschop|Grootmeester|Abdis|Koning|Drost) .*?)]', html, 'position') + return self.getinfos([r'functie\d?', + r'titre\d', + r'stanowisko\d*', + r'(?:\d+. )?funkcja', + r'должность(?:_\d+)?', + r'títulos?\d*', + 'tytuł', + 'titles', + 'chức vị', + r'amt\d*', + 'jabatan', + 'carica', + '(?:altri)?titol[oi]', + r'титул_?\d*', + 'anderwerk', + 'titels', + 'autres fonctions', + 'апісанне выявы', + r'titul(?:y|as)?\d*', + 'title', + r'\w*ambt(?:en)?', + 'carica', + 'other_post', + 'посада', + '事務所', + '最高職務', + ], + html, + 'position') + self.findallbyre(r'S-ttl|title\s*=(.*?)|', + html, + 'position') + self.findallbyre(r"Categor[ií]a:((?:Re[iy]e?|Conde)s d['e].*?)(?:]|del siglo)", + html, + 'position') + self.findallbyre(r'Kategorie:\s*(König ([^[]]*))', + html, + 'position') + self.findallbyre(r'Category:([^[]]+ king)', + html, + 'position') + self.findallbyre(r'Catégorie:\s*(Roi .*?)]', + html, + 'position') + self.findallbyre(r'Kategoria:(Królowie .*?)]', + html, + 'position') + self.findallbyre(r'Kategori:(Raja .*?)]', + html, + 'position') + self.findallbyre(r'[cC]ategorie:\s*((?:Heerser|Bisschop|Grootmeester|Abdis|Koning|Drost) .*?)]', + html, + 'position')
def findtitles(self, html: str): - return self.getinfos( - [r'titre\d*', r'титул_?\d*', r'tước vị[\w\s]*', '爵位', - 'titels', 'titles', 'títuloas', r'titul(?:y|as|ai)?\d*', - '(?:altri)?titol[oi]'], html, 'title') + \ - self.findallbyre(r'Categorie:\s*((?:Heer|Vorst|Graaf) van.*?)]', html, 'title') + self.findallbyre( - r'Kategorie:\s*((?:Herzog|Herr|Graf|Vizegraf) ([^[]]*))\s*]', html, 'title') + \ - self.findallbyre(r'Catégorie:\s*((?:Duc|Prince|Comte) de.*?)]', html, 'title') + \ - self.findallbyre(r'Category:' - '((?:Du(?:k|chess)e|Princ(?:ess)?e|Lord|Margrav(?:in)?e|Grand Master|Count|Viscount)s' - r' of.*?)]', html, 'title') \ - + self.findallbyre(r'Categoría:((?:Prínciple|Señore|Conde|Duque)s de .*?)]', html, - 'title') + self.findallbyre(r'Kategória:([^[]]+ királyai)', html, 'title') + return self.getinfos([r'titre\d*', + r'титул_?\d*', + r'tước vị[\w\s]*', + '爵位', + 'titels', + 'titles', + 'títuloas', + r'titul(?:y|as|ai)?\d*', + '(?:altri)?titol[oi]'], + html, + 'title') + self.findallbyre(r'Categorie:\s*((?:Heer|Vorst|Graaf) van.*?)]', + html, + 'title') + self.findallbyre(r'Kategorie:\s*((?:Herzog|Herr|Graf|Vizegraf) ([^[]]*))\s*]', + html, + 'title') + self.findallbyre(r'Catégorie:\s*((?:Duc|Prince|Comte) de.*?)]', + html, + 'title') + self.findallbyre(r'Category:' + '((?:Du(?:k|chess)e|Princ(?:ess)?e|Lord|Margrav(?:in)?e|Grand Master|Count|Viscount)s' + r' of.*?)]', + html, + 'title') + self.findallbyre(r'Categoría:((?:Prínciple|Señore|Conde|Duque)s de .*?)]', + html, + 'title') + self.findallbyre(r'Kategória:([^[]]+ királyai)', + html, + 'title')
def findspouses(self, html: str): return self.getinfos( - ['spouse', 'consorte', 'conjoint', 'małżeństwo', 'mąż', 'супруга', 'съпруга на', r'[\w\s]*брак', - 'echtgenoot', 'echtgenote', r'配偶者\d*', r'(?:\d+. )?związek(?: z)?', 'чоловік', 'phối ngẫu', - 'vợ', 'chồng', 'الزوج', 'жонка', 'královna', 'sutuoktin(?:ė|is)', 'partners?', 'supružnik', - 'gade', 'cónyuge', 'conjoint', 'házastárs', 'дружина', 'cônjuge', 'σύζυγος', 'همسر', - 'współmałżonek', 'c[ôo]njuge', 'cónxuxe', '배우자', 'ndoa', 'supruga?', '配偶', 'abikaasa', - 'maire', - ], html, 'person', splitters='<>,;)') + \ - self.findallbyre( - r'{(?:marriage|matrimonio)|(.*?)[|}]', html, 'person') + ['spouse', 'consorte', 'conjoint', 'małżeństwo', 'mąż', 'супруга', + 'съпруга на', r'[\w\s]*брак', 'echtgenoot', 'echtgenote', + r'配偶者\d*', r'(?:\d+. )?związek(?: z)?', 'чоловік', 'phối ngẫu', + 'vợ', 'chồng', 'الزوج', 'жонка', 'královna', 'sutuoktin(?:ė|is)', + 'partners?', 'supružnik', 'gade', 'cónyuge', 'conjoint', + 'házastárs', 'дружина', 'cônjuge', 'σύζυγος', 'همسر', + 'współmałżonek', 'c[ôo]njuge', 'cónxuxe', '배우자', 'ndoa', + 'supruga?', '配偶', 'abikaasa', 'maire'], + html, 'person', splitters='<>,;)') + self.findallbyre( + r'{(?:marriage|matrimonio)|(.*?)[|}]', html, 'person')
def findpartners(self, html: str): return self.getinfos([ @@ -15850,21 +16038,26 @@ ], html, 'person')
def findfather(self, html: str): - return self.getinfo(['father', 'padre', 'vader', 'père', 'far', 'ojciec', 'отец', 'баща', '父親', 'батько', - 'cha', 'الأب', 'per', 'бацька', 'pai', 'otec', 'tėvas', 'батько', 'nome_pai', - ], html, 'person') or \ - self.getinfo(['rodzice', 'parents', 'roditelji', 'γονείς', 'والدین', 'parella', '부모', 'wazazi', 'ouers', - ], html, 'male-person') or \ - self.findbyre(r'|otec|([^|{}]*)}', html, 'person') + return self.getinfo( + ['father', 'padre', 'vader', 'père', 'far', 'ojciec', 'отец', + 'баща', '父親', 'батько', 'cha', 'الأب', 'per', 'бацька', 'pai', + 'otec', 'tėvas', 'батько', 'nome_pai'], + html, 'person') or self.getinfo( + ['rodzice', 'parents', 'roditelji', 'γονείς', 'والدین', 'parella', + '부모', 'wazazi', 'ouers'], + html, 'male-person') or self.findbyre( + r'|otec|([^|{}]*)}', html, 'person')
def findmother(self, html: str): - return self.getinfo(['mother', 'madre', 'moeder', 'mère', 'mor', - 'matka', 'мать', 'майка', '母親', 'матір', 'mẹ', - 'الأم', 'mer', 'маці', 'mãe', 'motina', 'мати', - 'nome_mãe'], html, 'person') or \ - self.getinfo(['rodzice', 'parents', 'roditelji', 'γονείς', 'والدین', 'parella', '부모', 'wazazi', 'ouers', - ], html, 'female-person') or \ - self.findbyre(r'|matka|([^|{}]*)}', html, 'person') + return self.getinfo( + ['mother', 'madre', 'moeder', 'mère', 'mor', 'matka', 'мать', + 'майка', '母親', 'матір', 'mẹ', 'الأم', 'mer', 'маці', 'mãe', + 'motina', 'мати', 'nome_mãe'], + html, 'person') or self.getinfo( + ['rodzice', 'parents', 'roditelji', 'γονείς', 'والدین', 'parella', + '부모', 'wazazi', 'ouers'], + html, 'female-person') or self.findbyre( + r'|matka|([^|{}]*)}', html, 'person')
def findchildren(self, html: str): return self.getinfos([ @@ -15897,26 +16090,67 @@ ], html, 'person')
def findsiblings(self, html: str): - return self.getinfos(['broerzus', 'rodzeństwo', 'rodbina', 'broer', 'zuster', 'αδέλφια', '형제자매', - ], html, 'person') + \ - self.findallbyre(r'|(?:bratr|sestra)|([^|{}]*)}', html, 'person') + \ - self.findallbyre(r'[[([^[]]*)]] (brat)', html, 'person') + return self.getinfos(['broerzus', + 'rodzeństwo', + 'rodbina', + 'broer', + 'zuster', + 'αδέλφια', + '형제자매', + ], + html, + 'person') + self.findallbyre(r'|(?:bratr|sestra)|([^|{}]*)}', + html, + 'person') + self.findallbyre(r'[[([^[]]*)]] (brat)', + html, + 'person')
def findkins(self, html: str): return self.getinfos(['родичі', 'famille', '著名な家族', '친척'], html, 'person')
def findfamily(self, html: str): - return self.getinfo(['house', 'd[iy]nast[iyí]j?[ae]?', 'famille', 'noble family', 'rodzina', 'род', 'династия', - '王家', '王朝', 'hoàng tộc', 'casa', '家名・爵位', 'рід'], html, 'family') or \ - self.findbyre(r'Categorie:\s*Huis(.*?)]]', html, 'family') or self.findbyre( - r'Catégorie:\s*Maison (.*?)]]', html, 'family') or \ - self.findbyre(r'Category:([^[]]*)(?:dynasty|family)', html, 'family') or self.findbyre( - r'Kategorie:\s*Haus(.*?)]', html, 'family') or \ - self.findbyre(r'Categor[ií]a:Casa(?:to)? d[ei](.*?)]', html, 'family') or self.findbyre( - r'Kategory:Hûs(.*?)]', html, 'family') or \ - self.findbyre(r'Categorie:\s*([^[]]*)dynastie]', html, 'family') or self.findbyre( - r'Category:House of(.*?)]', html, 'family') + return self.getinfo( + [ + 'house', + 'd[iy]nast[iyí]j?[ae]?', + 'famille', + 'noble family', + 'rodzina', + 'род', + 'династия', + '王家', + '王朝', + 'hoàng tộc', + 'casa', + '家名・爵位', + 'рід'], + html, + 'family') or self.findbyre( + r'Categorie:\s*Huis(.*?)]]', + html, + 'family') or self.findbyre( + r'Catégorie:\s*Maison (.*?)]]', + html, + 'family') or self.findbyre( + r'Category:([^[]]*)(?:dynasty|family)', + html, + 'family') or self.findbyre( + r'Kategorie:\s*Haus(.*?)]', + html, + 'family') or self.findbyre( + r'Categor[ií]a:Casa(?:to)? d[ei](.*?)]', + html, + 'family') or self.findbyre( + r'Kategory:Hûs(.*?)]', + html, + 'family') or self.findbyre( + r'Categorie:\s*([^[]]*)dynastie]', + html, + 'family') or self.findbyre( + r'Category:House of(.*?)]', + html, + 'family')
def findgens(self, html: str): return self.findbyre(r'Categorie:\s*Gens(.*?)]]', html, 'gens') @@ -15926,116 +16160,401 @@ r'{{(?:[bB]irth[-\s]date(?: and age)?|dni|[dD]oğum tarihi ve yaşı|출생일(?:과 나이)?|[gG]eboortedatum(?: en ouderdom)?|' 'Data naixement|[dD]atum narození a věk|Naskiĝdato|[dD]atum rođenja|生年月日と年齢|死亡年月日と没年齢|' 'роден на|[dD]ate de naissance|' - r')\s*(?:|df=\w+)?|(\d+|\d+|\d+)', html) or \ - self.findbyre(r'{{[dD]ate de naissance|([\w\s]+)}}', html) or\ - self.findbyre(r'{{(?:[bB]irth year and age)|(\d+)', html) or\ - self.getinfo(['geburtsdatum', 'birth[_ ]?date', 'data di nascita', 'annonascita', 'geboren?', 'født', - 'data urodzenia', 'data_naixement', 'gbdat', 'data_nascimento', 'дата рождения', '出生日', - 'дата_народження', 'geboortedatum', 'sinh', 'fecha de nacimiento', 'تاريخ الولادة', - 'date de naissance', 'дата нараджэння', 'data de nascimento', 'datum narození', 'gimė', - 'תאריך לידה', 'születési dátum', 'дата народження', 'jaiotza data', 'nascimento_data', - 'ημερομηνία γέννησης', 'туған күні', 'datum_rođenja', 'تاریخ تولد', 'teraka', 'alizaliwa', - 'naskiĝjaro', 'rođenje', '出生日期', 'dato de naskiĝo', 'sünnlaeg', 'syntymäaika', - 'туған күні', '태어난 날', 'datadenaissença', 'doğum_tarihi'], - html) \ - or self.findbyre(r'Category:\s*(\d+) births', html) or self.findbyre(r'Kategorie:\s*Geboren (\d+)', html) or \ - self.findbyre(r'Catégorie:\s*Naissance en ([^[]]*)]', html) or self.findbyre( - r'Categorie:\s*Nașteri în (.*?)]', html) or \ - self.findbyre(r'(.*)-', self.getinfo(['leven'], html) or '') or self.findbyre( - r'Kategory:Persoan berne yn(.*?)]', html) or \ - self.findbyre(r'{{bd|([^{}]*?)|', - html) or self.findbyre(r'(\d+)年生', html) + r')\s*(?:|df=\w+)?|(\d+|\d+|\d+)', + html) or self.findbyre( + r'{{[dD]ate de naissance|([\w\s]+)}}', + html) or self.findbyre( + r'{{(?:[bB]irth year and age)|(\d+)', + html) or self.getinfo( + [ + 'geburtsdatum', + 'birth[_ ]?date', + 'data di nascita', + 'annonascita', + 'geboren?', + 'født', + 'data urodzenia', + 'data_naixement', + 'gbdat', + 'data_nascimento', + 'дата рождения', + '出生日', + 'дата_народження', + 'geboortedatum', + 'sinh', + 'fecha de nacimiento', + 'تاريخ الولادة', + 'date de naissance', + 'дата нараджэння', + 'data de nascimento', + 'datum narození', + 'gimė', + 'תאריך לידה', + 'születési dátum', + 'дата народження', + 'jaiotza data', + 'nascimento_data', + 'ημερομηνία γέννησης', + 'туған күні', + 'datum_rođenja', + 'تاریخ تولد', + 'teraka', + 'alizaliwa', + 'naskiĝjaro', + 'rođenje', + '出生日期', + 'dato de naskiĝo', + 'sünnlaeg', + 'syntymäaika', + 'туған күні', + '태어난 날', + 'datadenaissença', + 'doğum_tarihi'], + html) or self.findbyre( + r'Category:\s*(\d+) births', + html) or self.findbyre( + r'Kategorie:\s*Geboren (\d+)', + html) or self.findbyre( + r'Catégorie:\s*Naissance en ([^[]]*)]', + html) or self.findbyre( + r'Categorie:\s*Nașteri în (.*?)]', + html) or self.findbyre( + r'(.*)-', + self.getinfo( + ['leven'], + html) or '') or self.findbyre( + r'Kategory:Persoan berne yn(.*?)]', + html) or self.findbyre( + r'{{bd|([^{}]*?)|', + html) or self.findbyre( + r'(\d+)年生', + html)
def finddeathdate(self, html: str): - return self.findbyre(r'{{(?:[dD]eath (?:date|year)[\w\s]*|morte|사망일과 나이|Data defunció i edat|[dD]atum smrti i godine|' - '[dD]atum úmrtí a věk|[dD]ate de décès|починал на|[sS]terfdatum(?: en ouderdom)?|' - r')|(\d+|\d+|\d+)[}|]', html) or \ - self.findbyre(r'{{(?:死亡年月日と没年齢)|\d+|\d+|\d+|(\d+|\d+|\d+)[}|]', html) or\ + return self.findbyre( + r'{{(?:[dD]eath (?:date|year)[\w\s]*|morte|사망일과 나이|Data defunció i edat|[dD]atum smrti i godine|' + '[dD]atum úmrtí a věk|[dD]ate de décès|починал на|[sS]terfdatum(?: en ouderdom)?|' + r')|(\d+|\d+|\d+)[}|]', + html) or self.findbyre( + r'{{(?:死亡年月日と没年齢)|\d+|\d+|\d+|(\d+|\d+|\d+)[}|]', + html) or self.getinfo( + [ + 'sterbedatum', + 'death[_ ]?date', + 'data di morte', + 'annomorte', + 'date de décès', + 'gestorven', + 'død', + 'data śmierci', + 'data_defuncio', + r'sterf(?:te)?dat\w*', + 'data_morte', + 'дата смерти', + '死亡日', + 'дата_смерті', + 'mất', + 'overlijdensdatum', + 'overleden', + 'fecha de defunción', + 'تاريخ الوفاة', + 'datum_smrti', + 'дата смерці', + 'dta da morte', + 'datum úmrtí', + 'mirė', + 'oorlede', + 'fecha de fallecimiento', + 'תאריך פטירה', + 'halál dátuma', + 'дата смерті', + 'heriotza data', + 'morte_data', + 'ημερομηνία θανάτου', + 'қайтыс болған күн7і', + 'datum_smrti', + 'ölüm_tarihi', + 'تاریخ مرگ', + 'falecimento', + 'maty', + 'alikufa', + 'mortjaro', + 'smrt', + '逝世日期', + 'surmaaeg', + 'kuolinaika', + 'қайтыс болған күні', + '죽은 날', + 'datadedecès', + 'ölüm_tarihi', + ], + html) or self.findbyre( + r'Category:\s*(\d+) deaths', + html) or self.findbyre( + r'Catégorie:\s*Décès en ([^[]]*)]', + html) or self.findbyre( + r'Kategorie:\s*Gestorben (\d+)', + html) or self.findbyre( + r'{{death year and age|(.*?)|', + html) or self.findbyre( + r'Categoria:Mortos em (.*?)]', + html) or self.findbyre( + r'Category:(\d+)年没]', + html) or self.findbyre( + r'Categorie:\s*Decese în (.*?)]', + html) or self.findbyre( + r'Kategori:Kematian(.*?)]', + html) or self.findbyre( + r'Kategory:Persoan stoarn yn (.*?)]', + html) or self.findbyre( + r'-(.*)', self.getinfo( - ['sterbedatum', 'death[_ ]?date', 'data di morte', 'annomorte', 'date de décès', 'gestorven', 'død', - 'data śmierci', 'data_defuncio', r'sterf(?:te)?dat\w*', 'data_morte', 'дата смерти', '死亡日', - 'дата_смерті', 'mất', 'overlijdensdatum', 'overleden', 'fecha de defunción', 'تاريخ الوفاة', - 'datum_smrti', 'дата смерці', 'dta da morte', 'datum úmrtí', 'mirė', 'oorlede', - 'fecha de fallecimiento', 'תאריך פטירה', 'halál dátuma', 'дата смерті', 'heriotza data', - 'morte_data', 'ημερομηνία θανάτου', 'қайтыс болған күн7і', 'datum_smrti', 'ölüm_tarihi', - 'تاریخ مرگ', 'falecimento', 'maty', 'alikufa', 'mortjaro', 'smrt', '逝世日期', 'surmaaeg', - 'kuolinaika', 'қайтыс болған күні', '죽은 날', 'datadedecès', 'ölüm_tarihi', - ], html) or \ - self.findbyre(r'Category:\s*(\d+) deaths', html) or \ - self.findbyre(r'Catégorie:\s*Décès en ([^[]]*)]', html) or \ - self.findbyre(r'Kategorie:\s*Gestorben (\d+)', html) or \ - self.findbyre(r'{{death year and age|(.*?)|', html) or \ - self.findbyre(r'Categoria:Mortos em (.*?)]', html) or self.findbyre(r'Category:(\d+)年没]', html) or \ - self.findbyre(r'Categorie:\s*Decese în (.*?)]', html) or\ - self.findbyre(r'Kategori:Kematian(.*?)]', html) or \ - self.findbyre(r'Kategory:Persoan stoarn yn (.*?)]', html) or \ - self.findbyre(r'-(.*)', self.getinfo(['leven'], html) or '') or self.findbyre(r'(\d+)年没', html) or \ - self.findbyre(r'{{bd|[^[|{}]*|[^[|{}]*|([^[|{}]*)|', html) + ['leven'], + html) or '') or self.findbyre( + r'(\d+)年没', + html) or self.findbyre( + r'{{bd|[^[|{}]*|[^[|{}]*|([^[|{}]*)|', + html)
def findburialdate(self, html: str): return self.getinfo(['埋葬日', 'datum pohřbení'], html)
def findbirthplace(self, html: str): return self.getinfo( - ['birth[_ ]?place', 'luogo di nascita', r'luogonascita\w*', 'geboren_in', 'geburtsort', 'fødested', - 'geboorteplaats', 'miejsce urodzenia', 'lloc_naixement', 'gbplaats', 'место рождения', 'място на раждане', - '生地', 'місце_народження', r'lugar\s*de\s*nac[ei]mi?ento', 'مكان الولادة', 'lieu de naissance', - 'месца нараджэння', 'local de nascimento', 'místo narození', 'gimimo vieta', 'geboortestad', - 'geboorteplek', 'תאריך לידה', 'születési hely', '出生地點?', 'місце народження', 'nascimento_local', - 'τόπος γέννησης', 'туған жері', r'mj?esto[\s_]rođenja', 'doğum_yeri', 'محل تولد', 'local_nascimento', - 'роден-място', '출생지', 'naskiĝloko', 'loko de naskiĝo', 'sünnikoht', 'syntymäpaikka', 'туған жері', - '태어난 곳', 'luòcdenaissença', 'doğum_yeri', - ], html, 'city') or \ - self.findbyre(r'Category:Births in(.*?)]', html, 'city') or \ - self.findbyre(r'Categoria:Naturais de(.*?)]', html, 'city') + [ + 'birth[_ ]?place', + 'luogo di nascita', + r'luogonascita\w*', + 'geboren_in', + 'geburtsort', + 'fødested', + 'geboorteplaats', + 'miejsce urodzenia', + 'lloc_naixement', + 'gbplaats', + 'место рождения', + 'място на раждане', + '生地', + 'місце_народження', + r'lugar\s*de\s*nac[ei]mi?ento', + 'مكان الولادة', + 'lieu de naissance', + 'месца нараджэння', + 'local de nascimento', + 'místo narození', + 'gimimo vieta', + 'geboortestad', + 'geboorteplek', + 'תאריך לידה', + 'születési hely', + '出生地點?', + 'місце народження', + 'nascimento_local', + 'τόπος γέννησης', + 'туған жері', + r'mj?esto[\s_]rođenja', + 'doğum_yeri', + 'محل تولد', + 'local_nascimento', + 'роден-място', + '출생지', + 'naskiĝloko', + 'loko de naskiĝo', + 'sünnikoht', + 'syntymäpaikka', + 'туған жері', + '태어난 곳', + 'luòcdenaissença', + 'doğum_yeri', + ], + html, + 'city') or self.findbyre( + r'Category:Births in(.*?)]', + html, + 'city') or self.findbyre( + r'Categoria:Naturais de(.*?)]', + html, + 'city')
def finddeathplace(self, html: str): return self.getinfo( - ['death[_ ]?place', 'luogo di morte', 'luogomorte', 'lieu de décès', 'gestorven_in', 'sterbeort', - 'dødested', 'miejsce śmierci', 'lloc_defuncio', 'sterfplaats', 'место смерти', 'място на смърт(?:та)?', - '没地', 'місце_смерті', 'nơi mất', 'overlijdensplaats', 'lugar de defunción', 'مكان الوفاة', - 'месца смерці', 'local da morte', 'místo úmrtí', 'mirties vieta', 'stadvanoverlijden', 'מקום פטירה', - 'sterfteplek', 'lugar de fallecimiento', 'halál helye', '死没地', 'місце смерті', 'morte_local', - 'τόπος θανάτου', 'қайтыс болған жері', r'mj?esto_[\s_]mrti', 'ölüm_yeri', 'محل مرگ', 'починал-място', - 'lugardefalecemento', '사망지', 'mortloko', '逝世地點', 'surmakoht', 'kuolinpaikka', 'қайтыс болған жері', - '죽은 곳', 'plaatsvanoverlijden', 'luòcdedecès', 'ölüm_sebebi', - ], html, 'city') or \ - self.findbyre(r'{{МестоСмерти|([^{}|]*)', html, 'city') or \ - self.findbyre(r'Category:Deaths in(.*?)]', html, 'city') + [ + 'death[_ ]?place', + 'luogo di morte', + 'luogomorte', + 'lieu de décès', + 'gestorven_in', + 'sterbeort', + 'dødested', + 'miejsce śmierci', + 'lloc_defuncio', + 'sterfplaats', + 'место смерти', + 'място на смърт(?:та)?', + '没地', + 'місце_смерті', + 'nơi mất', + 'overlijdensplaats', + 'lugar de defunción', + 'مكان الوفاة', + 'месца смерці', + 'local da morte', + 'místo úmrtí', + 'mirties vieta', + 'stadvanoverlijden', + 'מקום פטירה', + 'sterfteplek', + 'lugar de fallecimiento', + 'halál helye', + '死没地', + 'місце смерті', + 'morte_local', + 'τόπος θανάτου', + 'қайтыс болған жері', + r'mj?esto_[\s_]mrti', + 'ölüm_yeri', + 'محل مرگ', + 'починал-място', + 'lugardefalecemento', + '사망지', + 'mortloko', + '逝世地點', + 'surmakoht', + 'kuolinpaikka', + 'қайтыс болған жері', + '죽은 곳', + 'plaatsvanoverlijden', + 'luòcdedecès', + 'ölüm_sebebi', + ], + html, + 'city') or self.findbyre( + r'{{МестоСмерти|([^{}|]*)', + html, + 'city') or self.findbyre( + r'Category:Deaths in(.*?)]', + html, + 'city')
def findburialplace(self, html: str): return self.getinfo( - ['place of burial', 'sepoltura', 'begraven', 'gravsted', 'resting_place', 'miejsce spoczynku', 'sepultura', - 'похоронен', 'погребан', '埋葬地', '陵墓', 'burial_place', 'lugar de entierro', 'مكان الدفن', - 'local de enterro', 'místo pohřbení', 'palaidotas', 'поховання', 'مدفن', '墓葬'], - html, 'cemetery', - alt=['city']) \ - or self.findbyre(r'Category:Burials at (.*?)]', html, 'cemetery') + [ + 'place of burial', + 'sepoltura', + 'begraven', + 'gravsted', + 'resting_place', + 'miejsce spoczynku', + 'sepultura', + 'похоронен', + 'погребан', + '埋葬地', + '陵墓', + 'burial_place', + 'lugar de entierro', + 'مكان الدفن', + 'local de enterro', + 'místo pohřbení', + 'palaidotas', + 'поховання', + 'مدفن', + '墓葬'], + html, + 'cemetery', + alt=['city']) or self.findbyre( + r'Category:Burials at (.*?)]', + html, + 'cemetery')
def findreligions(self, html: str): - return self.getinfos(['religione?', '宗教', 'wyznanie', 'religij?[ea]', 'الديانة', r'церковь_?\d*', 'church', - 'конфесія', 'religião', '종교', 'uskonto', 'dini', - ], html, 'religion') + \ - self.findallbyre(r'Catégorie:Religieux(.*?)]', html, 'religion') + return self.getinfos(['religione?', + '宗教', + 'wyznanie', + 'religij?[ea]', + 'الديانة', + r'церковь_?\d*', + 'church', + 'конфесія', + 'religião', + '종교', + 'uskonto', + 'dini', + ], + html, + 'religion') + self.findallbyre(r'Catégorie:Religieux(.*?)]', + html, + 'religion')
def findnationalities(self, html: str): return self.getinfos( - [r'nazionalità[\w\s_]*', 'allégeance', 'land', 'nationality', 'narodowość', 'państwo', 'громадянство', - 'нац[іи]ональ?н[іо]сть?', 'الجنسية', 'nacionalnost', 'nationalité', 'na[ts]ionaliteit', 'citizenship', - 'geboorteland', 'nacionalidade?', 'מדינה', '国籍', 'підданство', 'εθνικότητα', 'υπηκοότητα', - R'nazione\d*', 'азаматтығы', 'ملیت', 'гражданство', 'nacionalitat', 'firenena', 'nchi', - r'nationalteam\d*', 'ŝtato', '國家', 'občanství', 'kodakondsus', 'rahvus', 'kansalaisuus', - 'nationalité', 'állampolgárság', 'азаматтығы', '국적', 'paísdorigina', 'milliyeti', - ], html, 'country') or \ - self.findallbyre(r'Category:\d+th-century people of (.*?)]]', html, 'country') or \ - self.findallbyre(r'Categorie:\s*Persoon in([^[]]+)in de \d+e eeuw', html, 'country') or \ - self.findallbyre(r'Category:\d+th-century ([^[]]+) people]]', html, 'country') or \ - self.findallbyre(r'Category:([^[]]+) do século [IVX]+]]', html, 'country') or \ - self.findallbyre(r'Kategorie:\s*Person (([^[]]*))]', html, 'country') or \ - self.findallbyre(r'Kategori:Tokoh(.*?)]', html, 'country') or \ - self.findallbyre(r'Categoria:([^[]]+) del Segle [IVX]+', html, 'country') or \ - self.findallbyre( - r'Categorie:\s*([^[]]*) persoon]', html, 'country') + [ + r'nazionalità[\w\s_]*', + 'allégeance', + 'land', + 'nationality', + 'narodowość', + 'państwo', + 'громадянство', + 'нац[іи]ональ?н[іо]сть?', + 'الجنسية', + 'nacionalnost', + 'nationalité', + 'na[ts]ionaliteit', + 'citizenship', + 'geboorteland', + 'nacionalidade?', + 'מדינה', + '国籍', + 'підданство', + 'εθνικότητα', + 'υπηκοότητα', + R'nazione\d*', + 'азаматтығы', + 'ملیت', + 'гражданство', + 'nacionalitat', + 'firenena', + 'nchi', + r'nationalteam\d*', + 'ŝtato', + '國家', + 'občanství', + 'kodakondsus', + 'rahvus', + 'kansalaisuus', + 'nationalité', + 'állampolgárság', + 'азаматтығы', + '국적', + 'paísdorigina', + 'milliyeti', + ], + html, + 'country') or self.findallbyre( + r'Category:\d+th-century people of (.*?)]]', + html, + 'country') or self.findallbyre( + r'Categorie:\s*Persoon in([^[]]+)in de \d+e eeuw', + html, + 'country') or self.findallbyre( + r'Category:\d+th-century ([^[]]+) people]]', + html, + 'country') or self.findallbyre( + r'Category:([^[]]+) do século [IVX]+]]', + html, + 'country') or self.findallbyre( + r'Kategorie:\s*Person (([^[]]*))]', + html, + 'country') or self.findallbyre( + r'Kategori:Tokoh(.*?)]', + html, + 'country') or self.findallbyre( + r'Categoria:([^[]]+) del Segle [IVX]+', + html, + 'country') or self.findallbyre( + r'Categorie:\s*([^[]]*) persoon]', + html, + 'country')
def findorigcountries(self, html: str): return self.getinfos([ @@ -16043,13 +16562,25 @@ ], html, 'country')
def findlastname(self, html: str): - return self.getinfo(['cognome', 'surnom', 'familinomo', 'priezvisko', 'lastname'], html, 'lastname') or \ - self.findbyre( - r'(?:DEFAULTSORT|SORTIERUNG):([^{},]+),', html, 'lastname') + return self.getinfo( + [ + 'cognome', + 'surnom', + 'familinomo', + 'priezvisko', + 'lastname'], + html, + 'lastname') or self.findbyre( + r'(?:DEFAULTSORT|SORTIERUNG):([^{},]+),', + html, + 'lastname')
def findfirstname(self, html: str): - return self.getinfo(['antaŭnomo', 'nome', 'meno', 'firstname'], html, 'firstname') \ - or self.findbyre(r'(?:DEFAULTSORT|SORTIERUNG|ORDENA):[^{},]+,\s*([\w-]+)', html, 'firstname') + return self.getinfo( + ['antaŭnomo', 'nome', 'meno', 'firstname'], + html, 'firstname') or self.findbyre( + r'(?:DEFAULTSORT|SORTIERUNG|ORDENA):[^{},]+,\s*([\w-]+)', html, + 'firstname')
def findgender(self, html: str): return self.getinfo(['sesso'], html, 'gender') or \ @@ -16057,10 +16588,13 @@ html, 'gender')
def findmemberships(self, html: str): - return self.getinfos(['org', 'groep'], html, 'organization') + \ - self.getinfos(['associated_acts', 'artistas_relacionados'], html, 'group') + \ - self.findallbyre( - r'Categor(?:ie|y):\s*(?:Lid van|Members of)(.*?)]]', html, 'organization') + return self.getinfos( + ['org', 'groep'], + html, 'organization') + self.getinfos( + ['associated_acts', 'artistas_relacionados'], + html, 'group') + self.findallbyre( + r'Categor(?:ie|y):\s*(?:Lid van|Members of)(.*?)]]', html, + 'organization')
def findmixedrefs(self, html: str): imdb = self.findbyre(r'IMDb name|([^{}]*)|', html) or self.getinfo([ @@ -16086,22 +16620,39 @@ ]
def findschools(self, html: str): - return self.getinfos(['education', 'alma[ _]?m[aá]ter', 'edukacja', r'[\w\s]*uczelnia', 'formation', 'skool', - 'universiteit', 'educacio', 'альма-матер', 'diplôme', 'iskolái', '출신 대학', - ], html, 'university') + \ - self.findallbyre(r'Kategorie:\s*Absolvent de[rs] (.*?)]', html, 'university') + \ - self.findallbyre(r'Category:\s*Alumni of(?: the)?(.*?)]', html, 'university') + \ - self.findallbyre(r'Category:People educated at(.*?)]', html, 'university') + \ - self.findallbyre(r'Category:([^[]]+) alumni]', html, 'university') +\ - self.findallbyre(r'Categoria:Alunos do (.*?)]', - html, 'university') + return self.getinfos(['education', + 'alma[ _]?m[aá]ter', + 'edukacja', + r'[\w\s]*uczelnia', + 'formation', + 'skool', + 'universiteit', + 'educacio', + 'альма-матер', + 'diplôme', + 'iskolái', + '출신 대학', + ], + html, + 'university') + self.findallbyre(r'Kategorie:\s*Absolvent de[rs] (.*?)]', + html, + 'university') + self.findallbyre(r'Category:\s*Alumni of(?: the)?(.*?)]', + html, + 'university') + self.findallbyre(r'Category:People educated at(.*?)]', + html, + 'university') + self.findallbyre(r'Category:([^[]]+) alumni]', + html, + 'university') + self.findallbyre(r'Categoria:Alunos do (.*?)]', + html, + 'university')
def findemployers(self, html: str): return self.getinfos( - ['employer', 'pracodawca', 'institutions', 'empleador', r'jednostka podrz\d* nazwa', - 'workplaces', 'instituutti', 'жұмыс орны', '소속', 'çalıştığı_yerler'], - html, 'employer', alt=['university']) \ - + self.findallbyre(r'Category:([^[]]+) faculty', html, 'university') + ['employer', 'pracodawca', 'institutions', 'empleador', + r'jednostka podrz\d* nazwa', 'workplaces', 'instituutti', + 'жұмыс орны', '소속', 'çalıştığı_yerler'], + html, 'employer', alt=['university']) + self.findallbyre( + r'Category:([^[]]+) faculty', html, 'university')
def findteachers(self, html: str): return self.getinfos( @@ -16140,15 +16691,35 @@ return self.getinfos(['קישור'], html)
def findmannerdeath(self, html: str): - return self.getinfo(['przyczyna śmierci', 'причина_смерті', 'سبب الوفاة', 'doodsoorzaak', 'death_cause', - 'причина смерті'], html, 'mannerdeath') or \ - self.findbyre(r'Categoría:Fallecidos por(.*?)]', - html, 'mannerdeath') + return self.getinfo( + [ + 'przyczyna śmierci', + 'причина_смерті', + 'سبب الوفاة', + 'doodsoorzaak', + 'death_cause', + 'причина смерті'], + html, + 'mannerdeath') or self.findbyre( + r'Categoría:Fallecidos por(.*?)]', + html, + 'mannerdeath')
def findcausedeath(self, html: str): - return self.getinfo(['przyczyna śmierci', 'причина_смерті', 'سبب الوفاة', 'doodsoorzaak', 'death_cause', - 'причина смерті', 'vatandaşlığı'], html, 'causedeath') \ - or self.findbyre(r'Categoría:Fallecidos por(.*?)]', html, 'causedeath') + return self.getinfo( + [ + 'przyczyna śmierci', + 'причина_смерті', + 'سبب الوفاة', + 'doodsoorzaak', + 'death_cause', + 'причина смерті', + 'vatandaşlığı'], + html, + 'causedeath') or self.findbyre( + r'Categoría:Fallecidos por(.*?)]', + html, + 'causedeath')
def findresidences(self, html: str): return self.getinfos([ @@ -16322,15 +16893,34 @@ ], html, 'movement')
def findnotableworks(self, html: str): - return self.getinfos([r'notable[\s_]?works?', 'bekende-werken', R'\w+ notables', '主な作品', - 'œuvres principales', 'principais_trabalhos', 'bitna uloga', 'obra-prima', - '著作', 'belangrijke_projecten', 'known_for', 'tuntumad_tööd', 'tunnetut työt', + return self.getinfos([r'notable[\s_]?works?', + 'bekende-werken', + R'\w+ notables', + '主な作品', + 'œuvres principales', + 'principais_trabalhos', + 'bitna uloga', + 'obra-prima', + '著作', + 'belangrijke_projecten', + 'known_for', + 'tuntumad_tööd', + 'tunnetut työt', 'munkái', - ], html, 'work') + \ - self.getinfos(['films notables', 'значими филми', 'millors_films', 'znameniti_filmovi', - 'noemenswaardige rolprente', 'važniji filmovi', - ], html, 'film', alt=['work']) +\ - self.getinfos(['belangrijke_gebouwen'], html, 'building') + ], + html, + 'work') + self.getinfos(['films notables', + 'значими филми', + 'millors_films', + 'znameniti_filmovi', + 'noemenswaardige rolprente', + 'važniji filmovi', + ], + html, + 'film', + alt=['work']) + self.getinfos(['belangrijke_gebouwen'], + html, + 'building')
def findworkfields(self, html: str): return self.getinfos([ @@ -16395,11 +16985,22 @@ return self.getinfo(['hlasový obor'], html, 'voice')
def findlabels(self, html: str): - return self.getinfos([ - 'label', 'etichetta', 'discográfica', 'levy-yhtiö' - 'լեյբլեր', 'gravadora', 'pla[dt]eselska[bp]', 'selo', 'skivbolag', - 'wytwórnia płytowa', 'casă de discuri', 'лейблы', 'vydavatel', 'レーベル' - ], html, 'label') + return self.getinfos(['label', + 'etichetta', + 'discográfica', + 'levy-yhtiö' + 'լեյբլեր', + 'gravadora', + 'pla[dt]eselska[bp]', + 'selo', + 'skivbolag', + 'wytwórnia płytowa', + 'casă de discuri', + 'лейблы', + 'vydavatel', + 'レーベル'], + html, + 'label')
def findstudents(self, html: str): return self.getinfos([ @@ -16599,8 +17200,7 @@ if alt is None: alt = [] prevalue = self.findbyre( - r'(?s)<h3[^<>]*>\s*{}\s*</h3>(.*?)(?:<h3|<div class="scholar__)' - .format(field), html) + rf'(?s)<h3[^<>]*>\s*{field}\s*</h3>(.*?)(?:<h3|<div class="scholar__)', html) if prevalue: return self.findbyre( r'(?s)^(?:<[^<>]*>|\s)*(.*?)(?:<[^<>]*>|\s)*$', prevalue, @@ -16611,8 +17211,7 @@ if alt is None: alt = [] section = self.findbyre( - r'(?s)<h3[^<>]*>\s*{}\s*</h3>(.*?)(?:<h3|<div class="scholar__)' - .format(field), html) + rf'(?s)<h3[^<>]*>\s*{field}\s*</h3>(.*?)(?:<h3|<div class="scholar__)', html) if section: return self.findallbyre(r'(?s)>([^<>]*)<', section, dtype, alt=alt) or [] @@ -16622,12 +17221,11 @@ if alt is None: alt = [] section = self.findbyre( - r'(?s)<h3[^<>]*>\s*{}\s*</h3>(.*?)(?:<h3|<div class="scholar__)' - .format(field), html) + rf'(?s)<h3[^<>]*>\s*{field}\s*</h3>(.*?)(?:<h3|<div class="scholar__)', html) if section: return self.findallbyre( - r'(?s)<div class="[^"]*{}[^"]*"><div class="field__item">(.*?)</div>' - .format(secondfield), section, dtype, alt=alt) or [] + rf'(?s)<div class="[^"]*{secondfield}[^"]*"><div class="field__item">(.*?)</div>', + section, dtype, alt=alt) or [] return []
def findinstanceof(self, html: str): @@ -16659,9 +17257,14 @@ return self.getsubvalues('Honors', 'honor-description', html, 'award')
def findemployers(self, html: str): - return self.getvalues('Home Institution', html, 'employer', alt=['university']) +\ - self.getsubvalues('Appointments', 'organization', - html, 'employer', alt=['university']) + return self.getvalues('Home Institution', + html, + 'employer', + alt=['university']) + self.getsubvalues('Appointments', + 'organization', + html, + 'employer', + alt=['university'])
def findworkfields(self, html: str): return self.getvalues('Field of Study', html, 'subject') @@ -17033,7 +17636,8 @@ def finddeathplace(self, html: str): return self.findbyre( r'(?s)<strong>†</strong>\s*</div>\s*<div[^<>]*>\s*<span>[^<>]*</span>\s*<span>\s*(?:in )([^<>]*)<', - html, 'city') + html, + 'city')
def findoccupations(self, html: str): section = self.findbyre( @@ -17072,8 +17676,7 @@ def getrelations(self, relation, html): return [ x.upper() for x in self.findallbyre( - r'statement/([qQ]\d+)[^{{}}]+statement/{}[^\d]'.format( - relation), html) + rf'statement/([qQ]\d+)[^{{}}]+statement/{relation}[^\d]', html) ]
def findlongtext(self, html: str): diff --git a/scripts/delete.py b/scripts/delete.py index 8417e37..d9b334e 100755 --- a/scripts/delete.py +++ b/scripts/delete.py @@ -51,7 +51,7 @@ python pwb.py delete -cat:"To delete" -always """ # -# (C) Pywikibot team, 2013-2023 +# (C) Pywikibot team, 2013-2024 # # Distributed under the terms of the MIT license. # @@ -159,8 +159,8 @@
total = sum(len(v) for v in refs.values()) if total > 1: - pywikibot.warning('There are {} pages that link to {}.' - .format(total, self.current_page)) + pywikibot.warning( + f'There are {total} pages that link to {self.current_page}.') else: pywikibot.warning( f'There is a page that links to {self.current_page}.') @@ -206,9 +206,8 @@ ns_with_ref = sorted(ns_with_ref) if ns_with_ref: ns_names = ', '.join(str(ns.id) for ns in ns_with_ref) - pywikibot.info( - 'Skipping: {} is not orphan in ns: {}.'.format( - self.current_page, ns_names)) + pywikibot.info(f'Skipping: {self.current_page} is not ' + f'orphan in ns: {ns_names}.') return # Not an orphan, do not delete.
if self.current_page.site.user() is None: diff --git a/scripts/delinker.py b/scripts/delinker.py index 05daeed..cb78714 100755 --- a/scripts/delinker.py +++ b/scripts/delinker.py @@ -30,7 +30,7 @@ This script is completely rewriten from compat branch. """ # -# (C) Pywikibot team, 2006-2023 +# (C) Pywikibot team, 2006-2024 # # Distributed under the terms of the MIT license. # @@ -113,9 +113,8 @@ shown = False for page in file_page.using_pages(content=True, namespaces=0): if not shown: - pywikibot.info( - '\n>>> <<lightgreen>>Delinking {}<<default>> <<<' - .format(file_page.title())) + pywikibot.info('\n>>> <<lightgreen>>Delinking ' + f'{file_page.title()}<<default>> <<<') shown = True super().treat(page)
diff --git a/scripts/djvutext.py b/scripts/djvutext.py index 564dcd8..eda0d92 100755 --- a/scripts/djvutext.py +++ b/scripts/djvutext.py @@ -123,9 +123,9 @@
if page.exists() and not self.opt.force: pywikibot.info( - 'Page {} already exists, not adding!\n' + f'Page {page} already exists, not adding!\n' 'Use -force option to overwrite the output page.' - .format(page)) + ) else: self.userPut(page, old_text, new_text, summary=self.opt.summary)
diff --git a/scripts/download_dump.py b/scripts/download_dump.py index f374923..46a5890 100755 --- a/scripts/download_dump.py +++ b/scripts/download_dump.py @@ -108,9 +108,8 @@ remove(file_final_storepath) symlink(toolforge_dump_filepath, file_current_storepath) else: - url = 'https://dumps.wikimedia.org/%7B%7D/%7B%7D/%7B%7D%27.format( - self.opt.wikiname, self.opt.dumpdate, - download_filename) + url = (f'https://dumps.wikimedia.org/%7Bself.opt.wikiname%7D/' + f'{self.opt.dumpdate}/{download_filename}') pywikibot.info('Downloading file from ' + url) response = fetch(url, stream=True)
diff --git a/scripts/harvest_template.py b/scripts/harvest_template.py index 55f0344..6c89c49 100755 --- a/scripts/harvest_template.py +++ b/scripts/harvest_template.py @@ -306,8 +306,8 @@ template = pywikibot.Page(page.site, template, ns=10) except InvalidTitleError: pywikibot.error( - 'Failed parsing template; {!r} should be ' - 'the template name.'.format(template)) + f'Failed parsing template; {template!r} should be ' + 'the template name.') continue
if template.title(with_ns=False) not in self.templateTitles: @@ -362,8 +362,8 @@ and isinstance(target, pywikibot.ItemPage)): inverse_ppage = pywikibot.PropertyPage(self.repo, inverse_prop) if inverse_ppage.type != 'wikibase-item': - raise ValueError("{} does not have 'wikibase-item' type" - .format(inverse_ppage)) + raise ValueError( + f"{inverse_ppage} does not have 'wikibase-item' type") inverse_claim = inverse_ppage.newClaim() inverse_claim.setTarget(item) self.user_add_claim_unless_exists( @@ -502,8 +502,8 @@ image = pywikibot.FilePage(image.getRedirectTarget())
if not image.exists(): - pywikibot.info("{} doesn't exist so it cannot be linked" - .format(image.title(as_link=True))) + pywikibot.info(f"{image.title(as_link=True)} doesn't exist so it" + ' cannot be linked') return
yield image diff --git a/scripts/image.py b/scripts/image.py index 328c22a..5be3a3b 100755 --- a/scripts/image.py +++ b/scripts/image.py @@ -37,7 +37,7 @@
""" # -# (C) Pywikibot team, 2013-2022 +# (C) Pywikibot team, 2013-2024 # # Distributed under the terms of the MIT license. # @@ -99,11 +99,11 @@
replacements = [] if not self.opt.loose and self.new_image: - replacements.append((image_regex, - '[[{}:{}\g<parameters>]]' - .format( - self.site.namespaces.FILE.custom_name, - self.new_image))) + replacements.append( + (image_regex, + f'[[{self.site.namespaces.FILE.custom_name}:{self.new_image}' + '\g<parameters>]]') + ) else: replacements.append((image_regex, self.new_image))
diff --git a/scripts/imagetransfer.py b/scripts/imagetransfer.py index 7cf4836..c990df2 100755 --- a/scripts/imagetransfer.py +++ b/scripts/imagetransfer.py @@ -231,8 +231,9 @@
sourceSite = sourceImagePage.site pywikibot.info( - '\n>>> Transfer {source} from {source.site} to {target}\n' - .format(source=sourceImagePage, target=self.opt.target)) + f'\n>>> Transfer {sourceImagePage} from {sourceImagePage.site} ' + f'to {self.opt.target}\n' + ) url = sourceImagePage.get_file_url() pywikibot.info('URL should be: ' + url) # localize the text that should be printed on image description page @@ -398,9 +399,9 @@
if target_code or target_family: site = pywikibot.Site() - options.setdefault('target', - '{}:{}'.format(target_family or site.family, - target_code or site.lang)) + options.setdefault( + 'target', + f'{target_family or site.family}:{target_code or site.lang}')
bot = ImageTransferBot(generator=gen, **options) bot.run() diff --git a/scripts/interwiki.py b/scripts/interwiki.py index 3e3e8f0..0b0bb48 100755 --- a/scripts/interwiki.py +++ b/scripts/interwiki.py @@ -857,10 +857,10 @@
if self.conf.autonomous: pywikibot.info( - 'NOTE: Ignoring link from page {} in namespace' - ' {} to page {} in namespace {}.' - .format(linkingPage, linkingPage.namespace(), linkedPage, - linkedPage.namespace())) + f'NOTE: Ignoring link from page {linkingPage} in namespace' + f' {linkingPage.namespace()} to page {linkedPage} in ' + f'namespace {linkedPage.namespace()}.' + ) # Fill up found_in, so that we will not write this notice self.found_in[linkedPage] = [linkingPage] return True @@ -868,21 +868,20 @@ preferredPage = self.getFoundInCorrectNamespace(linkedPage.site) if preferredPage: pywikibot.info( - 'NOTE: Ignoring link from page {} in namespace {} to ' - 'page {} in namespace {} because page {} in the ' - 'correct namespace has already been found.' - .format(linkingPage, linkingPage.namespace(), - linkedPage, linkedPage.namespace(), - preferredPage)) + f'NOTE: Ignoring link from page {linkingPage} in ' + f'namespace {linkingPage.namespace()} to page ' + f'{linkedPage} in namespace {linkedPage.namespace()} ' + f'because page {preferredPage} in the correct namespace' + ' has already been found.' + ) return True
choice = pywikibot.input_choice( - 'WARNING: {} is in namespace "{}", but {} is in ' - 'namespace "{}". Follow it anyway?' - .format(self.origin, self.origin.namespace(), - linkedPage, linkedPage.namespace()), - [('Yes', 'y'), ('No', 'n'), - ('Add an alternative', 'a'), ('give up', 'g')], + f'WARNING: {self.origin} is in namespace ' + f'"{self.origin.namespace()}", but {linkedPage} is in ' + f'namespace "{linkedPage.namespace()}". Follow it anyway?', + [('Yes', 'y'), ('No', 'n'), ('Add an alternative', 'a'), + ('give up', 'g')], automatic_quit=False)
if choice != 'y': @@ -916,9 +915,8 @@ and self.origin.namespace().case == 'case-sensitive' and page.namespace().case == 'case-sensitive'): pywikibot.info( - 'NOTE: Ignoring {} for {} in wiktionary mode because both ' - 'languages are uncapitalized.' - .format(page, self.origin)) + f'NOTE: Ignoring {page} for {self.origin} in wiktionary' + ' mode because both languages are uncapitalized.') return True
return False @@ -942,14 +940,16 @@ if self.conf.autonomous: if self.origin.isDisambig() and not page.isDisambig(): pywikibot.info( - 'NOTE: Ignoring link from disambiguation page {} to ' - 'non-disambiguation {}'.format(self.origin, page)) + 'NOTE: Ignoring link from disambiguation page ' + f'{self.origin} to non-disambiguation {page}' + ) return (True, None)
if not self.origin.isDisambig() and page.isDisambig(): pywikibot.info( - 'NOTE: Ignoring link from non-disambiguation page {} to ' - 'disambiguation {}'.format(self.origin, page)) + 'NOTE: Ignoring link from non-disambiguation page ' + f'{self.origin} to disambiguation {page}' + ) return (True, None)
else: @@ -958,36 +958,38 @@ disambig = self.getFoundDisambig(page.site) if disambig: pywikibot.info( - 'NOTE: Ignoring non-disambiguation page {} for {} ' - 'because disambiguation page {} has already been ' - 'found.' - .format(page, self.origin, disambig)) + f'NOTE: Ignoring non-disambiguation page {page} for ' + f'{self.origin} because disambiguation page ' + f'{disambig} has already been found.' + ) return (True, None)
choice = pywikibot.input_choice( - "WARNING: {} is a disambiguation page, but {} doesn't " - 'seem to be one. Follow it anyway?' - .format(self.origin, page), + f'WARNING: {self.origin} is a disambiguation page, but ' + f"{page} doesn't seem to be one. Follow it anyway?", [('Yes', 'y'), ('No', 'n'), ('Add an alternative', 'a'), ('give up', 'g')], - automatic_quit=False) + automatic_quit=False + )
elif not self.origin.isDisambig() and page.isDisambig(): nondisambig = self.getFoundNonDisambig(page.site) if nondisambig: pywikibot.info( - 'NOTE: Ignoring disambiguation page {} for {} because ' - 'non-disambiguation page {} has already been found.' - .format(page, self.origin, nondisambig)) + f'NOTE: Ignoring disambiguation page {page} for ' + f'{self.origin} because non-disambiguation page ' + f'{nondisambig} has already been found.' + ) return (True, None)
choice = pywikibot.input_choice( - "WARNING: {} doesn't seem to be a disambiguation " - 'page, but {} is one. Follow it anyway?' - .format(self.origin, page), + f"WARNING: {self.origin} doesn't seem to be a " + f'disambiguation page, but {page} is one. Follow it' + ' anyway?', [('Yes', 'y'), ('No', 'n'), ('Add an alternative', 'a'), ('give up', 'g')], - automatic_quit=False) + automatic_quit=False + )
if choice == 'n': return (True, None) @@ -1202,8 +1204,9 @@ with codecs.open( pywikibot.config.datafilepath('autonomous_problems.dat'), 'a', 'utf-8') as f: - f.write('* {} {{Found more than one link for {}}}' - .format(self.origin, page.site)) + f.write( + f'* {self.origin} ' + f'{{Found more than one link for {page.site}}}') if config.interwiki_graph and config.interwiki_graph_url: filename = interwiki_graph.getFilename( self.origin, @@ -1222,9 +1225,8 @@ for link in iw: linkedPage = pywikibot.Page(link) if self.conf.hintsareright and linkedPage.site in self.hintedsites: - pywikibot.info( - 'NOTE: {}: {} extra interwiki on hinted site ignored {}' - .format(self.origin, page, linkedPage)) + pywikibot.info(f'NOTE: {self.origin}: {page} extra interwiki ' + f'on hinted site ignored {linkedPage}') break
if not self.skipPage(page, linkedPage, counter) \ @@ -1239,8 +1241,9 @@ # either may be a redirect to the other. # No way to find out quickly! pywikibot.info( - 'NOTE: {}: {} gives duplicate interwiki on same ' - 'site {}'.format(self.origin, page, linkedPage)) + f'NOTE: {self.origin}: {page} gives duplicate ' + f'interwiki on same site {linkedPage}' + ) break else: if config.interwiki_shownew: @@ -1623,8 +1626,8 @@ or str(rmPage) not in self.conf.remove): new[rmsite] = rmPage pywikibot.warning( - '{} is either deleted or has a mismatching ' - 'disambiguation state.'.format(rmPage)) + f'{rmPage} is either deleted or has a mismatching ' + 'disambiguation state.') # Re-Check what needs to get done mods, mcomment, adding, removing, modifying = compareLanguages( old, new, page.site, self.conf.summary) @@ -1780,14 +1783,15 @@ try: linkedPage = linkedPagesDict[expectedPage.site] pywikibot.warning( - '{}: {} does not link to {} but to {}' - .format(page.site.family.name, - page, expectedPage, linkedPage)) + f'{page.site.family.name}: {page} does not link to ' + f'{expectedPage} but to {linkedPage}' + ) except KeyError: if not expectedPage.site.is_data_repository(): - pywikibot.warning('{}: {} does not link to {}' - .format(page.site.family.name, - page, expectedPage)) + pywikibot.warning( + f'{page.site.family.name}: {page} does not link ' + f'to {expectedPage}' + ) # Check for superfluous links for linkedPage in linkedPages: if linkedPage in expectedPages: @@ -1796,9 +1800,8 @@ # that language. # In this case, it was already reported above. if linkedPage.site not in expectedSites: - pywikibot.warning('{}: {} links to incorrect {}' - .format(page.site.family.name, - page, linkedPage)) + pywikibot.warning(f'{page.site.family.name}: {page} links ' + f'to incorrect {linkedPage}')
class InterwikiBot: @@ -1857,9 +1860,8 @@ fs = self.firstSubject() if fs: self.conf.note(f'The first unfinished subject is {fs.origin}') - pywikibot.info( - 'NOTE: Number of pages queued is {}, trying to add {} more.' - .format(len(self.subjects), number)) + pywikibot.info('NOTE: Number of pages queued is ' + f'{len(self.subjects)}, trying to add {number} more.') for _ in range(number): for page in self.pageGenerator: if page in self.conf.skip: @@ -1954,8 +1956,8 @@ except ServerError: # Could not extract allpages special page? pywikibot.error('could not retrieve more pages. ' - 'Will try again in {} seconds' - .format(timeout)) + f'Will try again in {timeout} seconds' + ) pywikibot.sleep(timeout) timeout *= 2 else: diff --git a/scripts/interwikidata.py b/scripts/interwikidata.py index 74bd226..1af5d86 100755 --- a/scripts/interwikidata.py +++ b/scripts/interwikidata.py @@ -74,8 +74,9 @@ """Initialize the bot.""" super().__init__(**kwargs) if not self.site.has_data_repository: - raise ValueError('{site} does not have a data repository, use ' - 'interwiki.py instead.'.format(site=self.site)) + raise ValueError( + f'{self.site} does not have a data repository, use ' + 'interwiki.py instead.')
self.repo = self.site.data_repository() if not self.opt.summary: @@ -162,8 +163,8 @@ dbnames = [iw_site.dbName() for iw_site in self.iwlangs] if set(dbnames) - set(self.current_item.sitelinks.keys()) \ and not self.handle_complicated(): - warning('Interwiki conflict in {}, skipping...' - .format(self.current_page.title(as_link=True))) + warning('Interwiki conflict in ' + f'{self.current_page.title(as_link=True)}, skipping...') return
info('Cleaning up the page') @@ -176,8 +177,8 @@ wd_data = set() for iw_page in self.iwlangs.values(): if not iw_page.exists(): - warning('Interwiki {} does not exist, skipping...' - .format(iw_page.title(as_link=True))) + warning(f'Interwiki {iw_page.title(as_link=True)} does not' + ' exist, skipping...') continue try: wd_data.add(pywikibot.ItemPage.fromPage(iw_page)) @@ -193,8 +194,8 @@ return None
if len(wd_data) > 1: - warning('Interwiki conflict in {}, skipping...' - .format(self.current_page.title(as_link=True))) + warning('Interwiki conflict in ' + f'{self.current_page.title(as_link=True)}, skipping...') return False
item = list(wd_data).pop() diff --git a/scripts/listpages.py b/scripts/listpages.py index 3efeaf6..fcb1735 100755 --- a/scripts/listpages.py +++ b/scripts/listpages.py @@ -240,8 +240,8 @@ base_dir = None elif not os.path.isdir(base_dir): # base_dir is a file. - pywikibot.warning('Not a directory: "{}"\nSkipping saving ...' - .format(base_dir)) + pywikibot.warning( + f'Not a directory: "{base_dir}"\nSkipping saving ...') base_dir = None self.opt.save = base_dir
@@ -309,10 +309,10 @@ if page_target: page_target = pywikibot.Page(site, page_target) if not options.get('overwrite') and page_target.exists(): - additional_text = ('Page {} already exists.\n' + additional_text = (f'Page {page_target} already exists.\n' 'You can use the -overwrite argument to ' 'replace the content of this page.' - .format(page_target)) + )
gen = gen_factory.getCombinedGenerator() options['preloading'] = gen_factory.is_preloading diff --git a/scripts/maintenance/cache.py b/scripts/maintenance/cache.py index c76097a..bc338a5 100755 --- a/scripts/maintenance/cache.py +++ b/scripts/maintenance/cache.py @@ -270,9 +270,8 @@ # Skip foreign python specific directory *_, version = cache_path.partition('-') if version and version[-1] != str(PYTHON_VERSION[0]): - pywikibot.error( - "Skipping {} directory, can't read content with python {}" - .format(cache_path, PYTHON_VERSION[0])) + pywikibot.error(f"Skipping {cache_path} directory, can't read " + f'content with python {PYTHON_VERSION[0]}') continue
try: @@ -302,9 +301,8 @@ try: entry._rebuild() except Exception: - pywikibot.error('Problems loading {} with key {}, {!r}' - .format(entry.filename, entry.key, - entry._parsed_key)) + pywikibot.error(f'Problems loading {entry.filename} with key ' + f'{entry.key}, {entry._parsed_key!r}') pywikibot.exception() continue
diff --git a/scripts/movepages.py b/scripts/movepages.py index b73ce6e..1b2940a 100755 --- a/scripts/movepages.py +++ b/scripts/movepages.py @@ -94,8 +94,8 @@ """Treat only non-redirect pages if 'skipredirects' is set.""" if self.opt.skipredirects and page.isRedirectPage(): pywikibot.warning( - 'Page {page} on {page.site} is a redirect; skipping' - .format(page=page)) + f'Page {page} on {page.site} is a redirect; skipping' + ) return True return super().skip_page(page)
@@ -129,7 +129,7 @@ """Manage interactive choices for namespace prefix.""" namespace = page.site.namespace(page.namespace()) q = pywikibot.input_yn('Do you want to remove the ' - 'namespace prefix "{}:"?'.format(namespace), + f'namespace prefix "{namespace}:"?', automatic_quit=False) return None if q else namespace
diff --git a/scripts/newitem.py b/scripts/newitem.py index 97a604e..6a05b84 100755 --- a/scripts/newitem.py +++ b/scripts/newitem.py @@ -20,7 +20,7 @@
""" # -# (C) Pywikibot team, 2014-2023 +# (C) Pywikibot team, 2014-2024 # # Distributed under the terms of the MIT license. # @@ -70,14 +70,13 @@ days=self.opt.pageage) self.lastEditBefore = self.repo.server_time() - timedelta( days=self.opt.lastedit) - pywikibot.info('Page age is set to {} days so only pages created' - '\nbefore {} will be considered.\n' - .format(self.opt.pageage, - self.pageAgeBefore.isoformat())) pywikibot.info( - 'Last edit is set to {} days so only pages last edited' - '\nbefore {} will be considered.\n' - .format(self.opt.lastedit, self.lastEditBefore.isoformat())) + f'Page age is set to {self.opt.pageage} days so only pages created' + f'\nbefore {self.pageAgeBefore.isoformat()} will be considered.\n' + f'\nLast edit is set to {self.opt.lastedit} days so only pages ' + f'last edited\nbefore {self.lastEditBefore.isoformat()} will be' + ' considered.\n' + )
@staticmethod def _touch_page(page) -> None: diff --git a/scripts/noreferences.py b/scripts/noreferences.py index 7e30984..5ad450b 100755 --- a/scripts/noreferences.py +++ b/scripts/noreferences.py @@ -678,17 +678,17 @@ # Create a new section for the references tag for section in i18n.translate(self.site, placeBeforeSections) or []: # Find out where to place the new section - sectionR = re.compile(r'\r?\n(?P<ident>=+) *{} *(?P=ident) *\r?\n' - .format(section)) + sectionR = re.compile( + rf'\r?\n(?P<ident>=+) *{section} *(?P=ident) *\r?\n') index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): pywikibot.info( - 'Existing {} section is commented out, ' + f'Existing {section} section is commented out, ' "won't add the references in front of it." - .format(section)) + ) index = match.end() else: pywikibot.info(f'Adding references section before ' @@ -720,10 +720,12 @@ # so templatePattern must be fixed templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' - metadataR = re.compile(r'(\r?\n)?({}|{}|{}|{})$' - .format(categoryPattern, interwikiPattern, - templatePattern, commentPattern), - re.DOTALL) + metadataR = re.compile( + r'(\r?\n)?' + f'({categoryPattern}|{interwikiPattern}|{templatePattern}|' + f'{commentPattern})$', + re.DOTALL + ) tmpText = oldText while True: match = metadataR.search(tmpText) diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py index bea7b45..db8d202 100755 --- a/scripts/nowcommons.py +++ b/scripts/nowcommons.py @@ -43,7 +43,7 @@ can be set within a settings file which is scripts.ini by default. """ # -# (C) Pywikibot team, 2006-2022 +# (C) Pywikibot team, 2006-2024 # # Distributed under the terms of the MIT license. # @@ -291,16 +291,18 @@
if using_pages and using_pages != [local_file_page]: pywikibot.info( - '"<<lightred>>{}<<default>>" is still used in {} pages.' - .format(local_file_page.title(with_ns=False), - len(using_pages))) + f'"<<lightred>>{local_file_page.title(with_ns=False)}' + f'<<default>>" is still used in {len(using_pages)} pages.' + )
if self.opt.replace: pywikibot.info( - 'Replacing "<<lightred>>{}<<default>>" by ' - '"<<lightgreen>>{}<<default>>".' - .format(local_file_page.title(with_ns=False), - commons_file_page.title(with_ns=False))) + 'Replacing "<<lightred>>' + f'{local_file_page.title(with_ns=False)}' + '<<default>>" by "<<lightgreen>>' + f'{commons_file_page.title(with_ns=False)}' + '<<default>>".' + )
bot = ImageBot(local_file_page.using_pages(), local_file_page.title(with_ns=False), @@ -325,8 +327,9 @@ return
pywikibot.info( - 'No page is using "<<lightgreen>>{}<<default>>" anymore.' - .format(local_file_page.title(with_ns=False))) + 'No page is using "<<lightgreen>>' + f'{local_file_page.title(with_ns=False)}<<default>>" anymore.' + )
try: commons_text = commons_file_page.get() @@ -373,8 +376,8 @@ def teardown(self): """Show a message if no files were found.""" if self.generator_completed and not self.counter['read']: - pywikibot.info('No transcluded files found for {}.' - .format(self.nc_templates_list()[0])) + pywikibot.info('No transcluded files found for ' + f'{self.nc_templates_list()[0]}.')
def main(*args: str) -> None: diff --git a/scripts/parser_function_count.py b/scripts/parser_function_count.py index cdaaafc..09a6aab 100755 --- a/scripts/parser_function_count.py +++ b/scripts/parser_function_count.py @@ -156,8 +156,8 @@ def teardown(self) -> None: """Final processing.""" resultlist = '\n'.join( - '# [[{result[0]}]] ({result[1]})' - .format(result=result) + f'# [[{result[0]}]] ({result[1]})' + for result in self.results.most_common(self.opt.first)) pywikibot.info(resultlist) pywikibot.info(f'{len(self.results)} templates were found.') diff --git a/scripts/patrol.py b/scripts/patrol.py index c0d422f..5712ba4 100755 --- a/scripts/patrol.py +++ b/scripts/patrol.py @@ -42,7 +42,7 @@
""" # -# (C) Pywikibot team, 2011-2023 +# (C) Pywikibot team, 2011-2024 # # Distributed under the terms of the MIT license. # @@ -100,9 +100,10 @@ else: local_whitelist_subpage_name = pywikibot.translate( self.site, self.whitelist_subpage_name, fallback=True) - self.whitelist_pagename = '{}:{}/{}'.format( - self.site.namespace(2), self.site.username(), - local_whitelist_subpage_name) + self.whitelist_pagename = ( + f'{self.site.namespace(2)}:{self.site.username()}/' + f'{local_whitelist_subpage_name}' + ) self.whitelist = None self.whitelist_ts = 0 self.whitelist_load_ts = 0 diff --git a/scripts/redirect.py b/scripts/redirect.py index 5305a29..61a0d6c 100755 --- a/scripts/redirect.py +++ b/scripts/redirect.py @@ -178,15 +178,16 @@ except SiteDefinitionError as e: pywikibot.log(e) pywikibot.info( - 'NOTE: Ignoring {} which is a redirect ({}) to an ' - 'unknown site.'.format(entry.title, target)) + f'NOTE: Ignoring {entry.title} which is a redirect ' + f'({target}) to an unknown site.' + ) target_link = None else: if target_link.site != self.site: pywikibot.info( - 'NOTE: Ignoring {} which is a redirect to ' - 'another site {}.' - .format(entry.title, target_link.site)) + f'NOTE: Ignoring {entry.title} which is a ' + f'redirect to another site {target_link.site}.' + ) target_link = None # if the redirect does not link to another wiki if target_link and target_link.title: @@ -540,10 +541,9 @@ ignore_save_related_errors=True, ignore_server_errors=True) if not done and self.user_confirm( - 'Redirect target {} does not exist.\n' - 'Do you want to delete {}?' - .format(targetPage.title(as_link=True), - redir_page.title(as_link=True))): + f'Redirect target {targetPage.title(as_link=True)} does not' + ' exist.\nDo you want to delete ' + f'{redir_page.title(as_link=True)}?'): self.delete_redirect(redir_page, 'redirect-remove-broken') elif not (self.opt.delete or movedTarget): pywikibot.info('Cannot fix or delete the broken redirect') @@ -608,9 +608,8 @@ newRedir = redir = self.current_page redirList = [] # bookkeeping to detect loops while True: - redirList.append('{}:{}' - .format(newRedir.site.lang, - newRedir.title(with_section=False))) + redirList.append( + f'{newRedir.site.lang}:{newRedir.title(with_section=False)}') try: targetPage = self.get_redirect_target(newRedir) except Exception as e: @@ -631,9 +630,8 @@ break
# watch out for redirect loops - if redirList.count('{}:{}'.format( - targetPage.site.lang, - targetPage.title(with_section=False))): + if redirList.count(f'{targetPage.site.lang}:' + f'{targetPage.title(with_section=False)}'): pywikibot.warning( f'Redirect target {targetPage} forms a redirect loop.') break # FIXME: doesn't work. edits twice! diff --git a/scripts/reflinks.py b/scripts/reflinks.py index 2eceb36..9dbfb42 100755 --- a/scripts/reflinks.py +++ b/scripts/reflinks.py @@ -42,7 +42,7 @@
¶ms; """ -# (C) Pywikibot team, 2008-2023 +# (C) Pywikibot team, 2008-2024 # # Distributed under the terms of the MIT license. # @@ -214,12 +214,12 @@
def refTitle(self) -> str: """Return the <ref> with its new title.""" - return '<ref{r.name}>[{r.link} {r.title}<!-- {r.comment} -->]</ref>' \ - .format(r=self) + return (f'<ref{self.name}>[{self.link} {self.title}' + f'<!-- {self.comment} -->]</ref>')
def refLink(self) -> str: """No title has been found, return the unbracketed link.""" - return '<ref{r.name}>{r.link}</ref>'.format(r=self) + return f'<ref{self.name}>{self.link}</ref>'
def refDead(self): """Dead link, tag it with a {{dead link}}.""" @@ -407,10 +407,8 @@ if v[IX.reflist]: name = f'"{name}"'
- text = re.sub( - r'<ref name\s*=\s*(?P<quote>["']?)\s*{}\s*(?P=quote)\s*/>' - .format(ref), - f'<ref name={name} />', text) + text = re.sub(rf'<ref name\s*=\s*(?P<quote>["']?)\s*{ref}\s*' + r'(?P=quote)\s*/>', f'<ref name={name} />', text) return text
@@ -466,8 +464,10 @@ if self.stop_page.exists(): self.stop_page_rev_id = self.stop_page.latest_revision_id else: - pywikibot.warning('The stop page {} does not exist' - .format(self.stop_page.title(as_link=True))) + pywikibot.warning( + f'The stop page {self.stop_page.title(as_link=True)} does' + ' not exist' + )
# Regex to grasp content-type meta HTML tag in HTML source self.META_CONTENT = re.compile( @@ -610,9 +610,10 @@ continue
if r.status_code != HTTPStatus.OK: - pywikibot.stdout('HTTP error ({}) for {} on {}' - .format(r.status_code, ref.url, - page.title(as_link=True))) + pywikibot.stdout( + f'HTTP error ({r.status_code}) for {ref.url} on ' + f'{page.title(as_link=True)}' + ) # 410 Gone, indicates that the resource has been # purposely removed if r.status_code == HTTPStatus.GONE \ diff --git a/scripts/replace.py b/scripts/replace.py index a1786d1..5b04af1 100755 --- a/scripts/replace.py +++ b/scripts/replace.py @@ -477,9 +477,8 @@ except KeyboardInterrupt: with suppress(NameError): if not self.skipping: - pywikibot.info( - 'To resume, use "-xmlstart:{}" on the command line.' - .format(entry.title)) + pywikibot.info(f'To resume, use "-xmlstart:{entry.title}"' + ' on the command line.')
def isTitleExcepted(self, title) -> bool: """Return True if one of the exceptions applies for the given title.""" @@ -571,9 +570,8 @@ for i, replacement in enumerate(replacements): if isinstance(replacement, Sequence): if len(replacement) != 2: - raise ValueError('Replacement number {} does not have ' - 'exactly two elements: {}'.format( - i, replacement)) + raise ValueError(f'Replacement number {i} does not have ' + f'exactly two elements: {replacement}') # Replacement assumes it gets strings but it's already compiled replacements[i] = Replacement.from_compiled(replacement[0], replacement[1]) @@ -631,16 +629,18 @@ page.title(), replacement.exceptions): if replacement.container: pywikibot.info( - 'Skipping fix "{}" on {} because the title is on ' - 'the exceptions list.'.format( - replacement.container.name, - page.title(as_link=True))) + f'Skipping fix "{replacement.container.name}" on ' + f'{page.title(as_link=True)} because the title is on ' + 'the exceptions list.' + ) skipped_containers.add(replacement.container.name) else: pywikibot.info( - 'Skipping unnamed replacement ({}) on {} because ' - 'the title is on the exceptions list.'.format( - replacement.description, page.title(as_link=True))) + 'Skipping unnamed replacement ' + f'({replacement.description}) on ' + f'{page.title(as_link=True)} because the title is on' + ' the exceptions list.' + ) continue
if self.isTextExcepted(original_text, replacement.exceptions): @@ -891,14 +891,14 @@ """ if not sql: where_clause = 'WHERE ({})'.format(' OR '.join( - "old_text RLIKE '{}'" - .format(prepareRegexForMySQL(repl.old_regex.pattern)) + f"old_text RLIKE '{prepareRegexForMySQL(repl.old_regex.pattern)}'" + for repl in replacements))
if exceptions: except_clause = 'AND NOT ({})'.format(' OR '.join( - "old_text RLIKE '{}'" - .format(prepareRegexForMySQL(exc.pattern)) + f"old_text RLIKE '{prepareRegexForMySQL(exc.pattern)}'" + for exc in exceptions)) else: except_clause = '' @@ -1073,14 +1073,17 @@ missing_fix_summaries.append( f'"{fix_name}" (replacement #{index})') if chars.contains_invisible(replacement[0]): - pywikibot.warning('The old string "{}" contains formatting ' - 'characters like U+200E'.format( - chars.replace_invisible(replacement[0]))) + pywikibot.warning( + 'The old string ' + f'"{chars.replace_invisible(replacement[0])}"' + ' contains formatting characters like U+200E' + ) if (not callable(replacement[1]) and chars.contains_invisible(replacement[1])): - pywikibot.warning('The new string "{}" contains formatting ' - 'characters like U+200E'.format( - chars.replace_invisible(replacement[1]))) + pywikibot.warning( + 'The new string ' + f'"{chars.replace_invisible(replacement[1])}"' + ' contains formatting characters like U+200E') replacement_set.append(ReplacementListEntry( old=replacement[0], new=replacement[1], diff --git a/scripts/solve_disambiguation.py b/scripts/solve_disambiguation.py index f0173ef..90efbb6c 100755 --- a/scripts/solve_disambiguation.py +++ b/scripts/solve_disambiguation.py @@ -673,11 +673,11 @@ # group linktrail is the link trail, that's letters after ]] which # are part of the word. # note: the definition of 'letter' varies from language to language. - self.linkR = re.compile(r""" + self.linkR = re.compile(rf""" [[ (?P<title> [^[]|#]*) (?P<section> #[^]|]*)? (|(?P<label> [^]]*))? ]] - (?P<linktrail>{})""".format(linktrail), flags=re.X) + (?P<linktrail>{linktrail})""", flags=re.X)
@staticmethod def firstlinks(page) -> Generator[str, None, None]: @@ -759,13 +759,13 @@ try: text = ref_page.get() except IsRedirectPageError: - pywikibot.info('{} is a redirect to {}' - .format(ref_page.title(), disamb_page.title())) + pywikibot.info( + f'{ref_page.title()} is a redirect to {disamb_page.title()}') if disamb_page.isRedirectPage(): target = self.opt.pos[0] if pywikibot.input_yn( - 'Do you want to make redirect {} point to {}?' - .format(ref_page.title(), target), + f'Do you want to make redirect {ref_page.title()} point ' + f'to {target}?', default=False, automatic_quit=False): redir_text = f'#{self.site.redirect()} [[{target}]]' try: @@ -796,9 +796,8 @@ else: ignore_reason = self.checkContents(text) if ignore_reason: - pywikibot.info( - '\n\nSkipping {} because it contains {}.\n\n' - .format(ref_page.title(), ignore_reason)) + pywikibot.info(f'\n\nSkipping {ref_page.title()} because it ' + f'contains {ignore_reason}.\n\n') else: include = True
@@ -999,9 +998,8 @@ '', link_text[len(new_page_title):]) == '') and (not section) ): - newlink = '[[{}]]{}'.format( - link_text[:len(new_page_title)], - link_text[len(new_page_title):]) + newlink = (f'[[{link_text[:len(new_page_title)]}]]' + f'{link_text[len(new_page_title):]}') else: newlink = f'[[{new_page_title}{section}|{link_text}]]' text = text[:m.start()] + newlink + text[m.end():] @@ -1097,7 +1095,7 @@ except NoPageError: pywikibot.info( 'Page does not exist; using first ' - 'link in page {}.'.format(page.title())) + f'link in page {page.title()}.') links = page.linkedPages()[:1] links = [correctcap(link, page.get()) for link in links] @@ -1260,9 +1258,9 @@ else: page = pywikibot.Page(pywikibot.Link(value, site)) if page.exists() or pywikibot.input_yn( - 'Possibility {} does not actually exist. Use it anyway?' - .format(page.title()), default=False, - automatic_quit=False): + f'Possibility {page.title()} does not actually exist.' + ' Use it anyway?', + default=False, automatic_quit=False): alternatives.append(page.title()) elif arg == '-just': options['just'] = False diff --git a/scripts/speedy_delete.py b/scripts/speedy_delete.py index 0f4d0c3..1c753ed 100755 --- a/scripts/speedy_delete.py +++ b/scripts/speedy_delete.py @@ -479,8 +479,8 @@ bot = SpeedyBot(site=site) bot.run() elif site.logged_in(): - pywikibot.info("{} does not have 'delete' right for site {}" - .format(site.username(), site)) + pywikibot.info( + f"{site.username()} does not have 'delete' right for site {site}") else: pywikibot.info('Login first.')
diff --git a/scripts/transferbot.py b/scripts/transferbot.py index 99c87fb..172513f 100755 --- a/scripts/transferbot.py +++ b/scripts/transferbot.py @@ -45,7 +45,7 @@ -wantedtemplates:10 -target """ # -# (C) Pywikibot team, 2014-2023 +# (C) Pywikibot team, 2014-2024 # # Distributed under the terms of the MIT license. # @@ -135,17 +135,14 @@ if targetpage.exists(): if not overwrite: pywikibot.warning( - 'Skipped {} (target page {} exists)'.format( - page.title(as_link=True, force_interwiki=True), - targetpage.title(as_link=True) - ) + f'Skipped {page.title(as_link=True, force_interwiki=True)}' + f' (target page {targetpage.title(as_link=True)} exists)' ) continue if not targetpage.botMayEdit(): pywikibot.warning( - 'Target page {} is not editable by bots'.format( - targetpage.title(as_link=True) - ) + f'Target page {targetpage.title(as_link=True)} is not' + ' editable by bots' ) continue
diff --git a/scripts/transwikiimport.py b/scripts/transwikiimport.py index 4c28ae9..dc493cc 100755 --- a/scripts/transwikiimport.py +++ b/scripts/transwikiimport.py @@ -138,7 +138,7 @@ .. versionadded:: 8.2 """ # -# (C) Pywikibot team, 2023 +# (C) Pywikibot team, 2023-2024 # # Distributed under the terms of the MIT license. # @@ -284,18 +284,17 @@ if not overwrite: if targetpage.exists(): pywikibot.warning( - 'Skipped {} (target page {} exists)'.format( - page.title(as_link=True, force_interwiki=True), - targetpage.title(as_link=True) - ) + 'Skipped ' + f'{page.title(as_link=True, force_interwiki=True)} ' + f'(target page {targetpage.title(as_link=True)}' + ' exists)' ) continue else: if not targetpage.botMayEdit(): pywikibot.warning( - 'Target page {} is not editable by bots'.format( - targetpage.title(as_link=True) - ) + f'Target page {targetpage.title(as_link=True)} is not' + ' editable by bots' ) continue
diff --git a/scripts/unusedfiles.py b/scripts/unusedfiles.py index 5051bc0..bf831b3 100755 --- a/scripts/unusedfiles.py +++ b/scripts/unusedfiles.py @@ -105,8 +105,8 @@ and (self.opt.usertemplate or self.opt.nouserwarning)): # if no templates are given raise TranslationError( - 'This script is not localized for {} site;\n' - 'try using -filetemplate:<template name>.'.format(self.site)) + f'This script is not localized for {self.site} site;\n' + 'try using -filetemplate:<template name>.')
def treat(self, image) -> None: """Process one image page.""" diff --git a/scripts/watchlist.py b/scripts/watchlist.py index 22077ab..6618d52 100755 --- a/scripts/watchlist.py +++ b/scripts/watchlist.py @@ -23,7 +23,7 @@ watchlist is retrieved in parallel tasks. """ # -# (C) Pywikibot team, 2005-2022 +# (C) Pywikibot team, 2005-2024 # # Distributed under the terms of the MIT license. # @@ -67,8 +67,8 @@ wl_count_all = sum(len(future.result()) for future in as_completed(futures)) if not quiet: - pywikibot.info('There are a total of {} page(s) in the watchlists for ' - 'all wikis.'.format(wl_count_all)) + pywikibot.info(f'There are a total of {wl_count_all} page(s) in the' + ' watchlists for all wikis.')
def isWatched(pageName, site=None): # noqa: N802, N803 @@ -157,5 +157,5 @@ if __name__ == '__main__': start = datetime.datetime.now() main() - pywikibot.info('\nExecution time: {} seconds' - .format((datetime.datetime.now() - start).seconds)) + pywikibot.info('\nExecution time: ' + f'{(datetime.datetime.now() - start).seconds} seconds') diff --git a/scripts/weblinkchecker.py b/scripts/weblinkchecker.py index 8a19ec9..bd58caf 100755 --- a/scripts/weblinkchecker.py +++ b/scripts/weblinkchecker.py @@ -104,7 +104,7 @@ python pwb.py weblinkchecker -repeat """ # -# (C) Pywikibot team, 2005-2022 +# (C) Pywikibot team, 2005-2024 # # Distributed under the terms of the MIT license. # @@ -377,11 +377,10 @@ iso_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(date)) error_report += f'** In [[{page_title}]] on {iso_date}, {error}\n' pywikibot.info('** Logging link for deletion.') - txtfilename = pywikibot.config.datafilepath('deadlinks', - 'results-{}-{}.txt' - .format( - self.site.family.name, - self.site.lang)) + txtfilename = pywikibot.config.datafilepath( + 'deadlinks', + f'results-{self.site.family.name}-{self.site.lang}.txt' + ) with codecs.open(txtfilename, 'a', 'utf-8') as txtfile: self.log_count += 1 if self.log_count % 30 == 0: @@ -531,8 +530,8 @@ except SpamblacklistError as error: pywikibot.info( '<<lightaqua>>** SpamblacklistError while trying to ' - 'change {}: {}<<default>>' - .format(talk_page, error.url)) + f'change {talk_page}: {error.url}<<default>>' + )
class WeblinkCheckerRobot(SingleSiteBot, ExistingPageBot): @@ -585,18 +584,18 @@ """Finish remaining threads and save history file.""" num = self.count_link_check_threads() if num: - pywikibot.info('<<lightblue>>Waiting for remaining {} threads ' - 'to finish, please wait...'.format(num)) + pywikibot.info(f'<<lightblue>>Waiting for remaining {num} threads ' + 'to finish, please wait...')
while self.count_link_check_threads(): try: time.sleep(0.1) except KeyboardInterrupt: # Threads will die automatically because they are daemonic. - if pywikibot.input_yn('There are {} pages remaining in the ' - 'queue. Really exit?' - .format(self.count_link_check_threads()), - default=False, automatic_quit=False): + if pywikibot.input_yn( + f'There are {self.count_link_check_threads()} pages' + ' remaining in the queue. Really exit?', + default=False, automatic_quit=False): break
num = self.count_link_check_threads() diff --git a/scripts/welcome.py b/scripts/welcome.py index 4a48426..7ea7954 100755 --- a/scripts/welcome.py +++ b/scripts/welcome.py @@ -156,7 +156,7 @@ badwords at all but can be used for some bad-nickname. """ # -# (C) Pywikibot team, 2006-2023 +# (C) Pywikibot team, 2006-2024 # # Distributed under the terms of the MIT license. # @@ -602,9 +602,8 @@ """Add bad account to queue.""" if globalvar.confirm: answer = pywikibot.input_choice( - '{} may have an unwanted username, do you want to report ' - 'this user?' - .format(name), [('Yes', 'y'), ('No', 'n'), ('All', 'a')], + f'{name} may have an unwanted username, do you want to report ' + 'this user?', [('Yes', 'y'), ('No', 'n'), ('All', 'a')], 'n', automatic_quit=False) if answer in ['a', 'all']: answer = 'y' @@ -800,8 +799,8 @@ elif user.editCount() < globalvar.attach_edit_count: if user.editCount() != 0: self.show_status(Msg.IGNORE) - pywikibot.info('{} has only {} contributions.' - .format(user.username, user.editCount())) + pywikibot.info(f'{user.username} has only {user.editCount()}' + ' contributions.') elif not globalvar.quiet: self.show_status(Msg.IGNORE) pywikibot.info(f'{user.username} has no contributions.')