Is there a place to send new pywikibot scripts?
I wrote one, "cathash", which is rather useful to me, and I think it might be useful to other people as well:
#!/usr/bin/env python3
r"""Compares the contents of a Mediawiki files category to local files.
Usage:
pwb.py cathash category [localfiles...]
where:
category is the name of a Mediawiki category
localfiles is an optional list of local files
Hereinafter, "local files" means any file whose name is in "localfiles".
"Remote files" means any file which is a member of the named category.
We normalise each name as follows:
- Spaces in remote names are replaced with underscores.
- "File:" at the start of remote names is stripped.
- The pathname of local files is ignored.
Then we print the union of all local and remote filenames to stdout,
each preceded by one of these prefixes:
remote: there is a remote file by this name;
the name was either not given as a local filename,
or such a local file does not exist.
local: there is a local file by this name,
but no corresponding remote file.
neither: neither a remote file nor a local file exists by this name.
same: there are both a remote file and a local file by this name,
and their SHA1 hashes match
differ: there are both a remote file and a local file by this name,
but their SHA1 hashes do not match
If you need to pipe the output to another command, you can use
cut -d: -f2
to remove the prefixes.
"""
#
# (C) Pywikibot team, 2022-2025
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import argparse
import hashlib
import glob
import os
from collections import defaultdict
from contextlib import suppress
from pathlib import Path
import pywikibot
from pywikibot import config
from pywikibot.bot import SingleSiteBot
class CathashBot(SingleSiteBot):
@staticmethod
def setup_args(ap):
"""Declares arguments."""
ap.add_argument('cat')
ap.add_argument('localfiles', nargs='*')
def __init__(self, args: argparse.Namespace) -> None:
"""Initializer."""
super().__init__()
self.args = args
cat_title = vars(args)['cat']
self.locals = {}
for localfile in args.localfiles:
localname = os.path.basename(localfile)
if localname in self.locals:
raise ValueError(
f"{localname} was requested more than once"
)
self.locals[localname] = localfile
pywikibot.info(f'Scanning {cat_title!r}')
self.cat = pywikibot.Category(self.site, cat_title)
def run(self) -> None:
remotes = {}
results = {}
for f in self.cat.members():
title = f.title().replace(' ', '_')
if title.startswith('File:'):
title = title[5:]
remotes[title] = f.latest_file_info.sha1
for remotefile, remote_sha1 in remotes.items():
if (
remotefile in self.locals and
os.path.exists(self.locals[remotefile])
):
with open(self.locals[remotefile], 'rb') as f:
contents = f.read()
local_sha1 = hashlib.sha1(contents).hexdigest()
if remote_sha1==local_sha1:
results[remotefile] = 'same'
else:
results[remotefile] = 'differ'
else:
results[remotefile] = 'remote'
for localfile, fullpath in self.locals.items():
if localfile not in remotes:
if os.path.exists(fullpath):
results[localfile] = 'local'
else:
results[localfile] = 'neither'
for f,r in sorted(results.items()):
print(f'{r}:{f}')
def main(*args: str) -> None:
"""Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
ap = argparse.ArgumentParser(add_help=False)
CathashBot.setup_args(ap)
local_args = pywikibot.handle_args()
args, rest = ap.parse_known_args(local_args)
bot = CathashBot(args)
bot.run()
if __name__ == '__main__':
main()