From 7533bd0b94c122a6f4043b3ed778324dfa79b2e0 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 17 May 2024 15:11:56 +0200 Subject: [PATCH 001/107] First skeleton of metadatavalidator --- python-scripts/metadatavalidator/.gitignore | 177 ++++++++++++++++++ python-scripts/metadatavalidator/README.rst | 0 .../metadatavalidator/metadatavalidator.py | 146 +++++++++++++++ .../metadatavalidator/pyproject.toml | 51 +++++ 4 files changed, 374 insertions(+) create mode 100644 python-scripts/metadatavalidator/.gitignore create mode 100644 python-scripts/metadatavalidator/README.rst create mode 100755 python-scripts/metadatavalidator/metadatavalidator.py create mode 100644 python-scripts/metadatavalidator/pyproject.toml diff --git a/python-scripts/metadatavalidator/.gitignore b/python-scripts/metadatavalidator/.gitignore new file mode 100644 index 000000000..dea5c8d6b --- /dev/null +++ b/python-scripts/metadatavalidator/.gitignore @@ -0,0 +1,177 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env* +.venv* +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + + +#### Specific to this project diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst new file mode 100644 index 000000000..e69de29bb diff --git a/python-scripts/metadatavalidator/metadatavalidator.py b/python-scripts/metadatavalidator/metadatavalidator.py new file mode 100755 index 000000000..faf269ac8 --- /dev/null +++ b/python-scripts/metadatavalidator/metadatavalidator.py @@ -0,0 +1,146 @@ +#!/usr/bin/python3 +""" + +""" + +import argparse +import configparser +import logging +from logging.config import dictConfig +import sys + +try: + from lxml import etree +except ImportError: + print("Cannot import lxml. ", file=sys.stderr) + sys.exit(10) + +__version__ = "0.2.0" +__author__ = "Tom Schraitle " + +#: The logger name; can also set to "__name__" +LOGGERNAME = "metadata" + +#: The dictionary, passed to :class:`logging.config.dictConfig`, +#: is used to setup your logging formatters, handlers, and loggers +#: For details, see https://docs.python.org/3.4/library/logging.config.html#configuration-dictionary-schema +DEFAULT_LOGGING_DICT = { + 'version': 1, + 'disable_existing_loggers': True, + 'formatters': { + 'standard': {'format': '[%(levelname)s] %(funcName)s: %(message)s'}, + # 'file': {'format': '[%(levelname)s] %(asctime)s (%(funcName)s): %(message)s', + # #: Depending on your wanted precision, disable this line + # 'datefmt': '%Y-%m-%d %H:%M:%S', + # }, + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', # will be set later + 'formatter': 'standard', + 'class': 'logging.StreamHandler', + }, + # 'fh': { + # 'level': 'DEBUG', # we want all in the log file + # # Change the formatting here, if you want a different output in your log file + # 'formatter': 'file', + # 'class': 'logging.FileHandler', + # 'filename': '/tmp/log.txt', + # 'mode': 'w', # use "a" if you want to append log output or remove this lien + # }, + }, + 'loggers': { + LOGGERNAME: { + 'handlers': ['console', ], + 'level': 'DEBUG', + 'propagate': False, + }, + # Set the root logger's log level: + '': { + 'level': 'WARNING', + 'handlers': ["console"], + } + } +} + + +#: Map verbosity level (int) to log level +LOGLEVELS = {None: logging.WARNING, # 0 + 0: logging.WARNING, + 1: logging.INFO, + 2: logging.DEBUG, + } + + +#: Change root logger level from WARNING (default) to NOTSET +#: in order for all messages to be delegated. +logging.getLogger().setLevel(logging.NOTSET) + +#: Instantiate our logger +log = logging.getLogger(LOGGERNAME) + + +def parsecli(cliargs=None) -> argparse.Namespace: + """Parse CLI with :class:`argparse.ArgumentParser` and return parsed result + :param cliargs: Arguments to parse or None (=use sys.argv) + :return: parsed CLI result + """ + parser = argparse.ArgumentParser(description=__doc__, + epilog="Version %s written by %s " % (__version__, __author__) + ) + + parser.add_argument('-v', '--verbose', + action='count', + default=0, # emit warnings, errors, and critical + help="increase verbosity level") + + parser.add_argument('--version', + action='version', + version='%(prog)s ' + __version__ + ) + parser.add_argument("XMLFILE", + help="Searches for metadata in the XML file" + ) + + args = parser.parse_args(args=cliargs) + # Setup logging and the log level according to the "-v" option + dictConfig(DEFAULT_LOGGING_DICT) + # Setup logging and the log level according to the "-v" option + # If user requests more, cut it and return always DEBUG + loglevel = LOGLEVELS.get(args.verbose, logging.DEBUG) + + # Set console logger to the requested log level + for handler in log.handlers: + if handler.name == "console": + handler.setLevel(loglevel) + + log.debug("CLI result: %s", args) + return args + + +def main(cliargs=None) -> int: + """Entry point for the application script + :param cliargs: Arguments to parse or None (=use :class:`sys.argv`) + :return: error code + """ + try: + args = parsecli(cliargs) + # do some useful things here... + # If everything was good, return without error: + log.debug("I'm a debug message.") + log.info("I'm an info message") + log.warning("I'm a warning message.") + log.error("I'm an error message.") + log.fatal("I'm a really fatal massage!") + + return 0 + + # List possible exceptions here and return error codes + except Exception as error: # FIXME: add a more specific exception here! + log.fatal(error) + # Use whatever return code is appropriate for your specific exception + return 10 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/pyproject.toml b/python-scripts/metadatavalidator/pyproject.toml new file mode 100644 index 000000000..bb0a10dce --- /dev/null +++ b/python-scripts/metadatavalidator/pyproject.toml @@ -0,0 +1,51 @@ +# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata +[build-system] +requires = [ + # Don't forget to install the "build" package in your + # environment! + # pyproject-build ... + "setuptools>=61.0.0", + # "setuptools >=63.2.0", + # "setuptools @ git+https://github.com/pypa/setuptools.git@main" + # "git+https://github.com/pypa/setuptools.git@v67.0.0", + # "setuptools-scm", + "wheel ~=0.37.1", +] +build-backend = "setuptools.build_meta" + +[project] +name = "metadatavalidator" +readme = "README.rst" +description = "Validates DocBook metadata (mainly in tags)" +# readme = "README.rst" +license = {file = "LICENSE.rst"} +authors = [ + {name = "Tom Schraitle", email = "toms@suse.de"}, +] +requires-python = ">=3.11" +classifiers = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: System Administrators", + "Framework :: Litestar", + "License :: OSI Approved", + "Operating System :: POSIX :: Linux", + "Environment :: Web Environment", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.11", + "Topic :: Documentation", + "Topic :: Text Processing", + "Topic :: Text Processing :: Markup :: XML", +] +dynamic = ["version"] + + +[project.scripts] +metavalidator = "metadatavalidator:main" + +[tool.setuptools.dynamic] +version = {attr = "metadatavalidator.__version__"} + +[tool.ruff] +line-length = 88 +target-version = "py311" +include = [ "pyproject.toml", "metadatavalidator.py", ] From 68bbb706056fff2b077a3466885ff78504f393ff Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 17 May 2024 15:34:02 +0200 Subject: [PATCH 002/107] Remove comments --- .../metadatavalidator/metadatavalidator.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/python-scripts/metadatavalidator/metadatavalidator.py b/python-scripts/metadatavalidator/metadatavalidator.py index faf269ac8..696dad84b 100755 --- a/python-scripts/metadatavalidator/metadatavalidator.py +++ b/python-scripts/metadatavalidator/metadatavalidator.py @@ -29,10 +29,6 @@ 'disable_existing_loggers': True, 'formatters': { 'standard': {'format': '[%(levelname)s] %(funcName)s: %(message)s'}, - # 'file': {'format': '[%(levelname)s] %(asctime)s (%(funcName)s): %(message)s', - # #: Depending on your wanted precision, disable this line - # 'datefmt': '%Y-%m-%d %H:%M:%S', - # }, }, 'handlers': { 'console': { @@ -40,23 +36,15 @@ 'formatter': 'standard', 'class': 'logging.StreamHandler', }, - # 'fh': { - # 'level': 'DEBUG', # we want all in the log file - # # Change the formatting here, if you want a different output in your log file - # 'formatter': 'file', - # 'class': 'logging.FileHandler', - # 'filename': '/tmp/log.txt', - # 'mode': 'w', # use "a" if you want to append log output or remove this lien - # }, }, 'loggers': { LOGGERNAME: { - 'handlers': ['console', ], + 'handlers': ['console',], 'level': 'DEBUG', 'propagate': False, }, # Set the root logger's log level: - '': { + 'root': { 'level': 'WARNING', 'handlers': ["console"], } From 82b73c3da3b45da89d2a8fae64f2b999c5cf56dc Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 17 May 2024 15:34:32 +0200 Subject: [PATCH 003/107] Define the config directories to read config --- .../metadatavalidator/metadatavalidator.py | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/python-scripts/metadatavalidator/metadatavalidator.py b/python-scripts/metadatavalidator/metadatavalidator.py index 696dad84b..45b929f75 100755 --- a/python-scripts/metadatavalidator/metadatavalidator.py +++ b/python-scripts/metadatavalidator/metadatavalidator.py @@ -7,10 +7,13 @@ import configparser import logging from logging.config import dictConfig +import os.path import sys +import typing as t try: from lxml import etree + except ImportError: print("Cannot import lxml. ", file=sys.stderr) sys.exit(10) @@ -21,6 +24,17 @@ #: The logger name; can also set to "__name__" LOGGERNAME = "metadata" +#: The configuration paths where to search for the config +CONFIGDIRS: t.Sequence = [ + # Search in the current directory: + "metadatavalidator.ini", + # In the users' home directory: + "~/.config/metadatavalidator/config.ini", + # In the system + "/etc/metadatavalidator/config.ini" + ] +CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) + #: The dictionary, passed to :class:`logging.config.dictConfig`, #: is used to setup your logging formatters, handlers, and loggers #: For details, see https://docs.python.org/3.4/library/logging.config.html#configuration-dictionary-schema @@ -106,6 +120,11 @@ def parsecli(cliargs=None) -> argparse.Namespace: return args +def readconfig(): + """ + """ + + def main(cliargs=None) -> int: """Entry point for the application script :param cliargs: Arguments to parse or None (=use :class:`sys.argv`) @@ -113,6 +132,7 @@ def main(cliargs=None) -> int: """ try: args = parsecli(cliargs) + log.debug("CLI args %s", args) # do some useful things here... # If everything was good, return without error: log.debug("I'm a debug message.") From e3d244aa256ba7bb4e2e357ddd54eb5ca7d7ad20 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 17 May 2024 15:58:06 +0200 Subject: [PATCH 004/107] Introduce config parsing and exception handling * Configs are searched in the current directory ("config.ini"), in the users' home directory ("~/.config/metadatavalidator/config.ini") and in the system directory ("/etc/metadatavalidator/config.ini") * Introduce NoConfigFilesFoundError; raised when no config file could be found * Correct return codes --- python-scripts/metadatavalidator/README.rst | 25 ++++++++++++ .../metadatavalidator/metadatavalidator.ini | 1 + .../metadatavalidator/metadatavalidator.py | 39 +++++++++++++++---- 3 files changed, 57 insertions(+), 8 deletions(-) create mode 100644 python-scripts/metadatavalidator/metadatavalidator.ini diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index e69de29bb..64fb37630 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -0,0 +1,25 @@ +Metadata validator for DocBook +============================== + +The script in this directory check several metadata definition for DocBook. +Metadata can be found in the ```` and ```` tags. + + +Requirements +------------ + +* lxml (the more recent, the better) +* Python >=3.11 (only due to for installing with :file:`pyproject.toml`.) + + +Configuration +------------- + +The configuration file is search in the following order (first is the highest): + +1. Environment variable :envar:`METAVALIDATOR_CONFIG`. +1. In the current directory: :file:`metadatavalidator.ini` +1. In the users' home directory: :file:`~/.config/metadatavalidator/config.ini` +1. In the system: :file:`/etc/metadatavalidator/config.ini` + + diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini new file mode 100644 index 000000000..7c4f6ffb4 --- /dev/null +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -0,0 +1 @@ +[validator] diff --git a/python-scripts/metadatavalidator/metadatavalidator.py b/python-scripts/metadatavalidator/metadatavalidator.py index 45b929f75..5c33f7a2b 100755 --- a/python-scripts/metadatavalidator/metadatavalidator.py +++ b/python-scripts/metadatavalidator/metadatavalidator.py @@ -26,6 +26,7 @@ #: The configuration paths where to search for the config CONFIGDIRS: t.Sequence = [ + # "Reserve" first place for environment variable 'METAVALIDATOR_CONFIG' # Search in the current directory: "metadatavalidator.ini", # In the users' home directory: @@ -33,6 +34,10 @@ # In the system "/etc/metadatavalidator/config.ini" ] +METAVALIDATOR_CONFIG = os.environ.get('METAVALIDATOR_CONFIG') +if METAVALIDATOR_CONFIG is not None: + CONFIGDIRS.insert(0, os.path.expanduser(METAVALIDATOR_CONFIG)) + CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) #: The dictionary, passed to :class:`logging.config.dictConfig`, @@ -82,6 +87,11 @@ log = logging.getLogger(LOGGERNAME) +#---------------- +class NoConfigFilesFoundError(FileNotFoundError): + pass + + def parsecli(cliargs=None) -> argparse.Namespace: """Parse CLI with :class:`argparse.ArgumentParser` and return parsed result :param cliargs: Arguments to parse or None (=use sys.argv) @@ -100,7 +110,9 @@ def parsecli(cliargs=None) -> argparse.Namespace: action='version', version='%(prog)s ' + __version__ ) - parser.add_argument("XMLFILE", + parser.add_argument("xmlfiles", + metavar="XMLFILES", + nargs="+", help="Searches for metadata in the XML file" ) @@ -115,14 +127,21 @@ def parsecli(cliargs=None) -> argparse.Namespace: for handler in log.handlers: if handler.name == "console": handler.setLevel(loglevel) - - log.debug("CLI result: %s", args) return args -def readconfig(): - """ +def readconfig(dirs: t.Sequence) -> configparser.ConfigParser: + """Read config data from config files + + :param dirs: the directories to search for config files + :return: a :class:`configparser.ConfigParser` object """ + config = configparser.ConfigParser() + configfiles = config.read(dirs) + if not configfiles: + raise NoConfigFilesFoundError("Config files not found") + setattr(config, "configfiles", configfiles) + return config def main(cliargs=None) -> int: @@ -132,6 +151,7 @@ def main(cliargs=None) -> int: """ try: args = parsecli(cliargs) + config = readconfig(CONFIGDIRS) log.debug("CLI args %s", args) # do some useful things here... # If everything was good, return without error: @@ -143,11 +163,14 @@ def main(cliargs=None) -> int: return 0 - # List possible exceptions here and return error codes + except NoConfigFilesFoundError as error: + log.critical("No config files found") + return 100 + except Exception as error: # FIXME: add a more specific exception here! - log.fatal(error) + log.exception("Some unknown exception occured", error) # Use whatever return code is appropriate for your specific exception - return 10 + return 200 if __name__ == "__main__": From 11220a1ca902535add1a30e4976b33a357dd90d0 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 10:03:39 +0200 Subject: [PATCH 005/107] Snapshot --- .../metadatavalidator/metadatavalidator.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python-scripts/metadatavalidator/metadatavalidator.py b/python-scripts/metadatavalidator/metadatavalidator.py index 5c33f7a2b..538fcc891 100755 --- a/python-scripts/metadatavalidator/metadatavalidator.py +++ b/python-scripts/metadatavalidator/metadatavalidator.py @@ -127,6 +127,10 @@ def parsecli(cliargs=None) -> argparse.Namespace: for handler in log.handlers: if handler.name == "console": handler.setLevel(loglevel) + + args.config = readconfig(CONFIGDIRS) + log.debug("Reading these config files %s", + getattr(args.config, "configfiles", "n/a")) return args @@ -144,6 +148,15 @@ def readconfig(dirs: t.Sequence) -> configparser.ConfigParser: return config +# def check_ + + +def process(args: argparse.Namespace, config: configparser.ConfigParser): + """ + """ + pass + + def main(cliargs=None) -> int: """Entry point for the application script :param cliargs: Arguments to parse or None (=use :class:`sys.argv`) @@ -153,6 +166,8 @@ def main(cliargs=None) -> int: args = parsecli(cliargs) config = readconfig(CONFIGDIRS) log.debug("CLI args %s", args) + + process(args, config) # do some useful things here... # If everything was good, return without error: log.debug("I'm a debug message.") From 9499b9a8100a97dc865c8370adfdb709083c4325 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 10:04:05 +0200 Subject: [PATCH 006/107] pyproject.toml: Add tool.pytest.ini_option section --- .../metadatavalidator/pyproject.toml | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/python-scripts/metadatavalidator/pyproject.toml b/python-scripts/metadatavalidator/pyproject.toml index bb0a10dce..023609006 100644 --- a/python-scripts/metadatavalidator/pyproject.toml +++ b/python-scripts/metadatavalidator/pyproject.toml @@ -45,6 +45,26 @@ metavalidator = "metadatavalidator:main" [tool.setuptools.dynamic] version = {attr = "metadatavalidator.__version__"} + +[tool.pytest.ini_options] +testpaths = ["tests"] +norecursedirs = [ + "build", + ".git", + ".eggs/", + ".env/", + ".pyenv/", + ".tmp/", + ] +pythonpath = ["bin/", "tests/"] +# addopts = """ +# --no-cov-on-fail +# --cov=docr +# --cov-report=term-missing +# """ + + + [tool.ruff] line-length = 88 target-version = "py311" From 0d06751398ac6e5db18a841db5d3333b46b79f98 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 10:04:21 +0200 Subject: [PATCH 007/107] Move script into bin/ folder --- python-scripts/metadatavalidator/{ => bin}/metadatavalidator.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python-scripts/metadatavalidator/{ => bin}/metadatavalidator.py (100%) diff --git a/python-scripts/metadatavalidator/metadatavalidator.py b/python-scripts/metadatavalidator/bin/metadatavalidator.py similarity index 100% rename from python-scripts/metadatavalidator/metadatavalidator.py rename to python-scripts/metadatavalidator/bin/metadatavalidator.py From 9aadc40a533ccb0c70833ce6f7364fb72fb38033 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 10:19:16 +0200 Subject: [PATCH 008/107] Correct pyproject.toml using bin directory --- python-scripts/metadatavalidator/pyproject.toml | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/python-scripts/metadatavalidator/pyproject.toml b/python-scripts/metadatavalidator/pyproject.toml index 023609006..d4da614b6 100644 --- a/python-scripts/metadatavalidator/pyproject.toml +++ b/python-scripts/metadatavalidator/pyproject.toml @@ -17,8 +17,7 @@ build-backend = "setuptools.build_meta" name = "metadatavalidator" readme = "README.rst" description = "Validates DocBook metadata (mainly in tags)" -# readme = "README.rst" -license = {file = "LICENSE.rst"} +# license = {file = "LICENSE.rst"} authors = [ {name = "Tom Schraitle", email = "toms@suse.de"}, ] @@ -40,11 +39,10 @@ dynamic = ["version"] [project.scripts] -metavalidator = "metadatavalidator:main" +metavalidator = "bin.metadatavalidator:main" [tool.setuptools.dynamic] -version = {attr = "metadatavalidator.__version__"} - +version = {attr = "bin.metadatavalidator.__version__"} [tool.pytest.ini_options] testpaths = ["tests"] @@ -64,7 +62,6 @@ pythonpath = ["bin/", "tests/"] # """ - [tool.ruff] line-length = 88 target-version = "py311" From 856612d820aea5989b49a705a9cc327a8d922ad6 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 10:45:41 +0200 Subject: [PATCH 009/107] Switch to a src layout --- .../metadatavalidator/pyproject.toml | 27 +++--- .../src/metadatavalidator/__init__.py | 2 + .../src/metadatavalidator/_meta.py | 4 + .../metadatavalidator/cli.py} | 92 ++----------------- .../src/metadatavalidator/common.py | 20 ++++ .../src/metadatavalidator/config.py | 21 +++++ .../src/metadatavalidator/logging.py | 47 ++++++++++ 7 files changed, 111 insertions(+), 102 deletions(-) create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/__init__.py create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/_meta.py rename python-scripts/metadatavalidator/{bin/metadatavalidator.py => src/metadatavalidator/cli.py} (56%) mode change 100755 => 100644 create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/common.py create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/config.py create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/logging.py diff --git a/python-scripts/metadatavalidator/pyproject.toml b/python-scripts/metadatavalidator/pyproject.toml index d4da614b6..bfee4f899 100644 --- a/python-scripts/metadatavalidator/pyproject.toml +++ b/python-scripts/metadatavalidator/pyproject.toml @@ -1,14 +1,8 @@ # https://packaging.python.org/en/latest/specifications/declaring-project-metadata/#declaring-project-metadata [build-system] requires = [ - # Don't forget to install the "build" package in your - # environment! - # pyproject-build ... + # "pyproject-build", "setuptools>=61.0.0", - # "setuptools >=63.2.0", - # "setuptools @ git+https://github.com/pypa/setuptools.git@main" - # "git+https://github.com/pypa/setuptools.git@v67.0.0", - # "setuptools-scm", "wheel ~=0.37.1", ] build-backend = "setuptools.build_meta" @@ -39,10 +33,11 @@ dynamic = ["version"] [project.scripts] -metavalidator = "bin.metadatavalidator:main" +metadatavalidator = "metadatavalidator.cli:main" + [tool.setuptools.dynamic] -version = {attr = "bin.metadatavalidator.__version__"} +version = {attr = "metadatavalidator.__version__"} [tool.pytest.ini_options] testpaths = ["tests"] @@ -54,15 +49,15 @@ norecursedirs = [ ".pyenv/", ".tmp/", ] -pythonpath = ["bin/", "tests/"] -# addopts = """ -# --no-cov-on-fail -# --cov=docr -# --cov-report=term-missing -# """ +pythonpath = ["src", "tests"] +addopts = """ + --no-cov-on-fail + --cov=docr + --cov-report=term-missing +""" [tool.ruff] line-length = 88 target-version = "py311" -include = [ "pyproject.toml", "metadatavalidator.py", ] +include = [ "pyproject.toml", "src/metadatavalidator", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/__init__.py new file mode 100644 index 000000000..76535c807 --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/__init__.py @@ -0,0 +1,2 @@ + +from ._meta import __author__, __version__ \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py new file mode 100644 index 000000000..608335db3 --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py @@ -0,0 +1,4 @@ +"""Version and author""" + +__version__ = "0.2.0" +__author__ = "Tom Schraitle " \ No newline at end of file diff --git a/python-scripts/metadatavalidator/bin/metadatavalidator.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py old mode 100755 new mode 100644 similarity index 56% rename from python-scripts/metadatavalidator/bin/metadatavalidator.py rename to python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 538fcc891..a980c5084 --- a/python-scripts/metadatavalidator/bin/metadatavalidator.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -1,10 +1,4 @@ -#!/usr/bin/python3 -""" - -""" - import argparse -import configparser import logging from logging.config import dictConfig import os.path @@ -18,73 +12,16 @@ print("Cannot import lxml. ", file=sys.stderr) sys.exit(10) -__version__ = "0.2.0" -__author__ = "Tom Schraitle " - -#: The logger name; can also set to "__name__" -LOGGERNAME = "metadata" - -#: The configuration paths where to search for the config -CONFIGDIRS: t.Sequence = [ - # "Reserve" first place for environment variable 'METAVALIDATOR_CONFIG' - # Search in the current directory: - "metadatavalidator.ini", - # In the users' home directory: - "~/.config/metadatavalidator/config.ini", - # In the system - "/etc/metadatavalidator/config.ini" - ] -METAVALIDATOR_CONFIG = os.environ.get('METAVALIDATOR_CONFIG') -if METAVALIDATOR_CONFIG is not None: - CONFIGDIRS.insert(0, os.path.expanduser(METAVALIDATOR_CONFIG)) - -CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) - -#: The dictionary, passed to :class:`logging.config.dictConfig`, -#: is used to setup your logging formatters, handlers, and loggers -#: For details, see https://docs.python.org/3.4/library/logging.config.html#configuration-dictionary-schema -DEFAULT_LOGGING_DICT = { - 'version': 1, - 'disable_existing_loggers': True, - 'formatters': { - 'standard': {'format': '[%(levelname)s] %(funcName)s: %(message)s'}, - }, - 'handlers': { - 'console': { - 'level': 'DEBUG', # will be set later - 'formatter': 'standard', - 'class': 'logging.StreamHandler', - }, - }, - 'loggers': { - LOGGERNAME: { - 'handlers': ['console',], - 'level': 'DEBUG', - 'propagate': False, - }, - # Set the root logger's log level: - 'root': { - 'level': 'WARNING', - 'handlers': ["console"], - } - } -} - - -#: Map verbosity level (int) to log level -LOGLEVELS = {None: logging.WARNING, # 0 - 0: logging.WARNING, - 1: logging.INFO, - 2: logging.DEBUG, - } +from . import __author__, __version__ +from .config import readconfig +from .common import CONFIGDIRS +from .logging import DEFAULT_LOGGING_DICT, LOGLEVELS, log #: Change root logger level from WARNING (default) to NOTSET #: in order for all messages to be delegated. logging.getLogger().setLevel(logging.NOTSET) -#: Instantiate our logger -log = logging.getLogger(LOGGERNAME) #---------------- @@ -134,27 +71,10 @@ def parsecli(cliargs=None) -> argparse.Namespace: return args -def readconfig(dirs: t.Sequence) -> configparser.ConfigParser: - """Read config data from config files - - :param dirs: the directories to search for config files - :return: a :class:`configparser.ConfigParser` object +def process(args, config): """ - config = configparser.ConfigParser() - configfiles = config.read(dirs) - if not configfiles: - raise NoConfigFilesFoundError("Config files not found") - setattr(config, "configfiles", configfiles) - return config - - -# def check_ - - -def process(args: argparse.Namespace, config: configparser.ConfigParser): """ - """ - pass + log.info("I'm the process function!") def main(cliargs=None) -> int: diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/common.py b/python-scripts/metadatavalidator/src/metadatavalidator/common.py new file mode 100644 index 000000000..d4cf5c5fc --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/common.py @@ -0,0 +1,20 @@ +import os.path +import typing as t + + +#: The configuration paths where to search for the config +CONFIGDIRS: t.Sequence = [ + # "Reserve" first place for environment variable 'METAVALIDATOR_CONFIG' + # Search in the current directory: + "metadatavalidator.ini", + # In the users' home directory: + "~/.config/metadatavalidator/config.ini", + # In the system + "/etc/metadatavalidator/config.ini" + ] +METAVALIDATOR_CONFIG = os.environ.get('METAVALIDATOR_CONFIG') +if METAVALIDATOR_CONFIG is not None: + CONFIGDIRS.insert(0, os.path.expanduser(METAVALIDATOR_CONFIG)) + +CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) + diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py new file mode 100644 index 000000000..e279b1831 --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -0,0 +1,21 @@ +import configparser +import typing as t + + +class NoConfigFilesFoundError(FileNotFoundError): + pass + + +def readconfig(dirs: t.Sequence) -> configparser.ConfigParser: + """Read config data from config files + + :param dirs: the directories to search for config files + :return: a :class:`configparser.ConfigParser` object + """ + config = configparser.ConfigParser() + configfiles = config.read(dirs) + if not configfiles: + raise NoConfigFilesFoundError("Config files not found") + setattr(config, "configfiles", configfiles) + return config + diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/logging.py b/python-scripts/metadatavalidator/src/metadatavalidator/logging.py new file mode 100644 index 000000000..e6605811d --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/logging.py @@ -0,0 +1,47 @@ +import logging + +#: The logger name; can also set to "__name__" +LOGGERNAME = "metadata" + + +#: The dictionary, passed to :class:`logging.config.dictConfig`, +#: is used to setup your logging formatters, handlers, and loggers +#: For details, see https://docs.python.org/3.4/library/logging.config.html#configuration-dictionary-schema +DEFAULT_LOGGING_DICT = { + 'version': 1, + 'disable_existing_loggers': True, + 'formatters': { + 'standard': {'format': '[%(levelname)s] %(funcName)s: %(message)s'}, + }, + 'handlers': { + 'console': { + 'level': 'DEBUG', # will be set later + 'formatter': 'standard', + 'class': 'logging.StreamHandler', + }, + }, + 'loggers': { + LOGGERNAME: { + 'handlers': ['console',], + 'level': 'DEBUG', + 'propagate': False, + }, + # Set the root logger's log level: + 'root': { + 'level': 'WARNING', + 'handlers': ["console"], + } + } +} + + +#: Map verbosity level (int) to log level +LOGLEVELS = {None: logging.WARNING, # 0 + 0: logging.WARNING, + 1: logging.INFO, + 2: logging.DEBUG, + } + + +#: Instantiate our logger +log = logging.getLogger(LOGGERNAME) \ No newline at end of file From d69646bc4dd75488a77bf05092675aeac17b5d53 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 11:56:38 +0200 Subject: [PATCH 010/107] Add simple test cases --- .../metadatavalidator/src/metadatavalidator/cli.py | 7 +------ .../metadatavalidator/src/metadatavalidator/process.py | 9 +++++++++ python-scripts/metadatavalidator/tests/conftest.py | 5 +++++ .../metadatavalidator/tests/test_script_meta.py | 9 +++++++++ 4 files changed, 24 insertions(+), 6 deletions(-) create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/process.py create mode 100644 python-scripts/metadatavalidator/tests/conftest.py create mode 100644 python-scripts/metadatavalidator/tests/test_script_meta.py diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index a980c5084..f8880f3d4 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -16,7 +16,7 @@ from .config import readconfig from .common import CONFIGDIRS from .logging import DEFAULT_LOGGING_DICT, LOGLEVELS, log - +from .process import process #: Change root logger level from WARNING (default) to NOTSET #: in order for all messages to be delegated. @@ -71,11 +71,6 @@ def parsecli(cliargs=None) -> argparse.Namespace: return args -def process(args, config): - """ - """ - log.info("I'm the process function!") - def main(cliargs=None) -> int: """Entry point for the application script diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py new file mode 100644 index 000000000..5671ac094 --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -0,0 +1,9 @@ +import asyncio + +from .logging import log + + +def process(args, config): + """ + """ + log.info("I'm the process function!") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py new file mode 100644 index 000000000..2a1446174 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -0,0 +1,5 @@ +import sys +import os, os.path + +os.environ.setdefault("PYTHONPATH", + os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))) diff --git a/python-scripts/metadatavalidator/tests/test_script_meta.py b/python-scripts/metadatavalidator/tests/test_script_meta.py new file mode 100644 index 000000000..d87f90249 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/test_script_meta.py @@ -0,0 +1,9 @@ +from metadatavalidator import __author__, __version__ + + +def test_version(): + assert __version__ + + +def test_author(): + assert __author__ \ No newline at end of file From 1185655b28a2173921df82f919cc18d554a94e48 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 12:14:39 +0200 Subject: [PATCH 011/107] Define custom exceptions --- .../metadatavalidator/src/metadatavalidator/cli.py | 8 ++------ .../src/metadatavalidator/exceptions.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 6 deletions(-) create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index f8880f3d4..38ac27735 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -1,7 +1,7 @@ import argparse +import asyncio import logging from logging.config import dictConfig -import os.path import sys import typing as t @@ -15,6 +15,7 @@ from . import __author__, __version__ from .config import readconfig from .common import CONFIGDIRS +from .exceptions import NoConfigFilesFoundError from .logging import DEFAULT_LOGGING_DICT, LOGLEVELS, log from .process import process @@ -24,11 +25,6 @@ -#---------------- -class NoConfigFilesFoundError(FileNotFoundError): - pass - - def parsecli(cliargs=None) -> argparse.Namespace: """Parse CLI with :class:`argparse.ArgumentParser` and return parsed result :param cliargs: Arguments to parse or None (=use sys.argv) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py new file mode 100644 index 000000000..4e1f24644 --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py @@ -0,0 +1,10 @@ +""" +Our custom exception classes +""" +class NoConfigFilesFoundError(FileNotFoundError): + pass + + +class BaseMetadataError(ValueError): + pass + From 2a6e0b38cf6e249d7388771df232bdd2bcc8fc6b Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 13:26:26 +0200 Subject: [PATCH 012/107] Add process skeleton --- .../src/metadatavalidator/cli.py | 9 +--- .../src/metadatavalidator/config.py | 3 +- .../src/metadatavalidator/process.py | 42 ++++++++++++++++++- 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 38ac27735..1ccfd374a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -78,14 +78,7 @@ def main(cliargs=None) -> int: config = readconfig(CONFIGDIRS) log.debug("CLI args %s", args) - process(args, config) - # do some useful things here... - # If everything was good, return without error: - log.debug("I'm a debug message.") - log.info("I'm an info message") - log.warning("I'm a warning message.") - log.error("I'm an error message.") - log.fatal("I'm a really fatal massage!") + asyncio.run(process(args, config)) return 0 diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index e279b1831..0b73b7f1f 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -1,9 +1,8 @@ import configparser import typing as t +from .exceptions import NoConfigFilesFoundError -class NoConfigFilesFoundError(FileNotFoundError): - pass def readconfig(dirs: t.Sequence) -> configparser.ConfigParser: diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index 5671ac094..f4b6ef9ad 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -1,9 +1,47 @@ import asyncio +from argparse import Namespace +from configparser import ConfigParser + +from lxml import etree from .logging import log -def process(args, config): +# Example check functions +def check_root_tag(tree): + if tree.getroot().tag in ("article", "book", "topic"): + raise ValueError("Root tag is not 'expected_root'") + +def check_element_exists(tree, element_name): + if tree.find(element_name) is None: + raise ValueError(f"Element '{element_name}' not found") + + +async def process_xml_file(xmlfile: str): + """Process a single XML file + """ + try: + # loop = asyncio.get_running_loop() + # tree = await loop.run_in_executor(None, etree.parse, xmlfile) + tree = etree.parse(xmlfile) + + # Apply check functions + check_root_tag(tree) + # check_element_exists(tree, 'required_element') + + # Add calls to more check functions here... + + log.info("File %s processed successfully.", xmlfile) + + except etree.XMLSyntaxError as e: + log.fatal("Problem with %r: %s", xmlfile, e) + # print(f"Error in file {xmlfile}: {e}") + + +async def process(args: Namespace, config: ConfigParser): """ """ - log.info("I'm the process function!") \ No newline at end of file + log.debug("Process all XML files...") + async with asyncio.TaskGroup() as tg: + for xmlfile in args.xmlfiles: + tg.create_task(process_xml_file(xmlfile)) \ No newline at end of file From 4bd2a12694cadeb574401145c9e28b2ecdb0bcae Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 13:26:50 +0200 Subject: [PATCH 013/107] Add dependencies and optional-dependencies --- python-scripts/metadatavalidator/pyproject.toml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/python-scripts/metadatavalidator/pyproject.toml b/python-scripts/metadatavalidator/pyproject.toml index bfee4f899..046614925 100644 --- a/python-scripts/metadatavalidator/pyproject.toml +++ b/python-scripts/metadatavalidator/pyproject.toml @@ -30,7 +30,16 @@ classifiers = [ "Topic :: Text Processing :: Markup :: XML", ] dynamic = ["version"] +dependencies = [ + "lxml", +] +[project.optional-dependencies] +test = [ + "pytest", + "pytest-cov", + "pytest-asyncio", +] [project.scripts] metadatavalidator = "metadatavalidator.cli:main" From 6fd948ddcb672ea5b9a897b862c4fd5558c59045 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 22 May 2024 15:14:15 +0200 Subject: [PATCH 014/107] Start with a two small check functions --- .../metadatavalidator/metadatavalidator.ini | 2 + .../src/metadatavalidator/checks/__init__.py | 5 ++ .../metadatavalidator/checks/check_root.py | 22 ++++++++ .../src/metadatavalidator/cli.py | 1 - .../src/metadatavalidator/common.py | 1 + .../src/metadatavalidator/exceptions.py | 5 ++ .../src/metadatavalidator/process.py | 50 +++++++++---------- 7 files changed, 60 insertions(+), 26 deletions(-) create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 7c4f6ffb4..010a28619 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -1 +1,3 @@ [validator] +file_extension = .xml +check_root_elements = book article topic \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py new file mode 100644 index 000000000..c2c02e60c --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -0,0 +1,5 @@ +from .check_root import check_root_tag, check_namespace + + +__all__ = [] + diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py new file mode 100644 index 000000000..71b466f56 --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py @@ -0,0 +1,22 @@ +from lxml import etree + +from ..common import DOCBOOK_NS +from ..exceptions import InvalidValueError +from ..logging import log + + +def check_root_tag(tree): + """ + """ + tag = etree.QName(tree.getroot().tag) + log.debug("Found tag <%s>", tag) + if tag.localname not in ("topic"): + raise InvalidValueError("Root tag is not ") + + +def check_namespace(tree): + """Checks the namespace""" + tag = etree.QName(tree.getroot().tag) + log.debug("Found namespace %s", tag.namespace) + if tag.namespace != DOCBOOK_NS: + raise InvalidValueError("Root element doesn't belong to DocBook 5") diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 1ccfd374a..7b5e2d34f 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -67,7 +67,6 @@ def parsecli(cliargs=None) -> argparse.Namespace: return args - def main(cliargs=None) -> int: """Entry point for the application script :param cliargs: Arguments to parse or None (=use :class:`sys.argv`) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/common.py b/python-scripts/metadatavalidator/src/metadatavalidator/common.py index d4cf5c5fc..0c0e72cb4 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/common.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/common.py @@ -18,3 +18,4 @@ CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) +DOCBOOK_NS = "http://docbook.org/ns/docbook" diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py index 4e1f24644..58317247a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py @@ -8,3 +8,8 @@ class NoConfigFilesFoundError(FileNotFoundError): class BaseMetadataError(ValueError): pass +class InvalidElementError(BaseMetadataError): + pass + +class InvalidValueError(BaseMetadataError): + pass \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index f4b6ef9ad..20543aca3 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -4,44 +4,44 @@ from lxml import etree +from .checks.check_root import check_root_tag, check_namespace +from .exceptions import InvalidValueError from .logging import log -# Example check functions -def check_root_tag(tree): - if tree.getroot().tag in ("article", "book", "topic"): - raise ValueError("Root tag is not 'expected_root'") - -def check_element_exists(tree, element_name): - if tree.find(element_name) is None: - raise ValueError(f"Element '{element_name}' not found") - - -async def process_xml_file(xmlfile: str): +async def process_xml_file(xmlfile: str, config: ConfigParser): """Process a single XML file + + :param xmlfile: the XML file to check for meta data + :param config: read-only configuration from INI file """ - try: - # loop = asyncio.get_running_loop() - # tree = await loop.run_in_executor(None, etree.parse, xmlfile) - tree = etree.parse(xmlfile) + for checkfunc in [check_namespace, check_root_tag]: + try: + # loop = asyncio.get_running_loop() + # tree = await loop.run_in_executor(None, etree.parse, xmlfile) + tree = etree.parse(xmlfile, + parser=etree.XMLParser(encoding="UTF-8")) - # Apply check functions - check_root_tag(tree) - # check_element_exists(tree, 'required_element') + # Apply check function + checkfunc(tree, config) - # Add calls to more check functions here... + except etree.XMLSyntaxError as e: + log.fatal("Syntax error in %r: %s", xmlfile, e) - log.info("File %s processed successfully.", xmlfile) + except InvalidValueError as e: + log.fatal("Invalid value in %r for %s: %s", + xmlfile, checkfunc.__name__, e) - except etree.XMLSyntaxError as e: - log.fatal("Problem with %r: %s", xmlfile, e) - # print(f"Error in file {xmlfile}: {e}") + log.info("File %r checked successfully.", xmlfile) async def process(args: Namespace, config: ConfigParser): - """ + """Process all XML files that are give on CLI + + :param args: the arguments parsed by argparse + :param config: read-only configuration from INI file """ log.debug("Process all XML files...") async with asyncio.TaskGroup() as tg: for xmlfile in args.xmlfiles: - tg.create_task(process_xml_file(xmlfile)) \ No newline at end of file + tg.create_task(process_xml_file(xmlfile, config)) \ No newline at end of file From 24f4d684744e21a4a769ce8c14dc378578bdeb84 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 23 May 2024 14:51:37 +0200 Subject: [PATCH 015/107] Validate and convert ConfigParser obj into dict --- .../metadatavalidator/checks/check_root.py | 14 ++-- .../src/metadatavalidator/config.py | 64 ++++++++++++++++++- .../src/metadatavalidator/process.py | 3 +- 3 files changed, 72 insertions(+), 9 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py index 71b466f56..65d5bd9c6 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py @@ -1,3 +1,5 @@ +import typing as t + from lxml import etree from ..common import DOCBOOK_NS @@ -5,18 +7,20 @@ from ..logging import log -def check_root_tag(tree): +def check_root_tag(tree: etree.ElementTree, config: dict[t.Any, t.Any]): """ """ + allowed_root_elements = config.get("validator", {}).get("check_root_elements") + # log.debug("Found config: %s", allowed_root_elements) tag = etree.QName(tree.getroot().tag) - log.debug("Found tag <%s>", tag) - if tag.localname not in ("topic"): + # log.debug("Found tag <%s>", tag) + if tag.localname not in allowed_root_elements: raise InvalidValueError("Root tag is not ") -def check_namespace(tree): +def check_namespace(tree: etree.ElementTree, config: dict[t.Any, t.Any]): """Checks the namespace""" tag = etree.QName(tree.getroot().tag) - log.debug("Found namespace %s", tag.namespace) + # log.debug("Found namespace %s", tag.namespace) if tag.namespace != DOCBOOK_NS: raise InvalidValueError("Root element doesn't belong to DocBook 5") diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index 0b73b7f1f..718c8de7a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -1,11 +1,68 @@ import configparser +from pathlib import Path +import re +import tomllib import typing as t -from .exceptions import NoConfigFilesFoundError +from .exceptions import MissingKeyError, MissingSectionError, NoConfigFilesFoundError +from .logging import log +def read_and_merge_toml_files(*paths: str) -> dict[t.Any, t.Any]: + """Read and merge TOML files from given paths. -def readconfig(dirs: t.Sequence) -> configparser.ConfigParser: + :param paths: + :return: the merged configuration as dictionary. + """ + merged_config = {} + for path in paths: + full_path = Path(path).resolve() + if full_path.exists(): + with open(full_path, mode='rb') as f: + config = tomllib.load(f) + # merged_config.update(config) + merged_config |= config + return merged_config + + +def as_dict(config: configparser.ConfigParser): + """ + Converts a ConfigParser object into a dictionary. + + The resulting dictionary has sections as keys which point to a dict of the + sections options as key => value pairs. + """ + the_dict = {} + for section in config.sections(): + the_dict[section] = {} + for key, val in config.items(section): + the_dict[section][key] = val + return the_dict + + + +def validate_and_convert_config(config: configparser.ConfigParser): + """Validate sections, keys, and their values of the config + """ + split = re.compile(r"[;, ]") + theconfig = as_dict(config) + + if not config.has_section("validator"): + raise MissingSectionError("validator") + + # Validate "validator" section + check_root_elements = config.get("validator", "check_root_elements", fallback=None) + if check_root_elements is None: + raise MissingKeyError("validator.check_root_elements") + theconfig["validator"]["check_root_elements"] = split.split(check_root_elements) + + # Store the configfiles + theconfig["configfiles"] = getattr(config, "configfiles") + log.debug("The config: %s", theconfig) + return theconfig + + +def readconfig(dirs: t.Sequence) -> dict[t.Any, t.Any]: # configparser.ConfigParser """Read config data from config files :param dirs: the directories to search for config files @@ -16,5 +73,6 @@ def readconfig(dirs: t.Sequence) -> configparser.ConfigParser: if not configfiles: raise NoConfigFilesFoundError("Config files not found") setattr(config, "configfiles", configfiles) - return config + + return validate_and_convert_config(config) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index 20543aca3..f95de91c2 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -1,6 +1,7 @@ import asyncio from argparse import Namespace from configparser import ConfigParser +import typing as t from lxml import etree @@ -35,7 +36,7 @@ async def process_xml_file(xmlfile: str, config: ConfigParser): log.info("File %r checked successfully.", xmlfile) -async def process(args: Namespace, config: ConfigParser): +async def process(args: Namespace, config: dict[t.Any, t.Any]): """Process all XML files that are give on CLI :param args: the arguments parsed by argparse From c92a579c0a95a2c333cccb9436ac2147fa84f648 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 23 May 2024 14:52:12 +0200 Subject: [PATCH 016/107] Find checks_* functions semi-automatically --- .../src/metadatavalidator/checks/__init__.py | 5 ++++- .../src/metadatavalidator/process.py | 20 +++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index c2c02e60c..7d25d5460 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -1,5 +1,8 @@ from .check_root import check_root_tag, check_namespace -__all__ = [] +__all__ = [ + "check_root_tag", + "check_namespace", +] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index f95de91c2..1fa8620b6 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -1,22 +1,34 @@ import asyncio from argparse import Namespace -from configparser import ConfigParser +# from configparser import ConfigParser import typing as t from lxml import etree -from .checks.check_root import check_root_tag, check_namespace +# from .checks.check_root import check_root_tag, check_namespace +from . import checks from .exceptions import InvalidValueError from .logging import log -async def process_xml_file(xmlfile: str, config: ConfigParser): +def get_all_check_functions(name): + """"Yield a check function from the :mod:`metadatavalidator.checks` + package + """ + import importlib + module = importlib.import_module(name) + for name, obj in module.__dict__.items(): + if callable(obj) and name.startswith("check_"): + yield obj + + +async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): """Process a single XML file :param xmlfile: the XML file to check for meta data :param config: read-only configuration from INI file """ - for checkfunc in [check_namespace, check_root_tag]: + for checkfunc in get_all_check_functions(checks.__package__): try: # loop = asyncio.get_running_loop() # tree = await loop.run_in_executor(None, etree.parse, xmlfile) From ca644bcc3d066bf74a093ce2dd19cfa0de998378 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 23 May 2024 14:53:20 +0200 Subject: [PATCH 017/107] Introduce exception classes for config errors --- .../src/metadatavalidator/exceptions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py index 58317247a..ca1d77011 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py @@ -1,10 +1,25 @@ """ Our custom exception classes """ + class NoConfigFilesFoundError(FileNotFoundError): pass +# --- Configuration exceptions +class BaseConfigError(ValueError): + pass + + +class MissingSectionError(BaseConfigError): + """A missing section could not be found in the config""" + + +class MissingKeyError(BaseConfigError): + """A missing key could not be found in the config""" + + +# --- Validator exceptions class BaseMetadataError(ValueError): pass From 55ae26de01931ba7b85cdca0ea74aa1623fe4475 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 13:43:03 +0200 Subject: [PATCH 018/107] pyproject.toml: Use correct coverage module --- python-scripts/metadatavalidator/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/pyproject.toml b/python-scripts/metadatavalidator/pyproject.toml index 046614925..ad7eca8f7 100644 --- a/python-scripts/metadatavalidator/pyproject.toml +++ b/python-scripts/metadatavalidator/pyproject.toml @@ -61,7 +61,7 @@ norecursedirs = [ pythonpath = ["src", "tests"] addopts = """ --no-cov-on-fail - --cov=docr + --cov=metadatavalidator --cov-report=term-missing """ From c969c8a168b342c9f0ec0f1502ef7db527feaadd Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 14:02:37 +0200 Subject: [PATCH 019/107] Introduce valid_languages --- .../metadatavalidator/metadatavalidator.ini | 3 +- .../src/metadatavalidator/config.py | 38 ++++++------ .../tests/data/metadatavalidator.ini | 4 ++ .../tests/test_script_config.py | 61 +++++++++++++++++++ 4 files changed, 88 insertions(+), 18 deletions(-) create mode 100644 python-scripts/metadatavalidator/tests/data/metadatavalidator.ini create mode 100644 python-scripts/metadatavalidator/tests/test_script_config.py diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 010a28619..e86803e9a 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -1,3 +1,4 @@ [validator] file_extension = .xml -check_root_elements = book article topic \ No newline at end of file +check_root_elements = book article topic +valid_languages = ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl-nl pl-pl pt-br ru-ru sv-se zh-cn zh-tw \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index 718c8de7a..e2d0111aa 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -1,28 +1,28 @@ import configparser from pathlib import Path import re -import tomllib +# import tomllib import typing as t from .exceptions import MissingKeyError, MissingSectionError, NoConfigFilesFoundError from .logging import log -def read_and_merge_toml_files(*paths: str) -> dict[t.Any, t.Any]: - """Read and merge TOML files from given paths. +# def read_and_merge_toml_files(*paths: str) -> dict[t.Any, t.Any]: +# """Read and merge TOML files from given paths. - :param paths: - :return: the merged configuration as dictionary. - """ - merged_config = {} - for path in paths: - full_path = Path(path).resolve() - if full_path.exists(): - with open(full_path, mode='rb') as f: - config = tomllib.load(f) - # merged_config.update(config) - merged_config |= config - return merged_config +# :param paths: +# :return: the merged configuration as dictionary. +# """ +# merged_config = {} +# for path in paths: +# full_path = Path(path).resolve() +# if full_path.exists(): +# with open(full_path, mode='rb') as f: +# config = tomllib.load(f) +# # merged_config.update(config) +# merged_config |= config +# return merged_config def as_dict(config: configparser.ConfigParser): @@ -40,7 +40,6 @@ def as_dict(config: configparser.ConfigParser): return the_dict - def validate_and_convert_config(config: configparser.ConfigParser): """Validate sections, keys, and their values of the config """ @@ -56,9 +55,14 @@ def validate_and_convert_config(config: configparser.ConfigParser): raise MissingKeyError("validator.check_root_elements") theconfig["validator"]["check_root_elements"] = split.split(check_root_elements) + valid_languages = config.get("validator", "valid_languages", fallback=None) + if valid_languages is None: + raise MissingKeyError("validator.valid_languages") + + theconfig["validator"]["valid_languages"] = split.split(valid_languages) + # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") - log.debug("The config: %s", theconfig) return theconfig diff --git a/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini b/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini new file mode 100644 index 000000000..0a4e3147e --- /dev/null +++ b/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini @@ -0,0 +1,4 @@ +[validator] +file_extension = .xml +check_root_elements = book article topic +valid_languages = de-de en-us es-es fr-fr \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/test_script_config.py b/python-scripts/metadatavalidator/tests/test_script_config.py new file mode 100644 index 000000000..55d52ff6c --- /dev/null +++ b/python-scripts/metadatavalidator/tests/test_script_config.py @@ -0,0 +1,61 @@ +from configparser import ConfigParser +import os.path + +import pytest + +from metadatavalidator.config import readconfig, validate_and_convert_config +from metadatavalidator.exceptions import MissingKeyError, MissingSectionError, NoConfigFilesFoundError + +def create_config(): + config = ConfigParser() + config.add_section("validator") + config.set("validator", "check_root_elements", "book article") + config.set("validator", "file_extension", ".xml") + config.set("validator", "valid_languages", "en-us de-de") + setattr(config, "configfiles", None) + return config + + +def test_valid_validate_and_convert_config(): + config = create_config() + result = validate_and_convert_config(config) + assert result == { + "validator": { + "check_root_elements": ["book", "article"], + "file_extension": ".xml", + "valid_languages": ["en-us", "de-de",] + }, + "configfiles": None, + } + +def test_missing_validator_section(): + config = ConfigParser() + with pytest.raises(MissingSectionError, match=".*validator.*"): + validate_and_convert_config(config) + + +def test_missing_key_check_root_elements(): + config = create_config() + config.remove_option("validator", "check_root_elements") + with pytest.raises(MissingKeyError, match=".*validator.check_root_elements.*"): + validate_and_convert_config(config) + + +def test_missing_key_valid_languages(): + config = create_config() + config.remove_option("validator", "valid_languages") + with pytest.raises(MissingKeyError, match=".*validator.valid_languages.*"): + validate_and_convert_config(config) + + +def test_readconfig(): + configfile = os.path.join(os.path.dirname(__file__), "data/metadatavalidator.ini") + result = readconfig([configfile]) + assert result == { + "validator": { + "check_root_elements": ["book", "article", "topic"], + "file_extension": ".xml", + "valid_languages": ["de-de", "en-us", "es-es", "fr-fr"] + }, + "configfiles": [configfile], + } From 28a739460ed628e0ba70d699deff5eac28de4089 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 15:13:56 +0200 Subject: [PATCH 020/107] Define __str__ for exception classes --- .../src/metadatavalidator/exceptions.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py index ca1d77011..d66182b98 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py @@ -8,16 +8,24 @@ class NoConfigFilesFoundError(FileNotFoundError): # --- Configuration exceptions class BaseConfigError(ValueError): - pass + def __init__(self, error, *args, **kwargs): + self.error = error + super().__init__(*args, **kwargs) class MissingSectionError(BaseConfigError): """A missing section could not be found in the config""" + def __str__(self) -> str: + return f"Missing section in config file: {self.error}" + class MissingKeyError(BaseConfigError): """A missing key could not be found in the config""" + def __str__(self) -> str: + return f"Missing key in config file: {self.error}" + # --- Validator exceptions class BaseMetadataError(ValueError): From 0b118d3fe23a426faaca6adb47d0a9317e3dde44 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 15:14:15 +0200 Subject: [PATCH 021/107] Catch BaseConfigError in cli.py --- .../metadatavalidator/src/metadatavalidator/cli.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 7b5e2d34f..3d9cb0df8 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -15,7 +15,7 @@ from . import __author__, __version__ from .config import readconfig from .common import CONFIGDIRS -from .exceptions import NoConfigFilesFoundError +from .exceptions import BaseConfigError, NoConfigFilesFoundError from .logging import DEFAULT_LOGGING_DICT, LOGLEVELS, log from .process import process @@ -85,6 +85,10 @@ def main(cliargs=None) -> int: log.critical("No config files found") return 100 + except BaseConfigError as error: + log.critical(error) + return 150 + except Exception as error: # FIXME: add a more specific exception here! log.exception("Some unknown exception occured", error) # Use whatever return code is appropriate for your specific exception From 32b94489d180ae6efc9f5974fae0198976927058 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 15:53:16 +0200 Subject: [PATCH 022/107] Collect the result of each XML file --- .../metadatavalidator/checks/check_root.py | 9 +-- .../src/metadatavalidator/process.py | 67 +++++++++++++++++-- 2 files changed, 64 insertions(+), 12 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py index 65d5bd9c6..963d57e0d 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py @@ -8,19 +8,16 @@ def check_root_tag(tree: etree.ElementTree, config: dict[t.Any, t.Any]): - """ + """Checks if root element is in the list of allowed elements """ allowed_root_elements = config.get("validator", {}).get("check_root_elements") - # log.debug("Found config: %s", allowed_root_elements) tag = etree.QName(tree.getroot().tag) - # log.debug("Found tag <%s>", tag) if tag.localname not in allowed_root_elements: - raise InvalidValueError("Root tag is not ") + raise InvalidValueError(f"Root tag {tag.localname!r} is not allowed. Expected {', '.join(allowed_root_elements)}.") def check_namespace(tree: etree.ElementTree, config: dict[t.Any, t.Any]): """Checks the namespace""" tag = etree.QName(tree.getroot().tag) - # log.debug("Found namespace %s", tag.namespace) if tag.namespace != DOCBOOK_NS: - raise InvalidValueError("Root element doesn't belong to DocBook 5") + raise InvalidValueError(f"Root element {tag.localname!r} doesn't belong to DocBook 5.") diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index 1fa8620b6..dab2f4b0c 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -2,6 +2,7 @@ from argparse import Namespace # from configparser import ConfigParser import typing as t +import os.path from lxml import etree @@ -28,6 +29,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): :param xmlfile: the XML file to check for meta data :param config: read-only configuration from INI file """ + errors = [] for checkfunc in get_all_check_functions(checks.__package__): try: # loop = asyncio.get_running_loop() @@ -39,22 +41,75 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): checkfunc(tree, config) except etree.XMLSyntaxError as e: - log.fatal("Syntax error in %r: %s", xmlfile, e) + # log.fatal("Syntax error in %r: %s", xmlfile, e) + errors.append({ + 'checkfunc': checkfunc.__name__, + 'message': str(e) + }) except InvalidValueError as e: - log.fatal("Invalid value in %r for %s: %s", - xmlfile, checkfunc.__name__, e) + #log.fatal("Invalid value in %r for %s: %s", + # xmlfile, checkfunc.__name__, e) + errors.append({ + 'checkfunc': checkfunc.__name__, + 'message': str(e) + }) + else: + # log.info("Passed check %r for %r", checkfunc.__name__, os.path.basename(xmlfile)) + pass + + log.info("File %r checked.", xmlfile) + return { + "xmlfile": xmlfile, + "errors": errors, + "basename": os.path.basename(xmlfile), + } + +def green(text): + return f"\033[32m{text}\033[0m" + +def red(text): + return f"\033[31m{text}\033[0m" + + +def format_results(results: list[t.Any]): + """Format the results for output + + :param results: the results from the checks + """ + error_template = """[{idx}] {xmlfile}:""" + ok_template = f"""[{{idx}}] {{xmlfile}}: {green("OK")}""" + print(">>>", results) + print("==== RESULTS ====") + for allidx, result in enumerate(results, 1): + if not result['errors']: + print(ok_template.format(idx=allidx, **result)) + else: + print(error_template.format(idx=allidx, **result)) + + for idx, error in enumerate(result['errors'], 1): + print(f" {allidx}.{idx}: {error['checkfunc']}: {error['message']}") + print() - log.info("File %r checked successfully.", xmlfile) async def process(args: Namespace, config: dict[t.Any, t.Any]): - """Process all XML files that are give on CLI + """Process all XML files that are passed on CLI :param args: the arguments parsed by argparse :param config: read-only configuration from INI file """ log.debug("Process all XML files...") + tasks = [] async with asyncio.TaskGroup() as tg: for xmlfile in args.xmlfiles: - tg.create_task(process_xml_file(xmlfile, config)) \ No newline at end of file + task = tg.create_task(process_xml_file(xmlfile, config)) + tasks.append(task) + + results = [] + for task in tasks: + maybeissue = await task + if maybeissue: + results.append(maybeissue) + + format_results(results) From 2f609e0af021d3c014bea966e3e6c1066396f5cc Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 16:13:28 +0200 Subject: [PATCH 023/107] Improve error output --- .../metadatavalidator/src/metadatavalidator/process.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index dab2f4b0c..0f3ff8c01 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -79,7 +79,7 @@ def format_results(results: list[t.Any]): """ error_template = """[{idx}] {xmlfile}:""" ok_template = f"""[{{idx}}] {{xmlfile}}: {green("OK")}""" - print(">>>", results) + print("==== RESULTS ====") for allidx, result in enumerate(results, 1): if not result['errors']: @@ -88,7 +88,8 @@ def format_results(results: list[t.Any]): print(error_template.format(idx=allidx, **result)) for idx, error in enumerate(result['errors'], 1): - print(f" {allidx}.{idx}: {error['checkfunc']}: {error['message']}") + msg = red(error['message']) + print(f" {allidx}.{idx}: {error['checkfunc']}: {msg}") print() From aef1db2fed5a5603894ce00dbaee41dcba572fc2 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 16:20:39 +0200 Subject: [PATCH 024/107] Add test cases and check for info element --- .../src/metadatavalidator/checks/__init__.py | 3 +- .../metadatavalidator/checks/check_info.py | 15 ++++++ .../tests/checks/test_check_info.py | 19 +++++++ .../tests/checks/test_check_root.py | 54 +++++++++++++++++++ 4 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py create mode 100644 python-scripts/metadatavalidator/tests/checks/test_check_info.py create mode 100644 python-scripts/metadatavalidator/tests/checks/test_check_root.py diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 7d25d5460..6ff6c22aa 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -1,8 +1,9 @@ from .check_root import check_root_tag, check_namespace - +from .check_info import check_info __all__ = [ "check_root_tag", "check_namespace", + "check_info", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py new file mode 100644 index 000000000..51f2b6c0d --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -0,0 +1,15 @@ +import typing as t + +from lxml import etree + +from ..common import DOCBOOK_NS +from ..exceptions import InvalidValueError +from ..logging import log + + +def check_info(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks the info element""" + root = tree.getroot() + info = root.find(".//{%s}info" % DOCBOOK_NS) + if info is None: + raise InvalidValueError(f"Couldn't find info element in {root.tag}.") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py new file mode 100644 index 000000000..70c91560d --- /dev/null +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -0,0 +1,19 @@ +from metadatavalidator.checks import check_info +from lxml import etree + + +basic_xmlcontent = """
+ + Test + + +
""" + + +def test_check_info(): + tree = etree.ElementTree( + etree.fromstring(basic_xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_info(tree, {}) is None \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_root.py b/python-scripts/metadatavalidator/tests/checks/test_check_root.py new file mode 100644 index 000000000..24df6a7a0 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/checks/test_check_root.py @@ -0,0 +1,54 @@ + +from lxml import etree +import pytest + +from metadatavalidator.checks import check_root_tag, check_namespace +from metadatavalidator.exceptions import InvalidValueError + +basic_xmlcontent = """
+ + Test + + +
""" + + +def test_check_root_tag(): + tree = etree.ElementTree( + etree.fromstring(basic_xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_root_tag(tree, {"validator": {"check_root_elements": ["article"]}}) is None + + +def test_check_check_namespace(): + tree = etree.ElementTree( + etree.fromstring(basic_xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_namespace(tree, {}) is None + + +def test_check_root_tag_invalid(): + tree = etree.ElementTree( + etree.fromstring("""""", + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(InvalidValueError, + match="Root tag 'not_docbook5'.*"): + check_root_tag(tree, {"validator": {"check_root_elements": ["article"]}}) + + +def test_check_check_namespace_invalid(): + tree = etree.ElementTree( + etree.fromstring("""""", + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(InvalidValueError, + # match="Root element 'not_docbook5'.*" + ): + check_namespace(tree, {}) \ No newline at end of file From 5543e6d7796a68ec8e7214da0fe5214d0e66beac Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 16:24:01 +0200 Subject: [PATCH 025/107] Add test_missing_config_files() --- .../metadatavalidator/tests/checks/test_check_root.py | 2 +- .../metadatavalidator/tests/test_script_config.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_root.py b/python-scripts/metadatavalidator/tests/checks/test_check_root.py index 24df6a7a0..b73f3498d 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_root.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_root.py @@ -49,6 +49,6 @@ def test_check_check_namespace_invalid(): ) with pytest.raises(InvalidValueError, - # match="Root element 'not_docbook5'.*" + match=".*doesn't belong to DocBook 5" ): check_namespace(tree, {}) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/test_script_config.py b/python-scripts/metadatavalidator/tests/test_script_config.py index 55d52ff6c..d6d244c68 100644 --- a/python-scripts/metadatavalidator/tests/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/test_script_config.py @@ -28,6 +28,12 @@ def test_valid_validate_and_convert_config(): "configfiles": None, } + +def test_missing_config_files(): + with pytest.raises(NoConfigFilesFoundError, match=".*Config files not found.*"): + readconfig([]) + + def test_missing_validator_section(): config = ConfigParser() with pytest.raises(MissingSectionError, match=".*validator.*"): From bd12e7ebc6a49692add595585b36fccca033c2e7 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 16:42:42 +0200 Subject: [PATCH 026/107] Add basic test for CLI/parsecli() --- .../src/metadatavalidator/cli.py | 2 +- .../tests/test_script_cli.py | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 python-scripts/metadatavalidator/tests/test_script_cli.py diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 3d9cb0df8..3ffa2a145 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -41,7 +41,7 @@ def parsecli(cliargs=None) -> argparse.Namespace: parser.add_argument('--version', action='version', - version='%(prog)s ' + __version__ + version=f'%(prog)s {__version__} written by {__author__}' ) parser.add_argument("xmlfiles", metavar="XMLFILES", diff --git a/python-scripts/metadatavalidator/tests/test_script_cli.py b/python-scripts/metadatavalidator/tests/test_script_cli.py new file mode 100644 index 000000000..18efcd864 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/test_script_cli.py @@ -0,0 +1,20 @@ +import argparse +import re +import pytest + +from metadatavalidator.cli import parsecli + + +def test_parsecli(): + args = parsecli(["-v", "a.xml", "b.xml"]) + assert args.verbose == 1 + assert args.xmlfiles == ["a.xml", "b.xml"] + + +def test_parsecli_version(capsys): + with pytest.raises(SystemExit): + parsecli(["--version"]) + + captured = capsys.readouterr() + # We can't check for the script name as it's "pytest" + assert re.match(r"[a-z]+ \d+\.\d+(\.\d+)? written by .*\n", captured.out) \ No newline at end of file From a439fd68a7eb39f48c757f7d9679d5d87ae2f00f Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 16:57:36 +0200 Subject: [PATCH 027/107] Avoid reading config files two times --- python-scripts/metadatavalidator/src/metadatavalidator/cli.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 3ffa2a145..f2d5ffee7 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -61,9 +61,6 @@ def parsecli(cliargs=None) -> argparse.Namespace: if handler.name == "console": handler.setLevel(loglevel) - args.config = readconfig(CONFIGDIRS) - log.debug("Reading these config files %s", - getattr(args.config, "configfiles", "n/a")) return args @@ -75,6 +72,7 @@ def main(cliargs=None) -> int: try: args = parsecli(cliargs) config = readconfig(CONFIGDIRS) + args.config = config log.debug("CLI args %s", args) asyncio.run(process(args, config)) From 9696093be3307fb019036ea4045ea74c62870fb2 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 17:20:41 +0200 Subject: [PATCH 028/107] Test more info failures, add revhistory check --- .../src/metadatavalidator/checks/__init__.py | 3 +- .../metadatavalidator/checks/check_info.py | 16 ++++- .../tests/checks/test_check_info.py | 60 ++++++++++++++++++- 3 files changed, 74 insertions(+), 5 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 6ff6c22aa..876234b24 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -1,9 +1,10 @@ from .check_root import check_root_tag, check_namespace -from .check_info import check_info +from .check_info import check_info, check_info_revhistory __all__ = [ "check_root_tag", "check_namespace", "check_info", + "check_info_revhistory", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 51f2b6c0d..2f8cfc37a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -8,8 +8,20 @@ def check_info(tree: etree.ElementTree, config: dict[t.Any, t.Any]): - """Checks the info element""" + """Checks for an info element""" root = tree.getroot() info = root.find(".//{%s}info" % DOCBOOK_NS) if info is None: - raise InvalidValueError(f"Couldn't find info element in {root.tag}.") \ No newline at end of file + raise InvalidValueError(f"Couldn't find info element in {root.tag}.") + + +def check_info_revhistory(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks for an info/revhistory element""" + info = tree.find("./d:info", namespaces={"d": DOCBOOK_NS}) + if info is None: + # If couldn't be found, we can't check + return + + revhistory = info.find("./d:revhistory", namespaces={"d": DOCBOOK_NS}) + if revhistory is None: + raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index 70c91560d..20c01d068 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -1,6 +1,9 @@ -from metadatavalidator.checks import check_info +import pytest from lxml import etree +from metadatavalidator.checks import check_info, check_info_revhistory +from metadatavalidator.exceptions import InvalidValueError + basic_xmlcontent = """
@@ -16,4 +19,57 @@ def test_check_info(): parser=etree.XMLParser(encoding="UTF-8")) ) - assert check_info(tree, {}) is None \ No newline at end of file + assert check_info(tree, {}) is None + + +def test_check_info_missing(): + xmlcontent = """
+ +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + with pytest.raises(InvalidValueError, + match="Couldn't find info element"): + check_info(tree, {}) + + +def test_check_info_revhistory_missing(): + tree = etree.ElementTree( + etree.fromstring(basic_xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(InvalidValueError, + match="Couldn't find a revhistory element"): + check_info_revhistory(tree, {}) + + +def test_check_info_revhistory(): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_info_revhistory(tree, {}) is None + + +def test_check_info_revhistory_without_info(): + xmlcontent = """
+ +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_info_revhistory(tree, {}) is None + From 4eb8fa62b34ba82e69347ec75403d88b997628a7 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 17:22:48 +0200 Subject: [PATCH 029/107] Use log.debug for each check, use base XML name --- .../metadatavalidator/src/metadatavalidator/process.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index 0f3ff8c01..fc187ae6a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -30,7 +30,11 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): :param config: read-only configuration from INI file """ errors = [] + basexmlfile = os.path.basename(xmlfile) for checkfunc in get_all_check_functions(checks.__package__): + log.debug("Checking %r with %r", + basexmlfile, + checkfunc.__name__) try: # loop = asyncio.get_running_loop() # tree = await loop.run_in_executor(None, etree.parse, xmlfile) @@ -58,7 +62,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): # log.info("Passed check %r for %r", checkfunc.__name__, os.path.basename(xmlfile)) pass - log.info("File %r checked.", xmlfile) + log.info("File %r checked.", basexmlfile) return { "xmlfile": xmlfile, "errors": errors, From 7a06c7308cb4486d01cee458829f6f44837b2b4c Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 24 May 2024 18:05:34 +0200 Subject: [PATCH 030/107] Add checks and tests for revhistory/@xml:id --- .../src/metadatavalidator/checks/__init__.py | 2 +- .../metadatavalidator/checks/check_info.py | 21 ++++++- .../src/metadatavalidator/common.py | 1 + .../tests/checks/test_check_info.py | 60 ++++++++++++++++++- 4 files changed, 80 insertions(+), 4 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 876234b24..0c1b47691 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -1,5 +1,5 @@ from .check_root import check_root_tag, check_namespace -from .check_info import check_info, check_info_revhistory +from .check_info import check_info, check_info_revhistory, check_info_revhistory_xmlid __all__ = [ "check_root_tag", diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 2f8cfc37a..9af767f69 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -2,7 +2,7 @@ from lxml import etree -from ..common import DOCBOOK_NS +from ..common import DOCBOOK_NS, XML_NS from ..exceptions import InvalidValueError from ..logging import log @@ -24,4 +24,21 @@ def check_info_revhistory(tree: etree.ElementTree, config: dict[t.Any, t.Any]): revhistory = info.find("./d:revhistory", namespaces={"d": DOCBOOK_NS}) if revhistory is None: - raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") \ No newline at end of file + raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") + + +def check_info_revhistory_xmlid(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks for an info/revhistory/revision/@xml:id attribute""" + revhistory = tree.find("./d:info/d:revhistory", + namespaces={"d": DOCBOOK_NS}) + + if revhistory is None: + # If couldn't be found, this is checked in check_info_revhistory + return + + xmlid = revhistory.attrib.get(f"{{{XML_NS}}}id") + if xmlid is None: + raise InvalidValueError(f"Couldn't find xml:id attribute in info/revhistory.") + + if not xmlid.startswith("rh"): + raise InvalidValueError(f"xml:id attribute in info/revhistory should start with 'rh'.") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/common.py b/python-scripts/metadatavalidator/src/metadatavalidator/common.py index 0c0e72cb4..0cb7333db 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/common.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/common.py @@ -19,3 +19,4 @@ CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) DOCBOOK_NS = "http://docbook.org/ns/docbook" +XML_NS = "http://www.w3.org/XML/1998/namespace" diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index 20c01d068..bc432420c 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -1,7 +1,7 @@ import pytest from lxml import etree -from metadatavalidator.checks import check_info, check_info_revhistory +from metadatavalidator.checks import check_info, check_info_revhistory, check_info_revhistory_xmlid from metadatavalidator.exceptions import InvalidValueError @@ -72,4 +72,62 @@ def test_check_info_revhistory_without_info(): ) assert check_info_revhistory(tree, {}) is None + assert check_info_revhistory_xmlid(tree, {}) is None + + +def test_check_info_revhistory_xmlid(): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_info_revhistory_xmlid(tree, {}) is None + + +def test_check_info_revhistory_missing_xmlid(): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(InvalidValueError, + match="Couldn't find xml:id attribute"): + check_info_revhistory_xmlid(tree, {}) + + +def test_check_info_revhistory_xmlid_with_wrong_value(): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(InvalidValueError, + match="should start with 'rh'"): + check_info_revhistory_xmlid(tree, {}) + + + + + From 5a45b3b309c1dcfa8296b5006d584f0c78623b00 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 27 May 2024 14:07:46 +0200 Subject: [PATCH 031/107] Add comment about order in checks.__all__ --- .../src/metadatavalidator/checks/__init__.py | 14 ++++++++++++-- .../src/metadatavalidator/process.py | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 0c1b47691..666aa439f 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -1,10 +1,20 @@ -from .check_root import check_root_tag, check_namespace -from .check_info import check_info, check_info_revhistory, check_info_revhistory_xmlid +from .check_root import ( + check_root_tag, + check_namespace, +) +from .check_info import ( + check_info, + check_info_revhistory, + check_info_revhistory_xmlid, +) + +# Keep the order. The next item is dependent on the previous item. __all__ = [ "check_root_tag", "check_namespace", "check_info", "check_info_revhistory", + "check_info_revhistory_xmlid", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index fc187ae6a..f6a1ab111 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -18,6 +18,7 @@ def get_all_check_functions(name): """ import importlib module = importlib.import_module(name) + # The order of the checks is important and uses it from checks.__all__ for name, obj in module.__dict__.items(): if callable(obj) and name.startswith("check_"): yield obj From 1011939d8d3a3aba77b97520e85b890b96729fd5 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 27 May 2024 14:08:55 +0200 Subject: [PATCH 032/107] Remove check_info_revhistory_xmlid() Combine it with check_info_revhistory() --- .../src/metadatavalidator/checks/__init__.py | 2 -- .../src/metadatavalidator/checks/check_info.py | 12 +----------- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 666aa439f..37cc7831f 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -5,7 +5,6 @@ from .check_info import ( check_info, check_info_revhistory, - check_info_revhistory_xmlid, ) @@ -15,6 +14,5 @@ "check_namespace", "check_info", "check_info_revhistory", - "check_info_revhistory_xmlid", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 9af767f69..e89399301 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -26,19 +26,9 @@ def check_info_revhistory(tree: etree.ElementTree, config: dict[t.Any, t.Any]): if revhistory is None: raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") - -def check_info_revhistory_xmlid(tree: etree.ElementTree, config: dict[t.Any, t.Any]): - """Checks for an info/revhistory/revision/@xml:id attribute""" - revhistory = tree.find("./d:info/d:revhistory", - namespaces={"d": DOCBOOK_NS}) - - if revhistory is None: - # If couldn't be found, this is checked in check_info_revhistory - return - xmlid = revhistory.attrib.get(f"{{{XML_NS}}}id") if xmlid is None: raise InvalidValueError(f"Couldn't find xml:id attribute in info/revhistory.") if not xmlid.startswith("rh"): - raise InvalidValueError(f"xml:id attribute in info/revhistory should start with 'rh'.") \ No newline at end of file + raise InvalidValueError(f"xml:id attribute in info/revhistory should start with 'rh'.") From be1817ca6eba20ae6304a6d71f6efd66fd37096c Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 28 May 2024 16:26:06 +0200 Subject: [PATCH 033/107] Introduce metadata section --- .../metadatavalidator/metadatavalidator.ini | 6 ++++- .../src/metadatavalidator/config.py | 24 +++++++++++++++++-- .../src/metadatavalidator/exceptions.py | 19 ++++++++++++++- .../tests/checks/test_check_info.py | 12 +++++----- .../tests/test_script_config.py | 15 ++++-------- 5 files changed, 56 insertions(+), 20 deletions(-) diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index e86803e9a..6357ddfcb 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -1,4 +1,8 @@ [validator] file_extension = .xml check_root_elements = book article topic -valid_languages = ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl-nl pl-pl pt-br ru-ru sv-se zh-cn zh-tw \ No newline at end of file +valid_languages = ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl-nl pl-pl pt-br ru-ru sv-se zh-cn zh-tw + +[metadata] +revhistory = 0 +require_xmlid_on_revision = 1 \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index e2d0111aa..6adbce881 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -40,8 +40,21 @@ def as_dict(config: configparser.ConfigParser): return the_dict -def validate_and_convert_config(config: configparser.ConfigParser): +def truefalse(value: str|bool|int) -> bool: + """Convert a string to a boolean value + """ + if isinstance(value, bool): + return value + + return str(value).lower() in ("true", "yes", "1", "on") + + +def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any, t.Any]: """Validate sections, keys, and their values of the config + + :param config: the :class:`configparser.Configparser` object + :return: a dict that contains converted keys into their + respective datatypes """ split = re.compile(r"[;, ]") theconfig = as_dict(config) @@ -49,7 +62,7 @@ def validate_and_convert_config(config: configparser.ConfigParser): if not config.has_section("validator"): raise MissingSectionError("validator") - # Validate "validator" section + # Section "validator" check_root_elements = config.get("validator", "check_root_elements", fallback=None) if check_root_elements is None: raise MissingKeyError("validator.check_root_elements") @@ -61,6 +74,13 @@ def validate_and_convert_config(config: configparser.ConfigParser): theconfig["validator"]["valid_languages"] = split.split(valid_languages) + # Section "metadata" + require_xmlid_on_revision = truefalse( + theconfig.get("metadata", {}).get("require_xmlid_on_revision", True) + ) + theconfig.setdefault("metadata", {})["require_xmlid_on_revision"] = require_xmlid_on_revision + + # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") return theconfig diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py index d66182b98..3921fa90b 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py @@ -27,12 +27,29 @@ def __str__(self) -> str: return f"Missing key in config file: {self.error}" -# --- Validator exceptions +# --- Validator exceptions, base classes class BaseMetadataError(ValueError): + """Base class for metadata errors""" pass + +class BaseMetadataWarning(ValueError): + """Base class for metadata warnings""" + pass + + +# --- Warnings +class MissingAttributeWarning(BaseMetadataWarning): + """A warning for a missing attribute that is recommended to have""" + pass + + +# --- Errors class InvalidElementError(BaseMetadataError): + """An element was missing or invalid in the metadata""" pass + class InvalidValueError(BaseMetadataError): + """A value was invalid in the metadata""" pass \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index bc432420c..6ca8fdc55 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -1,7 +1,7 @@ import pytest from lxml import etree -from metadatavalidator.checks import check_info, check_info_revhistory, check_info_revhistory_xmlid +from metadatavalidator.checks import check_info, check_info_revhistory from metadatavalidator.exceptions import InvalidValueError @@ -50,7 +50,7 @@ def test_check_info_revhistory(): xmlcontent = """
Test - +
""" @@ -72,7 +72,7 @@ def test_check_info_revhistory_without_info(): ) assert check_info_revhistory(tree, {}) is None - assert check_info_revhistory_xmlid(tree, {}) is None + assert check_info_revhistory(tree, {}) is None def test_check_info_revhistory_xmlid(): @@ -88,7 +88,7 @@ def test_check_info_revhistory_xmlid(): parser=etree.XMLParser(encoding="UTF-8")) ) - assert check_info_revhistory_xmlid(tree, {}) is None + assert check_info_revhistory(tree, {}) is None def test_check_info_revhistory_missing_xmlid(): @@ -106,7 +106,7 @@ def test_check_info_revhistory_missing_xmlid(): with pytest.raises(InvalidValueError, match="Couldn't find xml:id attribute"): - check_info_revhistory_xmlid(tree, {}) + check_info_revhistory(tree, {}) def test_check_info_revhistory_xmlid_with_wrong_value(): @@ -124,7 +124,7 @@ def test_check_info_revhistory_xmlid_with_wrong_value(): with pytest.raises(InvalidValueError, match="should start with 'rh'"): - check_info_revhistory_xmlid(tree, {}) + check_info_revhistory(tree, {}) diff --git a/python-scripts/metadatavalidator/tests/test_script_config.py b/python-scripts/metadatavalidator/tests/test_script_config.py index d6d244c68..2a43ce4dc 100644 --- a/python-scripts/metadatavalidator/tests/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/test_script_config.py @@ -19,14 +19,11 @@ def create_config(): def test_valid_validate_and_convert_config(): config = create_config() result = validate_and_convert_config(config) - assert result == { - "validator": { + assert result.get("validator") == { "check_root_elements": ["book", "article"], "file_extension": ".xml", "valid_languages": ["en-us", "de-de",] - }, - "configfiles": None, - } + } def test_missing_config_files(): @@ -57,11 +54,9 @@ def test_missing_key_valid_languages(): def test_readconfig(): configfile = os.path.join(os.path.dirname(__file__), "data/metadatavalidator.ini") result = readconfig([configfile]) - assert result == { - "validator": { + assert result.get("validator") == { "check_root_elements": ["book", "article", "topic"], "file_extension": ".xml", "valid_languages": ["de-de", "en-us", "es-es", "fr-fr"] - }, - "configfiles": [configfile], - } + } + assert result.get("configfiles") == [configfile] From a4527214a56eab0732e90fc42b37f59be2ec8c7e Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 29 May 2024 09:13:23 +0200 Subject: [PATCH 034/107] Introduce util.py and getfullxpath() --- .../metadatavalidator/checks/check_info.py | 26 +++++++++- .../src/metadatavalidator/common.py | 11 ++++ .../src/metadatavalidator/util.py | 51 +++++++++++++++++++ .../metadatavalidator/tests/test_util.py | 28 ++++++++++ 4 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/util.py create mode 100644 python-scripts/metadatavalidator/tests/test_util.py diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index e89399301..4c0a5c55d 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -2,9 +2,13 @@ from lxml import etree -from ..common import DOCBOOK_NS, XML_NS -from ..exceptions import InvalidValueError +from ..common import ( + DOCBOOK_NS, + XML_NS, + ) +from ..exceptions import InvalidValueError, MissingAttributeWarning from ..logging import log +from ..util import getfullxpath def check_info(tree: etree.ElementTree, config: dict[t.Any, t.Any]): @@ -32,3 +36,21 @@ def check_info_revhistory(tree: etree.ElementTree, config: dict[t.Any, t.Any]): if not xmlid.startswith("rh"): raise InvalidValueError(f"xml:id attribute in info/revhistory should start with 'rh'.") + + + +def check_info_revhistory_revision(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks for an info/revhistory/revision element""" + revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) + if revhistory is None: + # If couldn't be found, we can't check + return + + revision = revhistory.find("./d:revision", namespaces={"d": DOCBOOK_NS}) + if revision is None: + raise InvalidValueError(f"Couldn't find a revision element in {revhistory.tag}.") + xmlid = revision.attrib.get(f"{{{XML_NS}}}id") + + if config.get("metadata", {}).get("require_xmlid_on_revision", True) and xmlid is None: + xpath = getfullxpath(revision) + raise MissingAttributeWarning("") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/common.py b/python-scripts/metadatavalidator/src/metadatavalidator/common.py index 0cb7333db..e88b48127 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/common.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/common.py @@ -20,3 +20,14 @@ DOCBOOK_NS = "http://docbook.org/ns/docbook" XML_NS = "http://www.w3.org/XML/1998/namespace" +XLINK_NS = "http://www.w3.org/1999/xlink" +ITS_NS = "http://www.w3.org/2005/11/its" +XINCLUDE_NS = "http://www.w3.org/2001/XInclude" + +NAMESPACES2PREFIX = { + DOCBOOK_NS: "d", + XML_NS: "xml", + XLINK_NS: "xlink", + ITS_NS: "its", + XINCLUDE_NS: "xi", +} diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py new file mode 100644 index 000000000..ae0c488a8 --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -0,0 +1,51 @@ +from lxml import etree + +from .common import ( + NAMESPACES2PREFIX, + ) + + +def getfullxpath(element: etree._Element, + ns2prefix:dict[str, str]=NAMESPACES2PREFIX) -> str: + """Return the full XPath including predicates to this element + + :param: the element to get an XPath for + :return: the full absolute XPath with optional predicates + """ + tree = element.getroottree() + root = tree.getroot() + + # Get the basic XPath from the root to the element + # There are two different cases: + # 1. Your element belongs to no namespace: + # get a regular XPath, no need to add anything + # 2. Your element belongs to a namespace: + # proceed further + path = tree.getpath(element) + if "*" not in path: + return path + path = path.split("/") + + # Get the "full" XPath, but add the missing root element + # Looks like ''{http://docbook.org/ns/docbook}info/{http://docbook.org/ns/docbook}revhistory' + fullpath = "/{}/{}".format( + etree.QName(root), + tree.getelementpath(element) + ) + # Remove some leftovers in case you have just "." from .getelementpath() + fullpath = fullpath.replace("/.", "") + + # Replace a namespace with a prefix + for ns, prefix in ns2prefix.items(): + fullpath = fullpath.replace(f'{{{ns}}}', f'{prefix}:') + fullpath = fullpath.split("/") + + # Check if the two paths have the same length + if len(path) != len(fullpath): + raise RuntimeError("two paths differ") + + # Combine element part and predicate + return "/".join([f"{p2}{p1.replace('*', '')}" + for p1, p2 in zip(path, fullpath) ] + ) + diff --git a/python-scripts/metadatavalidator/tests/test_util.py b/python-scripts/metadatavalidator/tests/test_util.py new file mode 100644 index 000000000..7e953ce87 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/test_util.py @@ -0,0 +1,28 @@ +import pytest +from lxml import etree + +from metadatavalidator.util import getfullxpath + +xmlcontent = """
+ + <para/> + <section> + <title/> + <para/> + <para/> + </section> + </article> +""" + + +def test_getfullpath(): + root = etree.fromstring(xmlcontent) + tree = root.getroottree() + section = tree.find("./section") + assert getfullxpath(section) == "/article/section" + + para = tree.find("./section/para[1]") + assert getfullxpath(para) == "/article/section/para[1]" + + para = tree.find("./section/para[2]") + assert getfullxpath(para) == "/article/section/para[2]" \ No newline at end of file From e7aa370159cfeb6750a79b40cf285526ed783a2d Mon Sep 17 00:00:00 2001 From: Tom Schraitle <toms@suse.de> Date: Wed, 29 May 2024 10:13:30 +0200 Subject: [PATCH 035/107] Improve exception with __str__ --- .../src/metadatavalidator/exceptions.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py index 3921fa90b..e46b268c2 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py @@ -41,15 +41,18 @@ class BaseMetadataWarning(ValueError): # --- Warnings class MissingAttributeWarning(BaseMetadataWarning): """A warning for a missing attribute that is recommended to have""" - pass + def __str__(self) -> str: + return f"Missing recommended attribute in {super().__str__()}" # --- Errors class InvalidElementError(BaseMetadataError): """An element was missing or invalid in the metadata""" - pass + def __str__(self) -> str: + return f"Missing or invalid element in {super().__str__()}" class InvalidValueError(BaseMetadataError): """A value was invalid in the metadata""" - pass \ No newline at end of file + def __str__(self) -> str: + return f"Invalid value in metadata {super().__str__()}" From 11d4cacd1da957eaa6414d67414245f2b04107b5 Mon Sep 17 00:00:00 2001 From: Tom Schraitle <toms@suse.de> Date: Wed, 29 May 2024 10:59:36 +0200 Subject: [PATCH 036/107] Check and test revhistory/revision/date --- .../metadatavalidator/checks/check_info.py | 24 ++++++++++++++-- .../src/metadatavalidator/common.py | 7 +++++ .../tests/checks/test_check_info.py | 19 ++++++++++--- .../metadatavalidator/tests/test_util.py | 28 +++++++++++++++++-- 4 files changed, 70 insertions(+), 8 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 4c0a5c55d..5096c8e3c 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -3,6 +3,7 @@ from lxml import etree from ..common import ( + DATE_REGEX, DOCBOOK_NS, XML_NS, ) @@ -39,7 +40,8 @@ def check_info_revhistory(tree: etree.ElementTree, config: dict[t.Any, t.Any]): -def check_info_revhistory_revision(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_info_revhistory_revision(tree: etree.ElementTree, + config: dict[t.Any, t.Any]): """Checks for an info/revhistory/revision element""" revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) if revhistory is None: @@ -53,4 +55,22 @@ def check_info_revhistory_revision(tree: etree.ElementTree, config: dict[t.Any, if config.get("metadata", {}).get("require_xmlid_on_revision", True) and xmlid is None: xpath = getfullxpath(revision) - raise MissingAttributeWarning("") \ No newline at end of file + xpath += "/@xml:id" + raise MissingAttributeWarning(path) + + +def check_info_revhistory_revision_date(tree: etree.ElementTree, + config: dict[t.Any, t.Any]): + """Checks for an info/revhistory/revision/date element""" + date = tree.find("./d:info/d:revhistory/d:revision/d:date", + namespaces={"d": DOCBOOK_NS}) + if date is None: + raise InvalidValueError(f"Couldn't find a date element in info/revhistory/revision.") + + if DATE_REGEX.search(date.text) is None: + path = getfullxpath(date) + raise InvalidValueError(f"Invalid date format in {date.tag} (XPath={path}).") + + # Check if the date is valid + + diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/common.py b/python-scripts/metadatavalidator/src/metadatavalidator/common.py index e88b48127..edb5b811b 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/common.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/common.py @@ -1,4 +1,5 @@ import os.path +import re import typing as t @@ -16,14 +17,17 @@ if METAVALIDATOR_CONFIG is not None: CONFIGDIRS.insert(0, os.path.expanduser(METAVALIDATOR_CONFIG)) +#: Store the expanded paths in tuple CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) +#: The namespaces DOCBOOK_NS = "http://docbook.org/ns/docbook" XML_NS = "http://www.w3.org/XML/1998/namespace" XLINK_NS = "http://www.w3.org/1999/xlink" ITS_NS = "http://www.w3.org/2005/11/its" XINCLUDE_NS = "http://www.w3.org/2001/XInclude" +#: Mapping of namespaces to prefixes NAMESPACES2PREFIX = { DOCBOOK_NS: "d", XML_NS: "xml", @@ -31,3 +35,6 @@ ITS_NS: "its", XINCLUDE_NS: "xi", } + +#: The regex to match a date with year, month and an optional day +DATE_REGEX = re.compile(r"^(\d{4})-(\d{1,2})(?:-(\d{1,2}))?$") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index 6ca8fdc55..f5708d9cc 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -127,7 +127,18 @@ def test_check_info_revhistory_xmlid_with_wrong_value(): check_info_revhistory(tree, {}) - - - - +def test_check_info_revhistory_revision_missing(): + xmlcontent = """<article xmlns="http://docbook.org/ns/docbook" version="5.2"> + <info> + <title>Test + + + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/test_util.py b/python-scripts/metadatavalidator/tests/test_util.py index 7e953ce87..8bf34084f 100644 --- a/python-scripts/metadatavalidator/tests/test_util.py +++ b/python-scripts/metadatavalidator/tests/test_util.py @@ -1,7 +1,7 @@ import pytest from lxml import etree -from metadatavalidator.util import getfullxpath +from metadatavalidator.util import getfullxpath, parse_date xmlcontent = """
@@ -25,4 +25,28 @@ def test_getfullpath(): assert getfullxpath(para) == "/article/section/para[1]" para = tree.find("./section/para[2]") - assert getfullxpath(para) == "/article/section/para[2]" \ No newline at end of file + assert getfullxpath(para) == "/article/section/para[2]" + + +@pytest.mark.parametrize("date_text, expected", [ + ("2021-01-01", "2021-01-01"), + ("2021-1-1", "2021-01-01"), + ("2021-01", "2021-01-01"), + ("2021-1", "2021-01-01"), + ("2021-01-1", "2021-01-01"), + ("2021-1-01", "2021-01-01"), + ("2024-12-12", "2024-12-12"), +]) +def test_parse_valid_dates(date_text, expected): + assert str(parse_date(date_text)) == expected + + +@pytest.mark.parametrize("date_text", [ + ("foo"), + ("2021"), + ("2024-14"), + (""), +]) +def test_parse_invalid_dates(date_text): + with pytest.raises(ValueError): + parse_date(date_text) From 369b42bcdf7016e39cbd77c64959055cfd16cc42 Mon Sep 17 00:00:00 2001 From: Tom Schraitle <toms@suse.de> Date: Wed, 29 May 2024 12:23:08 +0200 Subject: [PATCH 037/107] Add parse_date --- .../src/metadatavalidator/checks/__init__.py | 6 +- .../metadatavalidator/checks/check_info.py | 11 ++- .../src/metadatavalidator/util.py | 18 +++++ .../tests/checks/test_check_info.py | 74 ++++++++++++++++++- 4 files changed, 102 insertions(+), 7 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 37cc7831f..efdc8d977 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -5,8 +5,10 @@ from .check_info import ( check_info, check_info_revhistory, -) + check_info_revhistory_revision, + check_info_revhistory_revision_date +) # Keep the order. The next item is dependent on the previous item. __all__ = [ @@ -14,5 +16,7 @@ "check_namespace", "check_info", "check_info_revhistory", + "check_info_revhistory_revision", + "check_info_revhistory_revision_date", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 5096c8e3c..ef5a8a508 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -9,7 +9,7 @@ ) from ..exceptions import InvalidValueError, MissingAttributeWarning from ..logging import log -from ..util import getfullxpath +from ..util import getfullxpath, parse_date def check_info(tree: etree.ElementTree, config: dict[t.Any, t.Any]): @@ -56,7 +56,7 @@ def check_info_revhistory_revision(tree: etree.ElementTree, if config.get("metadata", {}).get("require_xmlid_on_revision", True) and xmlid is None: xpath = getfullxpath(revision) xpath += "/@xml:id" - raise MissingAttributeWarning(path) + raise MissingAttributeWarning(xpath) def check_info_revhistory_revision_date(tree: etree.ElementTree, @@ -67,10 +67,15 @@ def check_info_revhistory_revision_date(tree: etree.ElementTree, if date is None: raise InvalidValueError(f"Couldn't find a date element in info/revhistory/revision.") + # First check the formal correctness of the date with regex if DATE_REGEX.search(date.text) is None: path = getfullxpath(date) raise InvalidValueError(f"Invalid date format in {date.tag} (XPath={path}).") # Check if the date is valid - + try: + parse_date(date.text.strip()) + except ValueError as e: + xpath = getfullxpath(date) + raise InvalidValueError(f"{e} (XPath={xpath})") diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py index ae0c488a8..ed51f5f77 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/util.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -1,3 +1,4 @@ +from datetime import datetime, date from lxml import etree from .common import ( @@ -49,3 +50,20 @@ def getfullxpath(element: etree._Element, for p1, p2 in zip(path, fullpath) ] ) + +def parse_date(date_text: str) -> date: + """Attempt to parse a date string into a date object + Valid formats are YYYY-MM-DD, YYYY-M-D, YYYY-MM, YYYY-M, + YYYY-MM-D, and YYYY-M-D + """ + # Attempt to parse the date text into a date object + for fmt in ("%Y-%m-%d", "%Y-%m"): + try: + # This will handle all formats + parsed_date = datetime.strptime(date_text, fmt) + return parsed_date.date() + except ValueError: + continue + + # If none of the formats matched, raise an error + raise ValueError(f"Invalid date format: {date_text}") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index f5708d9cc..f213d3a92 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -1,8 +1,12 @@ import pytest from lxml import etree -from metadatavalidator.checks import check_info, check_info_revhistory -from metadatavalidator.exceptions import InvalidValueError +from metadatavalidator.checks import ( + check_info, check_info_revhistory, + check_info_revhistory_revision, + check_info_revhistory_revision_date +) +from metadatavalidator.exceptions import InvalidValueError, MissingAttributeWarning basic_xmlcontent = """<article xmlns="http://docbook.org/ns/docbook" version="5.2"> @@ -127,6 +131,50 @@ def test_check_info_revhistory_xmlid_with_wrong_value(): check_info_revhistory(tree, {}) +def test_check_info_revhistory_revision(): + xmlcontent = """<article xmlns="http://docbook.org/ns/docbook" version="5.2"> + <info> + <title>Test + + + 2021-01-01 + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + assert check_info_revhistory_revision(tree, {}) is None + + +def test_check_info_revhistory_revision_missing_xmlid(): + xmlcontent = """
+ + Test + + + 2021-01-01 + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(MissingAttributeWarning, + match="Missing recommended attribute in"): + check_info_revhistory_revision( + tree, + {"metadata": {"require_xmlid_on_revision": True}}) + + + def test_check_info_revhistory_revision_missing(): xmlcontent = """
@@ -141,4 +189,24 @@ def test_check_info_revhistory_revision_missing(): tree = etree.ElementTree( etree.fromstring(xmlcontent, parser=etree.XMLParser(encoding="UTF-8")) - ) \ No newline at end of file + ) + + +def test_check_info_revhistory_revision_date(): + xmlcontent = """
+ + Test + + + 2021-01-01 + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_info_revhistory_revision_date(tree, {}) is None \ No newline at end of file From 34a2103b906211ddc61e22d2fd2f761ac097ad66 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 29 May 2024 12:33:31 +0200 Subject: [PATCH 038/107] Move basic_xmlcontent and improve tests --- .../src/metadatavalidator/util.py | 4 +- .../tests/checks/test_check_info.py | 91 +++++++++++++++++-- 2 files changed, 84 insertions(+), 11 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py index ed51f5f77..7982474c9 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/util.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -42,8 +42,8 @@ def getfullxpath(element: etree._Element, fullpath = fullpath.split("/") # Check if the two paths have the same length - if len(path) != len(fullpath): - raise RuntimeError("two paths differ") + # if len(path) != len(fullpath): + # raise RuntimeError("two paths differ") # Combine element part and predicate return "/".join([f"{p2}{p1.replace('*', '')}" diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index f213d3a92..5f6c7850a 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -9,17 +9,15 @@ from metadatavalidator.exceptions import InvalidValueError, MissingAttributeWarning -basic_xmlcontent = """
+def test_check_info(): + xmlcontent = """
Test
""" - - -def test_check_info(): tree = etree.ElementTree( - etree.fromstring(basic_xmlcontent, + etree.fromstring(xmlcontent, parser=etree.XMLParser(encoding="UTF-8")) ) @@ -40,8 +38,14 @@ def test_check_info_missing(): def test_check_info_revhistory_missing(): + xmlcontent = """
+ + Test + + +
""" tree = etree.ElementTree( - etree.fromstring(basic_xmlcontent, + etree.fromstring(xmlcontent, parser=etree.XMLParser(encoding="UTF-8")) ) @@ -176,11 +180,33 @@ def test_check_info_revhistory_revision_missing_xmlid(): def test_check_info_revhistory_revision_missing(): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(InvalidValueError, + match="Couldn't find a revision element"): + check_info_revhistory_revision( + tree, + {"metadata": {"require_xmlid_on_revision": True}} + ) + + +def test_check_info_revhistory_revision_date(): xmlcontent = """
Test + 2021-01-01 @@ -191,14 +217,36 @@ def test_check_info_revhistory_revision_missing(): parser=etree.XMLParser(encoding="UTF-8")) ) + assert check_info_revhistory_revision_date(tree, {}) is None -def test_check_info_revhistory_revision_date(): + +def test_check_info_revhistory_revision_date_missing(): xmlcontent = """
Test + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + with pytest.raises(InvalidValueError, + match="Couldn't find a date element"): + check_info_revhistory_revision_date(tree, {}) + + + +def test_check_info_revhistory_revision_date_invalid_format(): + xmlcontent = """
+ + Test + - 2021-01-01 + January 2024 @@ -209,4 +257,29 @@ def test_check_info_revhistory_revision_date(): parser=etree.XMLParser(encoding="UTF-8")) ) - assert check_info_revhistory_revision_date(tree, {}) is None \ No newline at end of file + with pytest.raises(InvalidValueError, + match="Invalid date format"): + check_info_revhistory_revision_date(tree, {}) + + +def test_check_info_revhistory_revision_date_invalid_value(): + xmlcontent = """
+ + Test + + + 2024-13 + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + with pytest.raises(InvalidValueError, + match="Invalid value in metadata" + ): + check_info_revhistory_revision_date(tree, {}) From 74ba8cabaa38478368c1f28e9e8a323c10e1b776 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 29 May 2024 15:47:04 +0200 Subject: [PATCH 039/107] Check for correct order of revision/date --- .../src/metadatavalidator/checks/__init__.py | 5 +- .../metadatavalidator/checks/check_info.py | 59 +++++++++++++------ .../src/metadatavalidator/util.py | 36 +++++++++-- .../tests/checks/test_check_info.py | 30 +++++++++- 4 files changed, 107 insertions(+), 23 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index efdc8d977..54d939c44 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -6,9 +6,11 @@ check_info, check_info_revhistory, check_info_revhistory_revision, - check_info_revhistory_revision_date + check_info_revhistory_revision_date, + check_info_revhistory_revision_order ) +from .check_meta import check_meta_ # Keep the order. The next item is dependent on the previous item. __all__ = [ @@ -18,5 +20,6 @@ "check_info_revhistory", "check_info_revhistory_revision", "check_info_revhistory_revision_date", + "check_info_revhistory_revision_order", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index ef5a8a508..4cf671d24 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -1,3 +1,5 @@ +import datetime +import itertools import typing as t from lxml import etree @@ -6,13 +8,17 @@ DATE_REGEX, DOCBOOK_NS, XML_NS, - ) +) from ..exceptions import InvalidValueError, MissingAttributeWarning from ..logging import log -from ..util import getfullxpath, parse_date +from ..util import ( + getfullxpath, + validatedate, + validatedatevalue +) -def check_info(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_info(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info element""" root = tree.getroot() info = root.find(".//{%s}info" % DOCBOOK_NS) @@ -20,7 +26,7 @@ def check_info(tree: etree.ElementTree, config: dict[t.Any, t.Any]): raise InvalidValueError(f"Couldn't find info element in {root.tag}.") -def check_info_revhistory(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info/revhistory element""" info = tree.find("./d:info", namespaces={"d": DOCBOOK_NS}) if info is None: @@ -40,7 +46,7 @@ def check_info_revhistory(tree: etree.ElementTree, config: dict[t.Any, t.Any]): -def check_info_revhistory_revision(tree: etree.ElementTree, +def check_info_revhistory_revision(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info/revhistory/revision element""" revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) @@ -59,7 +65,7 @@ def check_info_revhistory_revision(tree: etree.ElementTree, raise MissingAttributeWarning(xpath) -def check_info_revhistory_revision_date(tree: etree.ElementTree, +def check_info_revhistory_revision_date(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info/revhistory/revision/date element""" date = tree.find("./d:info/d:revhistory/d:revision/d:date", @@ -67,15 +73,34 @@ def check_info_revhistory_revision_date(tree: etree.ElementTree, if date is None: raise InvalidValueError(f"Couldn't find a date element in info/revhistory/revision.") - # First check the formal correctness of the date with regex - if DATE_REGEX.search(date.text) is None: - path = getfullxpath(date) - raise InvalidValueError(f"Invalid date format in {date.tag} (XPath={path}).") - - # Check if the date is valid - try: - parse_date(date.text.strip()) - except ValueError as e: - xpath = getfullxpath(date) - raise InvalidValueError(f"{e} (XPath={xpath})") + validatedate(date) + + +def check_info_revhistory_revision_order(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): + """Checks for the right order of info/revhistory/revision elements""" + revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) + revisions = revhistory.xpath("d:revision", + namespaces={"d": DOCBOOK_NS}) + xpath = getfullxpath(revhistory) + if not revisions: + return None + + date_elements = [rev.find("./d:date", namespaces={"d": DOCBOOK_NS}) + for rev in revisions] + dates = [ + validatedatevalue(d.text) + for d in date_elements if d is not None + ] + converteddates: list[datetime.date] = [d for d in dates if d is not None] + + # First check: check if we have the same number of dates and revisions + if len(date_elements) != len(revisions): + raise InvalidValueError(f"Couldn't convert all dates. Check {xpath}") + + # Second check: we have the same number of dates and revisions, now + # check if the dates are in descending order + for first, second in itertools.pairwise(converteddates): + if first <= second: + raise InvalidValueError("Dates in revhistory/revision are not in descending order.") diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py index 7982474c9..2a57f15cd 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/util.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -1,9 +1,12 @@ -from datetime import datetime, date +import datetime +import typing as t from lxml import etree from .common import ( + DATE_REGEX, NAMESPACES2PREFIX, ) +from .exceptions import InvalidValueError def getfullxpath(element: etree._Element, @@ -51,7 +54,7 @@ def getfullxpath(element: etree._Element, ) -def parse_date(date_text: str) -> date: +def parse_date(date_text: str) -> datetime.date: """Attempt to parse a date string into a date object Valid formats are YYYY-MM-DD, YYYY-M-D, YYYY-MM, YYYY-M, YYYY-MM-D, and YYYY-M-D @@ -60,10 +63,35 @@ def parse_date(date_text: str) -> date: for fmt in ("%Y-%m-%d", "%Y-%m"): try: # This will handle all formats - parsed_date = datetime.strptime(date_text, fmt) + parsed_date = datetime.datetime.strptime(date_text, fmt) return parsed_date.date() except ValueError: continue # If none of the formats matched, raise an error - raise ValueError(f"Invalid date format: {date_text}") \ No newline at end of file + raise ValueError(f"Invalid date format: {date_text}") + + +def validatedate(element: etree._Element): + """Validate the date text from an element""" + # First check the formal correctness of the date with regex + + date = validatedatevalue(element.text.strip()) + if date is None: + path = getfullxpath(element) + raise InvalidValueError(f"Invalid date format in {element.tag} (XPath={path}).") + return date + + +def validatedatevalue(date: str) -> t.Optional[datetime.date]: + """Validate the date text from an element""" + # First check the formal correctness of the date with regex + if DATE_REGEX.search(date) is None: + raise InvalidValueError(f"Invalid date format in {date}.") + + # Check if the date is valid + try: + return parse_date(date.strip()) + + except ValueError as e: + return None diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index 5f6c7850a..9d88ff438 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -4,7 +4,8 @@ from metadatavalidator.checks import ( check_info, check_info_revhistory, check_info_revhistory_revision, - check_info_revhistory_revision_date + check_info_revhistory_revision_date, + check_info_revhistory_revision_order, ) from metadatavalidator.exceptions import InvalidValueError, MissingAttributeWarning @@ -283,3 +284,30 @@ def test_check_info_revhistory_revision_date_invalid_value(): match="Invalid value in metadata" ): check_info_revhistory_revision_date(tree, {}) + + +def test_check_info_revhistory_revision_order(): + xmlcontent = """
+ + Test + + + 2024-13 + + + 2023-12-12 + + + 2022-04 + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, + parser=etree.XMLParser(encoding="UTF-8")) + ) + + assert check_info_revhistory_revision_order(tree, {}) is None + From 3ebfb2216ef40671013baf41679290be0dff45f8 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 29 May 2024 16:04:22 +0200 Subject: [PATCH 040/107] Use xmlparser fixture --- .../tests/checks/test_check_info.py | 94 +++++++++---------- .../metadatavalidator/tests/conftest.py | 9 ++ 2 files changed, 54 insertions(+), 49 deletions(-) diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index 9d88ff438..eae79b501 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -10,7 +10,7 @@ from metadatavalidator.exceptions import InvalidValueError, MissingAttributeWarning -def test_check_info(): +def test_check_info(xmlparser): xmlcontent = """
Test @@ -18,27 +18,25 @@ def test_check_info():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_info(tree, {}) is None -def test_check_info_missing(): +def test_check_info_missing(xmlparser): xmlcontent = """
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, match="Couldn't find info element"): check_info(tree, {}) -def test_check_info_revhistory_missing(): +def test_check_info_revhistory_missing(xmlparser): xmlcontent = """
Test @@ -46,8 +44,7 @@ def test_check_info_revhistory_missing():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, @@ -55,7 +52,7 @@ def test_check_info_revhistory_missing(): check_info_revhistory(tree, {}) -def test_check_info_revhistory(): +def test_check_info_revhistory(xmlparser): xmlcontent = """
Test @@ -64,27 +61,25 @@ def test_check_info_revhistory():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_info_revhistory(tree, {}) is None -def test_check_info_revhistory_without_info(): +def test_check_info_revhistory_without_info(xmlparser): xmlcontent = """
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_info_revhistory(tree, {}) is None assert check_info_revhistory(tree, {}) is None -def test_check_info_revhistory_xmlid(): +def test_check_info_revhistory_xmlid(xmlparser): xmlcontent = """
Test @@ -93,14 +88,13 @@ def test_check_info_revhistory_xmlid():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_info_revhistory(tree, {}) is None -def test_check_info_revhistory_missing_xmlid(): +def test_check_info_revhistory_missing_xmlid(xmlparser): xmlcontent = """
Test @@ -109,8 +103,7 @@ def test_check_info_revhistory_missing_xmlid():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, @@ -118,7 +111,7 @@ def test_check_info_revhistory_missing_xmlid(): check_info_revhistory(tree, {}) -def test_check_info_revhistory_xmlid_with_wrong_value(): +def test_check_info_revhistory_xmlid_with_wrong_value(xmlparser): xmlcontent = """
Test @@ -127,8 +120,7 @@ def test_check_info_revhistory_xmlid_with_wrong_value():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, @@ -136,7 +128,7 @@ def test_check_info_revhistory_xmlid_with_wrong_value(): check_info_revhistory(tree, {}) -def test_check_info_revhistory_revision(): +def test_check_info_revhistory_revision(xmlparser): xmlcontent = """
Test @@ -149,13 +141,12 @@ def test_check_info_revhistory_revision():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_info_revhistory_revision(tree, {}) is None -def test_check_info_revhistory_revision_missing_xmlid(): +def test_check_info_revhistory_revision_missing_xmlid(xmlparser): xmlcontent = """
Test @@ -168,8 +159,7 @@ def test_check_info_revhistory_revision_missing_xmlid():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(MissingAttributeWarning, @@ -179,8 +169,21 @@ def test_check_info_revhistory_revision_missing_xmlid(): {"metadata": {"require_xmlid_on_revision": True}}) +def test_check_info_revhistory_missing(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + check_info_revhistory_revision(tree, {}) is None -def test_check_info_revhistory_revision_missing(): + +def test_check_info_revhistory_revision_missing(xmlparser): xmlcontent = """
Test @@ -189,8 +192,7 @@ def test_check_info_revhistory_revision_missing():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, @@ -201,7 +203,7 @@ def test_check_info_revhistory_revision_missing(): ) -def test_check_info_revhistory_revision_date(): +def test_check_info_revhistory_revision_date(xmlparser): xmlcontent = """
Test @@ -214,14 +216,13 @@ def test_check_info_revhistory_revision_date():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_info_revhistory_revision_date(tree, {}) is None -def test_check_info_revhistory_revision_date_missing(): +def test_check_info_revhistory_revision_date_missing(xmlparser): xmlcontent = """
Test @@ -232,16 +233,14 @@ def test_check_info_revhistory_revision_date_missing():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, match="Couldn't find a date element"): check_info_revhistory_revision_date(tree, {}) - -def test_check_info_revhistory_revision_date_invalid_format(): +def test_check_info_revhistory_revision_date_invalid_format(xmlparser): xmlcontent = """
Test @@ -254,8 +253,7 @@ def test_check_info_revhistory_revision_date_invalid_format():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, @@ -263,7 +261,7 @@ def test_check_info_revhistory_revision_date_invalid_format(): check_info_revhistory_revision_date(tree, {}) -def test_check_info_revhistory_revision_date_invalid_value(): +def test_check_info_revhistory_revision_date_invalid_value(xmlparser): xmlcontent = """
Test @@ -276,8 +274,7 @@ def test_check_info_revhistory_revision_date_invalid_value():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, @@ -286,7 +283,7 @@ def test_check_info_revhistory_revision_date_invalid_value(): check_info_revhistory_revision_date(tree, {}) -def test_check_info_revhistory_revision_order(): +def test_check_info_revhistory_revision_order(xmlparser): xmlcontent = """
Test @@ -305,8 +302,7 @@ def test_check_info_revhistory_revision_order():
""" tree = etree.ElementTree( - etree.fromstring(xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) + etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_info_revhistory_revision_order(tree, {}) is None diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py index 2a1446174..fcb4b4361 100644 --- a/python-scripts/metadatavalidator/tests/conftest.py +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -1,5 +1,14 @@ import sys import os, os.path +from lxml import etree + +import pytest + os.environ.setdefault("PYTHONPATH", os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))) + + +@pytest.fixture +def xmlparser(): + return etree.XMLParser(encoding="UTF-8") \ No newline at end of file From aee1f64b9a9b8406332b09cd782af1874d0f11d0 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 29 May 2024 16:16:47 +0200 Subject: [PATCH 041/107] Catch MissingAttributeWarning --- .../src/metadatavalidator/checks/check_info.py | 4 +++- .../metadatavalidator/src/metadatavalidator/process.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 4cf671d24..ef53c7dce 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -80,7 +80,9 @@ def check_info_revhistory_revision_order(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for the right order of info/revhistory/revision elements""" revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) - revisions = revhistory.xpath("d:revision", + if revhistory is None: + return + revisions = revhistory.xpath("./d:revision", namespaces={"d": DOCBOOK_NS}) xpath = getfullxpath(revhistory) if not revisions: diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index f6a1ab111..368ae56df 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -8,7 +8,7 @@ # from .checks.check_root import check_root_tag, check_namespace from . import checks -from .exceptions import InvalidValueError +from .exceptions import InvalidValueError, MissingAttributeWarning from .logging import log @@ -52,7 +52,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): 'message': str(e) }) - except InvalidValueError as e: + except (InvalidValueError, MissingAttributeWarning) as e: #log.fatal("Invalid value in %r for %s: %s", # xmlfile, checkfunc.__name__, e) errors.append({ From af88a31a8e40262042b3daa82c651d93d5dd41d3 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 29 May 2024 16:23:56 +0200 Subject: [PATCH 042/107] Fix a bug in validatedatevalue --- .../metadatavalidator/src/metadatavalidator/util.py | 4 ++-- .../metadatavalidator/tests/checks/test_check_info.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py index 2a57f15cd..3510cabe8 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/util.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -86,8 +86,8 @@ def validatedate(element: etree._Element): def validatedatevalue(date: str) -> t.Optional[datetime.date]: """Validate the date text from an element""" # First check the formal correctness of the date with regex - if DATE_REGEX.search(date) is None: - raise InvalidValueError(f"Invalid date format in {date}.") + if not date or not DATE_REGEX.search(date): + raise InvalidValueError(f"Date is empty or has invalid format: {date}.") # Check if the date is valid try: diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index eae79b501..2d1f26abb 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -257,7 +257,7 @@ def test_check_info_revhistory_revision_date_invalid_format(xmlparser): ) with pytest.raises(InvalidValueError, - match="Invalid date format"): + match=".*ate is empty or has invalid format.*"): check_info_revhistory_revision_date(tree, {}) From 5bdf6846efdbfb442d5a54ccd65bcba88b14ac3b Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 29 May 2024 17:27:43 +0200 Subject: [PATCH 043/107] Add test_check_info_revhistory_revision_order_one_invalid_date --- .../metadatavalidator/checks/check_info.py | 6 ++-- .../tests/checks/test_check_info.py | 29 ++++++++++++++++++- 2 files changed, 32 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index ef53c7dce..a6bc6725a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -97,8 +97,10 @@ def check_info_revhistory_revision_order(tree: etree._ElementTree, converteddates: list[datetime.date] = [d for d in dates if d is not None] # First check: check if we have the same number of dates and revisions - if len(date_elements) != len(revisions): - raise InvalidValueError(f"Couldn't convert all dates. Check {xpath}") + if len(converteddates) != len(revisions): + raise InvalidValueError(f"Couldn't convert all dates " + f"(see position dates={dates.index(None)+1}). " + f"Check {xpath}") # Second check: we have the same number of dates and revisions, now # check if the dates are in descending order diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/checks/test_check_info.py index 2d1f26abb..fbb9d9d16 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_info.py @@ -289,7 +289,7 @@ def test_check_info_revhistory_revision_order(xmlparser): Test - 2024-13 + 2024-12 2023-12-12 @@ -307,3 +307,30 @@ def test_check_info_revhistory_revision_order(xmlparser): assert check_info_revhistory_revision_order(tree, {}) is None + +def test_check_info_revhistory_revision_order_one_invalid_date(xmlparser): + xmlcontent = """
+ + Test + + + 2024-53 + + + 2023-12-12 + + + 2022-04 + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + with pytest.raises(InvalidValueError, + match=".*Couldn't convert all dates.*see position dates=1.*" + ): + check_info_revhistory_revision_order(tree, {}) From 2fa1fdd06c8586c3ca98115b6d36f36fcca51947 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 11:08:10 +0200 Subject: [PATCH 044/107] Add first & second date in comparing dates --- .../src/metadatavalidator/checks/check_info.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index a6bc6725a..190dc70e7 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -106,5 +106,8 @@ def check_info_revhistory_revision_order(tree: etree._ElementTree, # check if the dates are in descending order for first, second in itertools.pairwise(converteddates): if first <= second: - raise InvalidValueError("Dates in revhistory/revision are not in descending order.") + raise InvalidValueError( + "Dates in revhistory/revision are not in descending order: " + f"{first} <= {second}." + ) From 35d34eecaec4c6ec22796e5bb71be169aad04a2e Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 11:24:35 +0200 Subject: [PATCH 045/107] Check * Add metadata.require_meta_title and metadata_meta_title_length in config. * Add tests * Update README.rst --- python-scripts/metadatavalidator/README.rst | 22 +++++++ .../metadatavalidator/metadatavalidator.ini | 4 +- .../src/metadatavalidator/checks/__init__.py | 5 +- .../metadatavalidator/checks/check_meta.py | 25 ++++++++ .../src/metadatavalidator/config.py | 9 +++ .../tests/checks/test_check_meta.py | 64 +++++++++++++++++++ .../tests/data/metadatavalidator.ini | 8 ++- .../tests/test_script_config.py | 43 ++++++++++++- 8 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py create mode 100644 python-scripts/metadatavalidator/tests/checks/test_check_meta.py diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 64fb37630..862f972e9 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -23,3 +23,25 @@ The configuration file is search in the following order (first is the highest): 1. In the system: :file:`/etc/metadatavalidator/config.ini` +Configuration values +-------------------- + +The configuration file is a standard INI file. The following values are +recognized: + +* :var:`validator`: Global options to configure the validator. + * :var:`file_extension`: The file extension to search for. Default is + ``.xml``. + + * :var:`check_root_elements`: List of allowed root elements (space separated by local DocBook name). Default is ``article book topic``. + + * :var:`valid_languages`: List of valid languages (space separated by ISO 639-1 code). Default is ``ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl-nl pl-pl pt-br ru-ru sv-se zh-cn zh-tw``. + +* :var:`metadata`: Options to change behaviour of specific `` tags. + * :var:`revhistory`: Requires a ```` tag or not. + + * :var:`require_xmlid_on_revision`: Requires a ``xml:id`` attribute on each ```` tag or not. + + * :var:`require_meta_title`: Requires a ```` tag or not. + + * :var:`meta_title_length`: Checks the length of the text content in ````. Default is 55. \ No newline at end of file diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 6357ddfcb..ecdaaff7c 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -5,4 +5,6 @@ valid_languages = ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl [metadata] revhistory = 0 -require_xmlid_on_revision = 1 \ No newline at end of file +require_xmlid_on_revision = 1 +require_meta_title = off +meta_title_length = 55 \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 54d939c44..0fb6afc5e 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -10,16 +10,19 @@ check_info_revhistory_revision_order ) -from .check_meta import check_meta_ +from .check_meta import check_meta_title # Keep the order. The next item is dependent on the previous item. __all__ = [ "check_root_tag", "check_namespace", "check_info", + # "check_info_revhistory", "check_info_revhistory_revision", "check_info_revhistory_revision_date", "check_info_revhistory_revision_order", + # + "check_meta_title", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py new file mode 100644 index 000000000..2e2c2a77a --- /dev/null +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -0,0 +1,25 @@ +import typing as t + +from lxml import etree + +from ..common import DOCBOOK_NS +from ..exceptions import InvalidValueError +from ..logging import log + + +def check_meta_title(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks for a meta element""" + root = tree.getroot() + meta = root.find(".//{%s}meta[@name='title']" % DOCBOOK_NS) + required = config.get("metadata", {}).get("meta_title_required", False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta element in {root.tag}." + ) + return + + length = config.get("metadata", {}).get("meta_title_length", 55) + if len(meta.text) > length: + raise InvalidValueError(f"Meta title is too long. Max length is {length} characters.") + diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index 6adbce881..50c5efe07 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -80,6 +80,15 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any ) theconfig.setdefault("metadata", {})["require_xmlid_on_revision"] = require_xmlid_on_revision + try: + meta_title_length = int(theconfig.get("metadata", {}).get("meta_title_length")) + if meta_title_length < 0: + raise ValueError("meta_title_length should be a positive integer") + theconfig.setdefault("metadata", {})["meta_title_length"] = meta_title_length + + except TypeError: + raise MissingKeyError("metadata.meta_title_length") + # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/checks/test_check_meta.py new file mode 100644 index 000000000..86fe8db7e --- /dev/null +++ b/python-scripts/metadatavalidator/tests/checks/test_check_meta.py @@ -0,0 +1,64 @@ +from lxml import etree +import pytest + +from metadatavalidator.checks.check_meta import check_meta_title +from metadatavalidator.exceptions import InvalidValueError + + +def test_check_meta_title(xmlparser): + xmlcontent = """
+ + Test + The SEO title + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + assert check_meta_title(tree, {}) is None + + +def test_check_meta_title_wrong_length(xmlparser): + xmlcontent = """
+ + Test + The SEO title that is too long + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + with pytest.raises(InvalidValueError, match=".*too long.*"): + check_meta_title(tree, dict(metadata=dict(meta_title_length=10))) + + +def test_check_required_meta_title(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + with pytest.raises(InvalidValueError, match=".*required.*"): + check_meta_title(tree, dict(metadata=dict(meta_title_required=True))) + + +def test_check_optional_meta_title(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + assert check_meta_title(tree, + dict(metadata=dict(meta_title_required=False))) is None \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini b/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini index 0a4e3147e..f771ddeea 100644 --- a/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini @@ -1,4 +1,10 @@ [validator] file_extension = .xml check_root_elements = book article topic -valid_languages = de-de en-us es-es fr-fr \ No newline at end of file +valid_languages = de-de en-us es-es fr-fr + +[metadata] +revhistory = 0 +require_xmlid_on_revision = 1 +require_meta_title = 1 +meta_title_length = 55 \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/test_script_config.py b/python-scripts/metadatavalidator/tests/test_script_config.py index 2a43ce4dc..62744eee8 100644 --- a/python-scripts/metadatavalidator/tests/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/test_script_config.py @@ -3,7 +3,7 @@ import pytest -from metadatavalidator.config import readconfig, validate_and_convert_config +from metadatavalidator.config import readconfig, validate_and_convert_config, truefalse from metadatavalidator.exceptions import MissingKeyError, MissingSectionError, NoConfigFilesFoundError def create_config(): @@ -12,10 +12,36 @@ def create_config(): config.set("validator", "check_root_elements", "book article") config.set("validator", "file_extension", ".xml") config.set("validator", "valid_languages", "en-us de-de") + # + config.add_section("metadata") + config.set("metadata", "revhistory", "0") + config.set("metadata", "require_xmlid_on_revision", "true") + config.set("metadata", "meta_title_length", "50") + # setattr(config, "configfiles", None) return config +@pytest.mark.parametrize("value, expected", [ + ("true", True), + ("True", True), + ("false", False), + ("False", False), + ("1", True), + ("0", False), + ("on", True), + ("off", False), + ("On", True), + ("Off", False), + (True, True), + (False, False), + (1, True), + (0, False), +]) +def test_truefalse(value, expected): + assert truefalse(value) == expected + + def test_valid_validate_and_convert_config(): config = create_config() result = validate_and_convert_config(config) @@ -51,6 +77,20 @@ def test_missing_key_valid_languages(): validate_and_convert_config(config) +def test_missing_key_meta_title_length(): + config = create_config() + config.remove_option("metadata", "meta_title_length") + with pytest.raises(MissingKeyError, match=".*metadata.meta_title_length.*"): + validate_and_convert_config(config) + + +def test_meta_title_length_not_positive(): + config = create_config() + config.set("metadata", "meta_title_length", "-1") + with pytest.raises(ValueError, match=".*meta_title_length should be a positive integer.*"): + validate_and_convert_config(config) + + def test_readconfig(): configfile = os.path.join(os.path.dirname(__file__), "data/metadatavalidator.ini") result = readconfig([configfile]) @@ -60,3 +100,4 @@ def test_readconfig(): "valid_languages": ["de-de", "en-us", "es-es", "fr-fr"] } assert result.get("configfiles") == [configfile] + From bf5cc6276a7713c93cbdc7c18c2f197034c91322 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 12:48:58 +0200 Subject: [PATCH 046/107] Check * Add metadata.require_meta_description and metadata.meta_description_length in the config * Add tests * Update README.rst --- python-scripts/metadatavalidator/README.rst | 6 +- .../metadatavalidator/metadatavalidator.ini | 8 ++- .../src/metadatavalidator/checks/__init__.py | 6 +- .../metadatavalidator/checks/check_meta.py | 20 +++++- .../src/metadatavalidator/config.py | 9 +++ .../tests/checks/test_check_meta.py | 66 ++++++++++++++++++- .../tests/data/metadatavalidator.ini | 9 ++- .../tests/test_script_config.py | 15 +++++ 8 files changed, 130 insertions(+), 9 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 862f972e9..8c12845ce 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -44,4 +44,8 @@ recognized: * :var:`require_meta_title`: Requires a ```` tag or not. - * :var:`meta_title_length`: Checks the length of the text content in ````. Default is 55. \ No newline at end of file + * :var:`meta_title_length`: Checks the length of the text content in ````. Default is 55. + + * :var:`require_meta_description`: Requires a ```` tag or not. + + * :var:`meta_description_length`: Checks the length of the text content in ````. Default is 155. \ No newline at end of file diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index ecdaaff7c..690a49f53 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -6,5 +6,11 @@ valid_languages = ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl [metadata] revhistory = 0 require_xmlid_on_revision = 1 + +# require_meta_title = off -meta_title_length = 55 \ No newline at end of file +meta_title_length = 55 + +# +require_meta_description = off +meta_description_length = 150 \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 0fb6afc5e..9c1b9a04e 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -10,7 +10,10 @@ check_info_revhistory_revision_order ) -from .check_meta import check_meta_title +from .check_meta import ( + check_meta_title, + check_meta_description, +) # Keep the order. The next item is dependent on the previous item. __all__ = [ @@ -24,5 +27,6 @@ "check_info_revhistory_revision_order", # "check_meta_title", + "check_meta_description", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 2e2c2a77a..61f36dfe9 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -8,14 +8,14 @@ def check_meta_title(tree: etree.ElementTree, config: dict[t.Any, t.Any]): - """Checks for a meta element""" + """Checks for a element""" root = tree.getroot() meta = root.find(".//{%s}meta[@name='title']" % DOCBOOK_NS) required = config.get("metadata", {}).get("meta_title_required", False) if meta is None: if required: raise InvalidValueError( - f"Couldn't find required meta element in {root.tag}." + f"Couldn't find required meta[@name='title'] element in {root.tag}." ) return @@ -23,3 +23,19 @@ def check_meta_title(tree: etree.ElementTree, config: dict[t.Any, t.Any]): if len(meta.text) > length: raise InvalidValueError(f"Meta title is too long. Max length is {length} characters.") + +def check_meta_description(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks for a element""" + root = tree.getroot() + meta = root.find(".//{%s}meta[@name='description']" % DOCBOOK_NS) + required = config.get("metadata", {}).get("meta_description_required", False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta[@name='description'] element in {root.tag}." + ) + return + + length = config.get("metadata", {}).get("meta_description_length", 150) + if len(meta.text) > length: + raise InvalidValueError(f"Meta description is too long. Max length is {length} characters.") \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index 50c5efe07..f331b5b0f 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -89,6 +89,15 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any except TypeError: raise MissingKeyError("metadata.meta_title_length") + try: + meta_description_length = int(theconfig.get("metadata", {}).get("meta_description_length")) + if meta_description_length < 0: + raise ValueError("meta_description_length should be a positive integer") + theconfig.setdefault("metadata", {})["meta_description_length"] = meta_description_length + + except TypeError: + raise MissingKeyError("metadata.meta_description_length") + # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/checks/test_check_meta.py index 86fe8db7e..dc5afef28 100644 --- a/python-scripts/metadatavalidator/tests/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/checks/test_check_meta.py @@ -1,7 +1,10 @@ from lxml import etree import pytest -from metadatavalidator.checks.check_meta import check_meta_title +from metadatavalidator.checks.check_meta import ( + check_meta_title, + check_meta_description, +) from metadatavalidator.exceptions import InvalidValueError @@ -61,4 +64,63 @@ def test_check_optional_meta_title(xmlparser): etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_meta_title(tree, - dict(metadata=dict(meta_title_required=False))) is None \ No newline at end of file + dict(metadata=dict(meta_title_required=False))) is None + + +def test_check_meta_description(xmlparser): + xmlcontent = """
+ + Test + The SEO description + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + assert check_meta_description(tree, {}) is None + + +def test_check_meta_description_wrong_length(xmlparser): + xmlcontent = """
+ + Test + The SEO description that is too long + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + with pytest.raises(InvalidValueError, match=".*too long.*"): + check_meta_description(tree, dict(metadata=dict(meta_description_length=10))) + + +def test_check_required_meta_description(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + with pytest.raises(InvalidValueError, match=".*required.*"): + check_meta_description(tree, dict(metadata=dict(meta_description_required=True))) + + +def test_check_optional_meta_description(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + assert check_meta_description(tree, + dict(metadata=dict(meta_description_required=False))) is None \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini b/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini index f771ddeea..789fd418a 100644 --- a/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini @@ -6,5 +6,10 @@ valid_languages = de-de en-us es-es fr-fr [metadata] revhistory = 0 require_xmlid_on_revision = 1 -require_meta_title = 1 -meta_title_length = 55 \ No newline at end of file +# +require_meta_title = off +meta_title_length = 55 + +# +require_meta_description = off +meta_description_length = 150 \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/test_script_config.py b/python-scripts/metadatavalidator/tests/test_script_config.py index 62744eee8..189204547 100644 --- a/python-scripts/metadatavalidator/tests/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/test_script_config.py @@ -17,6 +17,7 @@ def create_config(): config.set("metadata", "revhistory", "0") config.set("metadata", "require_xmlid_on_revision", "true") config.set("metadata", "meta_title_length", "50") + config.set("metadata", "meta_description_length", "150") # setattr(config, "configfiles", None) return config @@ -91,6 +92,20 @@ def test_meta_title_length_not_positive(): validate_and_convert_config(config) +def test_meta_description_length_not_positive(): + config = create_config() + config.set("metadata", "meta_description_length", "-1") + with pytest.raises(ValueError, match=".*meta_description_length should be a positive integer.*"): + validate_and_convert_config(config) + + +def test_missing_key_meta_description_length(): + config = create_config() + config.remove_option("metadata", "meta_description_length") + with pytest.raises(MissingKeyError, match=".*metadata.meta_description_length.*"): + validate_and_convert_config(config) + + def test_readconfig(): configfile = os.path.join(os.path.dirname(__file__), "data/metadatavalidator.ini") result = readconfig([configfile]) From 34a045566f801ed61ab440083f1a63a8313c9080 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 13:57:36 +0200 Subject: [PATCH 047/107] Remove obsolete structures to raise coverage --- .../metadatavalidator/src/metadatavalidator/cli.py | 14 +++++++------- .../src/metadatavalidator/common.py | 2 +- .../src/metadatavalidator/exceptions.py | 8 ++++---- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index f2d5ffee7..18e68be34 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -5,13 +5,6 @@ import sys import typing as t -try: - from lxml import etree - -except ImportError: - print("Cannot import lxml. ", file=sys.stderr) - sys.exit(10) - from . import __author__, __version__ from .config import readconfig from .common import CONFIGDIRS @@ -70,6 +63,9 @@ def main(cliargs=None) -> int: :return: error code """ try: + # just try to import lxml.etree + from lxml import etree # noqa: F401 + args = parsecli(cliargs) config = readconfig(CONFIGDIRS) args.config = config @@ -79,6 +75,10 @@ def main(cliargs=None) -> int: return 0 + except ImportError as error: + log.critical("lxml is not installed") + return 50 + except NoConfigFilesFoundError as error: log.critical("No config files found") return 100 diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/common.py b/python-scripts/metadatavalidator/src/metadatavalidator/common.py index edb5b811b..5d4c3b094 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/common.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/common.py @@ -15,7 +15,7 @@ ] METAVALIDATOR_CONFIG = os.environ.get('METAVALIDATOR_CONFIG') if METAVALIDATOR_CONFIG is not None: - CONFIGDIRS.insert(0, os.path.expanduser(METAVALIDATOR_CONFIG)) + CONFIGDIRS.insert(0, os.path.expanduser(METAVALIDATOR_CONFIG)) # pragma: no cover #: Store the expanded paths in tuple CONFIGDIRS = tuple(os.path.expanduser(i) for i in CONFIGDIRS) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py index e46b268c2..8a6392eee 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/exceptions.py @@ -46,10 +46,10 @@ def __str__(self) -> str: # --- Errors -class InvalidElementError(BaseMetadataError): - """An element was missing or invalid in the metadata""" - def __str__(self) -> str: - return f"Missing or invalid element in {super().__str__()}" +# class InvalidElementError(BaseMetadataError): +# """An element was missing or invalid in the metadata""" +# def __str__(self) -> str: +# return f"Missing or invalid element in {super().__str__()}" class InvalidValueError(BaseMetadataError): From db22009916d5d5da68918c864f01d084176142c4 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 14:06:11 +0200 Subject: [PATCH 048/107] Support --config option for a config file This option will replace any other config files that might be available. --- python-scripts/metadatavalidator/README.rst | 1 + .../metadatavalidator/src/metadatavalidator/cli.py | 11 ++++++++++- .../metadatavalidator/tests/test_script_cli.py | 8 +++++++- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 8c12845ce..cc84f4a10 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -17,6 +17,7 @@ Configuration The configuration file is search in the following order (first is the highest): +1. Command line with :option:`--config`. This doesn't search for other configuration files. 1. Environment variable :envar:`METAVALIDATOR_CONFIG`. 1. In the current directory: :file:`metadatavalidator.ini` 1. In the users' home directory: :file:`~/.config/metadatavalidator/config.ini` diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 18e68be34..9427202a2 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -32,6 +32,10 @@ def parsecli(cliargs=None) -> argparse.Namespace: default=0, # emit warnings, errors, and critical help="increase verbosity level") + parser.add_argument('--config', + help="The configuration file to use (disables other systems or home configuration files)", + metavar="CONFIGFILE",) + parser.add_argument('--version', action='version', version=f'%(prog)s {__version__} written by {__author__}' @@ -67,7 +71,12 @@ def main(cliargs=None) -> int: from lxml import etree # noqa: F401 args = parsecli(cliargs) - config = readconfig(CONFIGDIRS) + # Either use the config file from the CLI or the default config files + if args.config is None: + configfiles = CONFIGDIRS + else: + configfiles = [args.config] + config = readconfig(configfiles) args.config = config log.debug("CLI args %s", args) diff --git a/python-scripts/metadatavalidator/tests/test_script_cli.py b/python-scripts/metadatavalidator/tests/test_script_cli.py index 18efcd864..c0a12fc86 100644 --- a/python-scripts/metadatavalidator/tests/test_script_cli.py +++ b/python-scripts/metadatavalidator/tests/test_script_cli.py @@ -17,4 +17,10 @@ def test_parsecli_version(capsys): captured = capsys.readouterr() # We can't check for the script name as it's "pytest" - assert re.match(r"[a-z]+ \d+\.\d+(\.\d+)? written by .*\n", captured.out) \ No newline at end of file + assert re.match(r"[a-z]+ \d+\.\d+(\.\d+)? written by .*\n", captured.out) + + +def test_parsecli_config(): + args = parsecli(["--config", "config.ini", "a.xml", "b.xml"]) + assert args.config == "config.ini" + assert args.xmlfiles == ["a.xml", "b.xml"] \ No newline at end of file From 782b0c60cb9ce3c53b091f1778503026f2199ce9 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 14:08:41 +0200 Subject: [PATCH 049/107] Group existing tests under tests/unit --- .../metadatavalidator/tests/{ => unit}/checks/test_check_info.py | 0 .../metadatavalidator/tests/{ => unit}/checks/test_check_meta.py | 0 .../metadatavalidator/tests/{ => unit}/checks/test_check_root.py | 0 .../metadatavalidator/tests/{ => unit}/data/metadatavalidator.ini | 0 .../metadatavalidator/tests/{ => unit}/test_script_cli.py | 0 .../metadatavalidator/tests/{ => unit}/test_script_config.py | 0 .../metadatavalidator/tests/{ => unit}/test_script_meta.py | 0 python-scripts/metadatavalidator/tests/{ => unit}/test_util.py | 0 8 files changed, 0 insertions(+), 0 deletions(-) rename python-scripts/metadatavalidator/tests/{ => unit}/checks/test_check_info.py (100%) rename python-scripts/metadatavalidator/tests/{ => unit}/checks/test_check_meta.py (100%) rename python-scripts/metadatavalidator/tests/{ => unit}/checks/test_check_root.py (100%) rename python-scripts/metadatavalidator/tests/{ => unit}/data/metadatavalidator.ini (100%) rename python-scripts/metadatavalidator/tests/{ => unit}/test_script_cli.py (100%) rename python-scripts/metadatavalidator/tests/{ => unit}/test_script_config.py (100%) rename python-scripts/metadatavalidator/tests/{ => unit}/test_script_meta.py (100%) rename python-scripts/metadatavalidator/tests/{ => unit}/test_util.py (100%) diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py similarity index 100% rename from python-scripts/metadatavalidator/tests/checks/test_check_info.py rename to python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py similarity index 100% rename from python-scripts/metadatavalidator/tests/checks/test_check_meta.py rename to python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py diff --git a/python-scripts/metadatavalidator/tests/checks/test_check_root.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_root.py similarity index 100% rename from python-scripts/metadatavalidator/tests/checks/test_check_root.py rename to python-scripts/metadatavalidator/tests/unit/checks/test_check_root.py diff --git a/python-scripts/metadatavalidator/tests/data/metadatavalidator.ini b/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini similarity index 100% rename from python-scripts/metadatavalidator/tests/data/metadatavalidator.ini rename to python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini diff --git a/python-scripts/metadatavalidator/tests/test_script_cli.py b/python-scripts/metadatavalidator/tests/unit/test_script_cli.py similarity index 100% rename from python-scripts/metadatavalidator/tests/test_script_cli.py rename to python-scripts/metadatavalidator/tests/unit/test_script_cli.py diff --git a/python-scripts/metadatavalidator/tests/test_script_config.py b/python-scripts/metadatavalidator/tests/unit/test_script_config.py similarity index 100% rename from python-scripts/metadatavalidator/tests/test_script_config.py rename to python-scripts/metadatavalidator/tests/unit/test_script_config.py diff --git a/python-scripts/metadatavalidator/tests/test_script_meta.py b/python-scripts/metadatavalidator/tests/unit/test_script_meta.py similarity index 100% rename from python-scripts/metadatavalidator/tests/test_script_meta.py rename to python-scripts/metadatavalidator/tests/unit/test_script_meta.py diff --git a/python-scripts/metadatavalidator/tests/test_util.py b/python-scripts/metadatavalidator/tests/unit/test_util.py similarity index 100% rename from python-scripts/metadatavalidator/tests/test_util.py rename to python-scripts/metadatavalidator/tests/unit/test_util.py From f007573d60181f401ebe00cdd218045ab025ea50 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 14:25:40 +0200 Subject: [PATCH 050/107] Add first integration test --- .../tests/integration/case1/article.xml | 7 +++++++ .../tests/integration/case1/config-test.ini | 16 ++++++++++++++ .../integration/case1/test_integration.py | 21 +++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 python-scripts/metadatavalidator/tests/integration/case1/article.xml create mode 100644 python-scripts/metadatavalidator/tests/integration/case1/config-test.ini create mode 100644 python-scripts/metadatavalidator/tests/integration/case1/test_integration.py diff --git a/python-scripts/metadatavalidator/tests/integration/case1/article.xml b/python-scripts/metadatavalidator/tests/integration/case1/article.xml new file mode 100644 index 000000000..abc8bdac3 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/case1/article.xml @@ -0,0 +1,7 @@ +
+ + Test + The SEO title + + +
diff --git a/python-scripts/metadatavalidator/tests/integration/case1/config-test.ini b/python-scripts/metadatavalidator/tests/integration/case1/config-test.ini new file mode 100644 index 000000000..08afdc93d --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/case1/config-test.ini @@ -0,0 +1,16 @@ +[validator] +file_extension = .xml +check_root_elements = book article topic +valid_languages = en-us + +[metadata] +revhistory = 0 +require_xmlid_on_revision = 0 + +# +require_meta_title = on +meta_title_length = 55 + +# +require_meta_description = off +meta_description_length = 150 diff --git a/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py b/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py new file mode 100644 index 000000000..28af4746f --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py @@ -0,0 +1,21 @@ +import os.path +import pytest + +from metadatavalidator.cli import main + +BASEDIR = os.path.dirname(os.path.realpath(__file__)) + + +def test_case1_integration(capsys): + cli = ["--config", f"{BASEDIR}/config-test.ini", + # "--json", + f"{BASEDIR}/article.xml"] + + result = main(cli) + captured = capsys.readouterr() + assert result == 0 + assert "RESULTS" in captured.out + assert f"{BASEDIR}/article.xml" in captured.out + assert "check_info_revhistory" in captured.out + # assert "check_info_revhistory_revision_date" in captured.out + From af46b9a81b1bd51ed9d5260134f19824b39c622b Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 14:46:43 +0200 Subject: [PATCH 051/107] Don't check date when there is no revhistory Take into account metadata.require_revhistory --- .../src/metadatavalidator/checks/check_info.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 190dc70e7..7233948bc 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -33,9 +33,13 @@ def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): # If couldn't be found, we can't check return + required = config.get("metadata", {}).get("require_revhistory", False) + revhistory = info.find("./d:revhistory", namespaces={"d": DOCBOOK_NS}) if revhistory is None: - raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") + if required: + raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") + return None xmlid = revhistory.attrib.get(f"{{{XML_NS}}}id") if xmlid is None: @@ -45,7 +49,6 @@ def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): raise InvalidValueError(f"xml:id attribute in info/revhistory should start with 'rh'.") - def check_info_revhistory_revision(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info/revhistory/revision element""" @@ -70,6 +73,10 @@ def check_info_revhistory_revision_date(tree: etree._ElementTree, """Checks for an info/revhistory/revision/date element""" date = tree.find("./d:info/d:revhistory/d:revision/d:date", namespaces={"d": DOCBOOK_NS}) + + revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) + if revhistory is None: + return None if date is None: raise InvalidValueError(f"Couldn't find a date element in info/revhistory/revision.") From a43d1e591f7c2c4bc113eaaff3f008c25e57cee0 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 14:52:22 +0200 Subject: [PATCH 052/107] Allow -C for --config option --- python-scripts/metadatavalidator/src/metadatavalidator/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 9427202a2..301516c9f 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -32,7 +32,7 @@ def parsecli(cliargs=None) -> argparse.Namespace: default=0, # emit warnings, errors, and critical help="increase verbosity level") - parser.add_argument('--config', + parser.add_argument('-C', '--config', help="The configuration file to use (disables other systems or home configuration files)", metavar="CONFIGFILE",) From 2dcb70f73f240d79e5af125275160a509e454301 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 14:52:56 +0200 Subject: [PATCH 053/107] Use strategy pattern to format output as JSON or text --- .../src/metadatavalidator/cli.py | 6 ++++++ .../src/metadatavalidator/process.py | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 301516c9f..e5dfa8c39 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -36,6 +36,12 @@ def parsecli(cliargs=None) -> argparse.Namespace: help="The configuration file to use (disables other systems or home configuration files)", metavar="CONFIGFILE",) + parser.add_argument('-F', '--format', + help="Output format (default %(default)r)", + choices=["text", "json"], + default="text" + ) + parser.add_argument('--version', action='version', version=f'%(prog)s {__version__} written by {__author__}' diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index 368ae56df..37a293996 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -66,6 +66,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): log.info("File %r checked.", basexmlfile) return { "xmlfile": xmlfile, + "absxmlfilename": os.path.abspath(xmlfile), "errors": errors, "basename": os.path.basename(xmlfile), } @@ -97,6 +98,13 @@ def format_results(results: list[t.Any]): print(f" {allidx}.{idx}: {error['checkfunc']}: {msg}") print() +def format_results_json(results: list[t.Any]): + """Format the results for output + + :param results: the results from the checks + """ + import json + print(json.dumps(results, indent=2)) async def process(args: Namespace, config: dict[t.Any, t.Any]): @@ -118,4 +126,10 @@ async def process(args: Namespace, config: dict[t.Any, t.Any]): if maybeissue: results.append(maybeissue) - format_results(results) + # Use strategy pattern to format the results + formatmap = { + "text": format_results, + "json": format_results_json, + } + + formatmap[args.format](results) From 6ce94f19f455864fca50877362ea24408e3e80d1 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 14:53:09 +0200 Subject: [PATCH 054/107] Use JSON output for first integration test --- .../tests/integration/case1/test_integration.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py b/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py index 28af4746f..a769f5446 100644 --- a/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py +++ b/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py @@ -1,21 +1,23 @@ import os.path +import json import pytest from metadatavalidator.cli import main + BASEDIR = os.path.dirname(os.path.realpath(__file__)) +RELATIVE_PATH = os.path.relpath(BASEDIR, os.getcwd()) def test_case1_integration(capsys): cli = ["--config", f"{BASEDIR}/config-test.ini", - # "--json", - f"{BASEDIR}/article.xml"] + "--format", "json", # needed to avoid formatting issues + f"{RELATIVE_PATH}/article.xml"] result = main(cli) captured = capsys.readouterr() assert result == 0 - assert "RESULTS" in captured.out - assert f"{BASEDIR}/article.xml" in captured.out - assert "check_info_revhistory" in captured.out - # assert "check_info_revhistory_revision_date" in captured.out + result = json.loads(captured.out) + assert result[0]['errors'] == [] + assert result[0]['xmlfile'] == f"{RELATIVE_PATH}/article.xml" From 83c4d713cb80d6f8c87c3996831f8657e2487844 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 15:51:39 +0200 Subject: [PATCH 055/107] Replace namespace URL with prefix, use d:info/... --- .../src/metadatavalidator/checks/check_meta.py | 12 +++++++++--- .../src/metadatavalidator/common.py | 17 ++++++++++------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 61f36dfe9..4a8d1df4a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -1,8 +1,14 @@ +""" +Checks elements in a element of a DocBook5 XML file. + +Source: https://confluence.suse.com/x/aQDWNg +""" + import typing as t from lxml import etree -from ..common import DOCBOOK_NS +from ..common import NAMESPACES from ..exceptions import InvalidValueError from ..logging import log @@ -10,7 +16,7 @@ def check_meta_title(tree: etree.ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find(".//{%s}meta[@name='title']" % DOCBOOK_NS) + meta = root.find("./d:info/d:meta[@name='title']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("meta_title_required", False) if meta is None: if required: @@ -27,7 +33,7 @@ def check_meta_title(tree: etree.ElementTree, config: dict[t.Any, t.Any]): def check_meta_description(tree: etree.ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find(".//{%s}meta[@name='description']" % DOCBOOK_NS) + meta = root.find("./d:info/d:meta[@name='description']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("meta_description_required", False) if meta is None: if required: diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/common.py b/python-scripts/metadatavalidator/src/metadatavalidator/common.py index 5d4c3b094..598d914c6 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/common.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/common.py @@ -27,14 +27,17 @@ ITS_NS = "http://www.w3.org/2005/11/its" XINCLUDE_NS = "http://www.w3.org/2001/XInclude" -#: Mapping of namespaces to prefixes -NAMESPACES2PREFIX = { - DOCBOOK_NS: "d", - XML_NS: "xml", - XLINK_NS: "xlink", - ITS_NS: "its", - XINCLUDE_NS: "xi", +#: Mapping of prefixes to namespaces +NAMESPACES = { + "d": DOCBOOK_NS, + "xml": XML_NS, + "xlink": XLINK_NS, + "its": ITS_NS, + "xi": XINCLUDE_NS, } +#: Mapping of namespaces to prefixes +NAMESPACES2PREFIX = {v: k for k, v in NAMESPACES.items()} + #: The regex to match a date with year, month and an optional day DATE_REGEX = re.compile(r"^(\d{4})-(\d{1,2})(?:-(\d{1,2}))?$") \ No newline at end of file From 94d970cf537428dbeea8c093c4b58476a9ba406f Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 16:02:58 +0200 Subject: [PATCH 056/107] Function: format_results -> format_results_text --- .../metadatavalidator/src/metadatavalidator/process.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index 37a293996..f914dda66 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -78,7 +78,7 @@ def red(text): return f"\033[31m{text}\033[0m" -def format_results(results: list[t.Any]): +def format_results_text(results: list[t.Any]): """Format the results for output :param results: the results from the checks @@ -128,7 +128,7 @@ async def process(args: Namespace, config: dict[t.Any, t.Any]): # Use strategy pattern to format the results formatmap = { - "text": format_results, + "text": format_results_text, "json": format_results_json, } From 78874faf98ba78f54ecc3f057d2b1e4fd0a6bde9 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 16:22:12 +0200 Subject: [PATCH 057/107] Test wrong --format argument --- .../tests/unit/test_script_cli.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/tests/unit/test_script_cli.py b/python-scripts/metadatavalidator/tests/unit/test_script_cli.py index c0a12fc86..24740df66 100644 --- a/python-scripts/metadatavalidator/tests/unit/test_script_cli.py +++ b/python-scripts/metadatavalidator/tests/unit/test_script_cli.py @@ -23,4 +23,20 @@ def test_parsecli_version(capsys): def test_parsecli_config(): args = parsecli(["--config", "config.ini", "a.xml", "b.xml"]) assert args.config == "config.ini" - assert args.xmlfiles == ["a.xml", "b.xml"] \ No newline at end of file + assert args.xmlfiles == ["a.xml", "b.xml"] + + +def test_parsecli_format(): + args = parsecli(["--format", "json", "a.xml", "b.xml"]) + assert args.format == "json" + assert args.xmlfiles == ["a.xml", "b.xml"] + + +def test_parsecli_wrong_format(capsys): + with pytest.raises(SystemExit, + # match=r".*invalid choice.*" + ): + parsecli(["--format", "unknown", "a.xml", "b.xml"]) + + caputured = capsys.readouterr() + assert "invalid choice: 'unknown'" in caputured.err \ No newline at end of file From f411721234b4fa137afba89e83df3dd12e435b42 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 15:52:45 +0200 Subject: [PATCH 058/107] Check * Add metadata.require_meta_series and metadata.valid_meta_series in config * Add tests * Update README.rst --- python-scripts/metadatavalidator/README.rst | 6 +- .../metadatavalidator/metadatavalidator.ini | 5 +- .../src/metadatavalidator/checks/__init__.py | 2 + .../metadatavalidator/checks/check_meta.py | 24 +++++++- .../src/metadatavalidator/config.py | 8 +++ .../tests/unit/checks/test_check_meta.py | 55 ++++++++++++++++++- .../tests/unit/data/metadatavalidator.ini | 5 +- 7 files changed, 100 insertions(+), 5 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index cc84f4a10..99bd6977a 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -49,4 +49,8 @@ recognized: * :var:`require_meta_description`: Requires a ```` tag or not. - * :var:`meta_description_length`: Checks the length of the text content in ````. Default is 155. \ No newline at end of file + * :var:`meta_description_length`: Checks the length of the text content in ````. Default is 155. + + * :var:`require_meta_series`: Requires a ```` tag or not. + + * :var:`valid_meta_series`: Lists the valid series names for ````. diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 690a49f53..66a733f0f 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -13,4 +13,7 @@ meta_title_length = 55 # require_meta_description = off -meta_description_length = 150 \ No newline at end of file +meta_description_length = 150 +# +require_meta_series = off +valid_meta_series = Products & Solutions, Best Practices, Technical References \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 9c1b9a04e..011a4aabe 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -13,6 +13,7 @@ from .check_meta import ( check_meta_title, check_meta_description, + check_meta_series, ) # Keep the order. The next item is dependent on the previous item. @@ -28,5 +29,6 @@ # "check_meta_title", "check_meta_description", + "check_meta_series", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 4a8d1df4a..4ab609a4a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -44,4 +44,26 @@ def check_meta_description(tree: etree.ElementTree, config: dict[t.Any, t.Any]): length = config.get("metadata", {}).get("meta_description_length", 150) if len(meta.text) > length: - raise InvalidValueError(f"Meta description is too long. Max length is {length} characters.") \ No newline at end of file + raise InvalidValueError(f"Meta description is too long. Max length is {length} characters.") + + +def check_meta_series(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks for a element""" + root = tree.getroot() + meta = root.find("./d:info/d:meta[@name='series']", namespaces=NAMESPACES) + required = config.get("metadata", {}).get("required_meta_series", False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta[@name='series'] element in {root.tag}." + ) + return + + valid_series = [x.strip() for x in + config.get("metadata", {}).get("valid_meta_series", []) + if x] + if meta.text.strip() not in valid_series: + raise InvalidValueError( + f"Meta series is invalid, got {meta.text.strip()!r}. " + f"Valid series are {valid_series}." + ) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index f331b5b0f..fd0fa247e 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -98,6 +98,14 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any except TypeError: raise MissingKeyError("metadata.meta_description_length") + split = re.compile(r"[;,]") # no space! + valid_meta_series = split.split(theconfig.get("metadata", {}).get("valid_meta_series", "")) + theconfig.setdefault("metadata", {})["valid_meta_series"] = valid_meta_series + + require_meta_series = truefalse( + theconfig.get("metadata", {}).get("require_meta_series", False) + ) + theconfig.setdefault("metadata", {})["require_meta_series"] = require_meta_series # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index dc5afef28..ba36c8de3 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -4,6 +4,7 @@ from metadatavalidator.checks.check_meta import ( check_meta_title, check_meta_description, + check_meta_series, ) from metadatavalidator.exceptions import InvalidValueError @@ -123,4 +124,56 @@ def test_check_optional_meta_description(xmlparser): etree.fromstring(xmlcontent, parser=xmlparser) ) assert check_meta_description(tree, - dict(metadata=dict(meta_description_required=False))) is None \ No newline at end of file + dict(metadata=dict(meta_description_required=False))) is None + + +def test_check_meta_series(xmlparser): + xmlcontent = """
+ + Test + Products & Solutions + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + config = dict(metadata=dict(require_meta_series=True, + valid_meta_series=["Products & Solutions", + "Best Practices", + "Technical References"])) + assert check_meta_series(tree, config) is None + + +def test_check_missing_meta_series(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + check_meta_series(tree, dict(metadata=dict(require_meta_series=False))) is None + + +def test_check_wrong_meta_series(xmlparser): + xmlcontent = """
+ + Test + Foo + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + config = dict(metadata=dict(require_meta_series=True, + valid_meta_series=["Best Practices", + "Technical References"])) + with pytest.raises(InvalidValueError, match="Meta series is invalid"): + check_meta_series(tree, config) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini b/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini index 789fd418a..9dcf1e744 100644 --- a/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini @@ -12,4 +12,7 @@ meta_title_length = 55 # require_meta_description = off -meta_description_length = 150 \ No newline at end of file +meta_description_length = 150 +# +require_meta_series = off +valid_meta_series = Products & Solutions, Best Practices, Technical References \ No newline at end of file From c2d76ace2d2d50a7cb3f068d876ab630cb512ea6 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 18:30:31 +0200 Subject: [PATCH 059/107] Check * Add metadata.require_meta_techpartner in config * Add tests * Update README.rst --- python-scripts/metadatavalidator/README.rst | 2 + .../metadatavalidator/metadatavalidator.ini | 4 +- .../src/metadatavalidator/checks/__init__.py | 2 + .../metadatavalidator/checks/check_meta.py | 29 +++++++ .../tests/unit/checks/test_check_meta.py | 81 ++++++++++++++++++- 5 files changed, 113 insertions(+), 5 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 99bd6977a..9850956e9 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -54,3 +54,5 @@ recognized: * :var:`require_meta_series`: Requires a ```` tag or not. * :var:`valid_meta_series`: Lists the valid series names for ````. + + * :var:`require_meta_techpartner`: Requires a ```` tag or not. diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 66a733f0f..66cbed329 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -16,4 +16,6 @@ require_meta_description = off meta_description_length = 150 # require_meta_series = off -valid_meta_series = Products & Solutions, Best Practices, Technical References \ No newline at end of file +valid_meta_series = Products & Solutions, Best Practices, Technical References +# +require_meta_techpartner = off \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 011a4aabe..db548ad8b 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -14,6 +14,7 @@ check_meta_title, check_meta_description, check_meta_series, + check_meta_techpartner, ) # Keep the order. The next item is dependent on the previous item. @@ -30,5 +31,6 @@ "check_meta_title", "check_meta_description", "check_meta_series", + "check_meta_techpartner", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 4ab609a4a..5890ce987 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -67,3 +67,32 @@ def check_meta_series(tree: etree.ElementTree, config: dict[t.Any, t.Any]): f"Meta series is invalid, got {meta.text.strip()!r}. " f"Valid series are {valid_series}." ) + + +def check_meta_techpartner(tree: etree.ElementTree, config: dict[t.Any, t.Any]): + """Checks for a element""" + root = tree.getroot() + meta = root.find("./d:info/d:meta[@name='techpartner']", namespaces=NAMESPACES) + required = config.get("metadata", {}).get("require_meta_techpartner", False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta[@name='techpartner'] element " + f"in {root.tag}." + ) + return + + # Do we have children? + partners = [tag.text.strip() for tag in meta.iterchildren()] + if not partners: + raise InvalidValueError( + f"Couldn't find any tech partners in meta[@name='techpartner'] element " + f"(line {meta.sourceline})." + ) + + # Are they unique? + if len(partners) != len(set(partners)): + raise InvalidValueError( + f"Duplicate tech partners found in meta[@name='techpartner'] element " + f"(line {meta.sourceline})." + ) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index ba36c8de3..99fb94cfd 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -5,6 +5,7 @@ check_meta_title, check_meta_description, check_meta_series, + check_meta_techpartner, ) from metadatavalidator.exceptions import InvalidValueError @@ -64,8 +65,8 @@ def test_check_optional_meta_title(xmlparser): tree = etree.ElementTree( etree.fromstring(xmlcontent, parser=xmlparser) ) - assert check_meta_title(tree, - dict(metadata=dict(meta_title_required=False))) is None + config = dict(metadata=dict(meta_title_required=False)) + assert check_meta_title(tree, config) is None def test_check_meta_description(xmlparser): @@ -157,7 +158,8 @@ def test_check_missing_meta_series(xmlparser): etree.fromstring(xmlcontent, parser=xmlparser) ) - check_meta_series(tree, dict(metadata=dict(require_meta_series=False))) is None + config = dict(metadata=dict(require_meta_series=False)) + assert check_meta_series(tree, config) is None def test_check_wrong_meta_series(xmlparser): @@ -176,4 +178,75 @@ def test_check_wrong_meta_series(xmlparser): valid_meta_series=["Best Practices", "Technical References"])) with pytest.raises(InvalidValueError, match="Meta series is invalid"): - check_meta_series(tree, config) \ No newline at end of file + check_meta_series(tree, config) + + +def test_check_meta_techpartner(xmlparser): + xmlcontent = """
+ + Test + + Acme Inc. + Foo Corp. + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + assert check_meta_techpartner(tree, {}) is None + + +def test_check_missing_meta_techpartner(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + config = dict(metadata=dict(require_meta_techpartner=True)) + with pytest.raises(InvalidValueError, match=".*required.*"): + check_meta_techpartner(tree, config) + + +def test_check_missing_children_in_meta_techpartner(xmlparser): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + config = dict(metadata=dict(require_meta_techpartner=True)) + with pytest.raises(InvalidValueError, match=".*Couldn't find any tech partners.*"): + check_meta_techpartner(tree, config) + + +def test_check_meta_techpartner_with_nonunique_children(xmlparser): + xmlcontent = """
+ + Test + + Acme Inc. + Acme Inc. + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + config = dict(metadata=dict(require_meta_techpartner=True)) + with pytest.raises(InvalidValueError, match=".*Duplicate tech partners.*"): + check_meta_techpartner(tree, config) \ No newline at end of file From cc5d92e9e6b75d88cb0d9b25a9279405f2d868d6 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 18:33:38 +0200 Subject: [PATCH 060/107] tests/integration/{case1 => goodcase1} --- .../tests/integration/{case1 => goodcase1}/article.xml | 0 .../tests/integration/{case1 => goodcase1}/config-test.ini | 0 .../tests/integration/{case1 => goodcase1}/test_integration.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename python-scripts/metadatavalidator/tests/integration/{case1 => goodcase1}/article.xml (100%) rename python-scripts/metadatavalidator/tests/integration/{case1 => goodcase1}/config-test.ini (100%) rename python-scripts/metadatavalidator/tests/integration/{case1 => goodcase1}/test_integration.py (100%) diff --git a/python-scripts/metadatavalidator/tests/integration/case1/article.xml b/python-scripts/metadatavalidator/tests/integration/goodcase1/article.xml similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/case1/article.xml rename to python-scripts/metadatavalidator/tests/integration/goodcase1/article.xml diff --git a/python-scripts/metadatavalidator/tests/integration/case1/config-test.ini b/python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/case1/config-test.ini rename to python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini diff --git a/python-scripts/metadatavalidator/tests/integration/case1/test_integration.py b/python-scripts/metadatavalidator/tests/integration/goodcase1/test_integration.py similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/case1/test_integration.py rename to python-scripts/metadatavalidator/tests/integration/goodcase1/test_integration.py From c03abca5391be35be6a0c496875737424c80d39c Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 3 Jun 2024 18:47:42 +0200 Subject: [PATCH 061/107] Amend README with Installation section --- python-scripts/metadatavalidator/README.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 9850956e9..9be14ff5d 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -12,6 +12,23 @@ Requirements * Python >=3.11 (only due to for installing with :file:`pyproject.toml`.) +Installation +------------ + +To install the script, run the following command: + +.. code-block:: bash + + pip install . + + +For development, you can install the script in editable mode: + +.. code-block:: bash + + pip install -e .[test] + + Configuration ------------- From 662098b1d368c61d5c00438daff8019cf12bc59d Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 4 Jun 2024 08:41:59 +0200 Subject: [PATCH 062/107] Correct etree._ElementTree (with the underscore) --- .../src/metadatavalidator/checks/check_meta.py | 2 +- .../src/metadatavalidator/checks/check_root.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 5890ce987..da3a0f78a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -13,7 +13,7 @@ from ..logging import log -def check_meta_title(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_meta_title(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='title']", namespaces=NAMESPACES) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py index 963d57e0d..5979af2f2 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py @@ -7,7 +7,7 @@ from ..logging import log -def check_root_tag(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_root_tag(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks if root element is in the list of allowed elements """ allowed_root_elements = config.get("validator", {}).get("check_root_elements") @@ -16,7 +16,7 @@ def check_root_tag(tree: etree.ElementTree, config: dict[t.Any, t.Any]): raise InvalidValueError(f"Root tag {tag.localname!r} is not allowed. Expected {', '.join(allowed_root_elements)}.") -def check_namespace(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_namespace(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks the namespace""" tag = etree.QName(tree.getroot().tag) if tag.namespace != DOCBOOK_NS: From 6ad5372d14226aee597ebbbe8104e8998f1ec7d5 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 4 Jun 2024 15:53:15 +0200 Subject: [PATCH 063/107] Check * Add metadata.require_meta_techpartner in config * Add tests * Update README.rst --- python-scripts/metadatavalidator/README.rst | 2 + .../metadatavalidator/metadatavalidator.ini | 6 ++- .../src/metadatavalidator/checks/__init__.py | 4 +- .../metadatavalidator/checks/check_meta.py | 27 +++++++++-- .../tests/unit/checks/test_check_meta.py | 46 ++++++++++++++++++- 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 9be14ff5d..d08f6fc4b 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -73,3 +73,5 @@ recognized: * :var:`valid_meta_series`: Lists the valid series names for ````. * :var:`require_meta_techpartner`: Requires a ```` tag or not. + + * :var:`require_meta_platform`: Requires a ```` tag or not. \ No newline at end of file diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 66cbed329..7063918bd 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -1,6 +1,6 @@ [validator] file_extension = .xml -check_root_elements = book article topic +check_root_elements = assembly book article topic valid_languages = ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl-nl pl-pl pt-br ru-ru sv-se zh-cn zh-tw [metadata] @@ -18,4 +18,6 @@ meta_description_length = 150 require_meta_series = off valid_meta_series = Products & Solutions, Best Practices, Technical References # -require_meta_techpartner = off \ No newline at end of file +require_meta_techpartner = off +# +require_meta_platform = off \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index db548ad8b..c8bbd5b25 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -7,7 +7,7 @@ check_info_revhistory, check_info_revhistory_revision, check_info_revhistory_revision_date, - check_info_revhistory_revision_order + check_info_revhistory_revision_order, ) from .check_meta import ( @@ -15,6 +15,7 @@ check_meta_description, check_meta_series, check_meta_techpartner, + check_meta_platform, ) # Keep the order. The next item is dependent on the previous item. @@ -32,5 +33,6 @@ "check_meta_description", "check_meta_series", "check_meta_techpartner", + "check_meta_platform", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index da3a0f78a..284e8ef02 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -30,7 +30,7 @@ def check_meta_title(tree: etree._ElementTree, config: dict[t.Any, t.Any]): raise InvalidValueError(f"Meta title is too long. Max length is {length} characters.") -def check_meta_description(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_meta_description(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='description']", namespaces=NAMESPACES) @@ -47,7 +47,7 @@ def check_meta_description(tree: etree.ElementTree, config: dict[t.Any, t.Any]): raise InvalidValueError(f"Meta description is too long. Max length is {length} characters.") -def check_meta_series(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_meta_series(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='series']", namespaces=NAMESPACES) @@ -69,10 +69,11 @@ def check_meta_series(tree: etree.ElementTree, config: dict[t.Any, t.Any]): ) -def check_meta_techpartner(tree: etree.ElementTree, config: dict[t.Any, t.Any]): +def check_meta_techpartner(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='techpartner']", namespaces=NAMESPACES) + meta = root.find("./d:info/d:meta[@name='techpartner']", + namespaces=NAMESPACES) required = config.get("metadata", {}).get("require_meta_techpartner", False) if meta is None: if required: @@ -96,3 +97,21 @@ def check_meta_techpartner(tree: etree.ElementTree, config: dict[t.Any, t.Any]): f"Duplicate tech partners found in meta[@name='techpartner'] element " f"(line {meta.sourceline})." ) + + +def check_meta_platform(tree: etree._ElementTree, config: dict[t.Any, t.Any]): + """Checks for a element""" + root = tree.getroot() + meta = root.find("./d:info/d:meta[@name='platform']", + namespaces=NAMESPACES) + required = config.get("metadata", {}).get("require_meta_platform", False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta[@name='platform'] element " + f"in {root.tag}." + ) + return + + if meta.text is None or not meta.text.strip(): + raise InvalidValueError("Empty meta[@name='platform'] element") diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index 99fb94cfd..64d655066 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -6,6 +6,7 @@ check_meta_description, check_meta_series, check_meta_techpartner, + check_meta_platform, ) from metadatavalidator.exceptions import InvalidValueError @@ -249,4 +250,47 @@ def test_check_meta_techpartner_with_nonunique_children(xmlparser): config = dict(metadata=dict(require_meta_techpartner=True)) with pytest.raises(InvalidValueError, match=".*Duplicate tech partners.*"): - check_meta_techpartner(tree, config) \ No newline at end of file + check_meta_techpartner(tree, config) + + +def test_check_meta_platform(xmlparser): + xmlcontent = """
+ + Test + Foo + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + config = dict(metadata=dict(require_meta_platform=True)) + assert check_meta_platform(tree, {}) is None + + +def test_check_missing_meta_platform(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_platform=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_platform(tree, config) + + +def test_check_empty_meta_platform(xmlparser): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_platform=True)) + with pytest.raises(InvalidValueError, match=r".*Empty meta.*"): + check_meta_platform(tree, config) From 5a285417bf858e28debc832f885935a8485ec4bd Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 4 Jun 2024 15:53:15 +0200 Subject: [PATCH 064/107] Check * Add metadata.require_meta_techpartner in config * Add tests * Update README.rst --- .../metadatavalidator/tests/unit/checks/test_check_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index 64d655066..345e11796 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -265,7 +265,7 @@ def test_check_meta_platform(xmlparser): etree.fromstring(xmlcontent, parser=xmlparser) ) config = dict(metadata=dict(require_meta_platform=True)) - assert check_meta_platform(tree, {}) is None + assert check_meta_platform(tree, config) is None def test_check_missing_meta_platform(xmlparser): From 05be611a5073cf1309335f6fbcfbf0ef690dd7ab Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 4 Jun 2024 16:17:16 +0200 Subject: [PATCH 065/107] Add line breaks in function argument list --- .../src/metadatavalidator/checks/check_meta.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 284e8ef02..eee7cf4e3 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -13,7 +13,8 @@ from ..logging import log -def check_meta_title(tree: etree._ElementTree, config: dict[t.Any, t.Any]): +def check_meta_title(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='title']", namespaces=NAMESPACES) @@ -30,7 +31,8 @@ def check_meta_title(tree: etree._ElementTree, config: dict[t.Any, t.Any]): raise InvalidValueError(f"Meta title is too long. Max length is {length} characters.") -def check_meta_description(tree: etree._ElementTree, config: dict[t.Any, t.Any]): +def check_meta_description(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='description']", namespaces=NAMESPACES) @@ -47,7 +49,8 @@ def check_meta_description(tree: etree._ElementTree, config: dict[t.Any, t.Any]) raise InvalidValueError(f"Meta description is too long. Max length is {length} characters.") -def check_meta_series(tree: etree._ElementTree, config: dict[t.Any, t.Any]): +def check_meta_series(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='series']", namespaces=NAMESPACES) @@ -69,7 +72,8 @@ def check_meta_series(tree: etree._ElementTree, config: dict[t.Any, t.Any]): ) -def check_meta_techpartner(tree: etree._ElementTree, config: dict[t.Any, t.Any]): +def check_meta_techpartner(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='techpartner']", @@ -99,7 +103,8 @@ def check_meta_techpartner(tree: etree._ElementTree, config: dict[t.Any, t.Any]) ) -def check_meta_platform(tree: etree._ElementTree, config: dict[t.Any, t.Any]): +def check_meta_platform(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='platform']", From a66cffee2811d15325f8896cbceb557ee423f04e Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 4 Jun 2024 16:57:04 +0200 Subject: [PATCH 066/107] Correct a typo in require_meta_series --- .../metadatavalidator/checks/check_meta.py | 2 +- .../tests/unit/checks/test_check_meta.py | 22 ++++++++++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index eee7cf4e3..d6f272fe2 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -54,7 +54,7 @@ def check_meta_series(tree: etree._ElementTree, """Checks for a element""" root = tree.getroot() meta = root.find("./d:info/d:meta[@name='series']", namespaces=NAMESPACES) - required = config.get("metadata", {}).get("required_meta_series", False) + required = config.get("metadata", {}).get("require_meta_series", False) if meta is None: if required: raise InvalidValueError( diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index 345e11796..d0c12124a 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -148,7 +148,7 @@ def test_check_meta_series(xmlparser): assert check_meta_series(tree, config) is None -def test_check_missing_meta_series(xmlparser): +def test_check_missing_optional_meta_series(xmlparser): xmlcontent = """
Test @@ -182,6 +182,26 @@ def test_check_wrong_meta_series(xmlparser): check_meta_series(tree, config) +def test_check_require_meta_series(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + + config = dict( + metadata=dict( + require_meta_series=True, + valid_meta_series=["Best Practices", "Technical References"], + ) + ) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_series(tree, config) + + def test_check_meta_techpartner(xmlparser): xmlcontent = """
From 1d4a61eabd03981182cba595cc3f7eaf3917c9da Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 4 Jun 2024 17:02:36 +0200 Subject: [PATCH 067/107] Check * Add metadata.require_meta_architecture and metadata.valid_meta_architecture in config * Add tests * Update README.rst --- python-scripts/metadatavalidator/README.rst | 6 +- .../metadatavalidator/metadatavalidator.ini | 5 +- .../src/metadatavalidator/checks/__init__.py | 2 + .../metadatavalidator/checks/check_meta.py | 47 ++++++++++ .../tests/unit/checks/test_check_meta.py | 91 +++++++++++++++++++ 5 files changed, 149 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index d08f6fc4b..709207fdd 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -74,4 +74,8 @@ recognized: * :var:`require_meta_techpartner`: Requires a ```` tag or not. - * :var:`require_meta_platform`: Requires a ```` tag or not. \ No newline at end of file + * :var:`require_meta_platform`: Requires a ```` tag or not. + + * :var:`require_meta_architecture`: Requires a ```` tag or not. + + * :var:`valid_meta_architecture`: Lists the valid architecture names for ``/``. diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 7063918bd..0d85088e5 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -20,4 +20,7 @@ valid_meta_series = Products & Solutions, Best Practices, Technical References # require_meta_techpartner = off # -require_meta_platform = off \ No newline at end of file +require_meta_platform = off +# +require_meta_architecture = off +valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index c8bbd5b25..6b8133c88 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -16,6 +16,7 @@ check_meta_series, check_meta_techpartner, check_meta_platform, + check_meta_architecture, ) # Keep the order. The next item is dependent on the previous item. @@ -34,5 +35,6 @@ "check_meta_series", "check_meta_techpartner", "check_meta_platform", + "check_meta_architecture", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index d6f272fe2..9a33a2fb7 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -120,3 +120,50 @@ def check_meta_platform(tree: etree._ElementTree, if meta.text is None or not meta.text.strip(): raise InvalidValueError("Empty meta[@name='platform'] element") + + +def check_meta_architecture(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): + """Checks for a element""" + root = tree.getroot() + meta = root.find("./d:info/d:meta[@name='architecture']", + namespaces=NAMESPACES) + required = config.get("metadata", {}).get("require_meta_architecture", + False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta[@name='architecture'] element " + f"in {root.tag}." + ) + return + + valid_archs = [ + x.strip() for x in config.get("metadata", {} + ).get("valid_meta_architecture", []) + if x + ] + + # Do we have children? + archs = [tag.text.strip() for tag in meta.iterchildren()] + if not archs: + raise InvalidValueError( + f"Couldn't find any child elements in meta[@name='architecture'] " + f"(line {meta.sourceline})." + ) + + # Are they unique? + if len(archs) != len(set(archs)): + raise InvalidValueError( + f"Duplicate architectures found in meta[@name='architecture'] " + f"(line {meta.sourceline})." + ) + + # Do we have items that don't conform to our predefined list? + wrong_items = set(archs) - set(valid_archs) + if wrong_items: + raise InvalidValueError( + f"Unknown architecture(s) {wrong_items}. " + f"Allowed are {valid_archs}." + ) + print(">>>", wrong_items, valid_archs, archs) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index d0c12124a..9adb8a189 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -7,6 +7,7 @@ check_meta_series, check_meta_techpartner, check_meta_platform, + check_meta_architecture, ) from metadatavalidator.exceptions import InvalidValueError @@ -314,3 +315,93 @@ def test_check_empty_meta_platform(xmlparser): config = dict(metadata=dict(require_meta_platform=True)) with pytest.raises(InvalidValueError, match=r".*Empty meta.*"): check_meta_platform(tree, config) + + +def test_check_meta_architecture(xmlparser): + xmlcontent = """
+ + Test + + x86_64 + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_architecture=True)) + assert check_meta_architecture(tree, config) is None + + +def test_check_missing_optional_meta_architecture(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_architecture=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_architecture(tree, config) + + +def test_check_missing_child_meta_architecture(xmlparser): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_architecture=True)) + with pytest.raises( + InvalidValueError, + match=r".*Couldn't find any child elements in meta.*" + ): + check_meta_architecture(tree, config) + + +def test_check_duplicate_child_meta_architecture(xmlparser): + xmlcontent = """
+ + Test + + x86_64 + x86_64 + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict( + require_meta_architecture=True, + valid_meta_architecture=["x86_64", "POWER"])) + with pytest.raises( + InvalidValueError, match=r".*Duplicate architectures found in meta.*" + ): + check_meta_architecture(tree, config) + + +def test_check_unknown_child_meta_architecture(xmlparser): + xmlcontent = """
+ + Test + + x86_64 + foo + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict( + metadata=dict( + require_meta_architecture=True, + valid_meta_architecture=["x86_64", "POWER"], + ) + ) + with pytest.raises(InvalidValueError, + match=r".*Unknown architecture.*"): + check_meta_architecture(tree, config) From 1f4839ef58a6d79cf00b74f3b257bf76d7097f26 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 5 Jun 2024 08:46:35 +0200 Subject: [PATCH 068/107] Add comments in config INI file --- .../metadatavalidator/metadatavalidator.ini | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 0d85088e5..a502e414c 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -14,13 +14,17 @@ meta_title_length = 55 # require_meta_description = off meta_description_length = 150 -# + +# require_meta_series = off valid_meta_series = Products & Solutions, Best Practices, Technical References -# + +# require_meta_techpartner = off -# + +# require_meta_platform = off -# + +# require_meta_architecture = off valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE \ No newline at end of file From 3afed3656c26ce36e66823afc5b7676586b3d971 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Wed, 5 Jun 2024 08:46:59 +0200 Subject: [PATCH 069/107] Fix config options in test_check_meta_architecture --- .../metadatavalidator/tests/unit/checks/test_check_meta.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index 9adb8a189..01e9734a2 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -328,7 +328,8 @@ def test_check_meta_architecture(xmlparser):
""" tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) - config = dict(metadata=dict(require_meta_architecture=True)) + config = dict(metadata=dict(require_meta_architecture=True, + valid_meta_architecture=["x86_64", "POWER"])) assert check_meta_architecture(tree, config) is None From 78f3d3a06cd388b458be52b7fbb0a86e1f73ae88 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 13 Jun 2024 15:46:42 +0200 Subject: [PATCH 070/107] Check --- python-scripts/metadatavalidator/README.rst | 4 ++ .../metadatavalidator/metadatavalidator.ini | 6 ++- .../src/metadatavalidator/checks/__init__.py | 2 + .../metadatavalidator/checks/check_meta.py | 46 ++++++++++++++++++- .../src/metadatavalidator/config.py | 24 ++++++++++ .../tests/integration/badcase1/article.xml | 7 +++ .../integration/badcase1/config-test.ini | 23 ++++++++++ .../integration/badcase1/test_badcase1.py | 23 ++++++++++ .../integration/goodcase1/config-test.ini | 7 +++ .../tests/unit/checks/test_check_meta.py | 17 +++++++ .../tests/unit/data/metadatavalidator.ini | 18 +++++++- .../tests/unit/test_script_config.py | 17 +++++-- 12 files changed, 187 insertions(+), 7 deletions(-) create mode 100644 python-scripts/metadatavalidator/tests/integration/badcase1/article.xml create mode 100644 python-scripts/metadatavalidator/tests/integration/badcase1/config-test.ini create mode 100644 python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 709207fdd..9ea4eec46 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -79,3 +79,7 @@ recognized: * :var:`require_meta_architecture`: Requires a ```` tag or not. * :var:`valid_meta_architecture`: Lists the valid architecture names for ``/``. + + * :var:`require_meta_category`: Requires a ```` tag or not. + + * :var:`valid_meta_category`: Lists the valid category names for ``/``. \ No newline at end of file diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index a502e414c..540e22b3a 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -27,4 +27,8 @@ require_meta_platform = off # require_meta_architecture = off -valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE \ No newline at end of file +valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE + +# +require_meta_category = off +valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 6b8133c88..9505d357b 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -17,6 +17,7 @@ check_meta_techpartner, check_meta_platform, check_meta_architecture, + check_meta_category, ) # Keep the order. The next item is dependent on the previous item. @@ -36,5 +37,6 @@ "check_meta_techpartner", "check_meta_platform", "check_meta_architecture", + "check_meta_category", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 9a33a2fb7..6b13ff9f7 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -166,4 +166,48 @@ def check_meta_architecture(tree: etree._ElementTree, f"Unknown architecture(s) {wrong_items}. " f"Allowed are {valid_archs}." ) - print(">>>", wrong_items, valid_archs, archs) + + +def check_meta_category(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): + """Checks for a element""" + root = tree.getroot() + meta = root.find("./d:info/d:meta[@name='category']", + namespaces=NAMESPACES) + required = config.get("metadata", {}).get("require_meta_category", False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta[@name='category'] element " + f"in {root.tag}." + ) + return + + valid_cats = [ + x.strip() for x in config.get("metadata", {} + ).get("valid_meta_category", []) + if x + ] + + # Do we have children? + cats = [tag.text.strip() for tag in meta.iterchildren()] + if not cats: + raise InvalidValueError( + f"Couldn't find any child elements in meta[@name='category'] " + f"(line {meta.sourceline})." + ) + + # Are they unique? + if len(cats) != len(set(cats)): + raise InvalidValueError( + f"Duplicate categories found in meta[@name='category'] " + f"(line {meta.sourceline})." + ) + + # Do we have items that don't conform to our predefined list? + wrong_items = set(cats) - set(valid_cats) + if wrong_items: + raise InvalidValueError( + f"Unknown category(ies) {wrong_items}. " + f"Allowed are {valid_cats}." + ) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index fd0fa247e..9dee23582 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -107,6 +107,30 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any ) theconfig.setdefault("metadata", {})["require_meta_series"] = require_meta_series + # architectures + require_meta_architecture = truefalse( + theconfig.get("metadata", {}).get("require_meta_architecture", False) + ) + theconfig.setdefault("metadata", {})["require_meta_architecture"] = require_meta_architecture + try: + architectures = split.split(theconfig.get("metadata", {}).get("valid_meta_architecture", [])) + theconfig.setdefault("metadata", {})["valid_meta_architecture"] = architectures + except TypeError: + raise MissingKeyError("metadata.valid_meta_architecture") + + + # categories + require_meta_category = truefalse( + theconfig.get("metadata", {}).get("require_meta_category", False) + ) + theconfig.setdefault("metadata", {})["require_meta_category"] = require_meta_category + try: + categories = split.split(theconfig.get("metadata", {}).get("valid_meta_category", [])) + theconfig.setdefault("metadata", {})["valid_meta_category"] = categories + except TypeError: + raise MissingKeyError("metadata.valid_meta_category") + + # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") return theconfig diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/article.xml b/python-scripts/metadatavalidator/tests/integration/badcase1/article.xml new file mode 100644 index 000000000..d766a4375 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/badcase1/article.xml @@ -0,0 +1,7 @@ +
+ + Test + The very long, long, long long SEO title + + +
diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/config-test.ini b/python-scripts/metadatavalidator/tests/integration/badcase1/config-test.ini new file mode 100644 index 000000000..636b78b88 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/badcase1/config-test.ini @@ -0,0 +1,23 @@ +[validator] +file_extension = .xml +check_root_elements = book article topic +valid_languages = en-us + +[metadata] +revhistory = 0 +require_xmlid_on_revision = 0 + +# +require_meta_title = on +meta_title_length = 55 + +# +require_meta_description = off +meta_description_length = 150 + +require_meta_architecture = off +valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE + +# +require_meta_category = off +valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py b/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py new file mode 100644 index 000000000..a769f5446 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py @@ -0,0 +1,23 @@ +import os.path +import json +import pytest + +from metadatavalidator.cli import main + + +BASEDIR = os.path.dirname(os.path.realpath(__file__)) +RELATIVE_PATH = os.path.relpath(BASEDIR, os.getcwd()) + + +def test_case1_integration(capsys): + cli = ["--config", f"{BASEDIR}/config-test.ini", + "--format", "json", # needed to avoid formatting issues + f"{RELATIVE_PATH}/article.xml"] + + result = main(cli) + captured = capsys.readouterr() + assert result == 0 + result = json.loads(captured.out) + assert result[0]['errors'] == [] + assert result[0]['xmlfile'] == f"{RELATIVE_PATH}/article.xml" + diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini b/python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini index 08afdc93d..636b78b88 100644 --- a/python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini +++ b/python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini @@ -14,3 +14,10 @@ meta_title_length = 55 # require_meta_description = off meta_description_length = 150 + +require_meta_architecture = off +valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE + +# +require_meta_category = off +valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index 01e9734a2..c3e51ecce 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -8,6 +8,7 @@ check_meta_techpartner, check_meta_platform, check_meta_architecture, + check_meta_category, ) from metadatavalidator.exceptions import InvalidValueError @@ -406,3 +407,19 @@ def test_check_unknown_child_meta_architecture(xmlparser): with pytest.raises(InvalidValueError, match=r".*Unknown architecture.*"): check_meta_architecture(tree, config) + + +def test_meta_category(xmlparser): + xmlcontent = """
+ + Test + + Systems Management + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_category=True, + valid_meta_category=["Systems Management"])) + assert check_meta_category(tree, config) is None \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini b/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini index 9dcf1e744..e9cc4c1b8 100644 --- a/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini @@ -13,6 +13,20 @@ meta_title_length = 55 # require_meta_description = off meta_description_length = 150 -# + +# require_meta_series = off -valid_meta_series = Products & Solutions, Best Practices, Technical References \ No newline at end of file +valid_meta_series = Products & Solutions, Best Practices, Technical References + +# +require_meta_techpartner = off + +# +require_meta_platform = off + +# +require_meta_architecture = off +valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE + +require_meta_category = off +valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/test_script_config.py b/python-scripts/metadatavalidator/tests/unit/test_script_config.py index 189204547..b9dbdd7f1 100644 --- a/python-scripts/metadatavalidator/tests/unit/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/unit/test_script_config.py @@ -3,8 +3,16 @@ import pytest -from metadatavalidator.config import readconfig, validate_and_convert_config, truefalse -from metadatavalidator.exceptions import MissingKeyError, MissingSectionError, NoConfigFilesFoundError +from metadatavalidator.config import ( + readconfig, + validate_and_convert_config, + truefalse, + ) +from metadatavalidator.exceptions import ( + MissingKeyError, + MissingSectionError, + NoConfigFilesFoundError, + ) def create_config(): config = ConfigParser() @@ -19,6 +27,8 @@ def create_config(): config.set("metadata", "meta_title_length", "50") config.set("metadata", "meta_description_length", "150") # + config.set("metadata", "valid_meta_architecture", "A, B, C") + config.set("metadata", "valid_meta_category", "D, E, F") setattr(config, "configfiles", None) return config @@ -49,7 +59,8 @@ def test_valid_validate_and_convert_config(): assert result.get("validator") == { "check_root_elements": ["book", "article"], "file_extension": ".xml", - "valid_languages": ["en-us", "de-de",] + "valid_languages": ["en-us", "de-de",], + # "" } From a90236640f0f1f19d1c5920f5d57158b18316e4d Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 21 Jun 2024 11:24:49 +0200 Subject: [PATCH 071/107] Refactor validate_and_convert_config * Put everything in smaller validate_*() functions * Convert the value and catch problems in these validate_*() functions --- .../metadatavalidator/checks/check_info.py | 1 + .../src/metadatavalidator/config.py | 217 ++++++++++++++---- .../src/metadatavalidator/process.py | 2 + .../metadatavalidator/tests/conftest.py | 34 ++- .../tests/unit/test_script_config.py | 54 ++--- 5 files changed, 226 insertions(+), 82 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 7233948bc..0b985ccd7 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -1,3 +1,4 @@ +import asyncio import datetime import itertools import typing as t diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index 9dee23582..d028bf5de 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -24,8 +24,10 @@ # merged_config |= config # return merged_config +SPLIT = re.compile(r"[;, ]") -def as_dict(config: configparser.ConfigParser): + +def as_dict(config: configparser.ConfigParser) -> dict[str, t.Any]: """ Converts a ConfigParser object into a dictionary. @@ -49,88 +51,211 @@ def truefalse(value: str|bool|int) -> bool: return str(value).lower() in ("true", "yes", "1", "on") -def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any, t.Any]: - """Validate sections, keys, and their values of the config +# def retrievekey(config: configparser.ConfigParser, +# section: str, key: str, default: t.Any = None) -> t.Any: +# """Retrieve a key from a section in a config file - :param config: the :class:`configparser.Configparser` object - :return: a dict that contains converted keys into their - respective datatypes - """ - split = re.compile(r"[;, ]") - theconfig = as_dict(config) +# :param config: the configuration object +# :param section: the section to look for +# :param key: the key to look for +# :param default: the default value if the key is not found +# :return: the value of the key +# """ +# if not config.has_section(section): +# raise MissingSectionError(section) +# return config.get(section, key, fallback=default) - if not config.has_section("validator"): - raise MissingSectionError("validator") - # Section "validator" - check_root_elements = config.get("validator", "check_root_elements", fallback=None) +# def get_metadata(config: configparser.ConfigParser, key) -> dict[str, t.Any]: +# """Retrieve the metadata section from the config + +# :param config: the configuration object +# :return: a dictionary with the metadata section +# """ +# return retrievekey(config, "metadata", key) + + +def validate_check_root_elements(config: dict) -> list[str]: + """Validate the language section of the config + + :param config: the configuration object + :return: a list of valid languages + """ + check_root_elements = config.get("validator", {}).get("check_root_elements") if check_root_elements is None: raise MissingKeyError("validator.check_root_elements") - theconfig["validator"]["check_root_elements"] = split.split(check_root_elements) - valid_languages = config.get("validator", "valid_languages", fallback=None) + return SPLIT.split(check_root_elements) + + +def validate_valid_languages(config: dict) -> list[str]: + """Validate the language section of the config + + :param config: the configuration object + :return: a list of valid languages + """ + + # valid_languages = retrievekey(config, "validator", "valid_languages") + valid_languages = config.get("validator", {}).get("valid_languages") if valid_languages is None: raise MissingKeyError("validator.valid_languages") - theconfig["validator"]["valid_languages"] = split.split(valid_languages) + return SPLIT.split(valid_languages) - # Section "metadata" - require_xmlid_on_revision = truefalse( - theconfig.get("metadata", {}).get("require_xmlid_on_revision", True) - ) - theconfig.setdefault("metadata", {})["require_xmlid_on_revision"] = require_xmlid_on_revision +def validate_meta_title_length(config: dict) -> int: + """Validate the meta title length + + :param config: the configuration object + :return: the meta title length + """ try: - meta_title_length = int(theconfig.get("metadata", {}).get("meta_title_length")) + meta_title_length = int(config.get("metadata", {}).get("meta_title_length")) if meta_title_length < 0: raise ValueError("meta_title_length should be a positive integer") - theconfig.setdefault("metadata", {})["meta_title_length"] = meta_title_length + return meta_title_length except TypeError: raise MissingKeyError("metadata.meta_title_length") + +def validate_meta_description_length(config: dict) -> int: + """Validate the meta description length + + :param config: the configuration object + :return: the meta description length + """ try: - meta_description_length = int(theconfig.get("metadata", {}).get("meta_description_length")) + meta_description_length = int(config.get("metadata", {}).get("meta_description_length")) if meta_description_length < 0: raise ValueError("meta_description_length should be a positive integer") - theconfig.setdefault("metadata", {})["meta_description_length"] = meta_description_length + return meta_description_length except TypeError: raise MissingKeyError("metadata.meta_description_length") - split = re.compile(r"[;,]") # no space! - valid_meta_series = split.split(theconfig.get("metadata", {}).get("valid_meta_series", "")) - theconfig.setdefault("metadata", {})["valid_meta_series"] = valid_meta_series - require_meta_series = truefalse( - theconfig.get("metadata", {}).get("require_meta_series", False) - ) - theconfig.setdefault("metadata", {})["require_meta_series"] = require_meta_series +def validate_valid_meta_series(config: dict) -> list[str]: + """Validate the meta series - # architectures - require_meta_architecture = truefalse( - theconfig.get("metadata", {}).get("require_meta_architecture", False) - ) - theconfig.setdefault("metadata", {})["require_meta_architecture"] = require_meta_architecture + :param config: the configuration object + :return: a list of valid meta series + """ + # split = re.compile(r"[;,]") # no space! + return [x.strip() for x in re.split(r"[;,]", + config.get("metadata", {}).get("valid_meta_series", "") + ) + if x + ] + + +def validate_valid_meta_architecture(config: dict) -> list[str]: + """Validate the meta architecture + + :param config: the configuration object + :return: a list of valid meta architecture + """ try: - architectures = split.split(theconfig.get("metadata", {}).get("valid_meta_architecture", [])) - theconfig.setdefault("metadata", {})["valid_meta_architecture"] = architectures + # architectures = re.split(r"[;,]", config.get("metadata", {}).get("valid_meta_architecture", [])) + return [x.strip() for x in re.split(r"[;,]", + config.get("metadata", {}).get("valid_meta_architecture", "") + ) + if x + ] except TypeError: raise MissingKeyError("metadata.valid_meta_architecture") - # categories - require_meta_category = truefalse( - theconfig.get("metadata", {}).get("require_meta_category", False) - ) - theconfig.setdefault("metadata", {})["require_meta_category"] = require_meta_category +def validate_valid_meta_category(config: dict) -> list[str]: + """Validate the meta category + + :param config: the configuration object + :return: a list of valid meta category + """ try: - categories = split.split(theconfig.get("metadata", {}).get("valid_meta_category", [])) - theconfig.setdefault("metadata", {})["valid_meta_category"] = categories + return [x.strip() for x in re.split(r"[;,]", + config.get("metadata", {}).get("valid_meta_category", "") + ) + if x + ] except TypeError: raise MissingKeyError("metadata.valid_meta_category") +def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any, t.Any]: + """Validate sections, keys, and their values of the config + + :param config: the :class:`configparser.Configparser` object + :return: a dict that contains converted keys into their + respective datatypes + """ + # TODO: This should be better used with pydantic + if not config.has_section("validator"): + raise MissingSectionError("validator") + if not config.has_section("metadata"): + raise MissingSectionError("metadata") + + theconfig = as_dict(config) + # Section "validator" + theconfig["validator"]["check_root_elements"] = validate_check_root_elements(theconfig) + theconfig["validator"]["valid_languages"] = validate_valid_languages(theconfig) + + # Section "metadata" + theconfig.setdefault("metadata", {})[ + "require_xmlid_on_revision" + ] = truefalse(theconfig.get("metadata", {}).get("require_xmlid_on_revision", True)) + + # + theconfig.setdefault("metadata", {})[ + "meta_title_length" + ] = validate_meta_title_length(theconfig) + + # + theconfig.setdefault("metadata", {})[ + "require_meta_description" + ] = truefalse(theconfig.get("metadata", {}).get("require_meta_description", False)) + theconfig.setdefault("metadata", {})[ + "meta_description_length" + ] = validate_meta_description_length(theconfig) + + # + theconfig.setdefault("metadata", {})[ + "require_meta_series" + ] = truefalse(theconfig.get("metadata", {}).get("require_meta_series", False)) + theconfig.setdefault("metadata", {})[ + "valid_meta_series" + ] = validate_valid_meta_series(theconfig) + + # + theconfig.setdefault("metadata", {})[ + "require_meta_architecture" + ] = truefalse(theconfig.get("metadata", {}).get("require_meta_architecture", + False)) + theconfig.setdefault("metadata", {})[ + "valid_meta_architecture" + ] = validate_valid_meta_architecture(theconfig) + + # + require_meta_techpartner = truefalse( + theconfig.get("metadata", {}).get("require_meta_techpartner", False) + ) + theconfig.setdefault("metadata", {})[ + "require_meta_techpartner" + ] = require_meta_techpartner + + # + theconfig.setdefault("metadata", {})[ + "require_meta_platform" + ] = truefalse(theconfig.get("metadata", {}).get("require_meta_platform", False)) + + # + theconfig.setdefault("metadata", {})[ + "require_meta_category" + ] = truefalse(theconfig.get("metadata", {}).get("require_meta_category", False)) + theconfig.setdefault("metadata", {})[ + "valid_meta_category" + ] = validate_valid_meta_category(theconfig) + # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") return theconfig diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index f914dda66..60cfa76f8 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -32,6 +32,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): """ errors = [] basexmlfile = os.path.basename(xmlfile) + # log.debug("Config %s", config) for checkfunc in get_all_check_functions(checks.__package__): log.debug("Checking %r with %r", basexmlfile, @@ -44,6 +45,7 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): # Apply check function checkfunc(tree, config) + # await asyncio.sleep(0.1) except etree.XMLSyntaxError as e: # log.fatal("Syntax error in %r: %s", xmlfile, e) diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py index fcb4b4361..caf982592 100644 --- a/python-scripts/metadatavalidator/tests/conftest.py +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -1,14 +1,42 @@ -import sys +from configparser import ConfigParser import os, os.path +import typing as t from lxml import etree - import pytest +from metadatavalidator.config import as_dict + + os.environ.setdefault("PYTHONPATH", os.path.normpath(os.path.join(os.path.dirname(__file__), ".."))) @pytest.fixture def xmlparser(): - return etree.XMLParser(encoding="UTF-8") \ No newline at end of file + return etree.XMLParser(encoding="UTF-8") + + +@pytest.fixture(scope="function") +def config() -> ConfigParser: + config = ConfigParser() + config.add_section("validator") + config.set("validator", "check_root_elements", "book article") + config.set("validator", "file_extension", ".xml") + config.set("validator", "valid_languages", "en-us de-de") + # + config.add_section("metadata") + config.set("metadata", "revhistory", "0") + config.set("metadata", "require_xmlid_on_revision", "true") + config.set("metadata", "meta_title_length", "50") + config.set("metadata", "meta_description_length", "150") + # + config.set("metadata", "valid_meta_architecture", "A, B, C") + config.set("metadata", "valid_meta_category", "D, E, F") + setattr(config, "configfiles", None) + return config + + +@pytest.fixture(scope="function") +def dict_config(config) -> dict[str, t.Any]: + return as_dict(config) diff --git a/python-scripts/metadatavalidator/tests/unit/test_script_config.py b/python-scripts/metadatavalidator/tests/unit/test_script_config.py index b9dbdd7f1..6960d0cf2 100644 --- a/python-scripts/metadatavalidator/tests/unit/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/unit/test_script_config.py @@ -5,8 +5,10 @@ from metadatavalidator.config import ( readconfig, - validate_and_convert_config, truefalse, + validate_check_root_elements, + validate_and_convert_config, + validate_valid_meta_architecture, ) from metadatavalidator.exceptions import ( MissingKeyError, @@ -14,24 +16,6 @@ NoConfigFilesFoundError, ) -def create_config(): - config = ConfigParser() - config.add_section("validator") - config.set("validator", "check_root_elements", "book article") - config.set("validator", "file_extension", ".xml") - config.set("validator", "valid_languages", "en-us de-de") - # - config.add_section("metadata") - config.set("metadata", "revhistory", "0") - config.set("metadata", "require_xmlid_on_revision", "true") - config.set("metadata", "meta_title_length", "50") - config.set("metadata", "meta_description_length", "150") - # - config.set("metadata", "valid_meta_architecture", "A, B, C") - config.set("metadata", "valid_meta_category", "D, E, F") - setattr(config, "configfiles", None) - return config - @pytest.mark.parametrize("value, expected", [ ("true", True), @@ -53,8 +37,7 @@ def test_truefalse(value, expected): assert truefalse(value) == expected -def test_valid_validate_and_convert_config(): - config = create_config() +def test_valid_validate_and_convert_config(config): result = validate_and_convert_config(config) assert result.get("validator") == { "check_root_elements": ["book", "article"], @@ -75,43 +58,37 @@ def test_missing_validator_section(): validate_and_convert_config(config) -def test_missing_key_check_root_elements(): - config = create_config() +def test_missing_key_check_root_elements(config): config.remove_option("validator", "check_root_elements") with pytest.raises(MissingKeyError, match=".*validator.check_root_elements.*"): validate_and_convert_config(config) -def test_missing_key_valid_languages(): - config = create_config() +def test_missing_key_valid_languages(config): config.remove_option("validator", "valid_languages") with pytest.raises(MissingKeyError, match=".*validator.valid_languages.*"): validate_and_convert_config(config) -def test_missing_key_meta_title_length(): - config = create_config() +def test_missing_key_meta_title_length(config): config.remove_option("metadata", "meta_title_length") with pytest.raises(MissingKeyError, match=".*metadata.meta_title_length.*"): validate_and_convert_config(config) -def test_meta_title_length_not_positive(): - config = create_config() +def test_meta_title_length_not_positive(config): config.set("metadata", "meta_title_length", "-1") with pytest.raises(ValueError, match=".*meta_title_length should be a positive integer.*"): validate_and_convert_config(config) -def test_meta_description_length_not_positive(): - config = create_config() +def test_meta_description_length_not_positive(config): config.set("metadata", "meta_description_length", "-1") with pytest.raises(ValueError, match=".*meta_description_length should be a positive integer.*"): validate_and_convert_config(config) -def test_missing_key_meta_description_length(): - config = create_config() +def test_missing_key_meta_description_length(config): config.remove_option("metadata", "meta_description_length") with pytest.raises(MissingKeyError, match=".*metadata.meta_description_length.*"): validate_and_convert_config(config) @@ -127,3 +104,14 @@ def test_readconfig(): } assert result.get("configfiles") == [configfile] + +def test_validate_check_root_elements(dict_config): + assert validate_check_root_elements(dict_config) == ["book", "article"] + +def test_validate_check_root_elements_missing_key(dict_config): + dict_config["validator"].pop("check_root_elements") + with pytest.raises(MissingKeyError, match=".*validator.check_root_elements.*"): + validate_check_root_elements(dict_config) + +def test_validate_valid_meta_architecture(dict_config): + assert validate_valid_meta_architecture(dict_config) == ["A", "B", "C"] \ No newline at end of file From 1df2894c0d624b9da2a959c62dcde98b695a24ac Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Fri, 21 Jun 2024 14:28:18 +0200 Subject: [PATCH 072/107] Add additional test cases --- .../metadatavalidator/checks/check_root.py | 1 + .../src/metadatavalidator/config.py | 15 ++-- .../tests/unit/checks/test_check_info.py | 52 ++++++++------ .../tests/unit/checks/test_check_meta.py | 68 ++++++++++++++++++- .../tests/unit/test_script_config.py | 7 ++ 5 files changed, 112 insertions(+), 31 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py index 5979af2f2..008247a1a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py @@ -21,3 +21,4 @@ def check_namespace(tree: etree._ElementTree, config: dict[t.Any, t.Any]): tag = etree.QName(tree.getroot().tag) if tag.namespace != DOCBOOK_NS: raise InvalidValueError(f"Root element {tag.localname!r} doesn't belong to DocBook 5.") + diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index d028bf5de..a96892750 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -155,15 +155,11 @@ def validate_valid_meta_architecture(config: dict) -> list[str]: :param config: the configuration object :return: a list of valid meta architecture """ - try: - # architectures = re.split(r"[;,]", config.get("metadata", {}).get("valid_meta_architecture", [])) - return [x.strip() for x in re.split(r"[;,]", + return [x.strip() for x in re.split(r"[;,]", config.get("metadata", {}).get("valid_meta_architecture", "") ) if x - ] - except TypeError: - raise MissingKeyError("metadata.valid_meta_architecture") + ] def validate_valid_meta_category(config: dict) -> list[str]: @@ -172,14 +168,11 @@ def validate_valid_meta_category(config: dict) -> list[str]: :param config: the configuration object :return: a list of valid meta category """ - try: - return [x.strip() for x in re.split(r"[;,]", + return [x.strip() for x in re.split(r"[;,]", config.get("metadata", {}).get("valid_meta_category", "") ) if x - ] - except TypeError: - raise MissingKeyError("metadata.valid_meta_category") + ] def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any, t.Any]: diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py index fbb9d9d16..fa6a79e88 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py @@ -2,7 +2,8 @@ from lxml import etree from metadatavalidator.checks import ( - check_info, check_info_revhistory, + check_info, + check_info_revhistory, check_info_revhistory_revision, check_info_revhistory_revision_date, check_info_revhistory_revision_order, @@ -49,7 +50,7 @@ def test_check_info_revhistory_missing(xmlparser): with pytest.raises(InvalidValueError, match="Couldn't find a revhistory element"): - check_info_revhistory(tree, {}) + check_info_revhistory(tree, {"metadata": {"require_revhistory": True}}) def test_check_info_revhistory(xmlparser): @@ -76,7 +77,6 @@ def test_check_info_revhistory_without_info(xmlparser): ) assert check_info_revhistory(tree, {}) is None - assert check_info_revhistory(tree, {}) is None def test_check_info_revhistory_xmlid(xmlparser): @@ -94,11 +94,11 @@ def test_check_info_revhistory_xmlid(xmlparser): assert check_info_revhistory(tree, {}) is None -def test_check_info_revhistory_missing_xmlid(xmlparser): +def test_info_revhistory_missing_xmlid(xmlparser): xmlcontent = """
Test - +
""" @@ -169,20 +169,6 @@ def test_check_info_revhistory_revision_missing_xmlid(xmlparser): {"metadata": {"require_xmlid_on_revision": True}}) -def test_check_info_revhistory_missing(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) - - check_info_revhistory_revision(tree, {}) is None - - def test_check_info_revhistory_revision_missing(xmlparser): xmlcontent = """
@@ -334,3 +320,31 @@ def test_check_info_revhistory_revision_order_one_invalid_date(xmlparser): match=".*Couldn't convert all dates.*see position dates=1.*" ): check_info_revhistory_revision_order(tree, {}) + + +def test_check_info_revhistory_revision_wrong_order(xmlparser): + xmlcontent = """
+ + Test + + + 2024-12 + + + 2023-12-12 + + + 2026-04 + + + + +
""" + tree = etree.ElementTree( + etree.fromstring(xmlcontent, parser=xmlparser) + ) + + with pytest.raises(InvalidValueError, + match=".*Dates in revhistory/revision are not in descending order.*" + ): + check_info_revhistory_revision_order(tree, {}) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index c3e51ecce..bf6377dfe 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -422,4 +422,70 @@ def test_meta_category(xmlparser): tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) config = dict(metadata=dict(require_meta_category=True, valid_meta_category=["Systems Management"])) - assert check_meta_category(tree, config) is None \ No newline at end of file + assert check_meta_category(tree, config) is None + + +def test_missing_optional_meta_category(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_category=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_category(tree, config) + + +def test_missing_child_meta_category(xmlparser): + xmlcontent = """
+ + Test + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_category=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find any child elements in meta.*"): + check_meta_category(tree, config) + + +def test_duplicate_child_meta_category(xmlparser): + xmlcontent = """
+ + Test + + Systems Management + Systems Management + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_category=True)) + with pytest.raises(InvalidValueError, + match=r".*Duplicate categories found in meta.*"): + check_meta_category(tree, config) + + +def test_unknown_category_meta_category(xmlparser): + xmlcontent = """
+ + Test + + Systems Management + Foo + + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_category=True, + valid_meta_category=["Systems Management"])) + with pytest.raises(InvalidValueError, + match=r".*Unknown category.*"): + check_meta_category(tree, config) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/test_script_config.py b/python-scripts/metadatavalidator/tests/unit/test_script_config.py index 6960d0cf2..59d94701d 100644 --- a/python-scripts/metadatavalidator/tests/unit/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/unit/test_script_config.py @@ -58,6 +58,13 @@ def test_missing_validator_section(): validate_and_convert_config(config) +def test_missing_metadata_section(): + config = ConfigParser() + config.add_section("validator") + with pytest.raises(MissingSectionError, match=".*metadata.*"): + validate_and_convert_config(config) + + def test_missing_key_check_root_elements(config): config.remove_option("validator", "check_root_elements") with pytest.raises(MissingKeyError, match=".*validator.check_root_elements.*"): From b2b3a5f069498f4698da7d258edb8c9b7b2ecac4 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 27 Jun 2024 17:48:32 +0200 Subject: [PATCH 073/107] Check --- python-scripts/metadatavalidator/README.rst | 31 ++++++++++- .../metadatavalidator/metadatavalidator.ini | 4 ++ .../src/metadatavalidator/checks/__init__.py | 2 + .../metadatavalidator/checks/check_meta.py | 45 ++++++++++++++++ .../src/metadatavalidator/config.py | 20 +++++++ .../tests/unit/checks/test_check_meta.py | 52 ++++++++++++++++++- 6 files changed, 151 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 9ea4eec46..9708911d3 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -19,14 +19,37 @@ To install the script, run the following command: .. code-block:: bash - pip install . + $ pip install . For development, you can install the script in editable mode: .. code-block:: bash - pip install -e .[test] + $ pip install -e .[test] + + +Usage +----- + +Call the script with the following command: + +.. code-block:: bash + + $ metadatavalidator PATH_TO_DOCBOOK_FILES + +The script will show all problems with metadata + +.. code-block:: + + $ metadatavalidator a.xml b.xml + ==== RESULTS ==== + [1] a.xml: + 1.1: check_info_revhistory_revision: Missing recommended attribute in /d:article/d:info[2]/d:revhistory[12]/d:revision/@xml:id + + [2] b.xml: + 2.1: + Configuration @@ -35,9 +58,13 @@ Configuration The configuration file is search in the following order (first is the highest): 1. Command line with :option:`--config`. This doesn't search for other configuration files. + 1. Environment variable :envar:`METAVALIDATOR_CONFIG`. + 1. In the current directory: :file:`metadatavalidator.ini` + 1. In the users' home directory: :file:`~/.config/metadatavalidator/config.ini` + 1. In the system: :file:`/etc/metadatavalidator/config.ini` diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 540e22b3a..b4a2dd384 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -29,6 +29,10 @@ require_meta_platform = off require_meta_architecture = off valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE +# +require_meta_task = off +valid_meta_task = Administration, Artificial Intelligence, Auditing, Authentication, Automation, Backup & Recovery, Certification, Cloud, Clustering, Compliance, Configuration, Containerization, Container Management, Data Intelligence, Deployment, Design, Encryption, High Availability, Image Building, Implementation, Installation, Integration, Maintenance, Migration, Monitoring, Packaging, Security, Storage, Subscription Management, Troubleshooting, Upgrade & Update, Virtualization, Vulnerability, Web + # require_meta_category = off valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py index 9505d357b..281be3c19 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/__init__.py @@ -18,6 +18,7 @@ check_meta_platform, check_meta_architecture, check_meta_category, + check_meta_task, ) # Keep the order. The next item is dependent on the previous item. @@ -38,5 +39,6 @@ "check_meta_platform", "check_meta_architecture", "check_meta_category", + "check_meta_task", ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 6b13ff9f7..e8ace5e29 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -210,4 +210,49 @@ def check_meta_category(tree: etree._ElementTree, raise InvalidValueError( f"Unknown category(ies) {wrong_items}. " f"Allowed are {valid_cats}." + ) + + +def check_meta_task(tree: etree._ElementTree, + config: dict[t.Any, t.Any]): + """Checks for a element""" + root = tree.getroot() + meta = root.find("./d:info/d:meta[@name='task']", + namespaces=NAMESPACES) + required = config.get("metadata", {}).get("require_meta_task", False) + if meta is None: + if required: + raise InvalidValueError( + f"Couldn't find required meta[@name='task'] element " + f"in {root.tag}." + ) + return + + valid_tasks = [ + x.strip() for x in config.get("metadata", {} + ).get("valid_meta_task", []) + if x + ] + + # Do we have children? + tasks = [tag.text.strip() for tag in meta.iterchildren()] + if not tasks: + raise InvalidValueError( + f"Couldn't find any child elements in meta[@name='task'] " + f"(line {meta.sourceline})." + ) + + # Are they unique? + if len(tasks) != len(set(tasks)): + raise InvalidValueError( + f"Duplicate tasks found in meta[@name='task'] " + f"(line {meta.sourceline})." + ) + + # Do we have items that don't conform to our predefined list? + wrong_items = set(tasks) - set(valid_tasks) + if wrong_items: + raise InvalidValueError( + f"Unknown task(s) {wrong_items}. " + f"Allowed are {valid_tasks}." ) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index a96892750..b99c7cfec 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -174,6 +174,18 @@ def validate_valid_meta_category(config: dict) -> list[str]: if x ] +def validate_valid_meta_task(config: dict) -> list[str]: + """Validate the meta task + + :param config: the configuration object + :return: a list of valid meta task + """ + return [x.strip() for x in re.split(r"[;,]", + config.get("metadata", {}).get("valid_meta_task", "") + ) + if x + ] + def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any, t.Any]: """Validate sections, keys, and their values of the config @@ -249,6 +261,14 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any "valid_meta_category" ] = validate_valid_meta_category(theconfig) + # + theconfig.setdefault("metadata", {})[ + "require_meta_task" + ] = truefalse(theconfig.get("metadata", {}).get("require_meta_task", False)) + theconfig.setdefault("metadata", {})[ + "valid_meta_task" + ] = validate_valid_meta_task(theconfig) + # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") return theconfig diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index bf6377dfe..dca91ff09 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -9,6 +9,7 @@ check_meta_platform, check_meta_architecture, check_meta_category, + check_meta_task, ) from metadatavalidator.exceptions import InvalidValueError @@ -488,4 +489,53 @@ def test_unknown_category_meta_category(xmlparser): valid_meta_category=["Systems Management"])) with pytest.raises(InvalidValueError, match=r".*Unknown category.*"): - check_meta_category(tree, config) \ No newline at end of file + check_meta_category(tree, config) + + +def test_meta_task(xmlparser): + xmlcontent = """
+ + Test + + Configuration + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_task=True, + valid_meta_task=["Configuration"])) + assert check_meta_task(tree, config) is None + + +def test_missing_child_meta_task(xmlparser): + xmlcontent = """
+ + Test + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_task=True), + valid_meta_task=["Configuration"]) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find any child elements in meta.*"): + check_meta_task(tree, config) + + + +def test_duplicate_child_meta_task(xmlparser): + xmlcontent = """
+ + Test + + Configuration + Configuration + + +
""" + tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_task=True), + valid_meta_task=["Configuration"]) + with pytest.raises(InvalidValueError, + match=r".*Duplicate tasks found in meta.*"): + check_meta_task(tree, config) \ No newline at end of file From 54787cc929ec113ad4355b3d5ac9a52a98c89e61 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 27 Jun 2024 19:23:39 +0200 Subject: [PATCH 074/107] Simplify tests * Add additional functions: - createmeta(attribs: dict, text: str|None=None, nsmap:dict|None=None) - appendmeta(tree: etree._ElementTree, meta: etree._Element) - addphrase(meta: etree._Element, phrases: list) All these help to simpliy the creation of and the addition to . * Add two additional fixtures: - xmlcontent(): returns a string XML
structure with but without additional content - tree(): returns a ElementTree based on xmlcontent --- .../metadatavalidator/tests/conftest.py | 15 + .../tests/unit/checks/test_check_meta.py | 718 ++++++++++-------- 2 files changed, 397 insertions(+), 336 deletions(-) diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py index caf982592..2ac0c52cb 100644 --- a/python-scripts/metadatavalidator/tests/conftest.py +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -17,6 +17,21 @@ def xmlparser(): return etree.XMLParser(encoding="UTF-8") +@pytest.fixture +def xmlcontent(): + return """
+ + Test + + +
""" + + +@pytest.fixture +def tree(xmlcontent, xmlparser): + return etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + + @pytest.fixture(scope="function") def config() -> ConfigParser: config = ConfigParser() diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index dca91ff09..df2c7b2d4 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -1,6 +1,7 @@ from lxml import etree import pytest +from metadatavalidator.common import NAMESPACES from metadatavalidator.checks.check_meta import ( check_meta_title, check_meta_description, @@ -13,136 +14,158 @@ ) from metadatavalidator.exceptions import InvalidValueError - -def test_check_meta_title(xmlparser): - xmlcontent = """
- - Test - The SEO title - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def createmeta(attribs: dict, + text: str|None=None, + nsmap: dict|None=None): + """Create a element with the given attributes and text.""" + if nsmap is None: + nsmap = {None: NAMESPACES['d']} + + meta = etree.Element(f"{{{NAMESPACES['d']}}}meta", + attrib=attribs, + nsmap=nsmap) + meta.text = text + return meta + + +def appendmeta(tree: etree._ElementTree, meta: etree._Element): + """Append the given element to the element in the tree.""" + root = tree.getroot() + info = root.find("./d:info", namespaces=NAMESPACES) + info.append(meta) + return tree + + +def addphrase(meta: etree._Element, phrases: list): + """Add a list of elements to the given element.""" + for phrase in phrases: + etree.SubElement(meta, f"{{{NAMESPACES['d']}}}phrase").text = phrase + return meta + + +def test_check_meta_title(tree): +# xmlcontent = """
+# +# Test +# The SEO title +# +# +#
""" + root = tree.getroot() + meta = createmeta(dict(name='title'), 'The SEO title') + appendmeta(tree, meta) + # info = root.find("./d:info", namespaces=NAMESPACES) + # info.append(meta) assert check_meta_title(tree, {}) is None -def test_check_meta_title_wrong_length(xmlparser): - xmlcontent = """
- - Test - The SEO title that is too long - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_meta_title_wrong_length(tree): +# xmlcontent = """
+# +# Test +# The SEO title that is too long +# +# +#
""" + root = tree.getroot() + meta = createmeta(dict(name='title'), + 'The SEO title that is too long', + root.nsmap) + appendmeta(tree, meta) + # info = root.find("./d:info", namespaces=NAMESPACES) + # info.append(meta) with pytest.raises(InvalidValueError, match=".*too long.*"): check_meta_title(tree, dict(metadata=dict(meta_title_length=10))) -def test_check_required_meta_title(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_required_meta_title(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + with pytest.raises(InvalidValueError, match=".*required.*"): check_meta_title(tree, dict(metadata=dict(meta_title_required=True))) -def test_check_optional_meta_title(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_optional_meta_title(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" config = dict(metadata=dict(meta_title_required=False)) assert check_meta_title(tree, config) is None -def test_check_meta_description(xmlparser): - xmlcontent = """
- - Test - The SEO description - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) - +def test_check_meta_description(tree): +# xmlcontent = """
+# +# Test +# The SEO description +# +# +#
""" + meta = createmeta(dict(name='description'), 'The SEO description') + appendmeta(tree, meta) assert check_meta_description(tree, {}) is None -def test_check_meta_description_wrong_length(xmlparser): - xmlcontent = """
- - Test - The SEO description that is too long - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_meta_description_wrong_length(tree): +# xmlcontent = """
+# +# Test +# The SEO description that is too long +# +# +#
""" + meta = createmeta(dict(name='description'), + 'The SEO description that is too long') + appendmeta(tree, meta) with pytest.raises(InvalidValueError, match=".*too long.*"): check_meta_description(tree, dict(metadata=dict(meta_description_length=10))) -def test_check_required_meta_description(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_required_meta_description(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + with pytest.raises(InvalidValueError, match=".*required.*"): check_meta_description(tree, dict(metadata=dict(meta_description_required=True))) -def test_check_optional_meta_description(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_optional_meta_description(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + assert check_meta_description(tree, dict(metadata=dict(meta_description_required=False))) is None -def test_check_meta_series(xmlparser): - xmlcontent = """
- - Test - Products & Solutions - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_meta_series(tree): +# xmlcontent = """
+# +# Test +# Products & Solutions +# +# +#
""" + meta = createmeta(dict(name='series'), 'Products & Solutions') + appendmeta(tree, meta) config = dict(metadata=dict(require_meta_series=True, valid_meta_series=["Products & Solutions", @@ -151,32 +174,28 @@ def test_check_meta_series(xmlparser): assert check_meta_series(tree, config) is None -def test_check_missing_optional_meta_series(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_missing_optional_meta_series(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" config = dict(metadata=dict(require_meta_series=False)) assert check_meta_series(tree, config) is None -def test_check_wrong_meta_series(xmlparser): - xmlcontent = """
- - Test - Foo - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_wrong_meta_series(tree): +# xmlcontent = """
+# +# Test +# Foo +# +# +#
""" + meta = createmeta(dict(name='series'), 'Foo') + appendmeta(tree, meta) config = dict(metadata=dict(require_meta_series=True, valid_meta_series=["Best Practices", @@ -185,14 +204,13 @@ def test_check_wrong_meta_series(xmlparser): check_meta_series(tree, config) -def test_check_require_meta_series(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_require_meta_series(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" config = dict( metadata=dict( @@ -205,159 +223,163 @@ def test_check_require_meta_series(xmlparser): check_meta_series(tree, config) -def test_check_meta_techpartner(xmlparser): - xmlcontent = """
- - Test - - Acme Inc. - Foo Corp. - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_meta_techpartner(tree): +# xmlcontent = """
+# +# Test +# +# Acme Inc. +# Foo Corp. +# +# +# +#
""" + meta = createmeta(dict(name='techpartner')) + addphrase(meta, ["Acme Inc.", "Foo Corp."]) + appendmeta(tree, meta) assert check_meta_techpartner(tree, {}) is None -def test_check_missing_meta_techpartner(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_missing_meta_techpartner(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" config = dict(metadata=dict(require_meta_techpartner=True)) with pytest.raises(InvalidValueError, match=".*required.*"): check_meta_techpartner(tree, config) -def test_check_missing_children_in_meta_techpartner(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_missing_children_in_meta_techpartner(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = createmeta(dict(name='techpartner')) + appendmeta(tree, meta) config = dict(metadata=dict(require_meta_techpartner=True)) with pytest.raises(InvalidValueError, match=".*Couldn't find any tech partners.*"): check_meta_techpartner(tree, config) -def test_check_meta_techpartner_with_nonunique_children(xmlparser): - xmlcontent = """
- - Test - - Acme Inc. - Acme Inc. - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_meta_techpartner_with_nonunique_children(tree): +# xmlcontent = """
+# +# Test +# +# Acme Inc. +# Acme Inc. +# +# +# +#
""" + meta = createmeta(dict(name='techpartner')) + addphrase(meta, ["Acme Inc.", "Acme Inc."]) + appendmeta(tree, meta) config = dict(metadata=dict(require_meta_techpartner=True)) with pytest.raises(InvalidValueError, match=".*Duplicate tech partners.*"): check_meta_techpartner(tree, config) -def test_check_meta_platform(xmlparser): - xmlcontent = """
- - Test - Foo - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_meta_platform(tree): +# xmlcontent = """
+# +# Test +# Foo +# +# +#
""" + meta = createmeta(dict(name='platform'), 'Foo') + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_platform=True)) assert check_meta_platform(tree, config) is None -def test_check_missing_meta_platform(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_missing_meta_platform(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" +# tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + config = dict(metadata=dict(require_meta_platform=True)) with pytest.raises(InvalidValueError, match=r".*Couldn't find required meta.*"): check_meta_platform(tree, config) -def test_check_empty_meta_platform(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_empty_meta_platform(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = createmeta(dict(name='platform')) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_platform=True)) with pytest.raises(InvalidValueError, match=r".*Empty meta.*"): check_meta_platform(tree, config) -def test_check_meta_architecture(xmlparser): - xmlcontent = """
- - Test - - x86_64 - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# x86_64 +# +# +# +#
""" + meta = createmeta(dict(name='architecture')) + addphrase(meta, ["x86_64"]) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_architecture=True, valid_meta_architecture=["x86_64", "POWER"])) assert check_meta_architecture(tree, config) is None -def test_check_missing_optional_meta_architecture(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_missing_optional_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + config = dict(metadata=dict(require_meta_architecture=True)) with pytest.raises(InvalidValueError, match=r".*Couldn't find required meta.*"): check_meta_architecture(tree, config) -def test_check_missing_child_meta_architecture(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_missing_child_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = createmeta(dict(name='architecture')) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_architecture=True)) with pytest.raises( InvalidValueError, @@ -366,18 +388,21 @@ def test_check_missing_child_meta_architecture(xmlparser): check_meta_architecture(tree, config) -def test_check_duplicate_child_meta_architecture(xmlparser): - xmlcontent = """
- - Test - - x86_64 - x86_64 - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_duplicate_child_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# x86_64 +# x86_64 +# +# +# +#
""" + meta = createmeta(dict(name='architecture')) + addphrase(meta, ["x86_64", "x86_64"]) + appendmeta(tree, meta) + config = dict(metadata=dict( require_meta_architecture=True, valid_meta_architecture=["x86_64", "POWER"])) @@ -387,18 +412,21 @@ def test_check_duplicate_child_meta_architecture(xmlparser): check_meta_architecture(tree, config) -def test_check_unknown_child_meta_architecture(xmlparser): - xmlcontent = """
- - Test - - x86_64 - foo - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_check_unknown_child_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# x86_64 +# foo +# +# +# +#
""" + meta = createmeta(dict(name='architecture')) + addphrase(meta, ["x86_64", "foo"]) + appendmeta(tree, meta) + config = dict( metadata=dict( require_meta_architecture=True, @@ -410,81 +438,92 @@ def test_check_unknown_child_meta_architecture(xmlparser): check_meta_architecture(tree, config) -def test_meta_category(xmlparser): - xmlcontent = """
- - Test - - Systems Management - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# Systems Management +# +# +# +#
""" + meta = createmeta(dict(name='category')) + addphrase(meta, ["Systems Management"]) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_category=True, valid_meta_category=["Systems Management"])) assert check_meta_category(tree, config) is None -def test_missing_optional_meta_category(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_missing_optional_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + config = dict(metadata=dict(require_meta_category=True)) with pytest.raises(InvalidValueError, match=r".*Couldn't find required meta.*"): check_meta_category(tree, config) -def test_missing_child_meta_category(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_missing_child_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = createmeta(dict(name='category')) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_category=True)) with pytest.raises(InvalidValueError, match=r".*Couldn't find any child elements in meta.*"): check_meta_category(tree, config) -def test_duplicate_child_meta_category(xmlparser): - xmlcontent = """
- - Test - - Systems Management - Systems Management - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_duplicate_child_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# Systems Management +# Systems Management +# +# +# +#
""" + meta = createmeta(dict(name='category')) + addphrase(meta, ["Systems Management", "Systems Management"]) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_category=True)) with pytest.raises(InvalidValueError, match=r".*Duplicate categories found in meta.*"): check_meta_category(tree, config) -def test_unknown_category_meta_category(xmlparser): - xmlcontent = """
- - Test - - Systems Management - Foo - - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_unknown_category_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# Systems Management +# Foo +# +# +# +#
""" + meta = createmeta(dict(name='category')) + addphrase(meta, ["Systems Management", "Foo"]) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_category=True, valid_meta_category=["Systems Management"])) with pytest.raises(InvalidValueError, @@ -492,29 +531,34 @@ def test_unknown_category_meta_category(xmlparser): check_meta_category(tree, config) -def test_meta_task(xmlparser): - xmlcontent = """
- - Test - - Configuration - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_meta_task(tree): +# xmlcontent = """
+# +# Test +# +# Configuration +# +# +#
""" + meta = createmeta(dict(name='task')) + addphrase(meta, ["Configuration"]) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_task=True, valid_meta_task=["Configuration"])) assert check_meta_task(tree, config) is None -def test_missing_child_meta_task(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +def test_missing_child_meta_task(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + meta = createmeta(dict(name='task')) + appendmeta(tree, meta) + config = dict(metadata=dict(require_meta_task=True), valid_meta_task=["Configuration"]) with pytest.raises(InvalidValueError, @@ -522,18 +566,20 @@ def test_missing_child_meta_task(xmlparser): check_meta_task(tree, config) +def test_duplicate_child_meta_task(tree): +# xmlcontent = """
+# +# Test +# +# Configuration +# Configuration +# +# +#
""" + meta = createmeta(dict(name='task')) + addphrase(meta, ["Configuration", "Configuration"]) + appendmeta(tree, meta) -def test_duplicate_child_meta_task(xmlparser): - xmlcontent = """
- - Test - - Configuration - Configuration - - -
""" - tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) config = dict(metadata=dict(require_meta_task=True), valid_meta_task=["Configuration"]) with pytest.raises(InvalidValueError, From d533ca3e1a0207f14a5397a845c79726db0e5c52 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 27 Jun 2024 19:27:35 +0200 Subject: [PATCH 075/107] Add missing output of script in README --- python-scripts/metadatavalidator/README.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 9708911d3..8dac076ff 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -48,8 +48,13 @@ The script will show all problems with metadata 1.1: check_info_revhistory_revision: Missing recommended attribute in /d:article/d:info[2]/d:revhistory[12]/d:revision/@xml:id [2] b.xml: - 2.1: + 2.1: check_meta_task: Invalid value in metadata Unknown task(s) {'Clusering'}. Allowed are ... +If wanted, you can add your own configuration file with the option :option:`--config`: + +.. code-block:: bash + + $ metadatavalidator --config /path/to/config.ini PATH_TO_DOCBOOK_FILES Configuration From f92de25f0d1d8293a477565d7a90cd30e41e5df7 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 27 Jun 2024 19:28:39 +0200 Subject: [PATCH 076/107] Add return type --- python-scripts/metadatavalidator/tests/conftest.py | 4 ++-- .../metadatavalidator/tests/unit/checks/test_check_meta.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py index 2ac0c52cb..cc448a684 100644 --- a/python-scripts/metadatavalidator/tests/conftest.py +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -18,7 +18,7 @@ def xmlparser(): @pytest.fixture -def xmlcontent(): +def xmlcontent() -> str: return """
Test @@ -28,7 +28,7 @@ def xmlcontent(): @pytest.fixture -def tree(xmlcontent, xmlparser): +def tree(xmlcontent, xmlparser) -> etree._ElementTree: return etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index df2c7b2d4..ff3141e84 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -14,6 +14,7 @@ ) from metadatavalidator.exceptions import InvalidValueError + def createmeta(attribs: dict, text: str|None=None, nsmap: dict|None=None): From 4ababb3e7cd0d1a17071f0aadb6dea6436506879 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 27 Jun 2024 19:40:50 +0200 Subject: [PATCH 077/107] Introduce getinfo() and info_or_fail() * getinfo(): returns the from usual DocBook element (book, article) or from an assembly. Return None if not found. * info_or_fail(): Get the element from the tree or raise an error --- .../metadatavalidator/checks/check_info.py | 13 +-- .../src/metadatavalidator/util.py | 31 ++++++++ .../metadatavalidator/tests/conftest.py | 22 ++++++ .../tests/unit/checks/test_check_info.py | 79 +++++++++++-------- 4 files changed, 105 insertions(+), 40 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 0b985ccd7..2c649ef01 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -6,13 +6,14 @@ from lxml import etree from ..common import ( - DATE_REGEX, DOCBOOK_NS, XML_NS, ) from ..exceptions import InvalidValueError, MissingAttributeWarning from ..logging import log from ..util import ( + getinfo, + info_or_fail, getfullxpath, validatedate, validatedatevalue @@ -21,18 +22,12 @@ def check_info(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info element""" - root = tree.getroot() - info = root.find(".//{%s}info" % DOCBOOK_NS) - if info is None: - raise InvalidValueError(f"Couldn't find info element in {root.tag}.") + info_or_fail(tree) def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info/revhistory element""" - info = tree.find("./d:info", namespaces={"d": DOCBOOK_NS}) - if info is None: - # If couldn't be found, we can't check - return + info = info_or_fail(tree) required = config.get("metadata", {}).get("require_revhistory", False) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py index 3510cabe8..1bd2190a6 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/util.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -4,11 +4,42 @@ from .common import ( DATE_REGEX, + NAMESPACES, NAMESPACES2PREFIX, ) from .exceptions import InvalidValueError +def getinfo(tree: etree._ElementTree) -> etree._Element: + """Get the element from a DocBook5 XML tree + + :param tree: the XML tree to get the element from + :return: the element + """ + # Check if we get an element from "normal" root elements or + # from an assembly structure. + a = tree.find("./d:info", namespaces=NAMESPACES) + b = tree.find("./d:structure/d:merge/d:info", namespaces=NAMESPACES) + + if a is not None: + return a + if b is not None: + return b + + +def info_or_fail(tree: etree._ElementTree, raise_on_missing=True) -> etree._Element: + """Get the element from a DocBook5 XML tree or raise an error + + :param tree: the XML tree to get the element from + :param raise_on_missing: whether to raise an error if the element is missing + :return: the element + """ + info = getinfo(tree) + if info is None and raise_on_missing: + raise InvalidValueError("Couldn't find element.") + return info + + def getfullxpath(element: etree._Element, ns2prefix:dict[str, str]=NAMESPACES2PREFIX) -> str: """Return the full XPath including predicates to this element diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py index cc448a684..003c28246 100644 --- a/python-scripts/metadatavalidator/tests/conftest.py +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -32,6 +32,28 @@ def tree(xmlcontent, xmlparser) -> etree._ElementTree: return etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) +@pytest.fixture +def assemblystr() -> str: + return """ + + + + + +""" + + +@pytest.fixture +def asmtree(assemblystr, xmlparser) -> etree._ElementTree: + return etree.ElementTree(etree.fromstring(assemblystr, parser=xmlparser)) + + @pytest.fixture(scope="function") def config() -> ConfigParser: config = ConfigParser() diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py index fa6a79e88..b30ddc60c 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py @@ -1,6 +1,7 @@ import pytest from lxml import etree +from metadatavalidator.common import NAMESPACES from metadatavalidator.checks import ( check_info, check_info_revhistory, @@ -8,20 +9,48 @@ check_info_revhistory_revision_date, check_info_revhistory_revision_order, ) +from metadatavalidator.util import getinfo, info_or_fail + from metadatavalidator.exceptions import InvalidValueError, MissingAttributeWarning -def test_check_info(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_getinfo_with_regular_tree(tree): + info = getinfo(tree) + assert info is not None + assert info.tag == "{http://docbook.org/ns/docbook}info" + + +def test_getinfo_with_assembly_tree(asmtree): + info = getinfo(asmtree) + assert info is not None + assert info.tag == "{http://docbook.org/ns/docbook}info" + + +def test_info_or_fail_with_regular_tree(tree): + info = info_or_fail(tree) + assert info is not None + assert info.tag == "{http://docbook.org/ns/docbook}info" + + +def test_info_or_fail_with_assembly_tree(asmtree): + info = info_or_fail(asmtree) + assert info is not None + assert info.tag == "{http://docbook.org/ns/docbook}info" + + +def test_info_or_fail_with_raise_on_missing(): + tree = etree.ElementTree(etree.Element("{http://docbook.org/ns/docbook}article")) + info = info_or_fail(tree, raise_on_missing=False) + assert info is None + + +def test_info_or_fail_with_raise_on_missing_and_missing_info(): + tree = etree.ElementTree(etree.Element("{http://docbook.org/ns/docbook}article")) + with pytest.raises(InvalidValueError, match="Couldn't find element."): + info_or_fail(tree) + +def test_check_info(tree): assert check_info(tree, {}) is None @@ -33,21 +62,11 @@ def test_check_info_missing(xmlparser): etree.fromstring(xmlcontent, parser=xmlparser) ) with pytest.raises(InvalidValueError, - match="Couldn't find info element"): + match=".*Couldn't find element."): check_info(tree, {}) -def test_check_info_revhistory_missing(xmlparser): - xmlcontent = """
- - Test - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) - +def test_check_info_revhistory_missing(tree): with pytest.raises(InvalidValueError, match="Couldn't find a revhistory element"): check_info_revhistory(tree, {"metadata": {"require_revhistory": True}}) @@ -68,15 +87,13 @@ def test_check_info_revhistory(xmlparser): assert check_info_revhistory(tree, {}) is None -def test_check_info_revhistory_without_info(xmlparser): - xmlcontent = """
- -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) - - assert check_info_revhistory(tree, {}) is None +def test_check_info_revhistory_without_info(tree): + info = tree.find("./d:info", namespaces=NAMESPACES) + info.getparent().remove(info) + # tree.remove(info) + with pytest.raises(InvalidValueError, + match="Couldn't find element."): + check_info_revhistory(tree, {}) def test_check_info_revhistory_xmlid(xmlparser): From 5f42c1aec1dd3133b8c978b7a583b07cca020e94 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 27 Jun 2024 21:19:42 +0200 Subject: [PATCH 078/107] Use tree fixture and add subelement of --- .../tests/unit/checks/test_check_info.py | 317 ++++++++++-------- .../tests/unit/checks/test_check_meta.py | 75 ++--- 2 files changed, 197 insertions(+), 195 deletions(-) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py index b30ddc60c..536a88912 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py @@ -1,6 +1,8 @@ import pytest from lxml import etree +from _utils import createmeta, appendnode, createinfoelement, addsub + from metadatavalidator.common import NAMESPACES from metadatavalidator.checks import ( check_info, @@ -72,17 +74,20 @@ def test_check_info_revhistory_missing(tree): check_info_revhistory(tree, {"metadata": {"require_revhistory": True}}) -def test_check_info_revhistory(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" +# tree = etree.ElementTree( +# etree.fromstring(xmlcontent, parser=xmlparser) +# ) + revhistory = createinfoelement("revhistory", + {f"{{{NAMESPACES['xml']}}}id": "rh"}) + appendnode(tree, revhistory) assert check_info_revhistory(tree, {}) is None @@ -90,94 +95,102 @@ def test_check_info_revhistory(xmlparser): def test_check_info_revhistory_without_info(tree): info = tree.find("./d:info", namespaces=NAMESPACES) info.getparent().remove(info) - # tree.remove(info) with pytest.raises(InvalidValueError, match="Couldn't find element."): check_info_revhistory(tree, {}) -def test_check_info_revhistory_xmlid(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_xmlid(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + revhistory = createinfoelement("revhistory", + {f"{{{NAMESPACES['xml']}}}id": "rh1"}) + appendnode(tree, revhistory) assert check_info_revhistory(tree, {}) is None -def test_info_revhistory_missing_xmlid(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_info_revhistory_missing_xmlid(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + revhistory = createinfoelement("revhistory") + appendnode(tree, revhistory) with pytest.raises(InvalidValueError, match="Couldn't find xml:id attribute"): check_info_revhistory(tree, {}) -def test_check_info_revhistory_xmlid_with_wrong_value(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_xmlid_with_wrong_value(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + revhistory = createinfoelement("revhistory", + {f"{{{NAMESPACES['xml']}}}id": "wrong_id"}) + appendnode(tree, revhistory) with pytest.raises(InvalidValueError, match="should start with 'rh'"): check_info_revhistory(tree, {}) -def test_check_info_revhistory_revision(xmlparser): - xmlcontent = """
- - Test - - - 2021-01-01 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision(tree): +# xmlcontent = """
+# +# Test +# +# +# 2021-01-01 +# +# +# +# +#
""" + revhistory = createinfoelement("revhistory") + revision = etree.SubElement(revhistory, + f"{{{NAMESPACES['d']}}}revision", + {f"{{{NAMESPACES['xml']}}}id": "rh"} + ) + date = etree.SubElement(revision, f"{{{NAMESPACES['d']}}}date") + date.text = "2021-01-01" + appendnode(tree, revhistory) + + print(etree.tostring(tree.getroot(), pretty_print=True).decode("utf-8")) + assert check_info_revhistory_revision(tree, {}) is None -def test_check_info_revhistory_revision_missing_xmlid(xmlparser): - xmlcontent = """
- - Test - - - 2021-01-01 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_missing_xmlid(tree): +# xmlcontent = """
+# +# Test +# +# +# 2021-01-01 +# +# +# +# +#
""" + revhistory = createinfoelement("revhistory") + revision = etree.SubElement(revhistory, f"{{{NAMESPACES['d']}}}revision") + date = etree.SubElement(revision, f"{{{NAMESPACES['d']}}}date") + date.text = "2021-01-01" + appendnode(tree, revhistory) with pytest.raises(MissingAttributeWarning, match="Missing recommended attribute in"): @@ -186,17 +199,16 @@ def test_check_info_revhistory_revision_missing_xmlid(xmlparser): {"metadata": {"require_xmlid_on_revision": True}}) -def test_check_info_revhistory_revision_missing(xmlparser): - xmlcontent = """
- - Test - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_missing(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + revhistory = createinfoelement("revhistory") + appendnode(tree, revhistory) with pytest.raises(InvalidValueError, match="Couldn't find a revision element"): @@ -206,79 +218,96 @@ def test_check_info_revhistory_revision_missing(xmlparser): ) -def test_check_info_revhistory_revision_date(xmlparser): - xmlcontent = """
- - Test - - - 2021-01-01 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_date(tree): +# xmlcontent = """
+# +# Test +# +# +# 2021-01-01 +# +# +# +# +#
""" + revhistory = createinfoelement("revhistory") + revision = etree.SubElement(revhistory, f"{{{NAMESPACES['d']}}}revision") + date = etree.SubElement(revision, f"{{{NAMESPACES['d']}}}date") + date.text = "2021-01-01" + appendnode(tree, revhistory) assert check_info_revhistory_revision_date(tree, {}) is None -def test_check_info_revhistory_revision_date_missing(xmlparser): - xmlcontent = """
- - Test - - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_date_missing(tree): +# xmlcontent = """
+# +# Test +# +# +# +# +# +#
""" + revhistory = createinfoelement("revhistory") + revision = etree.SubElement(revhistory, f"{{{NAMESPACES['d']}}}revision") + appendnode(tree, revhistory) + with pytest.raises(InvalidValueError, match="Couldn't find a date element"): check_info_revhistory_revision_date(tree, {}) -def test_check_info_revhistory_revision_date_invalid_format(xmlparser): - xmlcontent = """
- - Test - - - January 2024 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_date_invalid_format(tree): +# xmlcontent = """
+# +# Test +# +# +# January 2024 +# +# +# +# +#
""" + nsmap = tree.getroot().nsmap + revhistory = createinfoelement("revhistory", + {f"{{{NAMESPACES['xml']}}}id": "rh"}, + nsmap=nsmap) + revision = etree.SubElement(revhistory, + f"{{{NAMESPACES['d']}}}revision", + ) + date = etree.SubElement(revision, + f"{{{NAMESPACES['d']}}}date", + ) + date.text = "January 2024" + appendnode(tree, revhistory) with pytest.raises(InvalidValueError, match=".*ate is empty or has invalid format.*"): check_info_revhistory_revision_date(tree, {}) -def test_check_info_revhistory_revision_date_invalid_value(xmlparser): - xmlcontent = """
- - Test - - - 2024-13 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_date_invalid_value(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-13 +# +# +# +# +#
""" + revhistory = createinfoelement("revhistory", + {f"{{{NAMESPACES['xml']}}}id": "rh"}) + revision = etree.SubElement(revhistory, + f"{{{NAMESPACES['d']}}}revision") + date = etree.SubElement(revision, + f"{{{NAMESPACES['d']}}}date") + date.text = "2024-13" + appendnode(tree, revhistory) with pytest.raises(InvalidValueError, match="Invalid value in metadata" diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index ff3141e84..9c1dc17bd 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -1,6 +1,8 @@ from lxml import etree import pytest +from _utils import createmeta, appendnode, addphrase + from metadatavalidator.common import NAMESPACES from metadatavalidator.checks.check_meta import ( check_meta_title, @@ -15,35 +17,6 @@ from metadatavalidator.exceptions import InvalidValueError -def createmeta(attribs: dict, - text: str|None=None, - nsmap: dict|None=None): - """Create a element with the given attributes and text.""" - if nsmap is None: - nsmap = {None: NAMESPACES['d']} - - meta = etree.Element(f"{{{NAMESPACES['d']}}}meta", - attrib=attribs, - nsmap=nsmap) - meta.text = text - return meta - - -def appendmeta(tree: etree._ElementTree, meta: etree._Element): - """Append the given element to the element in the tree.""" - root = tree.getroot() - info = root.find("./d:info", namespaces=NAMESPACES) - info.append(meta) - return tree - - -def addphrase(meta: etree._Element, phrases: list): - """Add a list of elements to the given element.""" - for phrase in phrases: - etree.SubElement(meta, f"{{{NAMESPACES['d']}}}phrase").text = phrase - return meta - - def test_check_meta_title(tree): # xmlcontent = """
# @@ -54,7 +27,7 @@ def test_check_meta_title(tree): #
""" root = tree.getroot() meta = createmeta(dict(name='title'), 'The SEO title') - appendmeta(tree, meta) + appendnode(tree, meta) # info = root.find("./d:info", namespaces=NAMESPACES) # info.append(meta) @@ -73,7 +46,7 @@ def test_check_meta_title_wrong_length(tree): meta = createmeta(dict(name='title'), 'The SEO title that is too long', root.nsmap) - appendmeta(tree, meta) + appendnode(tree, meta) # info = root.find("./d:info", namespaces=NAMESPACES) # info.append(meta) @@ -113,7 +86,7 @@ def test_check_meta_description(tree): # #
""" meta = createmeta(dict(name='description'), 'The SEO description') - appendmeta(tree, meta) + appendnode(tree, meta) assert check_meta_description(tree, {}) is None @@ -127,7 +100,7 @@ def test_check_meta_description_wrong_length(tree): #
""" meta = createmeta(dict(name='description'), 'The SEO description that is too long') - appendmeta(tree, meta) + appendnode(tree, meta) with pytest.raises(InvalidValueError, match=".*too long.*"): check_meta_description(tree, dict(metadata=dict(meta_description_length=10))) @@ -166,7 +139,7 @@ def test_check_meta_series(tree): # #
""" meta = createmeta(dict(name='series'), 'Products & Solutions') - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_series=True, valid_meta_series=["Products & Solutions", @@ -196,7 +169,7 @@ def test_check_wrong_meta_series(tree): # #
""" meta = createmeta(dict(name='series'), 'Foo') - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_series=True, valid_meta_series=["Best Practices", @@ -237,7 +210,7 @@ def test_check_meta_techpartner(tree): #
""" meta = createmeta(dict(name='techpartner')) addphrase(meta, ["Acme Inc.", "Foo Corp."]) - appendmeta(tree, meta) + appendnode(tree, meta) assert check_meta_techpartner(tree, {}) is None @@ -264,7 +237,7 @@ def test_check_missing_children_in_meta_techpartner(tree): # #
""" meta = createmeta(dict(name='techpartner')) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_techpartner=True)) with pytest.raises(InvalidValueError, match=".*Couldn't find any tech partners.*"): @@ -284,7 +257,7 @@ def test_check_meta_techpartner_with_nonunique_children(tree): #
""" meta = createmeta(dict(name='techpartner')) addphrase(meta, ["Acme Inc.", "Acme Inc."]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_techpartner=True)) with pytest.raises(InvalidValueError, match=".*Duplicate tech partners.*"): @@ -300,7 +273,7 @@ def test_check_meta_platform(tree): # #
""" meta = createmeta(dict(name='platform'), 'Foo') - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_platform=True)) assert check_meta_platform(tree, config) is None @@ -330,7 +303,7 @@ def test_check_empty_meta_platform(tree): # #
""" meta = createmeta(dict(name='platform')) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_platform=True)) with pytest.raises(InvalidValueError, match=r".*Empty meta.*"): @@ -349,7 +322,7 @@ def test_check_meta_architecture(tree): # """ meta = createmeta(dict(name='architecture')) addphrase(meta, ["x86_64"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_architecture=True, valid_meta_architecture=["x86_64", "POWER"])) @@ -379,7 +352,7 @@ def test_check_missing_child_meta_architecture(tree): # # """ meta = createmeta(dict(name='architecture')) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_architecture=True)) with pytest.raises( @@ -402,7 +375,7 @@ def test_check_duplicate_child_meta_architecture(tree): # """ meta = createmeta(dict(name='architecture')) addphrase(meta, ["x86_64", "x86_64"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict( require_meta_architecture=True, @@ -426,7 +399,7 @@ def test_check_unknown_child_meta_architecture(tree): # """ meta = createmeta(dict(name='architecture')) addphrase(meta, ["x86_64", "foo"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict( metadata=dict( @@ -451,7 +424,7 @@ def test_meta_category(tree): # """ meta = createmeta(dict(name='category')) addphrase(meta, ["Systems Management"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True, valid_meta_category=["Systems Management"])) @@ -481,7 +454,7 @@ def test_missing_child_meta_category(tree): # # """ meta = createmeta(dict(name='category')) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True)) with pytest.raises(InvalidValueError, @@ -502,7 +475,7 @@ def test_duplicate_child_meta_category(tree): # """ meta = createmeta(dict(name='category')) addphrase(meta, ["Systems Management", "Systems Management"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True)) with pytest.raises(InvalidValueError, @@ -523,7 +496,7 @@ def test_unknown_category_meta_category(tree): # """ meta = createmeta(dict(name='category')) addphrase(meta, ["Systems Management", "Foo"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True, valid_meta_category=["Systems Management"])) @@ -543,7 +516,7 @@ def test_meta_task(tree): # """ meta = createmeta(dict(name='task')) addphrase(meta, ["Configuration"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True, valid_meta_task=["Configuration"])) @@ -558,7 +531,7 @@ def test_missing_child_meta_task(tree): #
# """ meta = createmeta(dict(name='task')) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True), valid_meta_task=["Configuration"]) @@ -579,7 +552,7 @@ def test_duplicate_child_meta_task(tree): # """ meta = createmeta(dict(name='task')) addphrase(meta, ["Configuration", "Configuration"]) - appendmeta(tree, meta) + appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True), valid_meta_task=["Configuration"]) From 86e56a4cb3f118d39b134629f679af886dd90ef0 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 27 Jun 2024 21:32:23 +0200 Subject: [PATCH 079/107] Use info_or_fail() and NAMESPACES dict --- .../metadatavalidator/checks/check_info.py | 24 ++++++++------- .../metadatavalidator/checks/check_meta.py | 30 +++++++++++-------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 2c649ef01..d2c14b07a 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -6,7 +6,7 @@ from lxml import etree from ..common import ( - DOCBOOK_NS, + NAMESPACES, XML_NS, ) from ..exceptions import InvalidValueError, MissingAttributeWarning @@ -31,7 +31,7 @@ def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): required = config.get("metadata", {}).get("require_revhistory", False) - revhistory = info.find("./d:revhistory", namespaces={"d": DOCBOOK_NS}) + revhistory = info.find("./d:revhistory", namespaces=NAMESPACES) if revhistory is None: if required: raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") @@ -48,12 +48,14 @@ def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): def check_info_revhistory_revision(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info/revhistory/revision element""" - revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) + info = info_or_fail(tree) + revhistory = info.find("./d:revhistory", namespaces=NAMESPACES) + if revhistory is None: # If couldn't be found, we can't check return - revision = revhistory.find("./d:revision", namespaces={"d": DOCBOOK_NS}) + revision = revhistory.find("./d:revision", namespaces=NAMESPACES) if revision is None: raise InvalidValueError(f"Couldn't find a revision element in {revhistory.tag}.") xmlid = revision.attrib.get(f"{{{XML_NS}}}id") @@ -67,12 +69,13 @@ def check_info_revhistory_revision(tree: etree._ElementTree, def check_info_revhistory_revision_date(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for an info/revhistory/revision/date element""" - date = tree.find("./d:info/d:revhistory/d:revision/d:date", - namespaces={"d": DOCBOOK_NS}) + info = info_or_fail(tree) - revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) + revhistory = info.find("./d:revhistory", namespaces=NAMESPACES) if revhistory is None: return None + + date = revhistory.find("./d:revision/d:date", namespaces=NAMESPACES) if date is None: raise InvalidValueError(f"Couldn't find a date element in info/revhistory/revision.") @@ -82,16 +85,17 @@ def check_info_revhistory_revision_date(tree: etree._ElementTree, def check_info_revhistory_revision_order(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for the right order of info/revhistory/revision elements""" - revhistory = tree.find("./d:info/d:revhistory", namespaces={"d": DOCBOOK_NS}) + info = info_or_fail(tree) + revhistory = info.find("./d:revhistory", namespaces=NAMESPACES) if revhistory is None: return revisions = revhistory.xpath("./d:revision", - namespaces={"d": DOCBOOK_NS}) + namespaces=NAMESPACES) xpath = getfullxpath(revhistory) if not revisions: return None - date_elements = [rev.find("./d:date", namespaces={"d": DOCBOOK_NS}) + date_elements = [rev.find("./d:date", namespaces=NAMESPACES) for rev in revisions] dates = [ validatedatevalue(d.text) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index e8ace5e29..6e9e0e324 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -11,13 +11,15 @@ from ..common import NAMESPACES from ..exceptions import InvalidValueError from ..logging import log +from ..util import info_or_fail def check_meta_title(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='title']", namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='title']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("meta_title_required", False) if meta is None: if required: @@ -35,7 +37,8 @@ def check_meta_description(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='description']", namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='description']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("meta_description_required", False) if meta is None: if required: @@ -53,7 +56,8 @@ def check_meta_series(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='series']", namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='series']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("require_meta_series", False) if meta is None: if required: @@ -76,8 +80,8 @@ def check_meta_techpartner(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='techpartner']", - namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='techpartner']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("require_meta_techpartner", False) if meta is None: if required: @@ -107,8 +111,8 @@ def check_meta_platform(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='platform']", - namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='platform']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("require_meta_platform", False) if meta is None: if required: @@ -126,8 +130,8 @@ def check_meta_architecture(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='architecture']", - namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='architecture']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("require_meta_architecture", False) if meta is None: @@ -172,8 +176,8 @@ def check_meta_category(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='category']", - namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='category']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("require_meta_category", False) if meta is None: if required: @@ -217,8 +221,8 @@ def check_meta_task(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks for a element""" root = tree.getroot() - meta = root.find("./d:info/d:meta[@name='task']", - namespaces=NAMESPACES) + info = info_or_fail(tree) + meta = info.find("./d:meta[@name='task']", namespaces=NAMESPACES) required = config.get("metadata", {}).get("require_meta_task", False) if meta is None: if required: From b1a48b8eb4065bc59620fe3d516b40d81a91f9fd Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 18:44:09 +0200 Subject: [PATCH 080/107] Use ElementMaker and refactor tests --- .../metadatavalidator/tests/_utils.py | 19 ++ .../tests/unit/checks/test_check_info.py | 234 ++++++++---------- .../tests/unit/checks/test_check_meta.py | 101 ++++---- 3 files changed, 173 insertions(+), 181 deletions(-) create mode 100644 python-scripts/metadatavalidator/tests/_utils.py diff --git a/python-scripts/metadatavalidator/tests/_utils.py b/python-scripts/metadatavalidator/tests/_utils.py new file mode 100644 index 000000000..883a65f7f --- /dev/null +++ b/python-scripts/metadatavalidator/tests/_utils.py @@ -0,0 +1,19 @@ +from lxml import etree +from lxml.builder import ElementMaker +from metadatavalidator.common import NAMESPACES, DOCBOOK_NS, XML_NS + + +D = ElementMaker(namespace=NAMESPACES["d"], nsmap={None: DOCBOOK_NS}) + +def dbtag(tag): + return f"{{{DOCBOOK_NS}}}{tag}" + +xmlid = f"{{{XML_NS}}}id" + + +def appendnode(tree: etree._ElementTree, node: etree._Element): + """Append the given element to the element in the tree.""" + root = tree.getroot() + info = root.find("./d:info", namespaces=NAMESPACES) + info.append(node) + return tree diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py index 536a88912..8889dface 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py @@ -1,7 +1,7 @@ import pytest -from lxml import etree +from lxml import etree as ET -from _utils import createmeta, appendnode, createinfoelement, addsub +from _utils import appendnode, dbtag, D, xmlid from metadatavalidator.common import NAMESPACES from metadatavalidator.checks import ( @@ -19,35 +19,35 @@ def test_getinfo_with_regular_tree(tree): info = getinfo(tree) assert info is not None - assert info.tag == "{http://docbook.org/ns/docbook}info" + assert info.tag == dbtag("info") def test_getinfo_with_assembly_tree(asmtree): info = getinfo(asmtree) assert info is not None - assert info.tag == "{http://docbook.org/ns/docbook}info" + assert info.tag == dbtag("info") def test_info_or_fail_with_regular_tree(tree): info = info_or_fail(tree) assert info is not None - assert info.tag == "{http://docbook.org/ns/docbook}info" + assert info.tag == dbtag("info") def test_info_or_fail_with_assembly_tree(asmtree): info = info_or_fail(asmtree) assert info is not None - assert info.tag == "{http://docbook.org/ns/docbook}info" + assert info.tag == dbtag("info") def test_info_or_fail_with_raise_on_missing(): - tree = etree.ElementTree(etree.Element("{http://docbook.org/ns/docbook}article")) + tree = D("article").getroottree() info = info_or_fail(tree, raise_on_missing=False) assert info is None def test_info_or_fail_with_raise_on_missing_and_missing_info(): - tree = etree.ElementTree(etree.Element("{http://docbook.org/ns/docbook}article")) + tree = D("article").getroottree() with pytest.raises(InvalidValueError, match="Couldn't find element."): info_or_fail(tree) @@ -56,13 +56,8 @@ def test_check_info(tree): assert check_info(tree, {}) is None -def test_check_info_missing(xmlparser): - xmlcontent = """
- -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_missing(): + tree = D("article").getroottree() with pytest.raises(InvalidValueError, match=".*Couldn't find element."): check_info(tree, {}) @@ -82,11 +77,10 @@ def test_check_info_revhistory(tree): # # # """ -# tree = etree.ElementTree( -# etree.fromstring(xmlcontent, parser=xmlparser) +# tree = ET.ElementTree( +# ET.fromstring(xmlcontent, parser=xmlparser) # ) - revhistory = createinfoelement("revhistory", - {f"{{{NAMESPACES['xml']}}}id": "rh"}) + revhistory = D("revhistory", {xmlid: "rh"}) appendnode(tree, revhistory) assert check_info_revhistory(tree, {}) is None @@ -108,8 +102,7 @@ def test_check_info_revhistory_xmlid(tree): #
# # """ - revhistory = createinfoelement("revhistory", - {f"{{{NAMESPACES['xml']}}}id": "rh1"}) + revhistory = D("revhistory", {xmlid: "rh1"}) appendnode(tree, revhistory) assert check_info_revhistory(tree, {}) is None @@ -123,7 +116,7 @@ def test_info_revhistory_missing_xmlid(tree): #
# # """ - revhistory = createinfoelement("revhistory") + revhistory = D("revhistory") appendnode(tree, revhistory) with pytest.raises(InvalidValueError, @@ -139,9 +132,7 @@ def test_check_info_revhistory_xmlid_with_wrong_value(tree): #
# # """ - revhistory = createinfoelement("revhistory", - {f"{{{NAMESPACES['xml']}}}id": "wrong_id"}) - appendnode(tree, revhistory) + appendnode(tree, D("revhistory", {xmlid: "wrong_id"})) with pytest.raises(InvalidValueError, match="should start with 'rh'"): @@ -160,16 +151,14 @@ def test_check_info_revhistory_revision(tree): #
# # """ - revhistory = createinfoelement("revhistory") - revision = etree.SubElement(revhistory, - f"{{{NAMESPACES['d']}}}revision", - {f"{{{NAMESPACES['xml']}}}id": "rh"} - ) - date = etree.SubElement(revision, f"{{{NAMESPACES['d']}}}date") - date.text = "2021-01-01" + revhistory = D("revhistory", + D("revision", {xmlid: "rh"}, + D("date", "2021") + ) + ) appendnode(tree, revhistory) - print(etree.tostring(tree.getroot(), pretty_print=True).decode("utf-8")) + print(ET.tostring(tree.getroot(), pretty_print=True).decode("utf-8")) assert check_info_revhistory_revision(tree, {}) is None @@ -186,10 +175,11 @@ def test_check_info_revhistory_revision_missing_xmlid(tree): # # # """ - revhistory = createinfoelement("revhistory") - revision = etree.SubElement(revhistory, f"{{{NAMESPACES['d']}}}revision") - date = etree.SubElement(revision, f"{{{NAMESPACES['d']}}}date") - date.text = "2021-01-01" + revhistory = D("revhistory", + D("revision", + D("date", "2021-01-01") + ) + ) appendnode(tree, revhistory) with pytest.raises(MissingAttributeWarning, @@ -207,8 +197,7 @@ def test_check_info_revhistory_revision_missing(tree): # # # """ - revhistory = createinfoelement("revhistory") - appendnode(tree, revhistory) + appendnode(tree, D("revhistory")) with pytest.raises(InvalidValueError, match="Couldn't find a revision element"): @@ -230,10 +219,11 @@ def test_check_info_revhistory_revision_date(tree): # # # """ - revhistory = createinfoelement("revhistory") - revision = etree.SubElement(revhistory, f"{{{NAMESPACES['d']}}}revision") - date = etree.SubElement(revision, f"{{{NAMESPACES['d']}}}date") - date.text = "2021-01-01" + revhistory = D("revhistory", + D("revision", + D("date", "2021-01-01") + ) + ) appendnode(tree, revhistory) assert check_info_revhistory_revision_date(tree, {}) is None @@ -249,8 +239,7 @@ def test_check_info_revhistory_revision_date_missing(tree): # # # """ - revhistory = createinfoelement("revhistory") - revision = etree.SubElement(revhistory, f"{{{NAMESPACES['d']}}}revision") + revhistory = D("revhistory", D("revision")) appendnode(tree, revhistory) with pytest.raises(InvalidValueError, @@ -270,17 +259,7 @@ def test_check_info_revhistory_revision_date_invalid_format(tree): # # # """ - nsmap = tree.getroot().nsmap - revhistory = createinfoelement("revhistory", - {f"{{{NAMESPACES['xml']}}}id": "rh"}, - nsmap=nsmap) - revision = etree.SubElement(revhistory, - f"{{{NAMESPACES['d']}}}revision", - ) - date = etree.SubElement(revision, - f"{{{NAMESPACES['d']}}}date", - ) - date.text = "January 2024" + revhistory = D("revhistory", D("revision", D("date", "January 2024"))) appendnode(tree, revhistory) with pytest.raises(InvalidValueError, @@ -300,13 +279,8 @@ def test_check_info_revhistory_revision_date_invalid_value(tree): # # # """ - revhistory = createinfoelement("revhistory", - {f"{{{NAMESPACES['xml']}}}id": "rh"}) - revision = etree.SubElement(revhistory, - f"{{{NAMESPACES['d']}}}revision") - date = etree.SubElement(revision, - f"{{{NAMESPACES['d']}}}date") - date.text = "2024-13" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-13"))) appendnode(tree, revhistory) with pytest.raises(InvalidValueError, @@ -315,52 +289,57 @@ def test_check_info_revhistory_revision_date_invalid_value(tree): check_info_revhistory_revision_date(tree, {}) -def test_check_info_revhistory_revision_order(xmlparser): - xmlcontent = """
- - Test - - - 2024-12 - - - 2023-12-12 - - - 2022-04 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_order(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-12 +# +# +# 2023-12-12 +# +# +# 2022-04 +# +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-12")), + D("revision", D("date", "2023-12-12")), + D("revision", D("date", "2022-04"))) + appendnode(tree, revhistory) assert check_info_revhistory_revision_order(tree, {}) is None -def test_check_info_revhistory_revision_order_one_invalid_date(xmlparser): - xmlcontent = """
- - Test - - - 2024-53 - - - 2023-12-12 - - - 2022-04 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_order_one_invalid_date(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-53 +# +# +# 2023-12-12 +# +# +# 2022-04 +# +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-53")), + D("revision", D("date", "2023-12-12")), + D("revision", D("date", "2022-04")) + ) + appendnode(tree, revhistory) with pytest.raises(InvalidValueError, match=".*Couldn't convert all dates.*see position dates=1.*" @@ -368,27 +347,30 @@ def test_check_info_revhistory_revision_order_one_invalid_date(xmlparser): check_info_revhistory_revision_order(tree, {}) -def test_check_info_revhistory_revision_wrong_order(xmlparser): - xmlcontent = """
- - Test - - - 2024-12 - - - 2023-12-12 - - - 2026-04 - - - - -
""" - tree = etree.ElementTree( - etree.fromstring(xmlcontent, parser=xmlparser) - ) +def test_check_info_revhistory_revision_wrong_order(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-12 +# +# +# 2023-12-12 +# +# +# 2026-04 +# +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-12")), + D("revision", D("date", "2023-12-12")), + D("revision", D("date", "2026-04")) + ) + appendnode(tree, revhistory) with pytest.raises(InvalidValueError, match=".*Dates in revhistory/revision are not in descending order.*" diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py index 9c1dc17bd..9bfb25e74 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py @@ -1,7 +1,7 @@ from lxml import etree import pytest -from _utils import createmeta, appendnode, addphrase +from _utils import appendnode, D, xmlid from metadatavalidator.common import NAMESPACES from metadatavalidator.checks.check_meta import ( @@ -25,11 +25,8 @@ def test_check_meta_title(tree): # # # """ - root = tree.getroot() - meta = createmeta(dict(name='title'), 'The SEO title') + meta = D("meta", {"name": "title"}, "The SEO title") appendnode(tree, meta) - # info = root.find("./d:info", namespaces=NAMESPACES) - # info.append(meta) assert check_meta_title(tree, {}) is None @@ -42,37 +39,19 @@ def test_check_meta_title_wrong_length(tree): # # # """ - root = tree.getroot() - meta = createmeta(dict(name='title'), - 'The SEO title that is too long', - root.nsmap) + meta = D("meta", {"name": "title"}, "The SEO title that is too long") appendnode(tree, meta) - # info = root.find("./d:info", namespaces=NAMESPACES) - # info.append(meta) with pytest.raises(InvalidValueError, match=".*too long.*"): check_meta_title(tree, dict(metadata=dict(meta_title_length=10))) def test_check_required_meta_title(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - with pytest.raises(InvalidValueError, match=".*required.*"): check_meta_title(tree, dict(metadata=dict(meta_title_required=True))) def test_check_optional_meta_title(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" config = dict(metadata=dict(meta_title_required=False)) assert check_meta_title(tree, config) is None @@ -85,7 +64,7 @@ def test_check_meta_description(tree): # # # """ - meta = createmeta(dict(name='description'), 'The SEO description') + meta = D("meta", {"name": "description"}, "The SEO description") appendnode(tree, meta) assert check_meta_description(tree, {}) is None @@ -98,8 +77,7 @@ def test_check_meta_description_wrong_length(tree): # # # """ - meta = createmeta(dict(name='description'), - 'The SEO description that is too long') + meta = D("meta", {"name": "description"}, "The SEO description that is too long") appendnode(tree, meta) with pytest.raises(InvalidValueError, match=".*too long.*"): @@ -138,7 +116,7 @@ def test_check_meta_series(tree): # # # """ - meta = createmeta(dict(name='series'), 'Products & Solutions') + meta = D("meta", {"name": "series"}, "Products & Solutions") appendnode(tree, meta) config = dict(metadata=dict(require_meta_series=True, @@ -168,7 +146,7 @@ def test_check_wrong_meta_series(tree): # # # """ - meta = createmeta(dict(name='series'), 'Foo') + meta = D("meta", {"name": "series"}, "Foo") appendnode(tree, meta) config = dict(metadata=dict(require_meta_series=True, @@ -208,8 +186,10 @@ def test_check_meta_techpartner(tree): # # # """ - meta = createmeta(dict(name='techpartner')) - addphrase(meta, ["Acme Inc.", "Foo Corp."]) + meta = D("meta", {"name": "techpartner"}, + D("phrase", {}, "Acme Inc."), + D("phrase", {}, "Foo Corp."), + ) appendnode(tree, meta) assert check_meta_techpartner(tree, {}) is None @@ -236,7 +216,7 @@ def test_check_missing_children_in_meta_techpartner(tree): # # # """ - meta = createmeta(dict(name='techpartner')) + meta = D("meta", {"name": "techpartner"}) appendnode(tree, meta) config = dict(metadata=dict(require_meta_techpartner=True)) @@ -255,8 +235,10 @@ def test_check_meta_techpartner_with_nonunique_children(tree): # # # """ - meta = createmeta(dict(name='techpartner')) - addphrase(meta, ["Acme Inc.", "Acme Inc."]) + meta = D("meta", {"name": "techpartner"}, + D("phrase", "Acme Inc."), + D("phrase", "Acme Inc."), + ) appendnode(tree, meta) config = dict(metadata=dict(require_meta_techpartner=True)) @@ -272,7 +254,7 @@ def test_check_meta_platform(tree): # # # """ - meta = createmeta(dict(name='platform'), 'Foo') + meta = D("meta", {"name": "platform"}, "Foo") appendnode(tree, meta) config = dict(metadata=dict(require_meta_platform=True)) @@ -302,7 +284,7 @@ def test_check_empty_meta_platform(tree): # # # """ - meta = createmeta(dict(name='platform')) + meta = D("meta", {"name": "platform"}) appendnode(tree, meta) config = dict(metadata=dict(require_meta_platform=True)) @@ -320,8 +302,7 @@ def test_check_meta_architecture(tree): # # # """ - meta = createmeta(dict(name='architecture')) - addphrase(meta, ["x86_64"]) + meta = D("meta", {"name": "architecture"}, D("phrase", "x86_64")) appendnode(tree, meta) config = dict(metadata=dict(require_meta_architecture=True, @@ -351,7 +332,7 @@ def test_check_missing_child_meta_architecture(tree): # # # """ - meta = createmeta(dict(name='architecture')) + meta = D("meta", {"name": "architecture"}) appendnode(tree, meta) config = dict(metadata=dict(require_meta_architecture=True)) @@ -373,8 +354,10 @@ def test_check_duplicate_child_meta_architecture(tree): # # # """ - meta = createmeta(dict(name='architecture')) - addphrase(meta, ["x86_64", "x86_64"]) + meta = D("meta", {"name": "architecture"}, + D("phrase", "x86_64"), + D("phrase", "x86_64"), + ) appendnode(tree, meta) config = dict(metadata=dict( @@ -397,8 +380,10 @@ def test_check_unknown_child_meta_architecture(tree): # # # """ - meta = createmeta(dict(name='architecture')) - addphrase(meta, ["x86_64", "foo"]) + meta = D("meta", {"name": "architecture"}, + D("phrase", "x86_64"), + D("phrase", "foo"), + ) appendnode(tree, meta) config = dict( @@ -422,8 +407,8 @@ def test_meta_category(tree): # # # """ - meta = createmeta(dict(name='category')) - addphrase(meta, ["Systems Management"]) + meta = D("meta", {"name": "category"}, + D("phrase", "Systems Management")) appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True, @@ -453,7 +438,7 @@ def test_missing_child_meta_category(tree): # # # """ - meta = createmeta(dict(name='category')) + meta = D("meta", {"name": "category"}) appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True)) @@ -473,8 +458,10 @@ def test_duplicate_child_meta_category(tree): # # # """ - meta = createmeta(dict(name='category')) - addphrase(meta, ["Systems Management", "Systems Management"]) + meta = D("meta", {"name": "category"}, + D("phrase", "Systems Management"), + D("phrase", "Systems Management"), + ) appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True)) @@ -494,8 +481,10 @@ def test_unknown_category_meta_category(tree): # # # """ - meta = createmeta(dict(name='category')) - addphrase(meta, ["Systems Management", "Foo"]) + meta = D("meta", {"name": "category"}, + D("phrase", "Systems Management"), + D("phrase", "Foo"), + ) appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True, @@ -514,8 +503,8 @@ def test_meta_task(tree): # # # """ - meta = createmeta(dict(name='task')) - addphrase(meta, ["Configuration"]) + meta = D("meta", {"name": "task"}, + D("phrase", "Configuration")) appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True, @@ -530,7 +519,7 @@ def test_missing_child_meta_task(tree): # # # """ - meta = createmeta(dict(name='task')) + meta = D("meta", {"name": "task"}) appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True), @@ -550,8 +539,10 @@ def test_duplicate_child_meta_task(tree): # # # """ - meta = createmeta(dict(name='task')) - addphrase(meta, ["Configuration", "Configuration"]) + meta = D("meta", {"name": "task"}, + D("phrase", "Configuration"), + D("phrase", "Configuration"), + ) appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True), From a2f9df0a9b2275751692e80e4a65010d915a7c4a Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 19:59:01 +0200 Subject: [PATCH 081/107] Correct geinfo() to get merge from --- .../src/metadatavalidator/checks/check_info.py | 5 ++--- .../src/metadatavalidator/process.py | 5 ++++- .../metadatavalidator/src/metadatavalidator/util.py | 10 ++++++---- .../tests/unit/checks/test_check_info.py | 12 ++++++------ 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index d2c14b07a..40f4070bd 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -12,7 +12,6 @@ from ..exceptions import InvalidValueError, MissingAttributeWarning from ..logging import log from ..util import ( - getinfo, info_or_fail, getfullxpath, validatedate, @@ -39,10 +38,10 @@ def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): xmlid = revhistory.attrib.get(f"{{{XML_NS}}}id") if xmlid is None: - raise InvalidValueError(f"Couldn't find xml:id attribute in info/revhistory.") + raise InvalidValueError(f"Couldn't find xml:id attribute in revhistory.") if not xmlid.startswith("rh"): - raise InvalidValueError(f"xml:id attribute in info/revhistory should start with 'rh'.") + raise InvalidValueError(f"xml:id attribute in revhistory should start with 'rh'.") def check_info_revhistory_revision(tree: etree._ElementTree, diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index 60cfa76f8..f24331eae 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -41,7 +41,10 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): # loop = asyncio.get_running_loop() # tree = await loop.run_in_executor(None, etree.parse, xmlfile) tree = etree.parse(xmlfile, - parser=etree.XMLParser(encoding="UTF-8")) + parser=etree.XMLParser(encoding="UTF-8", + # huge_tree=True, + resolve_entities=True) + ) # Apply check function checkfunc(tree, config) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py index 1bd2190a6..609d75273 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/util.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -10,16 +10,18 @@ from .exceptions import InvalidValueError -def getinfo(tree: etree._ElementTree) -> etree._Element: +def getinfo(tree: etree._ElementTree) -> etree._Element|None: """Get the element from a DocBook5 XML tree :param tree: the XML tree to get the element from :return: the element """ - # Check if we get an element from "normal" root elements or - # from an assembly structure. + # Check if we get all children of element from "normal" root + # elements orfrom an assembly structure. + # Regular structures have , but for an assembly we only have + # (which can be seen as a "virtual" element). a = tree.find("./d:info", namespaces=NAMESPACES) - b = tree.find("./d:structure/d:merge/d:info", namespaces=NAMESPACES) + b = tree.find("./d:structure/d:merge", namespaces=NAMESPACES) if a is not None: return a diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py index 8889dface..035147f3f 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py @@ -23,9 +23,9 @@ def test_getinfo_with_regular_tree(tree): def test_getinfo_with_assembly_tree(asmtree): - info = getinfo(asmtree) - assert info is not None - assert info.tag == dbtag("info") + merge = getinfo(asmtree) + assert merge is not None + assert merge.tag == dbtag("merge") def test_info_or_fail_with_regular_tree(tree): @@ -35,9 +35,9 @@ def test_info_or_fail_with_regular_tree(tree): def test_info_or_fail_with_assembly_tree(asmtree): - info = info_or_fail(asmtree) - assert info is not None - assert info.tag == dbtag("info") + merge = info_or_fail(asmtree) + assert merge is not None + assert merge.tag == dbtag("merge") def test_info_or_fail_with_raise_on_missing(): From 364b3b796968f3e1e7ba0d476d26745aa8e12ab6 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 20:27:07 +0200 Subject: [PATCH 082/107] Remove from assemblystr fixture --- .../metadatavalidator/tests/conftest.py | 2 +- .../metadatavalidator/tests/unit/test_assembly.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 python-scripts/metadatavalidator/tests/unit/test_assembly.py diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py index 003c28246..b1aa95a43 100644 --- a/python-scripts/metadatavalidator/tests/conftest.py +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -43,7 +43,7 @@ def assemblystr() -> str: xmlns="http://docbook.org/ns/docbook"> - + """ diff --git a/python-scripts/metadatavalidator/tests/unit/test_assembly.py b/python-scripts/metadatavalidator/tests/unit/test_assembly.py new file mode 100644 index 000000000..fc46fdc75 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/test_assembly.py @@ -0,0 +1,15 @@ +from lxml import etree +import pytest + +from _utils import dbtag + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.util import getinfo + + +def test_assembly_for_info(asmtree): + merge = getinfo(asmtree) + assert merge is not None + assert merge.tag == dbtag("merge") + + From dbdea358594056a7fde40e42c9ebb00378a95e5c Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 20:29:50 +0200 Subject: [PATCH 083/107] Add source line in error message --- .../metadatavalidator/checks/check_info.py | 38 ++++++++--- .../metadatavalidator/checks/check_meta.py | 64 ++++++++++++------- .../metadatavalidator/checks/check_root.py | 4 +- 3 files changed, 72 insertions(+), 34 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 40f4070bd..7b8f722b2 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -33,15 +33,24 @@ def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): revhistory = info.find("./d:revhistory", namespaces=NAMESPACES) if revhistory is None: if required: - raise InvalidValueError(f"Couldn't find a revhistory element in {info.tag}.") + raise InvalidValueError( + f"Couldn't find a revhistory element in {info.tag}" + f" (line {info.sourceline})." + ) return None xmlid = revhistory.attrib.get(f"{{{XML_NS}}}id") if xmlid is None: - raise InvalidValueError(f"Couldn't find xml:id attribute in revhistory.") + raise InvalidValueError( + f"Couldn't find xml:id attribute in revhistory" + f" (line {revhistory.sourceline})." + ) if not xmlid.startswith("rh"): - raise InvalidValueError(f"xml:id attribute in revhistory should start with 'rh'.") + raise InvalidValueError( + f"xml:id attribute in revhistory should start with 'rh'" + f" (line {revhistory.sourceline})." + ) def check_info_revhistory_revision(tree: etree._ElementTree, @@ -56,12 +65,16 @@ def check_info_revhistory_revision(tree: etree._ElementTree, revision = revhistory.find("./d:revision", namespaces=NAMESPACES) if revision is None: - raise InvalidValueError(f"Couldn't find a revision element in {revhistory.tag}.") + raise InvalidValueError( + f"Couldn't find a revision element in {revhistory.tag}" + f" (line {revhistory.sourceline})." + ) xmlid = revision.attrib.get(f"{{{XML_NS}}}id") if config.get("metadata", {}).get("require_xmlid_on_revision", True) and xmlid is None: xpath = getfullxpath(revision) xpath += "/@xml:id" + xpath += f" (line {revision.sourceline})." raise MissingAttributeWarning(xpath) @@ -76,7 +89,10 @@ def check_info_revhistory_revision_date(tree: etree._ElementTree, date = revhistory.find("./d:revision/d:date", namespaces=NAMESPACES) if date is None: - raise InvalidValueError(f"Couldn't find a date element in info/revhistory/revision.") + raise InvalidValueError( + f"Couldn't find a date element in revhistory/revision" + f" (line {revhistory.sourceline})." + ) validatedate(date) @@ -104,9 +120,12 @@ def check_info_revhistory_revision_order(tree: etree._ElementTree, # First check: check if we have the same number of dates and revisions if len(converteddates) != len(revisions): - raise InvalidValueError(f"Couldn't convert all dates " - f"(see position dates={dates.index(None)+1}). " - f"Check {xpath}") + raise InvalidValueError( + f"Couldn't convert all dates " + f"(see position dates={dates.index(None)+1}). " + f"Check {xpath}" + f" (line {revhistory.sourceline})." + ) # Second check: we have the same number of dates and revisions, now # check if the dates are in descending order @@ -114,6 +133,7 @@ def check_info_revhistory_revision_order(tree: etree._ElementTree, if first <= second: raise InvalidValueError( "Dates in revhistory/revision are not in descending order: " - f"{first} <= {second}." + f"{first} <= {second}" + f" (line {revhistory.sourceline})." ) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 6e9e0e324..ec6ad47aa 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -24,13 +24,17 @@ def check_meta_title(tree: etree._ElementTree, if meta is None: if required: raise InvalidValueError( - f"Couldn't find required meta[@name='title'] element in {root.tag}." + f"Couldn't find required meta[@name='title'] element in {root.tag}" + f" (line {info.sourceline})." ) return length = config.get("metadata", {}).get("meta_title_length", 55) if len(meta.text) > length: - raise InvalidValueError(f"Meta title is too long. Max length is {length} characters.") + raise InvalidValueError( + f"Meta title is too long. Max length is {length} characters" + f" (line {meta.sourceline})." + ) def check_meta_description(tree: etree._ElementTree, @@ -43,13 +47,17 @@ def check_meta_description(tree: etree._ElementTree, if meta is None: if required: raise InvalidValueError( - f"Couldn't find required meta[@name='description'] element in {root.tag}." + f"Couldn't find required meta[@name='description'] element in " + f"{root.tag} (line {info.sourceline})." ) return length = config.get("metadata", {}).get("meta_description_length", 150) if len(meta.text) > length: - raise InvalidValueError(f"Meta description is too long. Max length is {length} characters.") + raise InvalidValueError( + f"Meta description is too long. Max length is {length} characters." + f" (line {meta.sourceline})." + ) def check_meta_series(tree: etree._ElementTree, @@ -62,7 +70,8 @@ def check_meta_series(tree: etree._ElementTree, if meta is None: if required: raise InvalidValueError( - f"Couldn't find required meta[@name='series'] element in {root.tag}." + f"Couldn't find required meta[@name='series'] element in {root.tag}" + f" (line {info.sourceline})." ) return @@ -72,7 +81,8 @@ def check_meta_series(tree: etree._ElementTree, if meta.text.strip() not in valid_series: raise InvalidValueError( f"Meta series is invalid, got {meta.text.strip()!r}. " - f"Valid series are {valid_series}." + f"Valid series are {valid_series}" + f" (line {meta.sourceline})." ) @@ -87,7 +97,8 @@ def check_meta_techpartner(tree: etree._ElementTree, if required: raise InvalidValueError( f"Couldn't find required meta[@name='techpartner'] element " - f"in {root.tag}." + f"in {root.tag}" + f" (line {info.sourceline})." ) return @@ -96,7 +107,7 @@ def check_meta_techpartner(tree: etree._ElementTree, if not partners: raise InvalidValueError( f"Couldn't find any tech partners in meta[@name='techpartner'] element " - f"(line {meta.sourceline})." + f"(line {meta.sourceline})" ) # Are they unique? @@ -118,7 +129,8 @@ def check_meta_platform(tree: etree._ElementTree, if required: raise InvalidValueError( f"Couldn't find required meta[@name='platform'] element " - f"in {root.tag}." + f"in {root.tag}" + f" (line {info.sourceline})." ) return @@ -138,7 +150,8 @@ def check_meta_architecture(tree: etree._ElementTree, if required: raise InvalidValueError( f"Couldn't find required meta[@name='architecture'] element " - f"in {root.tag}." + f"in {root.tag}" + f" (line {info.sourceline})." ) return @@ -152,15 +165,15 @@ def check_meta_architecture(tree: etree._ElementTree, archs = [tag.text.strip() for tag in meta.iterchildren()] if not archs: raise InvalidValueError( - f"Couldn't find any child elements in meta[@name='architecture'] " - f"(line {meta.sourceline})." + f"Couldn't find any child elements in meta[@name='architecture']" + f" (line {meta.sourceline})." ) # Are they unique? if len(archs) != len(set(archs)): raise InvalidValueError( - f"Duplicate architectures found in meta[@name='architecture'] " - f"(line {meta.sourceline})." + f"Duplicate architectures found in meta[@name='architecture']" + f" (line {meta.sourceline})." ) # Do we have items that don't conform to our predefined list? @@ -168,7 +181,8 @@ def check_meta_architecture(tree: etree._ElementTree, if wrong_items: raise InvalidValueError( f"Unknown architecture(s) {wrong_items}. " - f"Allowed are {valid_archs}." + f"Allowed are {valid_archs}" + f" (line {meta.sourceline})." ) @@ -197,15 +211,15 @@ def check_meta_category(tree: etree._ElementTree, cats = [tag.text.strip() for tag in meta.iterchildren()] if not cats: raise InvalidValueError( - f"Couldn't find any child elements in meta[@name='category'] " - f"(line {meta.sourceline})." + f"Couldn't find any child elements in meta[@name='category']" + f" (line {meta.sourceline})." ) # Are they unique? if len(cats) != len(set(cats)): raise InvalidValueError( - f"Duplicate categories found in meta[@name='category'] " - f"(line {meta.sourceline})." + f"Duplicate categories found in meta[@name='category']" + f" (line {meta.sourceline})." ) # Do we have items that don't conform to our predefined list? @@ -214,6 +228,7 @@ def check_meta_category(tree: etree._ElementTree, raise InvalidValueError( f"Unknown category(ies) {wrong_items}. " f"Allowed are {valid_cats}." + f" (line {meta.sourceline})." ) @@ -242,15 +257,15 @@ def check_meta_task(tree: etree._ElementTree, tasks = [tag.text.strip() for tag in meta.iterchildren()] if not tasks: raise InvalidValueError( - f"Couldn't find any child elements in meta[@name='task'] " - f"(line {meta.sourceline})." + f"Couldn't find any child elements in meta[@name='task']" + f" (line {meta.sourceline})." ) # Are they unique? if len(tasks) != len(set(tasks)): raise InvalidValueError( - f"Duplicate tasks found in meta[@name='task'] " - f"(line {meta.sourceline})." + f"Duplicate tasks found in meta[@name='task']" + f" (line {meta.sourceline})." ) # Do we have items that don't conform to our predefined list? @@ -258,5 +273,6 @@ def check_meta_task(tree: etree._ElementTree, if wrong_items: raise InvalidValueError( f"Unknown task(s) {wrong_items}. " - f"Allowed are {valid_tasks}." + f"Allowed are {valid_tasks}" + f" (line {meta.sourceline})." ) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py index 008247a1a..6fb88372c 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_root.py @@ -20,5 +20,7 @@ def check_namespace(tree: etree._ElementTree, config: dict[t.Any, t.Any]): """Checks the namespace""" tag = etree.QName(tree.getroot().tag) if tag.namespace != DOCBOOK_NS: - raise InvalidValueError(f"Root element {tag.localname!r} doesn't belong to DocBook 5.") + raise InvalidValueError( + f"Root element {tag.localname!r} doesn't belong to DocBook 5." + ) From 4dc4d9a2a477d10256111bf7cd5963af3c8796c6 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 20:50:38 +0200 Subject: [PATCH 084/107] Refactor process_xml_file() * First check if the file is a well-formed XML file. If not, don't bother to check further * Introduce returndict with some pre-defined values --- .../src/metadatavalidator/process.py | 48 +++++++++++-------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index f24331eae..bc7b7ec78 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -32,7 +32,32 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): """ errors = [] basexmlfile = os.path.basename(xmlfile) + + returndict = { + "xmlfile": xmlfile, + "absxmlfilename": os.path.abspath(xmlfile), + "basename": basexmlfile, + } # log.debug("Config %s", config) + # First check if the file is a well-formed XML file + # If not, don't bother to check further + try: + tree = etree.parse(xmlfile, + parser=etree.XMLParser( + encoding="UTF-8", + # huge_tree=True, + resolve_entities=True) + ) + + except etree.XMLSyntaxError as e: + # log.fatal("Syntax error in %r: %s", xmlfile, e) + log.fatal("Syntax error in %r: %s", basexmlfile, e) + errors.append({ + 'checkfunc': None, + 'message': str(e) + }) + return { **returndict, "errors": errors } + for checkfunc in get_all_check_functions(checks.__package__): log.debug("Checking %r with %r", basexmlfile, @@ -40,23 +65,10 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): try: # loop = asyncio.get_running_loop() # tree = await loop.run_in_executor(None, etree.parse, xmlfile) - tree = etree.parse(xmlfile, - parser=etree.XMLParser(encoding="UTF-8", - # huge_tree=True, - resolve_entities=True) - ) - # Apply check function checkfunc(tree, config) # await asyncio.sleep(0.1) - except etree.XMLSyntaxError as e: - # log.fatal("Syntax error in %r: %s", xmlfile, e) - errors.append({ - 'checkfunc': checkfunc.__name__, - 'message': str(e) - }) - except (InvalidValueError, MissingAttributeWarning) as e: #log.fatal("Invalid value in %r for %s: %s", # xmlfile, checkfunc.__name__, e) @@ -65,16 +77,12 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): 'message': str(e) }) else: - # log.info("Passed check %r for %r", checkfunc.__name__, os.path.basename(xmlfile)) + # log.info("Passed check %r for %r", checkfunc.__name__, basexmlfile) pass log.info("File %r checked.", basexmlfile) - return { - "xmlfile": xmlfile, - "absxmlfilename": os.path.abspath(xmlfile), - "errors": errors, - "basename": os.path.basename(xmlfile), - } + return { **returndict, "errors": errors } + def green(text): return f"\033[32m{text}\033[0m" From 393f474badc36e3af53d4ed3bceb335923500dbb Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 20:56:31 +0200 Subject: [PATCH 085/107] Move red() & green() function to util.py --- .../metadatavalidator/src/metadatavalidator/process.py | 9 ++------- .../metadatavalidator/src/metadatavalidator/util.py | 8 ++++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/process.py b/python-scripts/metadatavalidator/src/metadatavalidator/process.py index bc7b7ec78..8a1dcafd3 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/process.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/process.py @@ -10,6 +10,7 @@ from . import checks from .exceptions import InvalidValueError, MissingAttributeWarning from .logging import log +from .util import green, red def get_all_check_functions(name): @@ -84,13 +85,6 @@ async def process_xml_file(xmlfile: str, config: dict[t.Any, t.Any]): return { **returndict, "errors": errors } -def green(text): - return f"\033[32m{text}\033[0m" - -def red(text): - return f"\033[31m{text}\033[0m" - - def format_results_text(results: list[t.Any]): """Format the results for output @@ -111,6 +105,7 @@ def format_results_text(results: list[t.Any]): print(f" {allidx}.{idx}: {error['checkfunc']}: {msg}") print() + def format_results_json(results: list[t.Any]): """Format the results for output diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/util.py b/python-scripts/metadatavalidator/src/metadatavalidator/util.py index 609d75273..7889508ad 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/util.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/util.py @@ -10,6 +10,14 @@ from .exceptions import InvalidValueError + +def green(text): # pragma: no cover + return f"\033[32m{text}\033[0m" + +def red(text): # pragma: no cover + return f"\033[31m{text}\033[0m" + + def getinfo(tree: etree._ElementTree) -> etree._Element|None: """Get the element from a DocBook5 XML tree From 4046e0aa2823b28f7b8f9d875dc8f6d961628406 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:00:55 +0200 Subject: [PATCH 086/107] Rename test_badcase1.py -> test_badcase.py --- .../integration/badcase1/test_badcase.py | 39 +++++++++++++++++++ .../integration/badcase1/test_badcase1.py | 23 ----------- 2 files changed, 39 insertions(+), 23 deletions(-) create mode 100644 python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase.py delete mode 100644 python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase.py b/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase.py new file mode 100644 index 000000000..9450a89c0 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase.py @@ -0,0 +1,39 @@ +import os.path +import json +import re +import pytest + +from metadatavalidator.cli import main + + +BASEDIR = os.path.dirname(os.path.realpath(__file__)) +RELATIVE_PATH = os.path.relpath(BASEDIR, os.getcwd()) + + +def test_integr_case1(capsys): + cli = ["--config", f"{BASEDIR}/config-test.ini", + "--format", "json", # needed to avoid formatting issues + f"{RELATIVE_PATH}/article.xml"] + + result = main(cli) + captured = capsys.readouterr() + assert result == 0 + result = json.loads(captured.out) + assert result[0]['errors'] == [] + assert result[0]['xmlfile'] == f"{RELATIVE_PATH}/article.xml" + + + +def test_integr_xml_with_syntax_error(capsys): + cli = ["--config", f"{BASEDIR}/config-test.ini", + "--format", "json", # needed to avoid formatting issues + f"{RELATIVE_PATH}/article-invalid.xml"] + + result = main(cli) + captured = capsys.readouterr() + assert result == 0 + result = json.loads(captured.out) + message = result[0]['errors'][0]['message'] + # "Opening and ending tag mismatch: para line 6 and article, line 7, column 11 (article-invalid.xml, line 7)" + assert re.match(r"Opening and ending tag mismatch:", message) + assert result[0]['xmlfile'] == f"{RELATIVE_PATH}/article-invalid.xml" \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py b/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py deleted file mode 100644 index a769f5446..000000000 --- a/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase1.py +++ /dev/null @@ -1,23 +0,0 @@ -import os.path -import json -import pytest - -from metadatavalidator.cli import main - - -BASEDIR = os.path.dirname(os.path.realpath(__file__)) -RELATIVE_PATH = os.path.relpath(BASEDIR, os.getcwd()) - - -def test_case1_integration(capsys): - cli = ["--config", f"{BASEDIR}/config-test.ini", - "--format", "json", # needed to avoid formatting issues - f"{RELATIVE_PATH}/article.xml"] - - result = main(cli) - captured = capsys.readouterr() - assert result == 0 - result = json.loads(captured.out) - assert result[0]['errors'] == [] - assert result[0]['xmlfile'] == f"{RELATIVE_PATH}/article.xml" - From 488149ad42dadb2de69dc2a230ee464e0b5b9256 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:01:26 +0200 Subject: [PATCH 087/107] Rename test_integration.py -> test_goodcase.py --- .../goodcase1/{test_integration.py => test_goodcase.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python-scripts/metadatavalidator/tests/integration/goodcase1/{test_integration.py => test_goodcase.py} (100%) diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/test_integration.py b/python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/goodcase1/test_integration.py rename to python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py From f9b6f2a4ad0c1ad1c2dd2431c9afdcc0806b183f Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:04:58 +0200 Subject: [PATCH 088/107] Add another goodcase: entity in doctype --- .../article-with-entity-in-doctype.xml | 15 ++++++++++++++ .../integration/goodcase1/my-entities.ent | 2 ++ .../integration/goodcase1/test_goodcase.py | 20 ++++++++++++++++--- 3 files changed, 34 insertions(+), 3 deletions(-) create mode 100644 python-scripts/metadatavalidator/tests/integration/goodcase1/article-with-entity-in-doctype.xml create mode 100644 python-scripts/metadatavalidator/tests/integration/goodcase1/my-entities.ent diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/article-with-entity-in-doctype.xml b/python-scripts/metadatavalidator/tests/integration/goodcase1/article-with-entity-in-doctype.xml new file mode 100644 index 000000000..bcac56a65 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/goodcase1/article-with-entity-in-doctype.xml @@ -0,0 +1,15 @@ + + + + + +]> + +
+ + A XML file with entity + + &foo; +
diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/my-entities.ent b/python-scripts/metadatavalidator/tests/integration/goodcase1/my-entities.ent new file mode 100644 index 000000000..1114a8b94 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/goodcase1/my-entities.ent @@ -0,0 +1,2 @@ + + diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py b/python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py index a769f5446..b6bbb0979 100644 --- a/python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py +++ b/python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py @@ -9,15 +9,29 @@ RELATIVE_PATH = os.path.relpath(BASEDIR, os.getcwd()) -def test_case1_integration(capsys): +def test_integr_article(capsys): + xmlfile = f"{RELATIVE_PATH}/article.xml" cli = ["--config", f"{BASEDIR}/config-test.ini", "--format", "json", # needed to avoid formatting issues - f"{RELATIVE_PATH}/article.xml"] + xmlfile] result = main(cli) captured = capsys.readouterr() assert result == 0 result = json.loads(captured.out) assert result[0]['errors'] == [] - assert result[0]['xmlfile'] == f"{RELATIVE_PATH}/article.xml" + assert result[0]['xmlfile'] == xmlfile + +def test_integr_xml_with_entity_in_doctype(capsys): + xmlfile = f"{RELATIVE_PATH}/article-with-entity-in-doctype.xml" + cli = ["--config", f"{BASEDIR}/config-test.ini", + "--format", "json", # needed to avoid formatting issues + xmlfile] + + result = main(cli) + captured = capsys.readouterr() + assert result == 0 + result = json.loads(captured.out) + assert result[0]['errors'] == [] + assert result[0]['xmlfile'] == xmlfile \ No newline at end of file From dc7936f6a903c32a34d4b17821d6fa944c321ad5 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:06:28 +0200 Subject: [PATCH 089/107] goodcase1 -> goodcases --- .../{goodcase1 => goodcases}/article-with-entity-in-doctype.xml | 0 .../tests/integration/{goodcase1 => goodcases}/article.xml | 0 .../tests/integration/{goodcase1 => goodcases}/config-test.ini | 0 .../tests/integration/{goodcase1 => goodcases}/my-entities.ent | 0 .../tests/integration/{goodcase1 => goodcases}/test_goodcase.py | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename python-scripts/metadatavalidator/tests/integration/{goodcase1 => goodcases}/article-with-entity-in-doctype.xml (100%) rename python-scripts/metadatavalidator/tests/integration/{goodcase1 => goodcases}/article.xml (100%) rename python-scripts/metadatavalidator/tests/integration/{goodcase1 => goodcases}/config-test.ini (100%) rename python-scripts/metadatavalidator/tests/integration/{goodcase1 => goodcases}/my-entities.ent (100%) rename python-scripts/metadatavalidator/tests/integration/{goodcase1 => goodcases}/test_goodcase.py (100%) diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/article-with-entity-in-doctype.xml b/python-scripts/metadatavalidator/tests/integration/goodcases/article-with-entity-in-doctype.xml similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/goodcase1/article-with-entity-in-doctype.xml rename to python-scripts/metadatavalidator/tests/integration/goodcases/article-with-entity-in-doctype.xml diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/article.xml b/python-scripts/metadatavalidator/tests/integration/goodcases/article.xml similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/goodcase1/article.xml rename to python-scripts/metadatavalidator/tests/integration/goodcases/article.xml diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini b/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/goodcase1/config-test.ini rename to python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/my-entities.ent b/python-scripts/metadatavalidator/tests/integration/goodcases/my-entities.ent similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/goodcase1/my-entities.ent rename to python-scripts/metadatavalidator/tests/integration/goodcases/my-entities.ent diff --git a/python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py b/python-scripts/metadatavalidator/tests/integration/goodcases/test_goodcase.py similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/goodcase1/test_goodcase.py rename to python-scripts/metadatavalidator/tests/integration/goodcases/test_goodcase.py From a7dbd957351abfadf01161f4a7ff91cb97ef2f8f Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:06:53 +0200 Subject: [PATCH 090/107] badcase1 -> badcases --- .../tests/integration/{badcase1 => badcases}/article.xml | 0 .../tests/integration/{badcase1 => badcases}/config-test.ini | 0 .../tests/integration/{badcase1 => badcases}/test_badcase.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename python-scripts/metadatavalidator/tests/integration/{badcase1 => badcases}/article.xml (100%) rename python-scripts/metadatavalidator/tests/integration/{badcase1 => badcases}/config-test.ini (100%) rename python-scripts/metadatavalidator/tests/integration/{badcase1 => badcases}/test_badcase.py (100%) diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/article.xml b/python-scripts/metadatavalidator/tests/integration/badcases/article.xml similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/badcase1/article.xml rename to python-scripts/metadatavalidator/tests/integration/badcases/article.xml diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/config-test.ini b/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/badcase1/config-test.ini rename to python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini diff --git a/python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase.py b/python-scripts/metadatavalidator/tests/integration/badcases/test_badcase.py similarity index 100% rename from python-scripts/metadatavalidator/tests/integration/badcase1/test_badcase.py rename to python-scripts/metadatavalidator/tests/integration/badcases/test_badcase.py From 6b498de1f41ad28fab735be25166f841857b9fa9 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:25:25 +0200 Subject: [PATCH 091/107] Replace basic_xmlcontent with tree fixture --- .../tests/unit/checks/test_check_root.py | 21 ++----------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_root.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_root.py index b73f3498d..50e3bbb73 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_root.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_root.py @@ -5,29 +5,12 @@ from metadatavalidator.checks import check_root_tag, check_namespace from metadatavalidator.exceptions import InvalidValueError -basic_xmlcontent = """
- - Test - - -
""" - - -def test_check_root_tag(): - tree = etree.ElementTree( - etree.fromstring(basic_xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) - ) +def test_check_root_tag(tree): assert check_root_tag(tree, {"validator": {"check_root_elements": ["article"]}}) is None -def test_check_check_namespace(): - tree = etree.ElementTree( - etree.fromstring(basic_xmlcontent, - parser=etree.XMLParser(encoding="UTF-8")) - ) - +def test_check_check_namespace(tree): assert check_namespace(tree, {}) is None From 33bcf359a46490079e8d4c3b01d30be344b46b88 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:30:51 +0200 Subject: [PATCH 092/107] Rename cfg name revhistory -> require_revhistory --- .../metadatavalidator/metadatavalidator.ini | 3 ++- .../src/metadatavalidator/config.py | 4 ++++ .../integration/badcases/article-invalid.xml | 7 +++++++ .../tests/integration/badcases/config-test.ini | 2 +- .../goodcases/article-with-external-entity.xml | 16 ++++++++++++++++ .../tests/integration/goodcases/config-test.ini | 2 +- ...rticle-with-external-entity-in-nested-dir.xml | 16 ++++++++++++++++ .../tests/integration/goodcases/test_goodcase.py | 15 +++++++++++++++ 8 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 python-scripts/metadatavalidator/tests/integration/badcases/article-invalid.xml create mode 100644 python-scripts/metadatavalidator/tests/integration/goodcases/article-with-external-entity.xml create mode 100644 python-scripts/metadatavalidator/tests/integration/goodcases/nested-dir/a/b/article-with-external-entity-in-nested-dir.xml diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index b4a2dd384..bbef4f85f 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -4,7 +4,8 @@ check_root_elements = assembly book article topic valid_languages = ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl-nl pl-pl pt-br ru-ru sv-se zh-cn zh-tw [metadata] -revhistory = 0 +# revhistory = 0 +require_revhistory = on require_xmlid_on_revision = 1 # diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index b99c7cfec..ab990b3b4 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -206,6 +206,10 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any theconfig["validator"]["valid_languages"] = validate_valid_languages(theconfig) # Section "metadata" + # + theconfig.setdefault("metadata", {})[ + "require_revhistory" + ] = truefalse(theconfig.get("metadata", {}).get("require_revhistory", True)) theconfig.setdefault("metadata", {})[ "require_xmlid_on_revision" ] = truefalse(theconfig.get("metadata", {}).get("require_xmlid_on_revision", True)) diff --git a/python-scripts/metadatavalidator/tests/integration/badcases/article-invalid.xml b/python-scripts/metadatavalidator/tests/integration/badcases/article-invalid.xml new file mode 100644 index 000000000..015d964d1 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/badcases/article-invalid.xml @@ -0,0 +1,7 @@ +
+ + Test + The very long, long, long long SEO title + + +
diff --git a/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini b/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini index 636b78b88..f166f6384 100644 --- a/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini +++ b/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini @@ -4,7 +4,7 @@ check_root_elements = book article topic valid_languages = en-us [metadata] -revhistory = 0 +require_revhistory = 0 require_xmlid_on_revision = 0 # diff --git a/python-scripts/metadatavalidator/tests/integration/goodcases/article-with-external-entity.xml b/python-scripts/metadatavalidator/tests/integration/goodcases/article-with-external-entity.xml new file mode 100644 index 000000000..d6a7c1dfd --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/goodcases/article-with-external-entity.xml @@ -0,0 +1,16 @@ + + + + + + %entities; +]> + +
+ + A XML file with an external entity + + &foo; +
diff --git a/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini b/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini index 636b78b88..f166f6384 100644 --- a/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini +++ b/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini @@ -4,7 +4,7 @@ check_root_elements = book article topic valid_languages = en-us [metadata] -revhistory = 0 +require_revhistory = 0 require_xmlid_on_revision = 0 # diff --git a/python-scripts/metadatavalidator/tests/integration/goodcases/nested-dir/a/b/article-with-external-entity-in-nested-dir.xml b/python-scripts/metadatavalidator/tests/integration/goodcases/nested-dir/a/b/article-with-external-entity-in-nested-dir.xml new file mode 100644 index 000000000..4e3b77de0 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/integration/goodcases/nested-dir/a/b/article-with-external-entity-in-nested-dir.xml @@ -0,0 +1,16 @@ + + + + + + %entities; +]> + +
+ + A XML file with an external entity + + &foo; +
diff --git a/python-scripts/metadatavalidator/tests/integration/goodcases/test_goodcase.py b/python-scripts/metadatavalidator/tests/integration/goodcases/test_goodcase.py index b6bbb0979..3940091c2 100644 --- a/python-scripts/metadatavalidator/tests/integration/goodcases/test_goodcase.py +++ b/python-scripts/metadatavalidator/tests/integration/goodcases/test_goodcase.py @@ -29,6 +29,21 @@ def test_integr_xml_with_entity_in_doctype(capsys): "--format", "json", # needed to avoid formatting issues xmlfile] + result = main(cli) + captured = capsys.readouterr() + assert result == 0 + result = json.loads(captured.out) + assert result[0]['errors'] == [] + assert result[0]['xmlfile'] == xmlfile + + +@pytest.mark.skip(reason="Works locally but not on CI") +def test_integr_xml_with_external_entity_in_nested_dir(capsys): + xmlfile = f"{RELATIVE_PATH}/nested-dir/a/b/article-with-external-entity.xml" + cli = ["--config", f"{BASEDIR}/config-test.ini", + "--format", "json", # needed to avoid formatting issues + xmlfile] + result = main(cli) captured = capsys.readouterr() assert result == 0 From 90c4e8598a60f937b53504426b42e220b3168b20 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:44:02 +0200 Subject: [PATCH 093/107] Split big test_check_meta.py into other files --- .../tests/unit/checks/test_check_meta.py | 552 ------------------ .../tests/unit/checks/test_check_meta_arch.py | 117 ++++ .../unit/checks/test_check_meta_category.py | 108 ++++ .../checks/test_check_meta_description.py | 63 ++ .../unit/checks/test_check_meta_platform.py | 56 ++ .../unit/checks/test_check_meta_series.py | 77 +++ .../tests/unit/checks/test_check_meta_task.py | 69 +++ .../checks/test_check_meta_techpartner.py | 81 +++ .../unit/checks/test_check_meta_title.py | 50 ++ 9 files changed, 621 insertions(+), 552 deletions(-) delete mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_description.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_platform.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_series.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_techpartner.py create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py deleted file mode 100644 index 9bfb25e74..000000000 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta.py +++ /dev/null @@ -1,552 +0,0 @@ -from lxml import etree -import pytest - -from _utils import appendnode, D, xmlid - -from metadatavalidator.common import NAMESPACES -from metadatavalidator.checks.check_meta import ( - check_meta_title, - check_meta_description, - check_meta_series, - check_meta_techpartner, - check_meta_platform, - check_meta_architecture, - check_meta_category, - check_meta_task, -) -from metadatavalidator.exceptions import InvalidValueError - - -def test_check_meta_title(tree): -# xmlcontent = """
-# -# Test -# The SEO title -# -# -#
""" - meta = D("meta", {"name": "title"}, "The SEO title") - appendnode(tree, meta) - - assert check_meta_title(tree, {}) is None - - -def test_check_meta_title_wrong_length(tree): -# xmlcontent = """
-# -# Test -# The SEO title that is too long -# -# -#
""" - meta = D("meta", {"name": "title"}, "The SEO title that is too long") - appendnode(tree, meta) - - with pytest.raises(InvalidValueError, match=".*too long.*"): - check_meta_title(tree, dict(metadata=dict(meta_title_length=10))) - - -def test_check_required_meta_title(tree): - with pytest.raises(InvalidValueError, match=".*required.*"): - check_meta_title(tree, dict(metadata=dict(meta_title_required=True))) - - -def test_check_optional_meta_title(tree): - config = dict(metadata=dict(meta_title_required=False)) - assert check_meta_title(tree, config) is None - - -def test_check_meta_description(tree): -# xmlcontent = """
-# -# Test -# The SEO description -# -# -#
""" - meta = D("meta", {"name": "description"}, "The SEO description") - appendnode(tree, meta) - assert check_meta_description(tree, {}) is None - - -def test_check_meta_description_wrong_length(tree): -# xmlcontent = """
-# -# Test -# The SEO description that is too long -# -# -#
""" - meta = D("meta", {"name": "description"}, "The SEO description that is too long") - appendnode(tree, meta) - - with pytest.raises(InvalidValueError, match=".*too long.*"): - check_meta_description(tree, dict(metadata=dict(meta_description_length=10))) - - -def test_check_required_meta_description(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - - with pytest.raises(InvalidValueError, match=".*required.*"): - check_meta_description(tree, dict(metadata=dict(meta_description_required=True))) - - -def test_check_optional_meta_description(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - - assert check_meta_description(tree, - dict(metadata=dict(meta_description_required=False))) is None - - -def test_check_meta_series(tree): -# xmlcontent = """
-# -# Test -# Products & Solutions -# -# -#
""" - meta = D("meta", {"name": "series"}, "Products & Solutions") - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_series=True, - valid_meta_series=["Products & Solutions", - "Best Practices", - "Technical References"])) - assert check_meta_series(tree, config) is None - - -def test_check_missing_optional_meta_series(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - - config = dict(metadata=dict(require_meta_series=False)) - assert check_meta_series(tree, config) is None - - -def test_check_wrong_meta_series(tree): -# xmlcontent = """
-# -# Test -# Foo -# -# -#
""" - meta = D("meta", {"name": "series"}, "Foo") - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_series=True, - valid_meta_series=["Best Practices", - "Technical References"])) - with pytest.raises(InvalidValueError, match="Meta series is invalid"): - check_meta_series(tree, config) - - -def test_check_require_meta_series(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - - config = dict( - metadata=dict( - require_meta_series=True, - valid_meta_series=["Best Practices", "Technical References"], - ) - ) - with pytest.raises(InvalidValueError, - match=r".*Couldn't find required meta.*"): - check_meta_series(tree, config) - - -def test_check_meta_techpartner(tree): -# xmlcontent = """
-# -# Test -# -# Acme Inc. -# Foo Corp. -# -# -# -#
""" - meta = D("meta", {"name": "techpartner"}, - D("phrase", {}, "Acme Inc."), - D("phrase", {}, "Foo Corp."), - ) - appendnode(tree, meta) - - assert check_meta_techpartner(tree, {}) is None - - -def test_check_missing_meta_techpartner(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - - config = dict(metadata=dict(require_meta_techpartner=True)) - with pytest.raises(InvalidValueError, match=".*required.*"): - check_meta_techpartner(tree, config) - - -def test_check_missing_children_in_meta_techpartner(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - meta = D("meta", {"name": "techpartner"}) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_techpartner=True)) - with pytest.raises(InvalidValueError, match=".*Couldn't find any tech partners.*"): - check_meta_techpartner(tree, config) - - -def test_check_meta_techpartner_with_nonunique_children(tree): -# xmlcontent = """
-# -# Test -# -# Acme Inc. -# Acme Inc. -# -# -# -#
""" - meta = D("meta", {"name": "techpartner"}, - D("phrase", "Acme Inc."), - D("phrase", "Acme Inc."), - ) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_techpartner=True)) - with pytest.raises(InvalidValueError, match=".*Duplicate tech partners.*"): - check_meta_techpartner(tree, config) - - -def test_check_meta_platform(tree): -# xmlcontent = """
-# -# Test -# Foo -# -# -#
""" - meta = D("meta", {"name": "platform"}, "Foo") - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_platform=True)) - assert check_meta_platform(tree, config) is None - - -def test_check_missing_meta_platform(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" -# tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) - - config = dict(metadata=dict(require_meta_platform=True)) - with pytest.raises(InvalidValueError, - match=r".*Couldn't find required meta.*"): - check_meta_platform(tree, config) - - -def test_check_empty_meta_platform(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - meta = D("meta", {"name": "platform"}) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_platform=True)) - with pytest.raises(InvalidValueError, match=r".*Empty meta.*"): - check_meta_platform(tree, config) - - -def test_check_meta_architecture(tree): -# xmlcontent = """
-# -# Test -# -# x86_64 -# -# -# -#
""" - meta = D("meta", {"name": "architecture"}, D("phrase", "x86_64")) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_architecture=True, - valid_meta_architecture=["x86_64", "POWER"])) - assert check_meta_architecture(tree, config) is None - - -def test_check_missing_optional_meta_architecture(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - - config = dict(metadata=dict(require_meta_architecture=True)) - with pytest.raises(InvalidValueError, - match=r".*Couldn't find required meta.*"): - check_meta_architecture(tree, config) - - -def test_check_missing_child_meta_architecture(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - meta = D("meta", {"name": "architecture"}) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_architecture=True)) - with pytest.raises( - InvalidValueError, - match=r".*Couldn't find any child elements in meta.*" - ): - check_meta_architecture(tree, config) - - -def test_check_duplicate_child_meta_architecture(tree): -# xmlcontent = """
-# -# Test -# -# x86_64 -# x86_64 -# -# -# -#
""" - meta = D("meta", {"name": "architecture"}, - D("phrase", "x86_64"), - D("phrase", "x86_64"), - ) - appendnode(tree, meta) - - config = dict(metadata=dict( - require_meta_architecture=True, - valid_meta_architecture=["x86_64", "POWER"])) - with pytest.raises( - InvalidValueError, match=r".*Duplicate architectures found in meta.*" - ): - check_meta_architecture(tree, config) - - -def test_check_unknown_child_meta_architecture(tree): -# xmlcontent = """
-# -# Test -# -# x86_64 -# foo -# -# -# -#
""" - meta = D("meta", {"name": "architecture"}, - D("phrase", "x86_64"), - D("phrase", "foo"), - ) - appendnode(tree, meta) - - config = dict( - metadata=dict( - require_meta_architecture=True, - valid_meta_architecture=["x86_64", "POWER"], - ) - ) - with pytest.raises(InvalidValueError, - match=r".*Unknown architecture.*"): - check_meta_architecture(tree, config) - - -def test_meta_category(tree): -# xmlcontent = """
-# -# Test -# -# Systems Management -# -# -# -#
""" - meta = D("meta", {"name": "category"}, - D("phrase", "Systems Management")) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_category=True, - valid_meta_category=["Systems Management"])) - assert check_meta_category(tree, config) is None - - -def test_missing_optional_meta_category(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - - config = dict(metadata=dict(require_meta_category=True)) - with pytest.raises(InvalidValueError, - match=r".*Couldn't find required meta.*"): - check_meta_category(tree, config) - - -def test_missing_child_meta_category(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - meta = D("meta", {"name": "category"}) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_category=True)) - with pytest.raises(InvalidValueError, - match=r".*Couldn't find any child elements in meta.*"): - check_meta_category(tree, config) - - -def test_duplicate_child_meta_category(tree): -# xmlcontent = """
-# -# Test -# -# Systems Management -# Systems Management -# -# -# -#
""" - meta = D("meta", {"name": "category"}, - D("phrase", "Systems Management"), - D("phrase", "Systems Management"), - ) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_category=True)) - with pytest.raises(InvalidValueError, - match=r".*Duplicate categories found in meta.*"): - check_meta_category(tree, config) - - -def test_unknown_category_meta_category(tree): -# xmlcontent = """
-# -# Test -# -# Systems Management -# Foo -# -# -# -#
""" - meta = D("meta", {"name": "category"}, - D("phrase", "Systems Management"), - D("phrase", "Foo"), - ) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_category=True, - valid_meta_category=["Systems Management"])) - with pytest.raises(InvalidValueError, - match=r".*Unknown category.*"): - check_meta_category(tree, config) - - -def test_meta_task(tree): -# xmlcontent = """
-# -# Test -# -# Configuration -# -# -#
""" - meta = D("meta", {"name": "task"}, - D("phrase", "Configuration")) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_task=True, - valid_meta_task=["Configuration"])) - assert check_meta_task(tree, config) is None - - -def test_missing_child_meta_task(tree): -# xmlcontent = """
-# -# Test -# -# -#
""" - meta = D("meta", {"name": "task"}) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_task=True), - valid_meta_task=["Configuration"]) - with pytest.raises(InvalidValueError, - match=r".*Couldn't find any child elements in meta.*"): - check_meta_task(tree, config) - - -def test_duplicate_child_meta_task(tree): -# xmlcontent = """
-# -# Test -# -# Configuration -# Configuration -# -# -#
""" - meta = D("meta", {"name": "task"}, - D("phrase", "Configuration"), - D("phrase", "Configuration"), - ) - appendnode(tree, meta) - - config = dict(metadata=dict(require_meta_task=True), - valid_meta_task=["Configuration"]) - with pytest.raises(InvalidValueError, - match=r".*Duplicate tasks found in meta.*"): - check_meta_task(tree, config) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py new file mode 100644 index 000000000..33ecb8278 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py @@ -0,0 +1,117 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_architecture, + check_meta_category, + check_meta_task, +) +from metadatavalidator.exceptions import InvalidValueError + + +def test_check_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# x86_64 +# +# +# +#
""" + meta = D("meta", {"name": "architecture"}, D("phrase", "x86_64")) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_architecture=True, + valid_meta_architecture=["x86_64", "POWER"])) + assert check_meta_architecture(tree, config) is None + + +def test_check_missing_optional_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + + config = dict(metadata=dict(require_meta_architecture=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_architecture(tree, config) + + +def test_check_missing_child_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = D("meta", {"name": "architecture"}) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_architecture=True)) + with pytest.raises( + InvalidValueError, + match=r".*Couldn't find any child elements in meta.*" + ): + check_meta_architecture(tree, config) + + +def test_check_duplicate_child_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# x86_64 +# x86_64 +# +# +# +#
""" + meta = D("meta", {"name": "architecture"}, + D("phrase", "x86_64"), + D("phrase", "x86_64"), + ) + appendnode(tree, meta) + + config = dict(metadata=dict( + require_meta_architecture=True, + valid_meta_architecture=["x86_64", "POWER"])) + with pytest.raises( + InvalidValueError, match=r".*Duplicate architectures found in meta.*" + ): + check_meta_architecture(tree, config) + + +def test_check_unknown_child_meta_architecture(tree): +# xmlcontent = """
+# +# Test +# +# x86_64 +# foo +# +# +# +#
""" + meta = D("meta", {"name": "architecture"}, + D("phrase", "x86_64"), + D("phrase", "foo"), + ) + appendnode(tree, meta) + + config = dict( + metadata=dict( + require_meta_architecture=True, + valid_meta_architecture=["x86_64", "POWER"], + ) + ) + with pytest.raises(InvalidValueError, + match=r".*Unknown architecture.*"): + check_meta_architecture(tree, config) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py new file mode 100644 index 000000000..5b1803329 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py @@ -0,0 +1,108 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_category, +) +from metadatavalidator.exceptions import InvalidValueError + + +def test_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# Systems Management +# +# +# +#
""" + meta = D("meta", {"name": "category"}, + D("phrase", "Systems Management")) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_category=True, + valid_meta_category=["Systems Management"])) + assert check_meta_category(tree, config) is None + + +def test_missing_optional_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + + config = dict(metadata=dict(require_meta_category=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_category(tree, config) + + +def test_missing_child_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = D("meta", {"name": "category"}) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_category=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find any child elements in meta.*"): + check_meta_category(tree, config) + + +def test_duplicate_child_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# Systems Management +# Systems Management +# +# +# +#
""" + meta = D("meta", {"name": "category"}, + D("phrase", "Systems Management"), + D("phrase", "Systems Management"), + ) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_category=True)) + with pytest.raises(InvalidValueError, + match=r".*Duplicate categories found in meta.*"): + check_meta_category(tree, config) + + +def test_unknown_category_meta_category(tree): +# xmlcontent = """
+# +# Test +# +# Systems Management +# Foo +# +# +# +#
""" + meta = D("meta", {"name": "category"}, + D("phrase", "Systems Management"), + D("phrase", "Foo"), + ) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_category=True, + valid_meta_category=["Systems Management"])) + with pytest.raises(InvalidValueError, + match=r".*Unknown category.*"): + check_meta_category(tree, config) + diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_description.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_description.py new file mode 100644 index 000000000..9c2e93dc9 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_description.py @@ -0,0 +1,63 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_description, +) +from metadatavalidator.exceptions import InvalidValueError + + + +def test_check_meta_description(tree): +# xmlcontent = """
+# +# Test +# The SEO description +# +# +#
""" + meta = D("meta", {"name": "description"}, "The SEO description") + appendnode(tree, meta) + assert check_meta_description(tree, {}) is None + + +def test_check_meta_description_wrong_length(tree): +# xmlcontent = """
+# +# Test +# The SEO description that is too long +# +# +#
""" + meta = D("meta", {"name": "description"}, "The SEO description that is too long") + appendnode(tree, meta) + + with pytest.raises(InvalidValueError, match=".*too long.*"): + check_meta_description(tree, dict(metadata=dict(meta_description_length=10))) + + +def test_check_required_meta_description(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + + with pytest.raises(InvalidValueError, match=".*required.*"): + check_meta_description(tree, dict(metadata=dict(meta_description_required=True))) + + +def test_check_optional_meta_description(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + + assert check_meta_description(tree, + dict(metadata=dict(meta_description_required=False))) is None diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_platform.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_platform.py new file mode 100644 index 000000000..bc7df50ec --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_platform.py @@ -0,0 +1,56 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_platform, +) +from metadatavalidator.exceptions import InvalidValueError + + +def test_check_meta_platform(tree): +# xmlcontent = """
+# +# Test +# Foo +# +# +#
""" + meta = D("meta", {"name": "platform"}, "Foo") + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_platform=True)) + assert check_meta_platform(tree, config) is None + + +def test_check_missing_meta_platform(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" +# tree = etree.ElementTree(etree.fromstring(xmlcontent, parser=xmlparser)) + + config = dict(metadata=dict(require_meta_platform=True)) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_platform(tree, config) + + +def test_check_empty_meta_platform(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = D("meta", {"name": "platform"}) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_platform=True)) + with pytest.raises(InvalidValueError, match=r".*Empty meta.*"): + check_meta_platform(tree, config) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_series.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_series.py new file mode 100644 index 000000000..193c24850 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_series.py @@ -0,0 +1,77 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_series, +) +from metadatavalidator.exceptions import InvalidValueError + + +def test_check_meta_series(tree): +# xmlcontent = """
+# +# Test +# Products & Solutions +# +# +#
""" + meta = D("meta", {"name": "series"}, "Products & Solutions") + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_series=True, + valid_meta_series=["Products & Solutions", + "Best Practices", + "Technical References"])) + assert check_meta_series(tree, config) is None + + +def test_check_missing_optional_meta_series(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + + config = dict(metadata=dict(require_meta_series=False)) + assert check_meta_series(tree, config) is None + + +def test_check_wrong_meta_series(tree): +# xmlcontent = """
+# +# Test +# Foo +# +# +#
""" + meta = D("meta", {"name": "series"}, "Foo") + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_series=True, + valid_meta_series=["Best Practices", + "Technical References"])) + with pytest.raises(InvalidValueError, match="Meta series is invalid"): + check_meta_series(tree, config) + + +def test_check_require_meta_series(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + + config = dict( + metadata=dict( + require_meta_series=True, + valid_meta_series=["Best Practices", "Technical References"], + ) + ) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta.*"): + check_meta_series(tree, config) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py new file mode 100644 index 000000000..a395cbb96 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py @@ -0,0 +1,69 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_category, + check_meta_task, +) +from metadatavalidator.exceptions import InvalidValueError + + +def test_meta_task(tree): +# xmlcontent = """
+# +# Test +# +# Configuration +# +# +#
""" + meta = D("meta", {"name": "task"}, + D("phrase", "Configuration")) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_task=True, + valid_meta_task=["Configuration"])) + assert check_meta_task(tree, config) is None + + +def test_missing_child_meta_task(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + meta = D("meta", {"name": "task"}) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_task=True), + valid_meta_task=["Configuration"]) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find any child elements in meta.*"): + check_meta_task(tree, config) + + +def test_duplicate_child_meta_task(tree): +# xmlcontent = """
+# +# Test +# +# Configuration +# Configuration +# +# +#
""" + meta = D("meta", {"name": "task"}, + D("phrase", "Configuration"), + D("phrase", "Configuration"), + ) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_task=True), + valid_meta_task=["Configuration"]) + with pytest.raises(InvalidValueError, + match=r".*Duplicate tasks found in meta.*"): + check_meta_task(tree, config) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_techpartner.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_techpartner.py new file mode 100644 index 000000000..4c517ddf5 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_techpartner.py @@ -0,0 +1,81 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_techpartner, +) +from metadatavalidator.exceptions import InvalidValueError + + +def test_check_meta_techpartner(tree): +# xmlcontent = """
+# +# Test +# +# Acme Inc. +# Foo Corp. +# +# +# +#
""" + meta = D("meta", {"name": "techpartner"}, + D("phrase", {}, "Acme Inc."), + D("phrase", {}, "Foo Corp."), + ) + appendnode(tree, meta) + + assert check_meta_techpartner(tree, {}) is None + + +def test_check_missing_meta_techpartner(tree): +# xmlcontent = """
+# +# Test +# +# +#
""" + + config = dict(metadata=dict(require_meta_techpartner=True)) + with pytest.raises(InvalidValueError, match=".*required.*"): + check_meta_techpartner(tree, config) + + +def test_check_missing_children_in_meta_techpartner(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + meta = D("meta", {"name": "techpartner"}) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_techpartner=True)) + with pytest.raises(InvalidValueError, match=".*Couldn't find any tech partners.*"): + check_meta_techpartner(tree, config) + + +def test_check_meta_techpartner_with_nonunique_children(tree): +# xmlcontent = """
+# +# Test +# +# Acme Inc. +# Acme Inc. +# +# +# +#
""" + meta = D("meta", {"name": "techpartner"}, + D("phrase", "Acme Inc."), + D("phrase", "Acme Inc."), + ) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_techpartner=True)) + with pytest.raises(InvalidValueError, match=".*Duplicate tech partners.*"): + check_meta_techpartner(tree, config) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py new file mode 100644 index 000000000..029bdc194 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py @@ -0,0 +1,50 @@ +from lxml import etree +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks.check_meta import ( + check_meta_title, +) +from metadatavalidator.exceptions import InvalidValueError + + +def test_check_meta_title(tree): +# xmlcontent = """
+# +# Test +# The SEO title +# +# +#
""" + meta = D("meta", {"name": "title"}, "The SEO title") + appendnode(tree, meta) + + assert check_meta_title(tree, {}) is None + + +def test_check_meta_title_wrong_length(tree): +# xmlcontent = """
+# +# Test +# The SEO title that is too long +# +# +#
""" + meta = D("meta", {"name": "title"}, "The SEO title that is too long") + appendnode(tree, meta) + + with pytest.raises(InvalidValueError, match=".*too long.*"): + check_meta_title(tree, dict(metadata=dict(meta_title_length=10))) + + +def test_check_required_meta_title(tree): + with pytest.raises(InvalidValueError, match=".*required.*"): + check_meta_title(tree, dict(metadata=dict(meta_title_required=True))) + + +def test_check_optional_meta_title(tree): + config = dict(metadata=dict(meta_title_required=False)) + assert check_meta_title(tree, config) is None + From 6d8153e3d8e7ba3acd008a12410ad859d7d001fc Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 21:50:09 +0200 Subject: [PATCH 094/107] Split test_check_info.py Move revhistory tests into test_check_revhistory.py --- .../tests/unit/checks/test_check_info.py | 325 +---------------- .../unit/checks/test_check_revhistory.py | 326 ++++++++++++++++++ 2 files changed, 328 insertions(+), 323 deletions(-) create mode 100644 python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py index 035147f3f..a14b3d723 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_info.py @@ -1,19 +1,13 @@ import pytest -from lxml import etree as ET -from _utils import appendnode, dbtag, D, xmlid +from _utils import dbtag, D from metadatavalidator.common import NAMESPACES from metadatavalidator.checks import ( check_info, - check_info_revhistory, - check_info_revhistory_revision, - check_info_revhistory_revision_date, - check_info_revhistory_revision_order, ) from metadatavalidator.util import getinfo, info_or_fail - -from metadatavalidator.exceptions import InvalidValueError, MissingAttributeWarning +from metadatavalidator.exceptions import InvalidValueError def test_getinfo_with_regular_tree(tree): @@ -61,318 +55,3 @@ def test_check_info_missing(): with pytest.raises(InvalidValueError, match=".*Couldn't find element."): check_info(tree, {}) - - -def test_check_info_revhistory_missing(tree): - with pytest.raises(InvalidValueError, - match="Couldn't find a revhistory element"): - check_info_revhistory(tree, {"metadata": {"require_revhistory": True}}) - - -def test_check_info_revhistory(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" -# tree = ET.ElementTree( -# ET.fromstring(xmlcontent, parser=xmlparser) -# ) - revhistory = D("revhistory", {xmlid: "rh"}) - appendnode(tree, revhistory) - - assert check_info_revhistory(tree, {}) is None - - -def test_check_info_revhistory_without_info(tree): - info = tree.find("./d:info", namespaces=NAMESPACES) - info.getparent().remove(info) - with pytest.raises(InvalidValueError, - match="Couldn't find element."): - check_info_revhistory(tree, {}) - - -def test_check_info_revhistory_xmlid(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - revhistory = D("revhistory", {xmlid: "rh1"}) - appendnode(tree, revhistory) - - assert check_info_revhistory(tree, {}) is None - - -def test_info_revhistory_missing_xmlid(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - revhistory = D("revhistory") - appendnode(tree, revhistory) - - with pytest.raises(InvalidValueError, - match="Couldn't find xml:id attribute"): - check_info_revhistory(tree, {}) - - -def test_check_info_revhistory_xmlid_with_wrong_value(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - appendnode(tree, D("revhistory", {xmlid: "wrong_id"})) - - with pytest.raises(InvalidValueError, - match="should start with 'rh'"): - check_info_revhistory(tree, {}) - - -def test_check_info_revhistory_revision(tree): -# xmlcontent = """
-# -# Test -# -# -# 2021-01-01 -# -# -# -# -#
""" - revhistory = D("revhistory", - D("revision", {xmlid: "rh"}, - D("date", "2021") - ) - ) - appendnode(tree, revhistory) - - print(ET.tostring(tree.getroot(), pretty_print=True).decode("utf-8")) - - assert check_info_revhistory_revision(tree, {}) is None - - -def test_check_info_revhistory_revision_missing_xmlid(tree): -# xmlcontent = """
-# -# Test -# -# -# 2021-01-01 -# -# -# -# -#
""" - revhistory = D("revhistory", - D("revision", - D("date", "2021-01-01") - ) - ) - appendnode(tree, revhistory) - - with pytest.raises(MissingAttributeWarning, - match="Missing recommended attribute in"): - check_info_revhistory_revision( - tree, - {"metadata": {"require_xmlid_on_revision": True}}) - - -def test_check_info_revhistory_revision_missing(tree): -# xmlcontent = """
-# -# Test -# -# -# -#
""" - appendnode(tree, D("revhistory")) - - with pytest.raises(InvalidValueError, - match="Couldn't find a revision element"): - check_info_revhistory_revision( - tree, - {"metadata": {"require_xmlid_on_revision": True}} - ) - - -def test_check_info_revhistory_revision_date(tree): -# xmlcontent = """
-# -# Test -# -# -# 2021-01-01 -# -# -# -# -#
""" - revhistory = D("revhistory", - D("revision", - D("date", "2021-01-01") - ) - ) - appendnode(tree, revhistory) - - assert check_info_revhistory_revision_date(tree, {}) is None - - -def test_check_info_revhistory_revision_date_missing(tree): -# xmlcontent = """
-# -# Test -# -# -# -# -# -#
""" - revhistory = D("revhistory", D("revision")) - appendnode(tree, revhistory) - - with pytest.raises(InvalidValueError, - match="Couldn't find a date element"): - check_info_revhistory_revision_date(tree, {}) - - -def test_check_info_revhistory_revision_date_invalid_format(tree): -# xmlcontent = """
-# -# Test -# -# -# January 2024 -# -# -# -# -#
""" - revhistory = D("revhistory", D("revision", D("date", "January 2024"))) - appendnode(tree, revhistory) - - with pytest.raises(InvalidValueError, - match=".*ate is empty or has invalid format.*"): - check_info_revhistory_revision_date(tree, {}) - - -def test_check_info_revhistory_revision_date_invalid_value(tree): -# xmlcontent = """
-# -# Test -# -# -# 2024-13 -# -# -# -# -#
""" - revhistory = D("revhistory", {xmlid: "rh"}, - D("revision", D("date", "2024-13"))) - appendnode(tree, revhistory) - - with pytest.raises(InvalidValueError, - match="Invalid value in metadata" - ): - check_info_revhistory_revision_date(tree, {}) - - -def test_check_info_revhistory_revision_order(tree): -# xmlcontent = """
-# -# Test -# -# -# 2024-12 -# -# -# 2023-12-12 -# -# -# 2022-04 -# -# -# -# -#
""" - revhistory = D("revhistory", {xmlid: "rh"}, - D("revision", D("date", "2024-12")), - D("revision", D("date", "2023-12-12")), - D("revision", D("date", "2022-04"))) - appendnode(tree, revhistory) - - assert check_info_revhistory_revision_order(tree, {}) is None - - -def test_check_info_revhistory_revision_order_one_invalid_date(tree): -# xmlcontent = """
-# -# Test -# -# -# 2024-53 -# -# -# 2023-12-12 -# -# -# 2022-04 -# -# -# -# -#
""" - revhistory = D("revhistory", {xmlid: "rh"}, - D("revision", D("date", "2024-53")), - D("revision", D("date", "2023-12-12")), - D("revision", D("date", "2022-04")) - ) - appendnode(tree, revhistory) - - with pytest.raises(InvalidValueError, - match=".*Couldn't convert all dates.*see position dates=1.*" - ): - check_info_revhistory_revision_order(tree, {}) - - -def test_check_info_revhistory_revision_wrong_order(tree): -# xmlcontent = """
-# -# Test -# -# -# 2024-12 -# -# -# 2023-12-12 -# -# -# 2026-04 -# -# -# -# -#
""" - revhistory = D("revhistory", {xmlid: "rh"}, - D("revision", D("date", "2024-12")), - D("revision", D("date", "2023-12-12")), - D("revision", D("date", "2026-04")) - ) - appendnode(tree, revhistory) - - with pytest.raises(InvalidValueError, - match=".*Dates in revhistory/revision are not in descending order.*" - ): - check_info_revhistory_revision_order(tree, {}) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py new file mode 100644 index 000000000..eb6b9dcd9 --- /dev/null +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py @@ -0,0 +1,326 @@ +import pytest + +from _utils import appendnode, D, xmlid + +from metadatavalidator.common import NAMESPACES +from metadatavalidator.checks import ( + check_info_revhistory, + check_info_revhistory_revision, + check_info_revhistory_revision_date, + check_info_revhistory_revision_order, +) + +from metadatavalidator.exceptions import InvalidValueError, MissingAttributeWarning + + +def test_check_info_revhistory_missing(tree): + with pytest.raises(InvalidValueError, + match="Couldn't find a revhistory element"): + check_info_revhistory(tree, {"metadata": {"require_revhistory": True}}) + + +def test_check_info_revhistory(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" +# tree = ET.ElementTree( +# ET.fromstring(xmlcontent, parser=xmlparser) +# ) + revhistory = D("revhistory", {xmlid: "rh"}) + appendnode(tree, revhistory) + + assert check_info_revhistory(tree, {}) is None + + +def test_check_info_revhistory_without_info(tree): + info = tree.find("./d:info", namespaces=NAMESPACES) + info.getparent().remove(info) + with pytest.raises(InvalidValueError, + match="Couldn't find element."): + check_info_revhistory(tree, {}) + + +def test_check_info_revhistory_xmlid(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh1"}) + appendnode(tree, revhistory) + + assert check_info_revhistory(tree, {}) is None + + +def test_info_revhistory_missing_xmlid(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + revhistory = D("revhistory") + appendnode(tree, revhistory) + + with pytest.raises(InvalidValueError, + match="Couldn't find xml:id attribute"): + check_info_revhistory(tree, {}) + + +def test_check_info_revhistory_xmlid_with_wrong_value(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + appendnode(tree, D("revhistory", {xmlid: "wrong_id"})) + + with pytest.raises(InvalidValueError, + match="should start with 'rh'"): + check_info_revhistory(tree, {}) + + +def test_check_info_revhistory_revision(tree): +# xmlcontent = """
+# +# Test +# +# +# 2021-01-01 +# +# +# +# +#
""" + revhistory = D("revhistory", + D("revision", {xmlid: "rh"}, + D("date", "2021") + ) + ) + appendnode(tree, revhistory) + + assert check_info_revhistory_revision(tree, {}) is None + + +def test_check_info_revhistory_revision_missing_xmlid(tree): +# xmlcontent = """
+# +# Test +# +# +# 2021-01-01 +# +# +# +# +#
""" + revhistory = D("revhistory", + D("revision", + D("date", "2021-01-01") + ) + ) + appendnode(tree, revhistory) + + with pytest.raises(MissingAttributeWarning, + match="Missing recommended attribute in"): + check_info_revhistory_revision( + tree, + {"metadata": {"require_xmlid_on_revision": True}}) + + +def test_check_info_revhistory_revision_missing(tree): +# xmlcontent = """
+# +# Test +# +# +# +#
""" + appendnode(tree, D("revhistory")) + + with pytest.raises(InvalidValueError, + match="Couldn't find a revision element"): + check_info_revhistory_revision( + tree, + {"metadata": {"require_xmlid_on_revision": True}} + ) + + +def test_check_info_revhistory_revision_date(tree): +# xmlcontent = """
+# +# Test +# +# +# 2021-01-01 +# +# +# +# +#
""" + revhistory = D("revhistory", + D("revision", + D("date", "2021-01-01") + ) + ) + appendnode(tree, revhistory) + + assert check_info_revhistory_revision_date(tree, {}) is None + + +def test_check_info_revhistory_revision_date_missing(tree): +# xmlcontent = """
+# +# Test +# +# +# +# +# +#
""" + revhistory = D("revhistory", D("revision")) + appendnode(tree, revhistory) + + with pytest.raises(InvalidValueError, + match="Couldn't find a date element"): + check_info_revhistory_revision_date(tree, {}) + + +def test_check_info_revhistory_revision_date_invalid_format(tree): +# xmlcontent = """
+# +# Test +# +# +# January 2024 +# +# +# +# +#
""" + revhistory = D("revhistory", D("revision", D("date", "January 2024"))) + appendnode(tree, revhistory) + + with pytest.raises(InvalidValueError, + match=".*ate is empty or has invalid format.*"): + check_info_revhistory_revision_date(tree, {}) + + +def test_check_info_revhistory_revision_date_invalid_value(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-13 +# +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-13"))) + appendnode(tree, revhistory) + + with pytest.raises(InvalidValueError, + match="Invalid value in metadata" + ): + check_info_revhistory_revision_date(tree, {}) + + +def test_check_info_revhistory_revision_order(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-12 +# +# +# 2023-12-12 +# +# +# 2022-04 +# +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-12")), + D("revision", D("date", "2023-12-12")), + D("revision", D("date", "2022-04"))) + appendnode(tree, revhistory) + + assert check_info_revhistory_revision_order(tree, {}) is None + + +def test_check_info_revhistory_revision_order_one_invalid_date(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-53 +# +# +# 2023-12-12 +# +# +# 2022-04 +# +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-53")), + D("revision", D("date", "2023-12-12")), + D("revision", D("date", "2022-04")) + ) + appendnode(tree, revhistory) + + with pytest.raises(InvalidValueError, + match=".*Couldn't convert all dates.*see position dates=1.*" + ): + check_info_revhistory_revision_order(tree, {}) + + +def test_check_info_revhistory_revision_wrong_order(tree): +# xmlcontent = """
+# +# Test +# +# +# 2024-12 +# +# +# 2023-12-12 +# +# +# 2026-04 +# +# +# +# +#
""" + revhistory = D("revhistory", {xmlid: "rh"}, + D("revision", D("date", "2024-12")), + D("revision", D("date", "2023-12-12")), + D("revision", D("date", "2026-04")) + ) + appendnode(tree, revhistory) + + with pytest.raises(InvalidValueError, + match=".*Dates in revhistory/revision are not in descending order.*" + ): + check_info_revhistory_revision_order(tree, {}) From 265cd44d102dbeb2b87cb2d012a735961a84d4c7 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sat, 29 Jun 2024 22:40:23 +0200 Subject: [PATCH 095/107] Add missing tests for --- .../src/metadatavalidator/config.py | 1 + .../tests/unit/checks/test_check_meta_task.py | 40 ++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index ab990b3b4..067b0bffe 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -174,6 +174,7 @@ def validate_valid_meta_category(config: dict) -> list[str]: if x ] + def validate_valid_meta_task(config: dict) -> list[str]: """Validate the meta task diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py index a395cbb96..88fa232a6 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py @@ -1,4 +1,3 @@ -from lxml import etree import pytest from _utils import appendnode, D, xmlid @@ -21,7 +20,8 @@ def test_meta_task(tree): #
# """ meta = D("meta", {"name": "task"}, - D("phrase", "Configuration")) + D("phrase", "Configuration") + ) appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True, @@ -29,6 +29,19 @@ def test_meta_task(tree): assert check_meta_task(tree, config) is None +def test_missing_required_meta_task(tree): +# xmlcontent = """
+# +# Test +# +#
""" + config = dict(metadata=dict(require_meta_task=True, + valid_meta_task=["Configuration"])) + with pytest.raises(InvalidValueError, + match=r".*Couldn't find required meta\[@name='task'\].*"): + check_meta_task(tree, config) + + def test_missing_child_meta_task(tree): # xmlcontent = """
# @@ -66,4 +79,27 @@ def test_duplicate_child_meta_task(tree): valid_meta_task=["Configuration"]) with pytest.raises(InvalidValueError, match=r".*Duplicate tasks found in meta.*"): + check_meta_task(tree, config) + + +def test_unknown_child_meta_task(tree): +# xmlcontent = """
+# +# Test +# +# Configuration +# Unknown +# +# +#
""" + meta = D("meta", {"name": "task"}, + D("phrase", "Configuration"), + D("phrase", "Unknown"), + ) + appendnode(tree, meta) + + config = dict(metadata=dict(require_meta_task=True, + valid_meta_task=["Configuration"])) + with pytest.raises(InvalidValueError, + match=r".*Unknown task\(s\) \{'Unknown'\}.*"): check_meta_task(tree, config) \ No newline at end of file From 4593b358e706714140b3b95773793e4dea1e6b64 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Sun, 30 Jun 2024 10:07:13 +0200 Subject: [PATCH 096/107] Correct and improve test/checks for revhistory --- .../src/metadatavalidator/checks/check_info.py | 9 ++++++--- .../tests/unit/checks/test_check_revhistory.py | 11 ++++++++++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py index 7b8f722b2..bc72ac69d 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_info.py @@ -34,7 +34,7 @@ def check_info_revhistory(tree: etree._ElementTree, config: dict[t.Any, t.Any]): if revhistory is None: if required: raise InvalidValueError( - f"Couldn't find a revhistory element in {info.tag}" + f"Couldn't find a revhistory element in info" f" (line {info.sourceline})." ) return None @@ -66,7 +66,7 @@ def check_info_revhistory_revision(tree: etree._ElementTree, revision = revhistory.find("./d:revision", namespaces=NAMESPACES) if revision is None: raise InvalidValueError( - f"Couldn't find a revision element in {revhistory.tag}" + f"Couldn't find any revision element in revhistory" f" (line {revhistory.sourceline})." ) xmlid = revision.attrib.get(f"{{{XML_NS}}}id") @@ -108,7 +108,10 @@ def check_info_revhistory_revision_order(tree: etree._ElementTree, namespaces=NAMESPACES) xpath = getfullxpath(revhistory) if not revisions: - return None + raise InvalidValueError( + f"Couldn't find any revision element in revhistory" + f" (line {revhistory.sourceline})." + ) date_elements = [rev.find("./d:date", namespaces=NAMESPACES) for rev in revisions] diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py index eb6b9dcd9..c8b5dd0c8 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_revhistory.py @@ -148,7 +148,7 @@ def test_check_info_revhistory_revision_missing(tree): appendnode(tree, D("revhistory")) with pytest.raises(InvalidValueError, - match="Couldn't find a revision element"): + match=".*Couldn't find any revision element.*"): check_info_revhistory_revision( tree, {"metadata": {"require_xmlid_on_revision": True}} @@ -264,6 +264,15 @@ def test_check_info_revhistory_revision_order(tree): assert check_info_revhistory_revision_order(tree, {}) is None +def test_check_info_revhistory_revision_order_without_revision(tree): + revhistory = D("revhistory", {xmlid: "rh"}) + appendnode(tree, revhistory) + + with pytest.raises(InvalidValueError, + match=".*Couldn't find any revision element.*"): + check_info_revhistory_revision_order(tree, {}) + + def test_check_info_revhistory_revision_order_one_invalid_date(tree): # xmlcontent = """
# From 473f218ef1b58dd7a1396a2b2e044a218af3e17e Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 1 Jul 2024 12:08:19 +0200 Subject: [PATCH 097/107] Update the README --- python-scripts/metadatavalidator/README.rst | 104 +++++++++++++++++--- 1 file changed, 93 insertions(+), 11 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 8dac076ff..f46c76db3 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -12,8 +12,51 @@ Requirements * Python >=3.11 (only due to for installing with :file:`pyproject.toml`.) -Installation ------------- +Preparing the environment +------------------------- + +It's recommended to create a Python virtual environment first before you +proceed further. The virtual environment is a self-contained Python environment +that separates the dependencies from the system Python installation. + +To create a virtual environment, execute the following steps: + +1. Create a virtual environment with Python 3.11: + + .. code-block:: bash + + $ python3.11 -m venv .venv311 + +1. Activate the virtual environment: + + .. code-block:: bash + + $ source .venv311/bin/activate + + Your prompt changes to show the active virtual environment: + + .. code-block:: bash + + (.venv311) $ + +1. Upgrade the package manager ``pip`` and ``setuptools`` to the latest version: + + .. code-block:: bash + + (.venv311) $ pip install --upgrade pip setuptools + +This makes your virtual environment ready for the next steps. + +If you don't need the virtual environment anymore, you can deactivate it: + +.. code-block:: bash + + (.venv311) $ deactivate + + + +Installing the script +--------------------- To install the script, run the following command: @@ -22,15 +65,37 @@ To install the script, run the following command: $ pip install . -For development, you can install the script in editable mode: +For development, install the script in "editable" mode: .. code-block:: bash $ pip install -e .[test] -Usage ------ +Setting the configuration +------------------------- + +Before you call the script, check the values in the configuration file. +The configuration file is an INI file and is searched in the following order (from highest to lowest): + +* Command line with :option:`--config`. This doesn't search for other configuration files. +* Environment variable :envar:`METAVALIDATOR_CONFIG`. +* In the current directory: :file:`metadatavalidator.ini` +* In the users' home directory: :file:`~/.config/metadatavalidator/config.ini` +* In the system: :file:`/etc/metadatavalidator/config.ini` + +The configuration file is a standard INI file. +All boolean values are case-insensitive and can be ``true``/``yes``, ``on``/``off`` or ``0``/``1``. +Everything else is considered as ``false``. +List values are separated by commas. + +All config files are merged together. If a key is defined in multiple files, +the last one wins. This way you can have a global configuration in the +system directory and a local one in the current directory. + + +Calling the script +------------------ Call the script with the following command: @@ -38,7 +103,7 @@ Call the script with the following command: $ metadatavalidator PATH_TO_DOCBOOK_FILES -The script will show all problems with metadata +The script will show all problems with metadata: .. code-block:: @@ -50,12 +115,26 @@ The script will show all problems with metadata [2] b.xml: 2.1: check_meta_task: Invalid value in metadata Unknown task(s) {'Clusering'}. Allowed are ... +The output shows: + +* The filename. +* The name of the check that the script executed and failed. +* A description of the problem. +* In some cases a line number. + + If wanted, you can add your own configuration file with the option :option:`--config`: .. code-block:: bash $ metadatavalidator --config /path/to/config.ini PATH_TO_DOCBOOK_FILES +For machine readable output of the result, use the option :option:`--format`: + +.. code-block:: bash + + $ metadatavalidator --format json PATH_TO_DOCBOOK_FILES + Configuration ------------- @@ -76,19 +155,18 @@ The configuration file is search in the following order (first is the highest): Configuration values -------------------- -The configuration file is a standard INI file. The following values are -recognized: +The following values are recognized: * :var:`validator`: Global options to configure the validator. * :var:`file_extension`: The file extension to search for. Default is ``.xml``. - * :var:`check_root_elements`: List of allowed root elements (space separated by local DocBook name). Default is ``article book topic``. + * :var:`check_root_elements`: List of allowed root elements (space separated by local DocBook name). Default is ``assembly article book topic``. * :var:`valid_languages`: List of valid languages (space separated by ISO 639-1 code). Default is ``ar-ar cs-cz de-de en-us es-es fr-fr hu-hu it-it ja-jp ko-kr nl-nl pl-pl pt-br ru-ru sv-se zh-cn zh-tw``. * :var:`metadata`: Options to change behaviour of specific `` tags. - * :var:`revhistory`: Requires a ```` tag or not. + * :var:`require_revhistory`: Requires a ```` tag or not. * :var:`require_xmlid_on_revision`: Requires a ``xml:id`` attribute on each ```` tag or not. @@ -114,4 +192,8 @@ recognized: * :var:`require_meta_category`: Requires a ```` tag or not. - * :var:`valid_meta_category`: Lists the valid category names for ``/``. \ No newline at end of file + * :var:`valid_meta_category`: Lists the valid category names for ``/``. + + * :var:`require_meta_task`: Requires a ```` tag or not. + + * :var:`valid_meta_task`: Lists the valid task names for ``/``. \ No newline at end of file From bb14462983ce1c9a1e17480302bcf14cf757f15b Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 1 Jul 2024 12:46:04 +0200 Subject: [PATCH 098/107] Require some tags * require_revhistory * require_meta_title * require_meta_description * require_meta_series --- python-scripts/metadatavalidator/metadatavalidator.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index bbef4f85f..6cce98790 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -9,15 +9,15 @@ require_revhistory = on require_xmlid_on_revision = 1 # -require_meta_title = off +require_meta_title = on meta_title_length = 55 # -require_meta_description = off +require_meta_description = on meta_description_length = 150 # -require_meta_series = off +require_meta_series = on valid_meta_series = Products & Solutions, Best Practices, Technical References # From 39539dc498d306bf526791e3ea86ac5b525a2df9 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 1 Jul 2024 13:18:46 +0200 Subject: [PATCH 099/107] Add .code-workspace for VSCode --- .../metadatavalidator/metadatavalidator.code-workspace | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 python-scripts/metadatavalidator/metadatavalidator.code-workspace diff --git a/python-scripts/metadatavalidator/metadatavalidator.code-workspace b/python-scripts/metadatavalidator/metadatavalidator.code-workspace new file mode 100644 index 000000000..876a1499c --- /dev/null +++ b/python-scripts/metadatavalidator/metadatavalidator.code-workspace @@ -0,0 +1,8 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": {} +} \ No newline at end of file From d6cbbe3568c81c323c81b8aa11065cbed5302011 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 1 Jul 2024 13:28:42 +0200 Subject: [PATCH 100/107] Use plural forms of valid_meta_* --- python-scripts/metadatavalidator/README.rst | 6 ++--- .../metadatavalidator/metadatavalidator.ini | 2 +- .../metadatavalidator/checks/check_meta.py | 6 ++--- .../src/metadatavalidator/config.py | 24 +++++++++---------- .../metadatavalidator/tests/conftest.py | 4 ++-- .../integration/badcases/config-test.ini | 4 ++-- .../integration/goodcases/config-test.ini | 4 ++-- .../tests/unit/checks/test_check_meta_arch.py | 6 ++--- .../unit/checks/test_check_meta_category.py | 4 ++-- .../tests/unit/checks/test_check_meta_task.py | 10 ++++---- .../tests/unit/data/metadatavalidator.ini | 4 ++-- .../tests/unit/test_script_config.py | 6 ++--- 12 files changed, 40 insertions(+), 40 deletions(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index f46c76db3..1ba1a9549 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -188,12 +188,12 @@ The following values are recognized: * :var:`require_meta_architecture`: Requires a ```` tag or not. - * :var:`valid_meta_architecture`: Lists the valid architecture names for ``/``. + * :var:`valid_meta_architectures`: Lists the valid architecture names for ``/``. * :var:`require_meta_category`: Requires a ```` tag or not. - * :var:`valid_meta_category`: Lists the valid category names for ``/``. + * :var:`valid_meta_categories`: Lists the valid category names for ``/``. * :var:`require_meta_task`: Requires a ```` tag or not. - * :var:`valid_meta_task`: Lists the valid task names for ``/``. \ No newline at end of file + * :var:`valid_meta_tasks`: Lists the valid task names for ``/``. \ No newline at end of file diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index 6cce98790..e9b1f5ee3 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -28,7 +28,7 @@ require_meta_platform = off # require_meta_architecture = off -valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE +valid_meta_architectures = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE # require_meta_task = off diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index ec6ad47aa..495dfec7c 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -157,7 +157,7 @@ def check_meta_architecture(tree: etree._ElementTree, valid_archs = [ x.strip() for x in config.get("metadata", {} - ).get("valid_meta_architecture", []) + ).get("valid_meta_architectures", []) if x ] @@ -203,7 +203,7 @@ def check_meta_category(tree: etree._ElementTree, valid_cats = [ x.strip() for x in config.get("metadata", {} - ).get("valid_meta_category", []) + ).get("valid_meta_categories", []) if x ] @@ -249,7 +249,7 @@ def check_meta_task(tree: etree._ElementTree, valid_tasks = [ x.strip() for x in config.get("metadata", {} - ).get("valid_meta_task", []) + ).get("valid_meta_tasks", []) if x ] diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/config.py b/python-scripts/metadatavalidator/src/metadatavalidator/config.py index 067b0bffe..02646882d 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/config.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/config.py @@ -149,40 +149,40 @@ def validate_valid_meta_series(config: dict) -> list[str]: ] -def validate_valid_meta_architecture(config: dict) -> list[str]: +def validate_valid_meta_architectures(config: dict) -> list[str]: """Validate the meta architecture :param config: the configuration object :return: a list of valid meta architecture """ return [x.strip() for x in re.split(r"[;,]", - config.get("metadata", {}).get("valid_meta_architecture", "") + config.get("metadata", {}).get("valid_meta_architectures", "") ) if x ] -def validate_valid_meta_category(config: dict) -> list[str]: +def validate_valid_meta_categories(config: dict) -> list[str]: """Validate the meta category :param config: the configuration object :return: a list of valid meta category """ return [x.strip() for x in re.split(r"[;,]", - config.get("metadata", {}).get("valid_meta_category", "") + config.get("metadata", {}).get("valid_meta_categories", "") ) if x ] -def validate_valid_meta_task(config: dict) -> list[str]: +def validate_valid_meta_tasks(config: dict) -> list[str]: """Validate the meta task :param config: the configuration object :return: a list of valid meta task """ return [x.strip() for x in re.split(r"[;,]", - config.get("metadata", {}).get("valid_meta_task", "") + config.get("metadata", {}).get("valid_meta_tasks", "") ) if x ] @@ -242,8 +242,8 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any ] = truefalse(theconfig.get("metadata", {}).get("require_meta_architecture", False)) theconfig.setdefault("metadata", {})[ - "valid_meta_architecture" - ] = validate_valid_meta_architecture(theconfig) + "valid_meta_architectures" + ] = validate_valid_meta_architectures(theconfig) # require_meta_techpartner = truefalse( @@ -263,16 +263,16 @@ def validate_and_convert_config(config: configparser.ConfigParser) -> dict[t.Any "require_meta_category" ] = truefalse(theconfig.get("metadata", {}).get("require_meta_category", False)) theconfig.setdefault("metadata", {})[ - "valid_meta_category" - ] = validate_valid_meta_category(theconfig) + "valid_meta_categories" + ] = validate_valid_meta_categories(theconfig) # theconfig.setdefault("metadata", {})[ "require_meta_task" ] = truefalse(theconfig.get("metadata", {}).get("require_meta_task", False)) theconfig.setdefault("metadata", {})[ - "valid_meta_task" - ] = validate_valid_meta_task(theconfig) + "valid_meta_tasks" + ] = validate_valid_meta_tasks(theconfig) # Store the configfiles theconfig["configfiles"] = getattr(config, "configfiles") diff --git a/python-scripts/metadatavalidator/tests/conftest.py b/python-scripts/metadatavalidator/tests/conftest.py index b1aa95a43..9c3ca3541 100644 --- a/python-scripts/metadatavalidator/tests/conftest.py +++ b/python-scripts/metadatavalidator/tests/conftest.py @@ -68,8 +68,8 @@ def config() -> ConfigParser: config.set("metadata", "meta_title_length", "50") config.set("metadata", "meta_description_length", "150") # - config.set("metadata", "valid_meta_architecture", "A, B, C") - config.set("metadata", "valid_meta_category", "D, E, F") + config.set("metadata", "valid_meta_architectures", "A, B, C") + config.set("metadata", "valid_meta_categories", "D, E, F") setattr(config, "configfiles", None) return config diff --git a/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini b/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini index f166f6384..b865deddc 100644 --- a/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini +++ b/python-scripts/metadatavalidator/tests/integration/badcases/config-test.ini @@ -16,8 +16,8 @@ require_meta_description = off meta_description_length = 150 require_meta_architecture = off -valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE +valid_meta_architectures = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE # require_meta_category = off -valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file +valid_meta_categories = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini b/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini index f166f6384..b865deddc 100644 --- a/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini +++ b/python-scripts/metadatavalidator/tests/integration/goodcases/config-test.ini @@ -16,8 +16,8 @@ require_meta_description = off meta_description_length = 150 require_meta_architecture = off -valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE +valid_meta_architectures = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE # require_meta_category = off -valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file +valid_meta_categories = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py index 33ecb8278..b8352834c 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_arch.py @@ -26,7 +26,7 @@ def test_check_meta_architecture(tree): appendnode(tree, meta) config = dict(metadata=dict(require_meta_architecture=True, - valid_meta_architecture=["x86_64", "POWER"])) + valid_meta_architectures=["x86_64", "POWER"])) assert check_meta_architecture(tree, config) is None @@ -82,7 +82,7 @@ def test_check_duplicate_child_meta_architecture(tree): config = dict(metadata=dict( require_meta_architecture=True, - valid_meta_architecture=["x86_64", "POWER"])) + valid_meta_architectures=["x86_64", "POWER"])) with pytest.raises( InvalidValueError, match=r".*Duplicate architectures found in meta.*" ): @@ -109,7 +109,7 @@ def test_check_unknown_child_meta_architecture(tree): config = dict( metadata=dict( require_meta_architecture=True, - valid_meta_architecture=["x86_64", "POWER"], + valid_meta_architectures=["x86_64", "POWER"], ) ) with pytest.raises(InvalidValueError, diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py index 5b1803329..6b3dd0060 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_category.py @@ -25,7 +25,7 @@ def test_meta_category(tree): appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True, - valid_meta_category=["Systems Management"])) + valid_meta_categories=["Systems Management"])) assert check_meta_category(tree, config) is None @@ -101,7 +101,7 @@ def test_unknown_category_meta_category(tree): appendnode(tree, meta) config = dict(metadata=dict(require_meta_category=True, - valid_meta_category=["Systems Management"])) + valid_meta_categories=["Systems Management"])) with pytest.raises(InvalidValueError, match=r".*Unknown category.*"): check_meta_category(tree, config) diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py index 88fa232a6..8b1b59a82 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_task.py @@ -25,7 +25,7 @@ def test_meta_task(tree): appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True, - valid_meta_task=["Configuration"])) + valid_meta_tasks=["Configuration"])) assert check_meta_task(tree, config) is None @@ -36,7 +36,7 @@ def test_missing_required_meta_task(tree): # #
""" config = dict(metadata=dict(require_meta_task=True, - valid_meta_task=["Configuration"])) + valid_meta_tasks=["Configuration"])) with pytest.raises(InvalidValueError, match=r".*Couldn't find required meta\[@name='task'\].*"): check_meta_task(tree, config) @@ -53,7 +53,7 @@ def test_missing_child_meta_task(tree): appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True), - valid_meta_task=["Configuration"]) + valid_meta_tasks=["Configuration"]) with pytest.raises(InvalidValueError, match=r".*Couldn't find any child elements in meta.*"): check_meta_task(tree, config) @@ -76,7 +76,7 @@ def test_duplicate_child_meta_task(tree): appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True), - valid_meta_task=["Configuration"]) + valid_meta_tasks=["Configuration"]) with pytest.raises(InvalidValueError, match=r".*Duplicate tasks found in meta.*"): check_meta_task(tree, config) @@ -99,7 +99,7 @@ def test_unknown_child_meta_task(tree): appendnode(tree, meta) config = dict(metadata=dict(require_meta_task=True, - valid_meta_task=["Configuration"])) + valid_meta_tasks=["Configuration"])) with pytest.raises(InvalidValueError, match=r".*Unknown task\(s\) \{'Unknown'\}.*"): check_meta_task(tree, config) \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini b/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini index e9cc4c1b8..9e0afb95c 100644 --- a/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/tests/unit/data/metadatavalidator.ini @@ -26,7 +26,7 @@ require_meta_platform = off # require_meta_architecture = off -valid_meta_architecture = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE +valid_meta_architectures = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE require_meta_category = off -valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file +valid_meta_categories = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file diff --git a/python-scripts/metadatavalidator/tests/unit/test_script_config.py b/python-scripts/metadatavalidator/tests/unit/test_script_config.py index 59d94701d..574b25af1 100644 --- a/python-scripts/metadatavalidator/tests/unit/test_script_config.py +++ b/python-scripts/metadatavalidator/tests/unit/test_script_config.py @@ -8,7 +8,7 @@ truefalse, validate_check_root_elements, validate_and_convert_config, - validate_valid_meta_architecture, + validate_valid_meta_architectures, ) from metadatavalidator.exceptions import ( MissingKeyError, @@ -120,5 +120,5 @@ def test_validate_check_root_elements_missing_key(dict_config): with pytest.raises(MissingKeyError, match=".*validator.check_root_elements.*"): validate_check_root_elements(dict_config) -def test_validate_valid_meta_architecture(dict_config): - assert validate_valid_meta_architecture(dict_config) == ["A", "B", "C"] \ No newline at end of file +def test_validate_valid_meta_architectures(dict_config): + assert validate_valid_meta_architectures(dict_config) == ["A", "B", "C"] \ No newline at end of file From 56c0cbeb1a94fecfe3b0d84a7aa043be0bbb469a Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 1 Jul 2024 13:40:06 +0200 Subject: [PATCH 101/107] Improve help output and use module docstring --- .../metadatavalidator/src/metadatavalidator/cli.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index e5dfa8c39..2b74dbce9 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -1,3 +1,10 @@ +""" +Parses a XML file and checks for metadata. Can be applied for regular +DocBook files and assembly files. + +For "Suggested values and structure" see: +https://confluence.suse.com/x/aQDWNg +""" import argparse import asyncio import logging @@ -24,7 +31,8 @@ def parsecli(cliargs=None) -> argparse.Namespace: :return: parsed CLI result """ parser = argparse.ArgumentParser(description=__doc__, - epilog="Version %s written by %s " % (__version__, __author__) + epilog="Version %s written by %s " % (__version__, __author__), + formatter_class=argparse.RawTextHelpFormatter, ) parser.add_argument('-v', '--verbose', From b626c3c9539699926d853f8d3722220a4d289995 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Mon, 1 Jul 2024 13:40:38 +0200 Subject: [PATCH 102/107] Bump version to 0.3.0 --- python-scripts/metadatavalidator/src/metadatavalidator/_meta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py index 608335db3..d44d4f8f8 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py @@ -1,4 +1,4 @@ """Version and author""" -__version__ = "0.2.0" +__version__ = "0.3.0" __author__ = "Tom Schraitle " \ No newline at end of file From 93445ffa6f5b31e3fd5cedb8f0caca64a999f8c9 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 16 Jul 2024 09:38:09 +0200 Subject: [PATCH 103/107] Improve CLI epilog --- .../metadatavalidator/src/metadatavalidator/_meta.py | 2 +- .../metadatavalidator/src/metadatavalidator/cli.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py index d44d4f8f8..4fc2c6eea 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py @@ -1,4 +1,4 @@ """Version and author""" -__version__ = "0.3.0" +__version__ = "0.3.1" __author__ = "Tom Schraitle " \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py index 2b74dbce9..272eb7cc4 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/cli.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/cli.py @@ -23,15 +23,18 @@ #: in order for all messages to be delegated. logging.getLogger().setLevel(logging.NOTSET) - +PROGPATH = sys.argv[0] def parsecli(cliargs=None) -> argparse.Namespace: """Parse CLI with :class:`argparse.ArgumentParser` and return parsed result :param cliargs: Arguments to parse or None (=use sys.argv) :return: parsed CLI result """ + epilog = f""" +{__package__} {__version__} written by {__author__} from {PROGPATH} +""" parser = argparse.ArgumentParser(description=__doc__, - epilog="Version %s written by %s " % (__version__, __author__), + epilog=epilog, formatter_class=argparse.RawTextHelpFormatter, ) From 7f6707b6f53e840d9babd65ef3e6e2728b09ddbf Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 16 Jul 2024 09:47:37 +0200 Subject: [PATCH 104/107] Fix check_meta_task() Test if tag.text is not None --- .../metadatavalidator/src/metadatavalidator/_meta.py | 2 +- .../src/metadatavalidator/checks/check_meta.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py index 4fc2c6eea..727651e4f 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py @@ -1,4 +1,4 @@ """Version and author""" -__version__ = "0.3.1" +__version__ = "0.3.2" __author__ = "Tom Schraitle " \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index 495dfec7c..bf654d431 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -254,7 +254,8 @@ def check_meta_task(tree: etree._ElementTree, ] # Do we have children? - tasks = [tag.text.strip() for tag in meta.iterchildren()] + tasks = [tag.text.strip() for tag in meta.iterchildren() + if tag.text is not None] if not tasks: raise InvalidValueError( f"Couldn't find any child elements in meta[@name='task']" From 38c481f41323c045642215776e05243c0f1b7fc9 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 16 Jul 2024 10:22:15 +0200 Subject: [PATCH 105/107] Fix empty text in meta[@name='title'] --- .../metadatavalidator/src/metadatavalidator/_meta.py | 2 +- .../src/metadatavalidator/checks/check_meta.py | 8 +++++++- .../tests/unit/checks/test_check_meta_title.py | 8 ++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py index 727651e4f..54ee23b4d 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/_meta.py @@ -1,4 +1,4 @@ """Version and author""" -__version__ = "0.3.2" +__version__ = "0.3.3" __author__ = "Tom Schraitle " \ No newline at end of file diff --git a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py index bf654d431..10ed04063 100644 --- a/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py +++ b/python-scripts/metadatavalidator/src/metadatavalidator/checks/check_meta.py @@ -29,8 +29,14 @@ def check_meta_title(tree: etree._ElementTree, ) return + # the meta[@name='title'] element exists, but must not be empty + if meta.text is None: + raise InvalidValueError( + f"Empty meta[@name='title'] element (line {meta.sourceline})." + ) + length = config.get("metadata", {}).get("meta_title_length", 55) - if len(meta.text) > length: + if meta.text is not None and len(meta.text) > length: raise InvalidValueError( f"Meta title is too long. Max length is {length} characters" f" (line {meta.sourceline})." diff --git a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py index 029bdc194..6eeebc271 100644 --- a/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py +++ b/python-scripts/metadatavalidator/tests/unit/checks/test_check_meta_title.py @@ -48,3 +48,11 @@ def test_check_optional_meta_title(tree): config = dict(metadata=dict(meta_title_required=False)) assert check_meta_title(tree, config) is None +def test_check_meta_title_empty(tree): + meta = D("meta", {"name": "title"}) + appendnode(tree, meta) + + with pytest.raises(InvalidValueError, + match=r".*Empty meta\[@name='title'\] element.*"): + check_meta_title(tree, dict(metadata=dict(meta_title_length=10, + meta_title_required=True))) \ No newline at end of file From 59208f423c6d7d01414d02394f219703b6c47fdf Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Thu, 25 Jul 2024 10:18:54 +0200 Subject: [PATCH 106/107] Add statement about current directory from to install --- python-scripts/metadatavalidator/README.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python-scripts/metadatavalidator/README.rst b/python-scripts/metadatavalidator/README.rst index 1ba1a9549..ff5491ce6 100644 --- a/python-scripts/metadatavalidator/README.rst +++ b/python-scripts/metadatavalidator/README.rst @@ -58,6 +58,8 @@ If you don't need the virtual environment anymore, you can deactivate it: Installing the script --------------------- +Before you install the script, your current directory must be +`python-scripts/metadatavalidator/`. To install the script, run the following command: .. code-block:: bash @@ -196,4 +198,4 @@ The following values are recognized: * :var:`require_meta_task`: Requires a ```` tag or not. - * :var:`valid_meta_tasks`: Lists the valid task names for ``/``. \ No newline at end of file + * :var:`valid_meta_tasks`: Lists the valid task names for ``/``. From 6f06cd385ab1c151e4fe3c0902b4554a82e18e30 Mon Sep 17 00:00:00 2001 From: Tom Schraitle Date: Tue, 10 Sep 2024 15:21:17 +0200 Subject: [PATCH 107/107] Use plural form in INI file Use valid_meta_categories and valid_meta_tasks --- python-scripts/metadatavalidator/metadatavalidator.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-scripts/metadatavalidator/metadatavalidator.ini b/python-scripts/metadatavalidator/metadatavalidator.ini index e9b1f5ee3..188884829 100644 --- a/python-scripts/metadatavalidator/metadatavalidator.ini +++ b/python-scripts/metadatavalidator/metadatavalidator.ini @@ -32,8 +32,8 @@ valid_meta_architectures = Arm, AMD64/Intel\u00a064, POWER, IBM LinuxONE # require_meta_task = off -valid_meta_task = Administration, Artificial Intelligence, Auditing, Authentication, Automation, Backup & Recovery, Certification, Cloud, Clustering, Compliance, Configuration, Containerization, Container Management, Data Intelligence, Deployment, Design, Encryption, High Availability, Image Building, Implementation, Installation, Integration, Maintenance, Migration, Monitoring, Packaging, Security, Storage, Subscription Management, Troubleshooting, Upgrade & Update, Virtualization, Vulnerability, Web +valid_meta_tasks = Administration, Artificial Intelligence, Auditing, Authentication, Automation, Backup & Recovery, Certification, Cloud, Clustering, Compliance, Configuration, Containerization, Container Management, Data Intelligence, Deployment, Design, Encryption, High Availability, Image Building, Implementation, Installation, Integration, Maintenance, Migration, Monitoring, Packaging, Security, Storage, Subscription Management, Troubleshooting, Upgrade & Update, Virtualization, Vulnerability, Web # require_meta_category = off -valid_meta_category = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization \ No newline at end of file +valid_meta_categories = 3rd Party, Cloud, Containerization, Developer Tools, High Availability, Tuning & Performance, SAP, Security, Storage, Systems Management, Virtualization