import json
import os.path as pth
import pkgutil
from functools import lru_cache
from importlib import import_module
from typing import Any, Callable, List, Optional, Type, Union
from urllib.parse import (ParseResult, urljoin as _urllib_urljoin,
urlparse as _urllib_urlparse)
import jsonschema # type: ignore
from jsonschema import RefResolver as _RefResolver # type: ignore
from jsonschema import ValidationError # type: ignore
try:
import yaml # type: ignore
except ImportError:
yaml = None
__version__ = '0.1.1.dev0'
__all__ = ['URL_SCHEME_RESOLVERS', 'RefResolver', 'ValidationError', 'urljoin',
'urlparse', 'validate']
_JSONTypes = Union[dict, list, str, int, float, bool, None]
_SchemaTypes = Union[dict, bool]
[docs]def validate(document: _JSONTypes, schema: _SchemaTypes,
cls: Optional[Type] = None, *args: Any, **kwargs: Any):
"""
Drop-in replacement for `jsonschema.validate` which uses the customized
`jsonschema_pyref.RefResolver` by default.
>>> from jsonschema_pyref import validate
>>> schema = {
... 'properties': {
... 'a': {'$ref': 'py-obj:jsonschema_pyref.examples.schema1'}
... }
... }
...
>>> validate({'a': 'hello'}, schema) is None
True
"""
if 'resolver' not in kwargs:
kwargs['resolver'] = RefResolver.from_schema(schema)
return jsonschema.validate(document, schema, cls=cls, *args, **kwargs)
[docs]class RefResolver(_RefResolver):
"""
Drop-in replacement for `jsonschema.RefResolver` that can resolve
``py-obj`` and ``py-pkgdata`` URLs.
The easiest way to use it is to initialize it using the
`~jsonschema.RefResolver.from_schema` method and pass it as a keyword
argument to `jsonschema.validate`:
>>> import jsonschema
>>> from jsonschema_pyref import RefResolver
>>> schema = {
... 'properties': {
... 'a': {'$ref': 'py-obj:jsonschema_pyref.examples.schema1'}
... }
... }
...
>>> resolver = RefResolver.from_schema(schema)
>>> jsonschema.validate({'a': 'hello'}, schema, resolver=resolver) is None
True
"""
def __init__(self, base_uri: str, referrer: _JSONTypes,
store: Union[dict, tuple] = (),
cache_remote: bool = True,
handlers: Union[dict, tuple] = (),
urljoin_cache: Optional[Callable[[str, str], str]] = None,
remote_cache: Optional[Callable[[str], dict]] = None) -> None:
base_handlers = dict(URL_SCHEME_RESOLVERS)
# Users may still override the builtin handlers for py-obj and
# py-pkgdata as well as any others
base_handlers.update(handlers)
handlers = base_handlers
# Same for urljoin_cache
if urljoin_cache is None:
urljoin_cache = lru_cache(1024)(urljoin)
super().__init__(base_uri, referrer, store=store,
cache_remote=cache_remote,
handlers=handlers, urljoin_cache=urljoin_cache,
remote_cache=remote_cache)
[docs]def urljoin(base: str, url: str, allow_fragments: bool = True) -> str:
"""
Extends the stdlib's `urllib.parse.urljoin` to support the custom URL
schemes implemented by this package.
The join semantics for ``py-obj`` and ``py-pkgdata`` URLs differ from
those of typical http-like URLs, as well as from each other.
The rules for ``py-obj`` are as follows:
* If joined with a "relative" URL, if the URL starts with a Python
identifier, it is simply appended to the current object path:
>>> from jsonschema_pyref import urljoin
>>> urljoin('py-obj:a.b.c', 'd.e')
'py-obj:a.b.c.d.e'
* However, if the "relative" URL starts with one or more ``.``, it
is processed relative to the base path using similar semantics to
Python relative imports. That is, one dot means a different attribute
of the same object, and so on:
>>> urljoin('py-obj:a.b.c', '.d')
'py-obj:a.b.d'
>>> urljoin('py-obj:a.b.c', '..d')
'py-obj:a.d'
>>> urljoin('py-obj:a.b.c', '..d.e')
'py-obj:a.d.e'
* When joining an absolute URL with the ``py-obj`` scheme, the new URL
replaces the base:
>>> urljoin('py-obj:a.b.c', 'py-obj:d.e.f')
'py-obj:d.e.f'
* And likewise when joining an absolute URL with an entirely different
scheme:
>>> urljoin('py-obj:a.b.c', 'https://example.com')
'https://example.com'
* Fragments are dropped from the base URL, and retained from the joined
URL:
>>> urljoin('py-obj:a.b.c#replaced', '.d#fragment')
'py-obj:a.b.d#fragment'
>>> urljoin('py-obj:a.b.c#replaced', '#fragment')
'py-obj:a.b.c#fragment'
The rules for ``py-pkgdata`` are as follows:
* If joined to a relative URL, it is treated as a different file relative
to the same package/directory as the base URL, very similarly to how
`urllib.parse.urljoin` works for relative paths joined to http(s) URLs:
>>> urljoin('py-pkgdata:a.b.c/schema1.json', 'schema2.json')
'py-pkgdata:a.b.c/schema2.json'
>>> urljoin('py-pkgdata:a.b.c/schemas/schema1.json', 'schema2.json')
'py-pkgdata:a.b.c/schemas/schema2.json'
>>> urljoin('py-pkgdata:a.b.c/schemas/sub/schema1.json', 'schema2.json')
'py-pkgdata:a.b.c/schemas/sub/schema2.json'
>>> urljoin('py-pkgdata:a.b.c/schemas/schema1.json', '../more_schemas/schema2.json')
'py-pkgdata:a.b.c/more_schemas/schema2.json'
.. note::
This does not support relative URLs with a package-relative
component, only different paths within the same package. Maybe
support for this could be added in the future, but under the current
format it's too ambiguous.
* When joining an absolute URL, if the joined URL is in the same package
it works the same as the relative case (effectively the joined URL
replaces the base):
>>> urljoin('py-pkgdata:a.b.c/schema1.json', 'py-pkgdata:a.b.c/schema2.json')
'py-pkgdata:a.b.c/schema2.json'
* Likewise when they are different packages:
>>> urljoin('py-pkgdata:a.b.c/schema1.json', 'py-pkgdata:d.e.f/schema2.json')
'py-pkgdata:d.e.f/schema2.json'
* Or if they are the exact same URL, that URL is returned:
>>> urljoin('py-pkgdata:a.b.c/sub/schema1.json', 'py-pkgdata:a.b.c/sub/schema1.json')
'py-pkgdata:a.b.c/sub/schema1.json'
* And likewise if they are not the same scheme at all:
>>> urljoin('py-pkgdata:a.b.c/schema1.json', 'http://example.com/schema2.json')
'http://example.com/schema2.json'
* The same rules apply for fragments as with ``py-obj``:
>>> urljoin('py-pkgdata:a.b.c/schema1.json#replaced', 'schema2.json#fragment')
'py-pkgdata:a.b.c/schema2.json#fragment'
>>> urljoin('py-pkgdata:a.b.c/schema1.json#replaced', '#fragment')
'py-pkgdata:a.b.c/schema1.json#fragment'
"""
parsedb = urlparse(base)
if parsedb.scheme in _URL_SCHEME_JOINERS:
return _URL_SCHEME_JOINERS[parsedb.scheme](base, url)
return _urllib_urljoin(base, url, allow_fragments=allow_fragments)
[docs]def urlparse(url: str) -> ParseResult:
"""
Extends the stdlib's `urllib.parse.urlparse` to support the custom URL
schemes implemented by this package.
"""
if ':' in url:
scheme, _ = url.split(':', 1)
if scheme in _URL_SCHEME_PARSERS:
return _URL_SCHEME_PARSERS[scheme](url)
return _urllib_urlparse(url)
def _is_dotted_identifier(s: str) -> bool:
"""
Return True if string is one or more valid Python identifiers separated
by ``.``.
"""
s = s.strip()
if not s:
return False
for ident in s.split('.'):
if not ident.isidentifier():
return False
return True
def _resolve_url_py_obj(url: str) -> dict:
parsed = _urlparse_py_obj(url)
path = parsed.path
modname = path
objpath: List[str] = []
module = None
while module is None:
try:
module = import_module(modname)
except ImportError:
if '.' in path:
# At least part of the dotted name is the name of the object
# in the module, and not the module itself.
modname, objname = modname.rsplit('.', 1)
objpath.insert(0, objname)
else:
raise
obj = module
for idx, attr in enumerate(objpath):
try:
obj = getattr(obj, attr)
except Exception:
raise AttributeError(
f"no attribute '{attr}' on "
f"{'.'.join([modname] + objpath[:idx])}")
if not isinstance(obj, dict):
raise ValueError(
f'path {path} does not resolve to a dict, which it must be in '
f'order to be a JSON Schema')
return obj
def _resolve_url_py_pkgdata(url: str) -> dict:
parsed = _urlparse_py_pkgdata(url)
path = parsed.path
pkgname, filename = path.split('/', 1)
_, ext = pth.splitext(filename)
# TODO: Support more file types and a pluggable registry of loaders for
# different types--as long as it can be loaded into a dict it can be
# supported
yaml_exts = {'.yaml', '.yml'}
json_exts = {'.json'}
supported_exts = yaml_exts | json_exts
data = pkgutil.get_data(pkgname, filename)
if data is None:
raise RuntimeError(
f'no resource named {filename} could be loaded from package '
f'{pkgname}; see https://docs.python.org/3/library/pkgutil.html#pkgutil.get_data')
if ext in yaml_exts:
if yaml is None:
raise RuntimeError(
'reading schemas from YAML files requires PyYAML to be '
'installed: https://pypi.org/project/PyYAML/')
return yaml.safe_load(data)
elif ext in json_exts:
return json.loads(data)
else:
supported_exts = yaml_exts | json_exts
raise ValueError(
f'schemas are currently only loadable from files with the '
f'following filename extensions: {sorted(supported_exts)}')
def _urljoin_py_obj(base: str, url: str) -> str:
parsedb = _urlparse_py_obj(base)
parsedu = urlparse(url)
if parsedu.scheme:
return url
rel_depth = 0
pathu = parsedu.path
while pathu and pathu[0] == '.':
rel_depth += 1
pathu = pathu[1:]
pathb = parsedb.path
pathb_parts = pathb.split('.')
if rel_depth > len(pathb_parts):
raise ValueError(
f'relative object path {url} outside the original path {base}')
if rel_depth > 0:
pathb = '.'.join(pathb_parts[:-rel_depth])
new_url = 'py-obj:' + pathb
if pathu:
new_url += '.' + pathu
if parsedu.fragment:
new_url += '#' + parsedu.fragment
return new_url
def _urljoin_py_pkgdata(base: str, url: str) -> str:
# This follows similar semantics to normal URL joins w.r.t. the filename
# path, so we actually pass this to urljoin but without the scheme (which
# it doesn't recognize)
parsedb = _urlparse_py_pkgdata(base)
parsedu = urlparse(url)
# If url is an absolute url with a scheme, what we do depends on the
# scheme:
# * If also py-pkgdata, check if the package names match; if so join just
# the filename, if not replace both the package name and the filename.
# This is a bit similar to stdlib urljoin() semantics for http URLs
#
# * If different scheme, also replace the base URL entirely.
if parsedu.scheme == parsedb.scheme:
# If pkgb == pkgu and filenameb == filenameu we take parsedu.path
# But same if pkgb != pkgu or filenameb != filenameu,
# So in all cases take the new path from url
new_path = parsedu.path
elif parsedu.scheme:
# Some other scheme
return url
else:
# A relative URL relative to the base py-pkgdata URL
new_path = _urllib_urljoin(parsedb.path, parsedu.path)
if parsedu.fragment:
new_path += '#' + parsedu.fragment
return 'py-pkgdata:' + new_path
def _urlparse_py_obj(url: str) -> ParseResult:
"""Parse URLs with the ``py-obj:`` scheme."""
if not url.startswith('py-obj:'):
raise ValueError(f'not a py-obj URL: {url}')
_, rest = url.split(':', 1)
if '#' in rest:
path, fragment = rest.split('#', 1)
else:
path, fragment = rest, ''
if not _is_dotted_identifier(path):
fmt = 'py-obj:<identifier>[.<identifier>...][#<fragment>]'
raise ValueError(
f'py-obj URL path component {path} contains an invalid Python '
f'indentifier; the correct format for a py-obj URL is: {fmt}')
return ParseResult(scheme='py-obj', netloc='', path=path, params='',
query='', fragment=fragment)
def _urlparse_py_pkgdata(url: str) -> ParseResult:
"""Parse URLs with the ``py-pkgdata:`` scheme."""
if not url.startswith('py-pkgdata:'):
raise ValueError(f'not a py-pkgdata URL: {url}')
_, rest = url.split(':', 1)
if '#' in rest:
path, fragment = rest.split('#', 1)
else:
path, fragment = rest, ''
path = path.strip()
fmt = 'py-pkgdata:<module>[.<module>...]/<path>[#<fragment>]'
if '/' in path:
module, filename = path.split('/', 1)
else:
module, filename = path, ''
filename = filename.strip()
if not filename:
raise ValueError(
f'py-pkgdata URL must contain a package-relative path to a file '
f'contained in the package, starting with a "/"; the correct '
f'format for a py-pkgdata URL is: {fmt}')
if not _is_dotted_identifier(module):
raise ValueError(
f'py-obj URL module component {module} is not a valid Python '
f'indentifier; the correct format for a py-obj URL is: {fmt}')
return ParseResult(scheme='py-pkgdata', netloc='',
path=f'{module}/{filename}', params='', query='',
fragment=fragment)
URL_SCHEME_RESOLVERS = {
'py-obj': _resolve_url_py_obj,
'py-pkgdata': _resolve_url_py_pkgdata
}
"""
Additional URL handlers for the custom URL schemes supported by this package.
This can be passed to the ``handlers`` argument of `jsonschema.RefResolver`,
but take note: It is also necessary to override the ``urljoin_cache`` argument
to use `jsonschema_pyref.urljoin` as follows::
>>> import jonschema
>>> from jsonschema_pyref import URL_SCHEME_RESOLVERS, urljoin
>>> from functools import lru_cache
>>> resolver = RefResolver({}, {}, handlers=URL_SCHEME_RESOLVERS,
... urljoin_cache=lru_cache(1024)(urljoin))
This boilerplate can be avoided entirely by using the supplied
`jsonschema_pyref.RefResolver` which has these enhancements by default (and
is otherwise identical to `jsonschema.RefResolver`).
"""
_URL_SCHEME_JOINERS = {
'py-obj': _urljoin_py_obj,
'py-pkgdata': _urljoin_py_pkgdata
}
_URL_SCHEME_PARSERS = {
'py-obj': _urlparse_py_obj,
'py-pkgdata': _urlparse_py_pkgdata
}