Source code for owlapy.parser

from types import MappingProxyType
from typing import Final, List, Optional, Union
from parsimonious.grammar import Grammar
from parsimonious.grammar import NodeVisitor
from parsimonious.nodes import Node
from owlapy.io import OWLObjectParser
from owlapy.model import OWLObjectHasSelf, OWLObjectIntersectionOf, OWLObjectMinCardinality, OWLObjectOneOf, \
    OWLObjectProperty, OWLObjectPropertyExpression, OWLObjectSomeValuesFrom, OWLObjectUnionOf, OWLClass, IRI, \
    OWLClassExpression, OWLDataProperty, OWLNamedIndividual, OWLObjectComplementOf, OWLObjectExactCardinality, \
    OWLObjectHasValue, OWLQuantifiedDataRestriction, OWLQuantifiedObjectRestriction, StringOWLDatatype,  \
    DateOWLDatatype, DateTimeOWLDatatype, DoubleOWLDatatype, DurationOWLDatatype, IntegerOWLDatatype, \
    OWLDataSomeValuesFrom, OWLDatatypeRestriction, OWLFacetRestriction, OWLDataExactCardinality, \
    OWLDataMaxCardinality, OWLObjectMaxCardinality, OWLDataIntersectionOf, OWLDataMinCardinality, OWLDataHasValue, \
    OWLLiteral, OWLDataRange, OWLDataUnionOf, OWLDataOneOf, OWLDatatype, OWLObjectCardinalityRestriction, \
    OWLDataCardinalityRestriction, OWLObjectAllValuesFrom, OWLDataAllValuesFrom, OWLDataComplementOf, BooleanOWLDatatype
from owlapy.namespaces import Namespaces

from owlapy.render import _DL_SYNTAX, _MAN_SYNTAX
from owlapy.vocab import OWLFacet, OWLRDFVocabulary


MANCHESTER_GRAMMAR = Grammar(r"""
    union = intersection (must_ws "or" must_ws intersection)*
    intersection = primary (must_ws "and" must_ws primary)*

    # Main entry point + object properties
    primary = ("not" must_ws)? (data_some_only_res / some_only_res / data_cardinality_res / cardinality_res
                           / data_value_res / value_res / has_self / class_expression)
    some_only_res = object_property must_ws ("some"/"only") must_ws primary
    cardinality_res = object_property must_ws ("max"/"min"/"exactly") must_ws non_negative_integer must_ws primary
    value_res = object_property must_ws "value" must_ws individual_iri
    has_self = object_property must_ws "Self"
    object_property = ("inverse" must_ws)? object_property_iri
    class_expression = class_iri / individual_list / parentheses
    individual_list = "{" maybe_ws individual_iri (maybe_ws "," maybe_ws individual_iri)* maybe_ws "}"

    # Back to start symbol (first production rule)
    parentheses = "(" maybe_ws union maybe_ws ")"

    # Data properties
    data_some_only_res = data_property_iri must_ws ("some"/"only") must_ws data_primary
    data_cardinality_res = data_property_iri must_ws ("max"/"min"/"exactly")
                           must_ws non_negative_integer must_ws data_primary
    data_value_res = data_property_iri must_ws "value" must_ws literal
    data_primary = ("not" must_ws)? data_range
    data_range = datatype_restriction / datatype_iri / literal_list / data_parentheses
    literal_list = "{" maybe_ws literal (maybe_ws "," maybe_ws literal)* maybe_ws "}"
    data_parentheses = "(" maybe_ws data_union maybe_ws ")"
    data_union = data_intersection (must_ws "or" must_ws data_intersection)*
    data_intersection = data_primary (must_ws "and" must_ws data_primary)*
    datatype_restriction = datatype_iri "[" maybe_ws facet_restrictions maybe_ws "]"
    facet_restrictions = facet_restriction (maybe_ws "," maybe_ws facet_restriction)*
    facet_restriction = facet must_ws literal
    facet = "length" / "minLength" / "maxLength" / "pattern" / "langRange"
            / "totalDigits" / "fractionDigits" / "<=" / ">=" / "<" / ">"
    datatype_iri = ("<http://www.w3.org/2001/XMLSchema#" datatype ">") / ("xsd:"? datatype)
    datatype = "double" / "integer" / "boolean" / "string" / "dateTime" / "date" / "duration"

    # Literals
    literal = typed_literal / string_literal_language / string_literal_no_language / datetime_literal /
              duration_literal / date_literal / float_literal / decimal_literal / integer_literal /
              boolean_literal
    typed_literal = quoted_string "^^" datatype_iri
    string_literal_language = quoted_string language_tag
    string_literal_no_language = quoted_string / no_match
    quoted_string = ~"\"([^\"\\\\]|\\\\[\"\\\\])*\""
    language_tag = "@" ~"[a-zA-Z]+" ("-" ~"[a-zA-Z0-9]+")*
    float_literal = sign (float_with_integer_part / float_no_integer_part) ("f"/"F")
    float_with_integer_part = non_negative_integer ("." ~"[0-9]+")? exponent?
    float_no_integer_part = "." ~"[0-9]+" exponent?
    exponent = ("e"/"E") sign ~"[0-9]+"
    decimal_literal = non_negative_integer "." ~"[0-9]+"
    integer_literal = sign non_negative_integer
    boolean_literal = ~"[tT]rue" / ~"[fF]alse"
    date_literal = ~"[0-9]{4}-((0[1-9])|(1[0-2]))-(([0-2][0-9])|(3[01]))"
    datetime_literal = ~"[0-9]{4}-((0[1-9])|(1[0-2]))-(([0-2][0-9])|(3[01]))[T\u0020]"
                       ~"(([0-1][0-9])|(2[0-3])):[0-5][0-9]:[0-5][0-9](\\.[0-9]{6})?"
                       ~"(Z|([+-](([0-1][0-9])|(2[0-3])):[0-5][0-9](:[0-5][0-9](\\.[0-9]{6})?)?))?"
    duration_literal = ~"P([0-9]+W)?([0-9]+D)?(T([0-9]+H)?([0-9]+M)?([0-9]+(\\.[0-9]{6})?S)?)?"
    sign = ("+"/"-")?
    non_negative_integer = ~"0|([1-9][0-9]*)"

    # IRIs / Characters
    class_iri = iri / no_match
    individual_iri = iri / no_match
    object_property_iri = iri / no_match
    data_property_iri = iri / no_match
    iri = full_iri / abbreviated_iri / simple_iri
    full_iri = iri_ref / no_match
    abbreviated_iri = pname_ln / no_match
    simple_iri = pn_local / no_match

    iri_ref = "<" ~"[^<>\"{}|^`\\\\\u0000-\u0020]*" ">"
    pname_ln = pname_ns pn_local
    pname_ns = pn_prefix? ":"
    pn_prefix = pn_chars_base ("."* pn_chars)*
    pn_local = (pn_chars_u / ~"[0-9]") ("."* pn_chars)*
    pn_chars = pn_chars_u / "-" / ~"[0-9]" / ~"\u00B7" / ~"[\u0300-\u036F]" / ~"[\u203F-\u2040]"
    pn_chars_u = pn_chars_base / "_"
    pn_chars_base = ~"[a-zA-Z]" / ~"[\u00C0-\u00D6]" / ~"[\u00D8-\u00F6]" / ~"[\u00F8-\u02FF]" /
                    ~"[\u0370-\u037D]" / ~"[\u037F-\u1FFF]" / ~"[\u200C-\u200D]" / ~"[\u2070-\u218F]" /
                    ~"[\u2C00-\u2FEF]" / ~"[\u3001-\uD7FF]" / ~"[\uF900-\uFDCF]" / ~"[\uFDF0-\uFFFD]" /
                    ~"[\U00010000-\U000EFFFF]"

    must_ws = ~"[\u0020\u000D\u0009\u000A]+"
    maybe_ws = ~"[\u0020\u000D\u0009\u000A]*"

    # hacky workaround: can be added to a pass through production rule that is semantically important
    # so nodes are not combined which makes the parsing cleaner
    no_match = ~"(?!a)a"
    """)


def _transform_children(nary_visit_function):
    def transform(self, node, visited_children):
        if len(visited_children) > 2:
            *_, first_operand, operands, _, _ = visited_children
        else:
            first_operand, operands = visited_children
        children = first_operand if isinstance(operands, Node) else [first_operand] + [node[-1] for node in operands]
        return nary_visit_function(self, node, children)
    return transform


def _node_text(node) -> str:
    return node.text.strip()


_STRING_TO_DATATYPE: Final = MappingProxyType({
    "integer": IntegerOWLDatatype,
    "double": DoubleOWLDatatype,
    "boolean": BooleanOWLDatatype,
    "string": StringOWLDatatype,
    "date": DateOWLDatatype,
    "dateTime": DateTimeOWLDatatype,
    "duration": DurationOWLDatatype,
})


_DATATYPE_TO_FACETS: Final = MappingProxyType({
    IntegerOWLDatatype: {OWLFacet.MIN_INCLUSIVE, OWLFacet.MIN_EXCLUSIVE, OWLFacet.MAX_EXCLUSIVE,
                         OWLFacet.MAX_INCLUSIVE, OWLFacet.TOTAL_DIGITS},
    DoubleOWLDatatype: {OWLFacet.MIN_INCLUSIVE, OWLFacet.MIN_EXCLUSIVE, OWLFacet.MAX_EXCLUSIVE, OWLFacet.MAX_INCLUSIVE},
    DateOWLDatatype: {OWLFacet.MIN_INCLUSIVE, OWLFacet.MIN_EXCLUSIVE, OWLFacet.MAX_EXCLUSIVE, OWLFacet.MAX_INCLUSIVE},
    DateTimeOWLDatatype: {OWLFacet.MIN_INCLUSIVE, OWLFacet.MIN_EXCLUSIVE,
                          OWLFacet.MAX_EXCLUSIVE, OWLFacet.MAX_INCLUSIVE},
    DurationOWLDatatype: {OWLFacet.MIN_INCLUSIVE, OWLFacet.MIN_EXCLUSIVE,
                          OWLFacet.MAX_EXCLUSIVE, OWLFacet.MAX_INCLUSIVE},
    StringOWLDatatype: {OWLFacet.LENGTH, OWLFacet.MIN_LENGTH, OWLFacet.MAX_LENGTH, OWLFacet.PATTERN},
    BooleanOWLDatatype: {}
})


_FACET_TO_LITERAL_DATATYPE: Final = MappingProxyType({
    OWLFacet.MIN_EXCLUSIVE: {IntegerOWLDatatype, DoubleOWLDatatype, DateOWLDatatype,
                             DateTimeOWLDatatype, DurationOWLDatatype},
    OWLFacet.MAX_EXCLUSIVE: {IntegerOWLDatatype, DoubleOWLDatatype, DateOWLDatatype,
                             DateTimeOWLDatatype, DurationOWLDatatype},
    OWLFacet.MIN_INCLUSIVE: {IntegerOWLDatatype, DoubleOWLDatatype, DateOWLDatatype,
                             DateTimeOWLDatatype, DurationOWLDatatype},
    OWLFacet.MAX_INCLUSIVE: {IntegerOWLDatatype, DoubleOWLDatatype, DateOWLDatatype,
                             DateTimeOWLDatatype, DurationOWLDatatype},
    OWLFacet.PATTERN: {IntegerOWLDatatype, DoubleOWLDatatype, DateOWLDatatype, DateTimeOWLDatatype,
                       DurationOWLDatatype, StringOWLDatatype},
    OWLFacet.LENGTH: {IntegerOWLDatatype},
    OWLFacet.MIN_LENGTH: {IntegerOWLDatatype},
    OWLFacet.MAX_LENGTH: {IntegerOWLDatatype},
    OWLFacet.TOTAL_DIGITS: {IntegerOWLDatatype},
    OWLFacet.FRACTION_DIGITS: {IntegerOWLDatatype}
})


# workaround to support multiple inheritance with different metaclasses
class _ManchesterOWLSyntaxParserMeta(type(NodeVisitor), type(OWLObjectParser)):
    pass


[docs]class ManchesterOWLSyntaxParser(NodeVisitor, OWLObjectParser, metaclass=_ManchesterOWLSyntaxParserMeta): """Manchester Syntax parser to parse strings to OWLClassExpressions Following: https://www.w3.org/TR/owl2-manchester-syntax""" slots = 'ns', 'grammar' ns: Optional[Union[str, Namespaces]] def __init__(self, namespace: Optional[Union[str, Namespaces]] = None, grammar=None): """Create a new Manchester Syntax parser. Names (entities) can be given as full IRIs enclosed in < and > or as simple strings, in that case the namespace attribute of the parser has to be set to resolve them. See https://www.w3.org/TR/owl2-manchester-syntax/#IRIs.2C_Integers.2C_Literals.2C_and_Entities for more information. Prefixes are currently not supported, except for datatypes. Args: namespace: Namespace to resolve names that were given without one grammar: Grammar (defaults to MANCHESTERGRAMMAR) """ self.ns = namespace self.grammar = grammar if self.grammar is None: self.grammar = MANCHESTER_GRAMMAR
[docs] def parse_expression(self, expression_str: str) -> OWLClassExpression: tree = self.grammar.parse(expression_str.strip()) return self.visit(tree)
[docs] @_transform_children def visit_union(self, node, children) -> OWLClassExpression: return children if isinstance(children, OWLClassExpression) else OWLObjectUnionOf(children)
[docs] @_transform_children def visit_intersection(self, node, children) -> OWLClassExpression: return children if isinstance(children, OWLClassExpression) else OWLObjectIntersectionOf(children)
[docs] def visit_primary(self, node, children) -> OWLClassExpression: match_not, expr = children return OWLObjectComplementOf(expr[0]) if isinstance(match_not, list) else expr[0]
[docs] def visit_some_only_res(self, node, children) -> OWLQuantifiedObjectRestriction: property_, _, type_, _, filler = children type_ = _node_text(*type_) if type_ == _MAN_SYNTAX.EXISTS: return OWLObjectSomeValuesFrom(property_, filler) else: return OWLObjectAllValuesFrom(property_, filler)
[docs] def visit_cardinality_res(self, node, children) -> OWLObjectCardinalityRestriction: property_, _, type_, _, cardinality, _, filler = children type_ = _node_text(*type_) if type_ == _MAN_SYNTAX.MIN: return OWLObjectMinCardinality(cardinality, property_, filler) elif type_ == _MAN_SYNTAX.MAX: return OWLObjectMaxCardinality(cardinality, property_, filler) else: return OWLObjectExactCardinality(cardinality, property_, filler)
[docs] def visit_value_res(self, node, children) -> OWLObjectHasValue: property_, *_, individual = children return OWLObjectHasValue(property_, individual)
[docs] def visit_has_self(self, node, children) -> OWLObjectHasSelf: property_, *_ = children return OWLObjectHasSelf(property_)
[docs] def visit_object_property(self, node, children) -> OWLObjectPropertyExpression: inverse, property_ = children return property_.get_inverse_property() if isinstance(inverse, list) else property_
[docs] def visit_class_expression(self, node, children) -> OWLClassExpression: return children[0]
[docs] @_transform_children def visit_individual_list(self, node, children) -> OWLObjectOneOf: return OWLObjectOneOf(children)
[docs] def visit_data_primary(self, node, children) -> OWLDataRange: match_not, expr = children return OWLDataComplementOf(expr[0]) if isinstance(match_not, list) else expr[0]
[docs] def visit_data_some_only_res(self, node, children) -> OWLQuantifiedDataRestriction: property_, _, type_, _, filler = children type_ = _node_text(*type_) if type_ == _MAN_SYNTAX.EXISTS: return OWLDataSomeValuesFrom(property_, filler) else: return OWLDataAllValuesFrom(property_, filler)
[docs] def visit_data_cardinality_res(self, node, children) -> OWLDataCardinalityRestriction: property_, _, type_, _, cardinality, _, filler = children type_ = _node_text(*type_) if type_ == _MAN_SYNTAX.MIN: return OWLDataMinCardinality(cardinality, property_, filler) elif type_ == _MAN_SYNTAX.MAX: return OWLDataMaxCardinality(cardinality, property_, filler) else: return OWLDataExactCardinality(cardinality, property_, filler)
[docs] def visit_data_value_res(self, node, children) -> OWLDataHasValue: property_, *_, literal = children return OWLDataHasValue(property_, literal)
[docs] @_transform_children def visit_data_union(self, node, children) -> OWLDataRange: return children if isinstance(children, OWLDataRange) else OWLDataUnionOf(children)
[docs] @_transform_children def visit_data_intersection(self, node, children) -> OWLDataRange: return children if isinstance(children, OWLDataRange) else OWLDataIntersectionOf(children)
[docs] @_transform_children def visit_literal_list(self, node, children) -> OWLDataOneOf: return OWLDataOneOf(children)
[docs] def visit_data_parentheses(self, node, children) -> OWLDataRange: *_, expr, _, _ = children return expr
[docs] def visit_datatype_restriction(self, node, children) -> OWLDatatypeRestriction: datatype, *_, facet_restrictions, _, _ = children if isinstance(facet_restrictions, OWLFacetRestriction): facet_restrictions = facet_restrictions, not_valid_literals = [] if datatype != StringOWLDatatype: not_valid_literals = [res.get_facet_value() for res in facet_restrictions if res.get_facet_value().get_datatype() != datatype] not_valid_facets = [res.get_facet() for res in facet_restrictions if res.get_facet() not in _DATATYPE_TO_FACETS[datatype]] if not_valid_literals or not_valid_facets: raise ValueError(f"Literals: {not_valid_literals} and Facets: {not_valid_facets}" f" not valid for datatype: {datatype}") return OWLDatatypeRestriction(datatype, facet_restrictions)
[docs] @_transform_children def visit_facet_restrictions(self, node, children) -> List[OWLFacetRestriction]: return children
[docs] def visit_facet_restriction(self, node, children) -> OWLFacetRestriction: facet, _, literal = children if literal.get_datatype() not in _FACET_TO_LITERAL_DATATYPE[facet]: raise ValueError(f"Literal: {literal} not valid for facet: {facet}") return OWLFacetRestriction(facet, literal)
[docs] def visit_literal(self, node, children) -> OWLLiteral: return children[0]
[docs] def visit_typed_literal(self, node, children) -> OWLLiteral: value, _, datatype = children return OWLLiteral(value[1:-1], datatype)
[docs] def visit_string_literal_language(self, node, children): raise NotImplementedError(f"Language tags and plain literals not supported in owlapy yet: {_node_text(node)}")
[docs] def visit_string_literal_no_language(self, node, children) -> OWLLiteral: value = children[0] return OWLLiteral(value[1:-1], StringOWLDatatype)
[docs] def visit_quoted_string(self, node, children) -> str: return _node_text(node)
[docs] def visit_float_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node)[:-1], DoubleOWLDatatype)
[docs] def visit_decimal_literal(self, node, children) -> OWLLiteral: # TODO: Just use float for now, decimal not supported in owlapy yet # owlready2 also just parses decimals to floats return OWLLiteral(_node_text(node), DoubleOWLDatatype)
[docs] def visit_integer_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), IntegerOWLDatatype)
[docs] def visit_boolean_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), BooleanOWLDatatype)
[docs] def visit_datetime_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), DateTimeOWLDatatype)
[docs] def visit_duration_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), DurationOWLDatatype)
[docs] def visit_date_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), DateOWLDatatype)
[docs] def visit_non_negative_integer(self, node, children) -> int: return int(_node_text(node))
[docs] def visit_datatype_iri(self, node, children) -> str: return children[0][1]
[docs] def visit_datatype(self, node, children) -> OWLDatatype: return _STRING_TO_DATATYPE[_node_text(node)]
[docs] def visit_facet(self, node, children) -> OWLFacet: return OWLFacet.from_str(_node_text(node))
[docs] def visit_class_iri(self, node, children) -> OWLClass: return OWLClass(children[0])
[docs] def visit_individual_iri(self, node, children) -> OWLNamedIndividual: return OWLNamedIndividual(children[0])
[docs] def visit_object_property_iri(self, node, children) -> OWLObjectProperty: return OWLObjectProperty(children[0])
[docs] def visit_data_property_iri(self, node, children) -> OWLDataProperty: return OWLDataProperty(children[0])
[docs] def visit_iri(self, node, children) -> IRI: return children[0]
[docs] def visit_full_iri(self, node, children) -> IRI: try: iri = _node_text(node)[1:-1] return IRI.create(iri) except IndexError: raise ValueError(f"{iri} is not a valid IRI.")
[docs] def visit_abbreviated_iri(self, node, children): # TODO: Add support for prefixes raise NotImplementedError(f"Parsing of prefixes is not supported yet: {_node_text(node)}")
[docs] def visit_simple_iri(self, node, children) -> IRI: simple_iri = _node_text(node) if simple_iri == "Thing": return OWLRDFVocabulary.OWL_THING.get_iri() elif simple_iri == "Nothing": return OWLRDFVocabulary.OWL_NOTHING.get_iri() elif self.ns is not None: return IRI(self.ns, simple_iri) else: raise ValueError(f"If entities are specified without a full iri ({simple_iri}), " "the namespace attribute of the parser has to be set.")
[docs] def visit_parentheses(self, node, children) -> OWLClassExpression: *_, expr, _, _ = children return expr
[docs] def generic_visit(self, node, children): return children or node
DL_GRAMMAR = Grammar(r""" union = intersection (must_ws "⊔" must_ws intersection)* intersection = primary (must_ws "⊓" must_ws primary)* # Main entry point + object properties primary = ("¬" maybe_ws)? (has_self / data_value_res / value_res / data_some_only_res / some_only_res / data_cardinality_res / cardinality_res / class_expression) some_only_res = ("∃"/"∀") maybe_ws object_property "." primary cardinality_res = ("≥"/"≤"/"=") must_ws non_negative_integer must_ws object_property "." primary value_res = "∃" maybe_ws object_property "." "{" individual_iri "}" has_self = "∃" maybe_ws object_property "." "Self" object_property = object_property_iri "⁻"? class_expression = class_iri / individual_list / parentheses individual_list = "{" maybe_ws individual_iri (maybe_ws "⊔" maybe_ws individual_iri)* maybe_ws "}" # Back to start symbol (first production rule) parentheses = "(" maybe_ws union maybe_ws ")" # Data properties data_some_only_res = ("∃"/"∀") maybe_ws data_property_iri "." data_primary data_cardinality_res = ("≥"/"≤"/"=") must_ws non_negative_integer must_ws data_property_iri "." data_primary data_value_res = "∃" maybe_ws data_property_iri "." "{" literal "}" data_primary = ("¬" maybe_ws)? data_range data_range = datatype_restriction / datatype_iri / literal_list / data_parentheses literal_list = "{" maybe_ws literal (maybe_ws "⊔" maybe_ws literal)* maybe_ws "}" data_parentheses = "(" maybe_ws data_union maybe_ws ")" data_union = data_intersection (must_ws "⊔" must_ws data_intersection)* data_intersection = data_primary (must_ws "⊓" must_ws data_primary)* datatype_restriction = datatype_iri "[" maybe_ws facet_restrictions maybe_ws "]" facet_restrictions = facet_restriction (maybe_ws "," maybe_ws facet_restriction)* facet_restriction = facet must_ws literal facet = "length" / "minLength" / "maxLength" / "pattern" / "langRange" / "totalDigits" / "fractionDigits" / "≥" / "≤" / "<" / ">" datatype_iri = ("<http://www.w3.org/2001/XMLSchema#" datatype ">") / ("xsd:"? datatype) datatype = "double" / "integer" / "boolean" / "string" / "dateTime" / "date" / "duration" # Literals literal = typed_literal / string_literal_language / string_literal_no_language / datetime_literal / duration_literal / date_literal / float_literal / decimal_literal / integer_literal / boolean_literal typed_literal = quoted_string "^^" datatype_iri string_literal_language = quoted_string language_tag string_literal_no_language = quoted_string / no_match quoted_string = ~"\"([^\"\\\\]|\\\\[\"\\\\])*\"" language_tag = "@" ~"[a-zA-Z]+" ("-" ~"[a-zA-Z0-9]+")* float_literal = sign (float_with_integer_part / float_no_integer_part) ("f"/"F") float_with_integer_part = non_negative_integer ("." ~"[0-9]+")? exponent? float_no_integer_part = "." ~"[0-9]+" exponent? exponent = ("e"/"E") sign ~"[0-9]+" decimal_literal = non_negative_integer "." ~"[0-9]+" integer_literal = sign non_negative_integer boolean_literal = ~"[tT]rue" / ~"[fF]alse" date_literal = ~"[0-9]{4}-((0[1-9])|(1[0-2]))-(([0-2][0-9])|(3[01]))" datetime_literal = ~"[0-9]{4}-((0[1-9])|(1[0-2]))-(([0-2][0-9])|(3[01]))[T\u0020]" ~"(([0-1][0-9])|(2[0-3])):[0-5][0-9]:[0-5][0-9](\\.[0-9]{6})?" ~"(Z|([+-](([0-1][0-9])|(2[0-3])):[0-5][0-9](:[0-5][0-9](\\.[0-9]{6})?)?))?" duration_literal = ~"P([0-9]+W)?([0-9]+D)?(T([0-9]+H)?([0-9]+M)?([0-9]+(\\.[0-9]{6})?S)?)?" sign = ("+"/"-")? non_negative_integer = ~"0|([1-9][0-9]*)" # IRIs / Characters class_iri = "⊤" / "⊥" / iri object_property_iri = iri / no_match data_property_iri = iri / no_match individual_iri = iri / no_match iri = full_iri / abbreviated_iri / simple_iri full_iri = iri_ref / no_match abbreviated_iri = pname_ln / no_match simple_iri = pn_local / no_match # Changes to ManchesterGrammar -- Don't allow: # . used as a separator # ⁻ used for inverse properties (\u207B) iri_ref = "<" ~"[^<>\"{}|^`\\\\\u0000-\u0020]*" ">" pname_ln = pname_ns pn_local pname_ns = pn_prefix? ":" pn_prefix = pn_chars_base pn_chars* pn_local = (pn_chars_u / ~"[0-9]") pn_chars* pn_chars = pn_chars_u / "-" / ~"[0-9]" / ~"\u00B7" / ~"[\u0300-\u036F]" / ~"[\u203F-\u2040]" pn_chars_u = pn_chars_base / "_" pn_chars_base = ~"[a-zA-Z]" / ~"[\u00C0-\u00D6]" / ~"[\u00D8-\u00F6]" / ~"[\u00F8-\u02FF]" / ~"[\u0370-\u037D]" / ~"[\u037F-\u1FFF]" / ~"[\u200C-\u200D]" / ~"[\u2070-\u207A]" / ~"[\u207C-\u218F]"/ ~"[\u2C00-\u2FEF]" / ~"[\u3001-\uD7FF]" / ~"[\uF900-\uFDCF]" / ~"[\uFDF0-\uFFFD]" / ~"[\U00010000-\U000EFFFF]" must_ws = ~"[\u0020\u000D\u0009\u000A]+" maybe_ws = ~"[\u0020\u000D\u0009\u000A]*" # hacky workaround: can be added to a pass through production rule that is semantically important # so nodes are not combined which makes the parsing cleaner no_match = ~"(?!a)a" """) # workaround to support multiple inheritance with different metaclasses class _DLSyntaxParserMeta(type(NodeVisitor), type(OWLObjectParser)): pass
[docs]class DLSyntaxParser(NodeVisitor, OWLObjectParser, metaclass=_DLSyntaxParserMeta): """Description Logic Syntax parser to parse strings to OWLClassExpressions""" slots = 'ns', 'grammar' ns: Optional[Union[str, Namespaces]] def __init__(self, namespace: Optional[Union[str, Namespaces]] = None, grammar=None): """Create a new Description Logic Syntax parser. Names (entities) can be given as full IRIs enclosed in < and > or as simple strings, in that case the namespace attribute of the parser has to be set to resolve them. Prefixes are currently not supported, except for datatypes. Args: namespace: Namespace to resolve names that were given without one grammar: Grammar (defaults to DL_GRAMMAR) """ self.ns = namespace self.grammar = grammar if self.grammar is None: self.grammar = DL_GRAMMAR
[docs] def parse_expression(self, expression_str: str) -> OWLClassExpression: tree = self.grammar.parse(expression_str.strip()) return self.visit(tree)
[docs] @_transform_children def visit_union(self, node, children) -> OWLClassExpression: return children if isinstance(children, OWLClassExpression) else OWLObjectUnionOf(children)
[docs] @_transform_children def visit_intersection(self, node, children) -> OWLClassExpression: return children if isinstance(children, OWLClassExpression) else OWLObjectIntersectionOf(children)
[docs] def visit_primary(self, node, children) -> OWLClassExpression: match_not, expr = children return OWLObjectComplementOf(expr[0]) if isinstance(match_not, list) else expr[0]
[docs] def visit_some_only_res(self, node, children) -> OWLQuantifiedObjectRestriction: type_, _, property_, _, filler = children type_ = _node_text(*type_) if type_ == _DL_SYNTAX.EXISTS: return OWLObjectSomeValuesFrom(property_, filler) else: return OWLObjectAllValuesFrom(property_, filler)
[docs] def visit_cardinality_res(self, node, children) -> OWLObjectCardinalityRestriction: type_, _, cardinality, _, property_, _, filler = children type_ = _node_text(*type_) if type_ == _DL_SYNTAX.MIN: return OWLObjectMinCardinality(cardinality, property_, filler) elif type_ == _DL_SYNTAX.MAX: return OWLObjectMaxCardinality(cardinality, property_, filler) else: return OWLObjectExactCardinality(cardinality, property_, filler)
[docs] def visit_value_res(self, node, children) -> OWLObjectHasValue: _, _, property_, _, _, individual, _ = children return OWLObjectHasValue(property_, individual)
[docs] def visit_has_self(self, node, children) -> OWLObjectHasSelf: _, _, property_, _, _ = children return OWLObjectHasSelf(property_)
[docs] def visit_object_property(self, node, children) -> OWLObjectPropertyExpression: property_, inverse = children return property_.get_inverse_property() if isinstance(inverse, list) else property_
[docs] def visit_class_expression(self, node, children) -> OWLClassExpression: return children[0]
[docs] @_transform_children def visit_individual_list(self, node, children) -> OWLObjectOneOf: return OWLObjectOneOf(children)
[docs] def visit_data_primary(self, node, children) -> OWLDataRange: match_not, expr = children return OWLDataComplementOf(expr[0]) if isinstance(match_not, list) else expr[0]
[docs] def visit_data_some_only_res(self, node, children) -> OWLQuantifiedDataRestriction: type_, _, property_, _, filler = children type_ = _node_text(*type_) if type_ == _DL_SYNTAX.EXISTS: return OWLDataSomeValuesFrom(property_, filler) else: return OWLDataAllValuesFrom(property_, filler)
[docs] def visit_data_cardinality_res(self, node, children) -> OWLDataCardinalityRestriction: type_, _, cardinality, _, property_, _, filler = children type_ = _node_text(*type_) if type_ == _DL_SYNTAX.MIN: return OWLDataMinCardinality(cardinality, property_, filler) elif type_ == _DL_SYNTAX.MAX: return OWLDataMaxCardinality(cardinality, property_, filler) else: return OWLDataExactCardinality(cardinality, property_, filler)
[docs] def visit_data_value_res(self, node, children) -> OWLDataHasValue: _, _, property_, _, _, literal, _ = children return OWLDataHasValue(property_, literal)
[docs] @_transform_children def visit_data_union(self, node, children) -> OWLDataRange: return children if isinstance(children, OWLDataRange) else OWLDataUnionOf(children)
[docs] @_transform_children def visit_data_intersection(self, node, children) -> OWLDataRange: return children if isinstance(children, OWLDataRange) else OWLDataIntersectionOf(children)
[docs] @_transform_children def visit_literal_list(self, node, children) -> OWLDataOneOf: return OWLDataOneOf(children)
[docs] def visit_data_parentheses(self, node, children) -> OWLDataRange: *_, expr, _, _ = children return expr
[docs] def visit_datatype_restriction(self, node, children) -> OWLDatatypeRestriction: datatype, *_, facet_restrictions, _, _ = children if isinstance(facet_restrictions, OWLFacetRestriction): facet_restrictions = facet_restrictions, not_valid_literals = [] if datatype != StringOWLDatatype: not_valid_literals = [res.get_facet_value() for res in facet_restrictions if res.get_facet_value().get_datatype() != datatype] not_valid_facets = [res.get_facet() for res in facet_restrictions if res.get_facet() not in _DATATYPE_TO_FACETS[datatype]] if not_valid_literals or not_valid_facets: raise ValueError(f"Literals: {not_valid_literals} and Facets: {not_valid_facets}" f" not valid for datatype: {datatype}") return OWLDatatypeRestriction(datatype, facet_restrictions)
[docs] @_transform_children def visit_facet_restrictions(self, node, children) -> List[OWLFacetRestriction]: return children
[docs] def visit_facet_restriction(self, node, children) -> OWLFacetRestriction: facet, _, literal = children if literal.get_datatype() not in _FACET_TO_LITERAL_DATATYPE[facet]: raise ValueError(f"Literal: {literal} not valid for facet: {facet}") return OWLFacetRestriction(facet, literal)
[docs] def visit_literal(self, node, children) -> OWLLiteral: return children[0]
[docs] def visit_typed_literal(self, node, children) -> OWLLiteral: value, _, datatype = children return OWLLiteral(value[1:-1], datatype)
[docs] def visit_string_literal_language(self, node, children): raise NotImplementedError(f"Language tags and plain literals not supported in owlapy yet: {_node_text(node)}")
[docs] def visit_string_literal_no_language(self, node, children) -> OWLLiteral: value = children[0] return OWLLiteral(value[1:-1], StringOWLDatatype)
[docs] def visit_quoted_string(self, node, children) -> str: return _node_text(node)
[docs] def visit_float_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node)[:-1], DoubleOWLDatatype)
[docs] def visit_decimal_literal(self, node, children) -> OWLLiteral: # TODO: Just use float for now, decimal not supported in owlapy yet # owlready2 also just parses decimals to floats return OWLLiteral(_node_text(node), DoubleOWLDatatype)
[docs] def visit_integer_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), IntegerOWLDatatype)
[docs] def visit_boolean_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), BooleanOWLDatatype)
[docs] def visit_datetime_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), DateTimeOWLDatatype)
[docs] def visit_duration_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), DurationOWLDatatype)
[docs] def visit_date_literal(self, node, children) -> OWLLiteral: return OWLLiteral(_node_text(node), DateOWLDatatype)
[docs] def visit_non_negative_integer(self, node, children) -> int: return int(_node_text(node))
[docs] def visit_datatype_iri(self, node, children) -> str: return children[0][1]
[docs] def visit_datatype(self, node, children) -> OWLDatatype: return _STRING_TO_DATATYPE[_node_text(node)]
[docs] def visit_facet(self, node, children) -> OWLFacet: symbolic_form = _node_text(node) if symbolic_form == _DL_SYNTAX.MIN: symbolic_form = '>=' elif symbolic_form == _DL_SYNTAX.MAX: symbolic_form = '<=' return OWLFacet.from_str(symbolic_form)
[docs] def visit_class_iri(self, node, children) -> OWLClass: top_bottom = _node_text(node) if top_bottom == _DL_SYNTAX.TOP: return OWLClass(OWLRDFVocabulary.OWL_THING.get_iri()) elif top_bottom == _DL_SYNTAX.BOTTOM: return OWLClass(OWLRDFVocabulary.OWL_NOTHING.get_iri()) else: return OWLClass(children[0])
[docs] def visit_individual_iri(self, node, children) -> OWLNamedIndividual: return OWLNamedIndividual(children[0])
[docs] def visit_object_property_iri(self, node, children) -> OWLObjectProperty: return OWLObjectProperty(children[0])
[docs] def visit_data_property_iri(self, node, children) -> OWLDataProperty: return OWLDataProperty(children[0])
[docs] def visit_iri(self, node, children) -> IRI: return children[0]
[docs] def visit_full_iri(self, node, children) -> IRI: try: iri = _node_text(node)[1:-1] return IRI.create(iri) except IndexError: raise ValueError(f"{iri} is not a valid IRI.")
[docs] def visit_abbreviated_iri(self, node, children): # TODO: Add support for prefixes raise NotImplementedError(f"Parsing of prefixes is not supported yet: {_node_text(node)}")
[docs] def visit_simple_iri(self, node, children) -> IRI: simple_iri = _node_text(node) if self.ns is not None: return IRI(self.ns, simple_iri) else: raise ValueError(f"If entities are specified without a full iri ({simple_iri}), " "the namespace attribute of the parser has to be set.")
[docs] def visit_parentheses(self, node, children) -> OWLClassExpression: *_, expr, _, _ = children return expr
[docs] def generic_visit(self, node, children): return children or node