tractatus/.venv-docs/lib/python3.12/site-packages/docx/oxml/parser.py

# pyright: reportImportCycles=false

"""XML parser for python-docx."""

from __future__ import annotations

from typing import TYPE_CHECKING, Dict, Type, cast

from lxml import etree

from docx.oxml.ns import NamespacePrefixedTag, nsmap

if TYPE_CHECKING:
    from docx.oxml.xmlchemy import BaseOxmlElement


# -- configure XML parser --
element_class_lookup = etree.ElementNamespaceClassLookup()
oxml_parser = etree.XMLParser(remove_blank_text=True, resolve_entities=False)
oxml_parser.set_element_class_lookup(element_class_lookup)


def parse_xml(xml: str | bytes) -> "BaseOxmlElement":
    """Root lxml element obtained by parsing XML character string `xml`.

    The custom parser is used, so custom element classes are produced for elements in
    `xml` that have them.
    """
    return cast("BaseOxmlElement", etree.fromstring(xml, oxml_parser))


def register_element_cls(tag: str, cls: Type["BaseOxmlElement"]):
    """Register an lxml custom element-class to use for `tag`.

    A instance of `cls` to be constructed when the oxml parser encounters an element
    with matching `tag`. `tag` is a string of the form `nspfx:tagroot`, e.g.
    `'w:document'`.
    """
    nspfx, tagroot = tag.split(":")
    namespace = element_class_lookup.get_namespace(nsmap[nspfx])
    namespace[tagroot] = cls


def OxmlElement(
    nsptag_str: str,
    attrs: Dict[str, str] | None = None,
    nsdecls: Dict[str, str] | None = None,
) -> BaseOxmlElement | etree._Element:  # pyright: ignore[reportPrivateUsage]
    """Return a 'loose' lxml element having the tag specified by `nsptag_str`.

    The tag in `nsptag_str` must contain the standard namespace prefix, e.g. `a:tbl`.
    The resulting element is an instance of the custom element class for this tag name
    if one is defined. A dictionary of attribute values may be provided as `attrs`; they
    are set if present. All namespaces defined in the dict `nsdecls` are declared in the
    element using the key as the prefix and the value as the namespace name. If
    `nsdecls` is not provided, a single namespace declaration is added based on the
    prefix on `nsptag_str`.
    """
    nsptag = NamespacePrefixedTag(nsptag_str)
    if nsdecls is None:
        nsdecls = nsptag.nsmap
    return oxml_parser.makeelement(nsptag.clark_name, attrib=attrs, nsmap=nsdecls)