Skip to content

Response Parsers API

This module provides classes for parsing and validating LLM responses into structured data formats, such as Pydantic models, JSON, or XML.

Base Class

maticlib.core.parsers.base.BaseResponseParser

Bases: ABC, Generic[T]

Base class for response parsers.

A response parser is responsible for: 1. Providing structural instructions to the LLM. 2. Parsing the LLM's text output into a structured object.

Source code in maticlib/core/parsers/base.py
class BaseResponseParser(ABC, Generic[T]):
    """
    Base class for response parsers.

    A response parser is responsible for:
    1. Providing structural instructions to the LLM.
    2. Parsing the LLM's text output into a structured object.
    """

    @abstractmethod
    def parse(self, text: str) -> T:
        """
        Parses the raw text output from an LLM.

        Args:
            text (str): The raw text response from the model.

        Returns:
            T: The parsed structured object.
        """
        pass

    @abstractmethod
    def get_structure_instructions(self) -> str:
        """
        Provides instructions to the LLM on how to structure its output.

        Returns:
            str: Formatting instructions for the prompt.
        """
        pass

get_structure_instructions abstractmethod

get_structure_instructions()

Provides instructions to the LLM on how to structure its output.

Returns:

Name Type Description
str str

Formatting instructions for the prompt.

Source code in maticlib/core/parsers/base.py
@abstractmethod
def get_structure_instructions(self) -> str:
    """
    Provides instructions to the LLM on how to structure its output.

    Returns:
        str: Formatting instructions for the prompt.
    """
    pass

parse abstractmethod

parse(text)

Parses the raw text output from an LLM.

Parameters:

Name Type Description Default
text str

The raw text response from the model.

required

Returns:

Name Type Description
T T

The parsed structured object.

Source code in maticlib/core/parsers/base.py
@abstractmethod
def parse(self, text: str) -> T:
    """
    Parses the raw text output from an LLM.

    Args:
        text (str): The raw text response from the model.

    Returns:
        T: The parsed structured object.
    """
    pass

Pydantic Parser

maticlib.core.parsers.pydantic.PydanticResponseParser

Bases: BaseResponseParser[T], Generic[T]

Parses LLM output into a specific Pydantic model.

Ensures that the response not only conforms to JSON but matches the expected schema and types defined by the Pydantic model.

Source code in maticlib/core/parsers/pydantic.py
class PydanticResponseParser(BaseResponseParser[T], Generic[T]):
    """
    Parses LLM output into a specific Pydantic model.

    Ensures that the response not only conforms to JSON but matches the
    expected schema and types defined by the Pydantic model.
    """

    def __init__(self, model: Type[T]):
        self.model = model

    def parse(self, text: str) -> T:
        """
        Parses text into a Pydantic model instance.
        """
        # Use the reusable extraction logic from JSONResponseParser
        json_str = JSONResponseParser._extract_json_string(text)

        try:
            raw_dict = json.loads(json_str)
            return self.model.model_validate(raw_dict)
        except (json.JSONDecodeError, Exception) as e:
            raise ValueError(f"Could not parse or validate response as Pydantic model: {e}")

    def get_structure_instructions(self) -> str:
        """
        Generates instructions based on the Pydantic model schema.
        """
        schema = self.model.model_json_schema()
        # Simplify schema for the prompt
        essential_schema = {
            k: v for k, v in schema.items() 
            if k in ["properties", "required", "type"]
        }

        return (
            f"The output should be a valid JSON object matching this schema: {json.dumps(essential_schema)}\n"
            "Respond strictly with the JSON object and no other text."
        )

get_structure_instructions

get_structure_instructions()

Generates instructions based on the Pydantic model schema.

Source code in maticlib/core/parsers/pydantic.py
def get_structure_instructions(self) -> str:
    """
    Generates instructions based on the Pydantic model schema.
    """
    schema = self.model.model_json_schema()
    # Simplify schema for the prompt
    essential_schema = {
        k: v for k, v in schema.items() 
        if k in ["properties", "required", "type"]
    }

    return (
        f"The output should be a valid JSON object matching this schema: {json.dumps(essential_schema)}\n"
        "Respond strictly with the JSON object and no other text."
    )

parse

parse(text)

Parses text into a Pydantic model instance.

Source code in maticlib/core/parsers/pydantic.py
def parse(self, text: str) -> T:
    """
    Parses text into a Pydantic model instance.
    """
    # Use the reusable extraction logic from JSONResponseParser
    json_str = JSONResponseParser._extract_json_string(text)

    try:
        raw_dict = json.loads(json_str)
        return self.model.model_validate(raw_dict)
    except (json.JSONDecodeError, Exception) as e:
        raise ValueError(f"Could not parse or validate response as Pydantic model: {e}")

JSON Parser

maticlib.core.parsers.json.JSONResponseParser

Bases: BaseResponseParser[Dict[str, Any]]

Parses LLM output into a JSON/dictionary format.

Includes robust extraction logic to find JSON blocks even if the model returns conversational text surrounding the JSON.

Source code in maticlib/core/parsers/json.py
class JSONResponseParser(BaseResponseParser[Dict[str, Any]]):
    """
    Parses LLM output into a JSON/dictionary format.

    Includes robust extraction logic to find JSON blocks even if the model
    returns conversational text surrounding the JSON.
    """

    @staticmethod
    def _extract_json_string(text: str) -> str:
        """
        Extracts a JSON-looking string from text, handling markdown blocks.
        """
        # 1. Try to find regex extraction of JSON blocks (```json ... ```)
        json_match = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
        if json_match:
            return json_match.group(1)

        # 2. Try finding anything between braces
        brace_match = re.search(r"(\{.*\})", text, re.DOTALL)
        if brace_match:
            return brace_match.group(1)

        return text.strip()

    def parse(self, text: str) -> Dict[str, Any]:
        """
        Parses text into a dictionary. Handles markdown formatting.
        """
        json_str = self._extract_json_string(text)
        try:
            return json.loads(json_str)
        except json.JSONDecodeError:
            raise ValueError(f"Could not parse response as JSON: {text}")

    def get_structure_instructions(self) -> str:
        """
        Instructions for JSON output.
        """
        return (
            "The output should be a valid JSON object. "
            "Do not include any conversational text or explanations outside the JSON block."
        )

get_structure_instructions

get_structure_instructions()

Instructions for JSON output.

Source code in maticlib/core/parsers/json.py
def get_structure_instructions(self) -> str:
    """
    Instructions for JSON output.
    """
    return (
        "The output should be a valid JSON object. "
        "Do not include any conversational text or explanations outside the JSON block."
    )

parse

parse(text)

Parses text into a dictionary. Handles markdown formatting.

Source code in maticlib/core/parsers/json.py
def parse(self, text: str) -> Dict[str, Any]:
    """
    Parses text into a dictionary. Handles markdown formatting.
    """
    json_str = self._extract_json_string(text)
    try:
        return json.loads(json_str)
    except json.JSONDecodeError:
        raise ValueError(f"Could not parse response as JSON: {text}")

XML Parser

maticlib.core.parsers.xml.XMLResponseParser

Bases: BaseResponseParser[Dict[str, Any]]

Parses LLM output into a dictionary using XML tags as structure.

Source code in maticlib/core/parsers/xml.py
class XMLResponseParser(BaseResponseParser[Dict[str, Any]]):
    """
    Parses LLM output into a dictionary using XML tags as structure.
    """

    def parse(self, text: str) -> Dict[str, Any]:
        """
        Extracts XML blocks and converts them into a flat dictionary.
        """
        # 1. Look for XML blocks in markdown or the whole text
        xml_match = re.search(r"```(?:xml)?\s*(<.*?>)\s*```", text, re.DOTALL | re.IGNORECASE)
        if xml_match:
            content = xml_match.group(1)
        else:
            # Try to find the first tag to the last tag
            content_match = re.search(r"(<.*?>.*</.*?>)", text, re.DOTALL)
            if not content_match:
                raise ValueError(f"Could not find any XML tags in response: {text}")
            content = content_match.group(1)

        try:
            # Wrap in a root tag to ensure valid XML for parsing
            wrapped_content = f"<root>{content}</root>"
            root = ET.fromstring(wrapped_content)

            # If the provided XML already had a single root tag (e.g. <user>...), 
            # and that tag has children, we want the children of THAT tag.
            if len(root) == 1 and list(root[0]):
                target = root[0]
            else:
                target = root

            return {child.tag: child.text for child in target}
        except ET.ParseError as e:
            raise ValueError(f"Could not parse response as XML: {e}\nContent: {content}")

    def get_structure_instructions(self) -> str:
        """
        Instructions for XML output.
        """
        return (
            "The output should be formatted as XML. "
            "Use clear tags for each data field. "
            "Respond strictly with the XML and no other text."
        )

get_structure_instructions

get_structure_instructions()

Instructions for XML output.

Source code in maticlib/core/parsers/xml.py
def get_structure_instructions(self) -> str:
    """
    Instructions for XML output.
    """
    return (
        "The output should be formatted as XML. "
        "Use clear tags for each data field. "
        "Respond strictly with the XML and no other text."
    )

parse

parse(text)

Extracts XML blocks and converts them into a flat dictionary.

Source code in maticlib/core/parsers/xml.py
def parse(self, text: str) -> Dict[str, Any]:
    """
    Extracts XML blocks and converts them into a flat dictionary.
    """
    # 1. Look for XML blocks in markdown or the whole text
    xml_match = re.search(r"```(?:xml)?\s*(<.*?>)\s*```", text, re.DOTALL | re.IGNORECASE)
    if xml_match:
        content = xml_match.group(1)
    else:
        # Try to find the first tag to the last tag
        content_match = re.search(r"(<.*?>.*</.*?>)", text, re.DOTALL)
        if not content_match:
            raise ValueError(f"Could not find any XML tags in response: {text}")
        content = content_match.group(1)

    try:
        # Wrap in a root tag to ensure valid XML for parsing
        wrapped_content = f"<root>{content}</root>"
        root = ET.fromstring(wrapped_content)

        # If the provided XML already had a single root tag (e.g. <user>...), 
        # and that tag has children, we want the children of THAT tag.
        if len(root) == 1 and list(root[0]):
            target = root[0]
        else:
            target = root

        return {child.tag: child.text for child in target}
    except ET.ParseError as e:
        raise ValueError(f"Could not parse response as XML: {e}\nContent: {content}")