sybil-extras

Add ons for Sybil.

Installation

Requires Python 3.11+.

$ pip install sybil-extras

Evaluators

MultiEvaluator

"""Use MultiEvaluator to run multiple evaluators on the same parser."""

from sybil import Example, Sybil
from sybil.evaluators.python import PythonEvaluator
from sybil.parsers.rest.codeblock import CodeBlockParser
from sybil.typing import Evaluator

from sybil_extras.evaluators.multi import MultiEvaluator


def _evaluator_1(example: Example) -> None:
    """Check that the example is long enough."""
    minimum_length = 50
    assert len(example.parsed) >= minimum_length


evaluators: list[Evaluator] = [_evaluator_1, PythonEvaluator()]
multi_evaluator = MultiEvaluator(evaluators=evaluators)
parser = CodeBlockParser(language="python", evaluator=multi_evaluator)
sybil = Sybil(parsers=[parser])

pytest_collect_file = sybil.pytest()

ShellCommandEvaluator

"""Use ShellCommandEvaluator to run shell commands against the code block."""

import sys
import uuid
from pathlib import Path

from sybil import Sybil
from sybil.example import Example
from sybil.parsers.rest.codeblock import CodeBlockParser

from sybil_extras.evaluators.shell_evaluator import ShellCommandEvaluator


def temp_file_path_maker(*, example: Example) -> Path:
    """Create a temporary file path for an example."""
    return Path(example.path).parent / f"temp_{uuid.uuid4().hex[:8]}.py"


evaluator = ShellCommandEvaluator(
    args=[sys.executable, "-m", "mypy"],
    # The code block is written to a temporary file
    # created by the temp_file_path_maker callable.
    temp_file_path_maker=temp_file_path_maker,
    # Pad the temporary file with newlines so that the
    # line numbers in the error messages match the
    # line numbers in the source document.
    pad_file=True,
    # Don't write any changes back to the source document.
    # This option is useful when running a linter or formatter
    # which modifies the code.
    write_to_file=False,
    # Use a pseudo-terminal for running commands.
    # This can be useful e.g. to get color output, but can also break
    # in some environments.
    use_pty=True,
)
parser = CodeBlockParser(language="python", evaluator=evaluator)
sybil = Sybil(parsers=[parser])

pytest_collect_file = sybil.pytest()

BlockAccumulatorEvaluator

The BlockAccumulatorEvaluator accumulates parsed code block content in a list within the document’s namespace. This is useful for testing parsers that group multiple code blocks together.

"""Use BlockAccumulatorEvaluator to accumulate code blocks."""

from pathlib import Path

from sybil import Sybil
from sybil.parsers.rest.codeblock import CodeBlockParser

from sybil_extras.evaluators.block_accumulator import BlockAccumulatorEvaluator

namespace_key = "blocks"
evaluator = BlockAccumulatorEvaluator(namespace_key=namespace_key)
parser = CodeBlockParser(language="python", evaluator=evaluator)
sybil = Sybil(parsers=[parser])
document = sybil.parse(path=Path("README.rst"))

for example in document.examples():
    example.evaluate()

blocks = document.namespace[namespace_key]
assert len(blocks)

NoOpEvaluator

The NoOpEvaluator is an evaluator which does nothing. It is useful for testing and debugging parsers.

"""Use NoOpEvaluator to do nothing."""

from sybil import Sybil
from sybil.parsers.rest.codeblock import CodeBlockParser

from sybil_extras.evaluators.no_op import NoOpEvaluator

parser = CodeBlockParser(language="python", evaluator=NoOpEvaluator())
sybil = Sybil(parsers=[parser])

pytest_collect_file = sybil.pytest()

CodeBlockWriterEvaluator

The CodeBlockWriterEvaluator wraps another evaluator and writes any modifications back to the source document. This is useful for building evaluators that transform code blocks, such as formatters or auto-fixers.

The wrapped evaluator should store the modified content in example.document.namespace[namespace_key] for it to be written back.

"""Use CodeBlockWriterEvaluator to write modifications back to code blocks."""

from sybil import Example, Sybil
from sybil.parsers.rest.codeblock import CodeBlockParser

from sybil_extras.evaluators.code_block_writer import CodeBlockWriterEvaluator


def formatting_evaluator(example: Example) -> None:
    """Format the code and store the result for writing back."""
    formatted_code = example.parsed.upper()
    example.document.namespace["modified_content"] = formatted_code


writer_evaluator = CodeBlockWriterEvaluator(
    evaluator=formatting_evaluator,
    # The key in example.document.namespace where modified content is stored.
    # Defaults to "modified_content".
    namespace_key="modified_content",
    # Optional encoding for writing files.
    encoding=None,
)
parser = CodeBlockParser(language="python", evaluator=writer_evaluator)
sybil = Sybil(parsers=[parser])

pytest_collect_file = sybil.pytest()

Parsers

CustomDirectiveSkipParser

"""Use CustomDirectiveSkipParser to skip code blocks with a custom marker."""

from sybil import Sybil
from sybil.parsers.rest.codeblock import PythonCodeBlockParser

# Similar parsers are available at
# sybil_extras.parsers.markdown.custom_directive_skip,
# sybil_extras.parsers.markdown_it.custom_directive_skip,
# sybil_extras.parsers.mdx.custom_directive_skip,
# sybil_extras.parsers.myst.custom_directive_skip,
# sybil_extras.parsers.myst_parser.custom_directive_skip.
from sybil_extras.parsers.rest.custom_directive_skip import (
    CustomDirectiveSkipParser,
)

skip_parser = CustomDirectiveSkipParser(directive="custom-marker-skip")
code_block_parser = PythonCodeBlockParser()

sybil = Sybil(parsers=[skip_parser, code_block_parser])

pytest_collect_file = sybil.pytest()

This allows you to skip code blocks in the same way as described in the Sybil documentation for skipping examples in reStructuredText, Markdown, MDX, and MyST files, but with custom text, e.g. custom-marker-skip replacing the word skip.

GroupedSourceParser

"""Use GroupedSourceParser to group code blocks by a custom directive."""

import sys
from pathlib import Path

from sybil import Sybil
from sybil.example import Example
from sybil.parsers.rest.codeblock import PythonCodeBlockParser

# Similar parsers are available at
# sybil_extras.parsers.markdown.grouped_source,
# sybil_extras.parsers.markdown_it.grouped_source,
# sybil_extras.parsers.mdx.grouped_source,
# sybil_extras.parsers.myst.grouped_source,
# sybil_extras.parsers.myst_parser.grouped_source.
from sybil_extras.parsers.rest.grouped_source import GroupedSourceParser


def evaluator(example: Example) -> None:
    """Evaluate the code block by printing it."""
    sys.stdout.write(example.parsed)


group_parser = GroupedSourceParser(
    directive="group",
    evaluator=evaluator,
    # Pad the groups with newlines so that the
    # line number differences between blocks in the output match the
    # line number differences in the source document.
    # This is useful for error messages that reference line numbers.
    # However, this is detrimental to commands that expect the file
    # to not have a bunch of newlines in it, such as formatters.
    pad_groups=True,
)
code_block_parser = PythonCodeBlockParser()

sybil = Sybil(parsers=[code_block_parser, group_parser])

document = sybil.parse(path=Path("CHANGELOG.rst"))

for item in document.examples():
    # One evaluate call will evaluate a code block with the contents of all
    # code blocks in the group.
    item.evaluate()

This makes Sybil act as though all of the code blocks within a group are a single code block, to be evaluated with the evaluator given to GroupedSourceParser.

Only code blocks parsed by another parser in the same Sybil instance will be grouped.

The GroupedSourceParser must be registered after any code block parsers in the Sybil(parsers=[...]) list. At parse time, it counts code blocks by examining document.examples(), which only contains examples from parsers that have already run.

A group is defined by a pair of comments, group: start and group: end. The group: end example is expanded to include the contents of the code blocks in the group.

A reStructuredText example:

.. code-block:: python

   """Code block outside the group."""

   x = 1
   assert x == 1

.. group: start

.. code-block:: python

    """Define a function to use in the next code block."""

    import sys


    def hello() -> None:
        """Print a greeting."""
        sys.stdout.write("Hello, world!")


    hello()

.. code-block:: python

    """Run a function which is defined in the previous code block."""

    # We don't run ``hello()`` yet - ``doccmd`` does not support groups

.. group: end

GroupAllParser

"""Use GroupAllParser to group all code blocks in a document."""

import sys
from pathlib import Path

from sybil import Sybil
from sybil.example import Example
from sybil.parsers.rest.codeblock import PythonCodeBlockParser

# Similar parsers are available at
# sybil_extras.parsers.markdown.group_all,
# sybil_extras.parsers.markdown_it.group_all,
# sybil_extras.parsers.mdx.group_all,
# sybil_extras.parsers.myst.group_all,
# sybil_extras.parsers.myst_parser.group_all.
from sybil_extras.parsers.rest.group_all import GroupAllParser


def evaluator(example: Example) -> None:
    """Evaluate the code block by printing it."""
    sys.stdout.write(example.parsed)


group_all_parser = GroupAllParser(
    evaluator=evaluator,
    # Pad the groups with newlines so that the
    # line number differences between blocks in the output match the
    # line number differences in the source document.
    # This is useful for error messages that reference line numbers.
    # However, this is detrimental to commands that expect the file
    # to not have a bunch of newlines in it, such as formatters.
    pad_groups=True,
)
code_block_parser = PythonCodeBlockParser()

sybil = Sybil(parsers=[code_block_parser, group_all_parser])

document = sybil.parse(path=Path("CHANGELOG.rst"))

for item in document.examples():
    # One evaluate call will evaluate a code block with the contents of all
    # code blocks in the document.
    item.evaluate()

This makes Sybil act as though all of the code blocks in a document are a single code block, to be evaluated with the evaluator given to GroupAllParser.

Unlike GroupedSourceParser, this parser does not require any special markup directives like group: start and group: end. All code blocks in the document are automatically grouped together.

Only code blocks parsed by another parser in the same Sybil instance will be grouped.

The GroupAllParser must be registered after any code block parsers in the Sybil(parsers=[...]) list. At parse time, it counts code blocks by examining document.examples(), which only contains examples from parsers that have already run.

AttributeGroupedSourceParser

The AttributeGroupedSourceParser groups MDX code blocks by their group attribute value, following Docusaurus conventions. This is useful for MDX documentation where code blocks with the same group attribute should be combined and evaluated together.

"""Use AttributeGroupedSourceParser to group MDX code blocks by attribute."""

import sys
from pathlib import Path

from sybil import Sybil
from sybil.example import Example

from sybil_extras.evaluators.no_op import NoOpEvaluator
from sybil_extras.parsers.mdx.attribute_grouped_source import (
    AttributeGroupedSourceParser,
)
from sybil_extras.parsers.mdx.codeblock import CodeBlockParser


def evaluator(example: Example) -> None:
    """Evaluate the code block by printing it."""
    sys.stdout.write(example.parsed)


code_block_parser = CodeBlockParser(language="python")
group_parser = AttributeGroupedSourceParser(
    code_block_parser=code_block_parser,
    evaluator=evaluator,
    # The attribute name to use for grouping (default: "group")
    attribute_name="group",
    # Pad the groups with newlines so that the
    # line number differences between blocks in the output match the
    # line number differences in the source document.
    # This is useful for error messages that reference line numbers.
    # However, this is detrimental to commands that expect the file
    # to not have a bunch of newlines in it, such as formatters.
    pad_groups=True,
    # The evaluator to use for code blocks that don't have the
    # grouping attribute.
    ungrouped_evaluator=NoOpEvaluator(),
)

sybil = Sybil(parsers=[group_parser])

document = sybil.parse(path=Path("example.mdx"))

for item in document.examples():
    # One evaluate call will evaluate a code block with the contents of all
    # code blocks in the same group.
    item.evaluate()

This makes Sybil act as though all code blocks with the same group attribute value are a single code block, to be evaluated with the evaluator given to AttributeGroupedSourceParser.

An MDX example:

```python group="example1"
from pprint import pp
```

Some text in between.

```python group="example1"
pp({"hello": "world"})
```

```python group="example2"
x = 1
```

In this example, the first two code blocks will be combined and evaluated as one block, while the third block (with group="example2") will be evaluated separately.

Code blocks with the group attribute (or custom attribute name) will be grouped. Code blocks without the attribute are evaluated with the ungrouped_evaluator.

SphinxJinja2Parser

Use the SphinxJinja2Parser to parse sphinx-jinja2 templates in Sphinx documentation.

This extracts the source, arguments and options from .. jinja:: directive blocks in reStructuredText documents or \`\`\`{jinja} blocks in MyST documents.

"""Use SphinxJinja2Parser to extract Jinja templates."""

from pathlib import Path

from sybil import Sybil
from sybil.example import Example

# Similar parsers are available at
# sybil_extras.parsers.myst.sphinx_jinja2 and
# sybil_extras.parsers.myst_parser.sphinx_jinja2.
# There are no Markdown or MDX parsers as Sphinx is not used with them
# without MyST.
from sybil_extras.parsers.rest.sphinx_jinja2 import SphinxJinja2Parser


def _evaluator(example: Example) -> None:
    """Check that the example is long enough."""
    minimum_length = 50
    assert len(example.parsed) >= minimum_length


parser = SphinxJinja2Parser(evaluator=_evaluator)
sybil = Sybil(parsers=[parser])
document = sybil.parse(path=Path("CHANGELOG.rst"))
for item in document.examples():
    item.evaluate()

Djot code block parser

The djot CodeBlockParser correctly handles code blocks that are implicitly closed when their parent container ends, following the djot specification.

For example, a code block inside a blockquote without a closing fence:

> ```python
> code in a
> block quote

Paragraph.
"""Use the djot CodeBlockParser for djot documents."""

from sybil import Sybil

from sybil_extras.evaluators.no_op import NoOpEvaluator
from sybil_extras.parsers.djot.codeblock import CodeBlockParser

parser = CodeBlockParser(language="python", evaluator=NoOpEvaluator())
sybil = Sybil(parsers=[parser])

pytest_collect_file = sybil.pytest()

Djot directive lexer

Use DirectiveInDjotCommentLexer to extract directive information from djot comments such as {% group: start %}. This pairs well with sybil.testing.check_lexer for concise lexer tests.

"""Lex djot directives and test them with check_lexer."""

from sybil.testing import check_lexer

from sybil_extras.parsers.djot.lexers import DirectiveInDjotCommentLexer

lexer = DirectiveInDjotCommentLexer(directive="group", arguments=r".+")

check_lexer(
    lexer=lexer,
    source_text="Before\n{% group: start %}\nAfter\n",
    expected_text="{% group: start %}",
    expected_lexemes={"directive": "group", "arguments": "start"},
)

MarkdownIt code block parser

The markdown_it module provides Markdown parsers that use the markdown-it-py library instead of regex. This provides more accurate parsing of Markdown documents, particularly for edge cases like fenced code blocks inside indented sections.

"""Use the markdown_it CodeBlockParser for Markdown documents."""

from sybil import Sybil

from sybil_extras.evaluators.no_op import NoOpEvaluator
from sybil_extras.parsers.markdown_it.codeblock import CodeBlockParser

parser = CodeBlockParser(language="python", evaluator=NoOpEvaluator())
sybil = Sybil(parsers=[parser])

pytest_collect_file = sybil.pytest()

The module also provides CustomDirectiveSkipParser, GroupedSourceParser, GroupAllParser, and DirectiveInHTMLCommentLexer that use the MarkdownIt library for parsing HTML comments containing directives.

MystParser code block parser

The myst_parser module provides MyST parsers that use the myst-parser library instead of regex. This provides more accurate parsing of MyST documents, including support for percent-style comment directives (% skip: next) in addition to HTML comment directives.

"""Use the myst_parser CodeBlockParser for MyST documents."""

from sybil import Sybil

from sybil_extras.evaluators.no_op import NoOpEvaluator
from sybil_extras.parsers.myst_parser.codeblock import CodeBlockParser

parser = CodeBlockParser(language="python", evaluator=NoOpEvaluator())
sybil = Sybil(parsers=[parser])

pytest_collect_file = sybil.pytest()

The module also provides CustomDirectiveSkipParser, GroupedSourceParser, GroupAllParser, SphinxJinja2Parser, DirectiveInHTMLCommentLexer, and DirectiveInPercentCommentLexer that use the myst-parser library.

Markup Languages

The languages module provides a MarkupLanguage dataclass and predefined instances for working with different markup formats. This is useful for building tools that need to work consistently across multiple markup languages.

"""Use MarkupLanguage to work with different markup formats."""

from pathlib import Path

from sybil import Sybil

from sybil_extras.evaluators.no_op import NoOpEvaluator
from sybil_extras.languages import (
    DJOT,
    MARKDOWN,
    MDX,
    MYST,
    MYST_PARSER,
    NORG,
    RESTRUCTUREDTEXT,
)

assert MYST.name == "MyST"
assert MYST_PARSER.name == "MystParser"
assert MARKDOWN.name == "Markdown"
assert MDX.name == "MDX"
assert DJOT.name == "Djot"
assert NORG.name == "Norg"
assert RESTRUCTUREDTEXT.name == "reStructuredText"

code_parser = RESTRUCTUREDTEXT.code_block_parser_cls(
    language="python",
    evaluator=NoOpEvaluator(),
)

sybil = Sybil(parsers=[code_parser])
document = sybil.parse(path=Path("README.rst"))

for example in document.examples():
    example.evaluate()

Reference