From 86dc085476fb9fab56dbefcd7c229278b8149390 Mon Sep 17 00:00:00 2001
From: Chris Sewell <chrisj_sewell@hotmail.com>
Date: Thu, 27 Jul 2023 03:35:00 +0200
Subject: [PATCH] =?UTF-8?q?=F0=9F=91=8C=20Directive=20option=20parsing?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore                                    |   2 +
 .pre-commit-config.yaml                       |   3 +-
 docs/conf.py                                  |   1 +
 docs/syntax/roles-and-directives.md           |  51 +-
 myst_parser/mdit_to_docutils/base.py          |   8 +-
 myst_parser/mocking.py                        |   2 +-
 myst_parser/parsers/directives.py             | 150 ++--
 myst_parser/parsers/options.py                | 658 ++++++++++++++++++
 .../fixtures/directive_options.md             |   2 +-
 .../fixtures/directive_parsing.txt            |  34 +-
 .../fixtures/option_parsing.yaml              | 169 +++++
 .../fixtures/option_parsing_errors.yaml       |  42 ++
 .../fixtures/reporter_warnings.md             |   2 +-
 tests/test_renderers/test_parse_directives.py |  37 +-
 14 files changed, 1078 insertions(+), 83 deletions(-)
 create mode 100644 myst_parser/parsers/options.py
 create mode 100644 tests/test_renderers/fixtures/option_parsing.yaml
 create mode 100644 tests/test_renderers/fixtures/option_parsing_errors.yaml

diff --git a/.gitignore b/.gitignore
index dc4ce707..48cf54de 100644
--- a/.gitignore
+++ b/.gitignore
@@ -133,3 +133,5 @@ _archive/
 
 .vscode/
 .DS_Store
+
+docs/apidocs
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d192d459..393dcf3c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,7 +6,8 @@ exclude: >
       \.vscode/settings\.json|
       tests/test_commonmark/commonmark\.json|
       .*\.xml|
-      tests/.*/.*\.md
+      tests/.*/.*\.md|
+      tests/.*/.*\.yaml
     )$
 
 repos:
diff --git a/docs/conf.py b/docs/conf.py
index 43f9c970..58cceb0e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -85,6 +85,7 @@
 nitpick_ignore_regex = [
     (r"py:.*", r"docutils\..*"),
     (r"py:.*", r"pygments\..*"),
+    (r"py:.*", r"typing\.Literal\[.*"),
 ]
 nitpick_ignore = [
     ("py:obj", "myst_parser._docs._ConfigBase"),
diff --git a/docs/syntax/roles-and-directives.md b/docs/syntax/roles-and-directives.md
index 7c0fea86..8e05b590 100644
--- a/docs/syntax/roles-and-directives.md
+++ b/docs/syntax/roles-and-directives.md
@@ -23,9 +23,8 @@ It is effectively a Markdown code fence with curly brackets around the language,
 Here is the basic structure:
 
 `````{list-table}
----
-header-rows: 1
----
+:header-rows: 1
+
 * - MyST
   - reStructuredText
 * - ````md
@@ -55,16 +54,13 @@ This is my note
 ```
 :::
 
-#### Parameterizing directives
+#### Parameterizing directives (options)
 
-For directives that take parameters as input, there are two ways to parameterize them.
-In each case, the options themselves are given as `key: value` pairs. An example of
-each is shown below:
+Many directives can take key/value pairs, in an optional *option block* at the start of the directive.
 
-**Short-hand options with `:` characters**. If you only need one or two options for your
-directive and wish to save lines, you may also specify directive options as a collection
-of lines just after the first line of the directive, each preceding with `:`. Then the
-leading `:` is removed from each line, and the rest is parsed as YAML.
+The option block starts on the first line of the directive body and is defined by a set of lines prefixed with `:`.
+
+The block then follows a YAML-like mapping syntax, where the key (string) and value (string) are separated by a colon (`:`):
 
 :::{myst-example}
 ```{code-block} python
@@ -77,10 +73,28 @@ print(f'my {a}nd line')
 ```
 :::
 
-**Using YAML frontmatter**. A block of YAML front-matter just after the
-first line of the directive will be parsed as options for the directive. This needs to be
-surrounded by `---` lines. Everything in between will be parsed by YAML and
-passed as keyword arguments to your directive. For example:
+Comments, starting `#`, are also allowed in between options or at the end of values, and are ignored.
+The values can be enclosed in quotes (`"` or `'`) and span multiple lines.
+Newline behaviour can be controlled by starting the value with `|` (preserve newlines) or `>` (collapse newlines):
+
+:::{myst-example}
+```{code-block} python
+:lineno-start: 10  # this is a comment
+: # this is also a comment
+:emphasize-lines: "1, 3"
+:caption: |
+:    This is my
+:    multi-line caption. It is *pretty nifty* ;-)
+
+a = 2
+print('my 1st line')
+print(f'my {a}nd line')
+```
+:::
+
+::::{dropdown} Old-style options block
+
+Option blocks can also be enclosed by `---`, with no `:` prefix, for example:
 
 :::{myst-example}
 ```{code-block} python
@@ -97,6 +111,8 @@ print(f'my {a}nd line')
 ```
 :::
 
+::::
+
 (syntax/directives/parsing)=
 
 #### How directives parse content
@@ -209,9 +225,8 @@ Roles are similar to directives - they allow you to define arbitrary new functio
 To define an in-line role, use the following form:
 
 ````{list-table}
----
-header-rows: 1
----
+:header-rows: 1
+
 * - MyST
   - reStructuredText
 * - ````md
diff --git a/myst_parser/mdit_to_docutils/base.py b/myst_parser/mdit_to_docutils/base.py
index faf35e44..93714f7d 100644
--- a/myst_parser/mdit_to_docutils/base.py
+++ b/myst_parser/mdit_to_docutils/base.py
@@ -1741,6 +1741,7 @@ def run_directive(
                 directive_class,
                 first_line,
                 content,
+                line=position,
                 additional_options=additional_options,
             )
         except MarkupError as error:
@@ -1750,12 +1751,11 @@ def run_directive(
             )
             return [error]
 
-        if parsed.warnings:
-            _errors = ",\n".join(parsed.warnings)
+        for warning_msg, warning_line in parsed.warnings:
             self.create_warning(
-                f"{name!r}: {_errors}",
+                f"{name!r}: {warning_msg}",
                 MystWarnings.DIRECTIVE_PARSING,
-                line=position,
+                line=warning_line if warning_line is not None else position,
                 append_to=self.current_node,
             )
 
diff --git a/myst_parser/mocking.py b/myst_parser/mocking.py
index 0bafa424..a6f64319 100644
--- a/myst_parser/mocking.py
+++ b/myst_parser/mocking.py
@@ -140,7 +140,7 @@ def parse_directive_block(
         # TODO should argument_str always be ""?
         parsed = parse_directive_text(directive, "", "\n".join(content))
         if parsed.warnings:
-            raise MarkupError(",".join(parsed.warnings))
+            raise MarkupError(",".join(w for w, _ in parsed.warnings))
         return (
             parsed.arguments,
             parsed.options,
diff --git a/myst_parser/parsers/directives.py b/myst_parser/parsers/directives.py
index 1b031044..e49ed429 100644
--- a/myst_parser/parsers/directives.py
+++ b/myst_parser/parsers/directives.py
@@ -35,7 +35,6 @@
 """
 from __future__ import annotations
 
-import datetime
 import re
 from dataclasses import dataclass
 from textwrap import dedent
@@ -43,9 +42,13 @@
 
 import yaml
 from docutils.parsers.rst import Directive
+from docutils.parsers.rst.directives import flag
 from docutils.parsers.rst.directives.misc import TestDirective
 from docutils.parsers.rst.states import MarkupError
 
+from .options import TokenizeError
+from .options import to_items as options_to_items
+
 
 @dataclass
 class DirectiveParsingResult:
@@ -57,8 +60,10 @@ class DirectiveParsingResult:
     """The lines of body content"""
     body_offset: int
     """The number of lines to the start of the body content."""
-    warnings: list[str]
-    """List of non-fatal errors encountered during parsing."""
+    warnings: list[tuple[str, int | None]]
+    """List of non-fatal errors encountered during parsing.
+    (message, line_number)
+    """
 
 
 def parse_directive_text(
@@ -66,6 +71,7 @@ def parse_directive_text(
     first_line: str,
     content: str,
     *,
+    line: int | None = None,
     validate_options: bool = True,
     additional_options: dict[str, str] | None = None,
 ) -> DirectiveParsingResult:
@@ -75,32 +81,54 @@ def parse_directive_text(
         May be an argument or body text, dependent on the directive
     :param content: All text after the first line. Can include options.
     :param validate_options: Whether to validate the values of options
+        This is actually only here to be used by myst-nb cells,
+        which converts options directly to JSON metadata, using the full YAML spec.
     :param additional_options: Additional options to add to the directive,
         above those parsed from the content (content options take priority).
 
     :raises MarkupError: if there is a fatal parsing/validation error
     """
-    parse_errors: list[str] = []
+    parse_errors: list[tuple[str, int | None]]
+    options: dict[str, Any]
+    body_lines: list[str]
+    content_offset: int
+    has_options_block: bool
+
     if directive_class.option_spec:
-        body, options, option_errors = parse_directive_options(
+        # only look for an option block if there are possible options
+        # body, options, option_errors = _parse_directive_options(
+        result = _parse_directive_options(
             content,
             directive_class,
-            validate=validate_options,
+            line=line,
+            as_yaml=not validate_options,
             additional_options=additional_options,
         )
-        parse_errors.extend(option_errors)
-        body_lines = body.splitlines()
+        parse_errors = result.errors
+        has_options_block = result.has_options
+        options = result.options
+        body_lines = result.content.splitlines()
         content_offset = len(content.splitlines()) - len(body_lines)
     else:
-        # If there are no possible options, we do not look for a YAML block
+        parse_errors = []
+        has_options_block = False
         options = {}
         body_lines = content.splitlines()
         content_offset = 0
 
     if not (directive_class.required_arguments or directive_class.optional_arguments):
-        # If there are no possible arguments, then the body starts on the argument line
-        if first_line:
+        # If there are no possible arguments, then the body can start on the argument line
+        if first_line.strip():
+            if has_options_block and any(body_lines):
+                parse_errors.append(
+                    (
+                        "Cannot split content across first line and body, "
+                        "when options block is present (move first line to body)",
+                        None,
+                    )
+                )
             body_lines.insert(0, first_line)
+            content_offset = 0
         arguments = []
     else:
         arguments = parse_directive_arguments(directive_class, first_line)
@@ -113,26 +141,35 @@ def parse_directive_text(
 
     # check for body content
     if body_lines and not directive_class.has_content:
-        parse_errors.append("Has content, but none permitted")
+        parse_errors.append(("Has content, but none permitted", None))
 
     return DirectiveParsingResult(
         arguments, options, body_lines, content_offset, parse_errors
     )
 
 
-def parse_directive_options(
+@dataclass
+class _DirectiveOptions:
+    content: str
+    options: dict[str, Any]
+    errors: list[tuple[str, int | None]]
+    has_options: bool
+
+
+def _parse_directive_options(
     content: str,
     directive_class: type[Directive],
-    validate: bool = True,
+    as_yaml: bool,
+    line: int | None,
     additional_options: dict[str, str] | None = None,
-) -> tuple[str, dict, list[str]]:
+) -> _DirectiveOptions:
     """Parse (and validate) the directive option section.
 
     :returns: (content, options, validation_errors)
     """
-    options: dict[str, Any] = {}
-    validation_errors: list[str] = []
+    yaml_block: None | str = None
     if content.startswith("---"):
+        line = None if line is None else line + 1
         content = "\n".join(content.splitlines()[1:])
         match = re.search(r"^-{3,}", content, re.MULTILINE)
         if match:
@@ -142,12 +179,11 @@ def parse_directive_options(
             yaml_block = content
             content = ""
         yaml_block = dedent(yaml_block)
-        try:
-            options = yaml.safe_load(yaml_block) or {}
-        except (yaml.parser.ParserError, yaml.scanner.ScannerError):
-            validation_errors.append("Invalid options format (bad YAML)")
     elif content.lstrip().startswith(":"):
-        content_lines = content.splitlines()  # type: list
+        # TODO deprecate allowing initial whitespace (by lstripping)
+        # or at least make it that all have the same indent
+        # also look at mystjs implementation
+        content_lines = content.splitlines()
         yaml_lines = []
         while content_lines:
             if not content_lines[0].lstrip().startswith(":"):
@@ -155,64 +191,80 @@ def parse_directive_options(
             yaml_lines.append(content_lines.pop(0).lstrip()[1:])
         yaml_block = "\n".join(yaml_lines)
         content = "\n".join(content_lines)
-        try:
-            options = yaml.safe_load(yaml_block) or {}
-        except (yaml.parser.ParserError, yaml.scanner.ScannerError):
-            validation_errors.append("Invalid options format (bad YAML)")
 
-    if not isinstance(options, dict):
-        options = {}
-        validation_errors.append("Invalid options format (not a dict)")
+    has_options_block = yaml_block is not None
 
-    if validation_errors:
-        return content, options, validation_errors
+    if as_yaml:
+        yaml_errors: list[tuple[str, int | None]] = []
+        try:
+            yaml_options = yaml.safe_load(yaml_block or "") or {}
+        except (yaml.parser.ParserError, yaml.scanner.ScannerError):
+            yaml_options = {}
+            yaml_errors.append(("Invalid options format (bad YAML)", line))
+        if not isinstance(yaml_options, dict):
+            yaml_options = {}
+            yaml_errors.append(("Invalid options format (not a dict)", line))
+        return _DirectiveOptions(content, yaml_options, yaml_errors, has_options_block)
+
+    options: dict[str, str] = {}
+    if yaml_block is not None:
+        try:
+            options = dict(options_to_items(yaml_block))
+        except TokenizeError as err:
+            return _DirectiveOptions(
+                content,
+                options,
+                [(f"Invalid options format: {err.problem}", line)],
+                has_options_block,
+            )
 
-    if (not validate) or issubclass(directive_class, TestDirective):
+    if issubclass(directive_class, TestDirective):
         # technically this directive spec only accepts one option ('option')
         # but since its for testing only we accept all options
-        return content, options, validation_errors
+        return _DirectiveOptions(content, options, [], has_options_block)
 
     if additional_options:
-        # The YAML block takes priority over additional options
+        # The options block takes priority over additional options
         options = {**additional_options, **options}
 
     # check options against spec
     options_spec: dict[str, Callable] = directive_class.option_spec
     unknown_options: list[str] = []
     new_options: dict[str, Any] = {}
+    validation_errors: list[tuple[str, int | None]] = []
+    value: str | None
     for name, value in options.items():
         try:
             convertor = options_spec[name]
         except KeyError:
             unknown_options.append(name)
             continue
-        if not isinstance(value, str):
-            if value is True or value is None:
-                value = None  # flag converter requires no argument
-            elif isinstance(value, (int, float, datetime.date, datetime.datetime)):
-                # convertor always requires string input
-                value = str(value)
-            else:
-                validation_errors.append(
-                    f'option "{name}" value not string (enclose with ""): {value}'
-                )
-                continue
+        if not value:
+            # restructured text parses empty option values as None
+            value = None
+        if convertor is flag:
+            # flag will error if value is not empty,
+            # but to be more permissive we allow any value
+            value = None
         try:
             converted_value = convertor(value)
         except (ValueError, TypeError) as error:
             validation_errors.append(
-                f"Invalid option value for {name!r}: {value}: {error}"
+                (f"Invalid option value for {name!r}: {value}: {error}", line)
             )
         else:
             new_options[name] = converted_value
 
     if unknown_options:
         validation_errors.append(
-            f"Unknown option keys: {sorted(unknown_options)} "
-            f"(allowed: {sorted(options_spec)})"
+            (
+                f"Unknown option keys: {sorted(unknown_options)} "
+                f"(allowed: {sorted(options_spec)})",
+                line,
+            )
         )
 
-    return content, new_options, validation_errors
+    return _DirectiveOptions(content, new_options, validation_errors, has_options_block)
 
 
 def parse_directive_arguments(
diff --git a/myst_parser/parsers/options.py b/myst_parser/parsers/options.py
new file mode 100644
index 00000000..15da7327
--- /dev/null
+++ b/myst_parser/parsers/options.py
@@ -0,0 +1,658 @@
+"""Parser for directive options.
+
+This is a highly restricted parser for YAML,
+which only allows a subset of YAML to be used for directive options:
+
+- Only block mappings are allowed at the top level
+- Mapping keys are parsed as strings (plain or quoted)
+- Mapping values are parsed as strings (plain, quoted, literal `|`, folded `>`)
+- `#` Comments are allowed and blank lines
+
+Adapted from:
+https://github.com/yaml/pyyaml/commit/957ae4d495cf8fcb5475c6c2f1bce801096b68a5
+
+For a good description of multi-line YAML strings, see:
+https://stackoverflow.com/a/21699210/5033292
+"""
+from __future__ import annotations
+
+from dataclasses import dataclass, replace
+from typing import ClassVar, Final, Iterable, Literal, cast
+
+
+@dataclass
+class Position:
+    """Position of a character in a stream."""
+
+    index: int
+    line: int
+    column: int
+
+
+class StreamBuffer:
+    """A buffer for a stream of characters."""
+
+    def __init__(self, stream: str):
+        self._buffer = stream + _CHARS_END
+        self._index = 0
+        self._line = 0
+        self._column = 0
+
+    @property
+    def index(self) -> int:
+        return self._index
+
+    @property
+    def line(self) -> int:
+        return self._line
+
+    @property
+    def column(self) -> int:
+        return self._column
+
+    def peek(self, index: int = 0) -> str:
+        return self._buffer[self._index + index]
+
+    def prefix(self, length: int = 1) -> str:
+        return self._buffer[self._index : self._index + length]
+
+    def forward(self, length: int = 1) -> None:
+        while length:
+            ch = self._buffer[self._index]
+            self._index += 1
+            if ch in "\n\x85\u2028\u2029" or (
+                ch == "\r" and self._buffer[self._index] != "\n"
+            ):
+                self._line += 1
+                self._column = 0
+            elif ch != "\uFEFF":
+                self._column += 1
+            length -= 1
+
+    def get_position(self) -> Position:
+        return Position(self._index, self._line, self._column)
+
+
+@dataclass
+class Token:
+    """A parsed token from a directive option stream."""
+
+    id: ClassVar[str] = "<unknown>"
+    start: Position
+    end: Position
+
+
+@dataclass
+class KeyToken(Token):
+    id: ClassVar[str] = "<key>"
+    value: str
+    style: Literal[None, "'", '"'] = None
+    """The original style of the string."""
+
+
+@dataclass
+class ValueToken(Token):
+    id: ClassVar[str] = "<value>"
+    value: str
+    style: Literal[None, "'", '"', "|", ">"] = None
+    """The original style of the string."""
+
+
+@dataclass
+class ColonToken(Token):
+    id: ClassVar[str] = "<colon>"
+
+
+class TokenizeError(Exception):
+    def __init__(
+        self,
+        problem: str,
+        problem_mark: Position,
+        context: str | None = None,
+        context_mark: Position | None = None,
+    ):
+        """A YAML error with optional context.
+
+        :param problem: The problem encountered
+        :param problem_mark: The position of the problem
+        :param context: The context of the error, e.g. the parent being scanned
+        :param context_mark: The position of the context
+        """
+        self.context = context
+        self.context_mark = context_mark
+        self.problem = problem
+        self.problem_mark = problem_mark
+
+    def clone(self, line_offset: int, column_offset: int) -> TokenizeError:
+        """Clone the error with the given line and column offsets."""
+        return TokenizeError(
+            self.problem,
+            replace(
+                self.problem_mark,
+                line=self.problem_mark.line + line_offset,
+                column=self.problem_mark.column + column_offset,
+            ),
+            self.context,
+            None
+            if self.context_mark is None
+            else replace(
+                self.context_mark,
+                line=self.context_mark.line + line_offset,
+                column=self.context_mark.column + column_offset,
+            ),
+        )
+
+    def __str__(self) -> str:
+        lines = []
+        if self.context is not None:
+            lines.append(self.context)
+        if self.context_mark is not None and (
+            self.context_mark.line != self.problem_mark.line
+            or self.context_mark.column != self.problem_mark.column
+        ):
+            lines.append(
+                f"at line {self.context_mark.line}, column {self.context_mark.column}"
+            )
+        if self.problem is not None:
+            lines.append(self.problem)
+        if self.problem_mark is not None:
+            lines.append(
+                f"at line {self.problem_mark.line}, column {self.problem_mark.column}"
+            )
+        return "\n".join(lines)
+
+
+def to_items(
+    text: str, line_offset: int = 0, column_offset: int = 0
+) -> Iterable[tuple[str, str]]:
+    """Parse a directive option block into (key, value) tuples.
+
+    :param text: The directive option text.
+    :param line_offset: The line offset to apply to the error positions.
+    :param column_offset: The column offset to apply to the error positions.
+
+    :raises: `TokenizeError`
+    """
+    for key_token, value_token in to_tokens(text, line_offset, column_offset):
+        yield key_token.value, value_token.value if value_token is not None else ""
+
+
+def to_tokens(
+    text: str, line_offset: int = 0, column_offset: int = 0
+) -> Iterable[tuple[KeyToken, ValueToken | None]]:
+    """Parse a directive option, and yield key/value token pairs.
+
+    :param text: The directive option text.
+    :param line_offset: The line offset to apply to the error positions.
+    :param column_offset: The column offset to apply to the error positions.
+
+    :raises: `TokenizeError`
+    """
+    key_token: KeyToken | None = None
+    try:
+        for token in tokenize(text):
+            if isinstance(token, KeyToken):
+                if key_token is not None:
+                    yield key_token, None
+                key_token = token
+            elif isinstance(token, ValueToken):
+                if key_token is None:
+                    raise TokenizeError("expected key before value", token.start)
+                yield key_token, token
+                key_token = None
+    except TokenizeError as exc:
+        if line_offset or column_offset:
+            raise exc.clone(line_offset, column_offset) from exc
+        raise
+
+
+def tokenize(text: str) -> Iterable[Token]:
+    """Yield tokens from a directive option stream."""
+    stream = StreamBuffer(text)
+
+    while True:
+        _scan_to_next_token(stream)
+
+        if stream.peek() == _CHARS_END:
+            break
+
+        if not stream.column == 0:
+            raise TokenizeError(
+                "expected key to start at column 0", stream.get_position()
+            )
+
+        # find key
+        ch = stream.peek()
+        if ch in ("'", '"'):
+            yield _scan_flow_scalar(stream, cast(Literal['"', "'"], ch), is_key=True)
+        else:
+            yield _scan_plain_scalar(stream, is_key=True)
+
+        _scan_to_next_token(stream)
+
+        # check next char is colon + space
+        if stream.peek() != ":":
+            raise TokenizeError("expected ':' after key", stream.get_position())
+
+        start_mark = stream.get_position()
+        stream.forward()
+        end_mark = stream.get_position()
+        yield ColonToken(start_mark, end_mark)
+
+        _scan_to_next_token(stream)
+
+        # now find value
+        ch = stream.peek()
+        if stream.column == 0:
+            pass
+        elif ch in ("|", ">"):
+            yield _scan_block_scalar(stream, cast(Literal["|", ">"], ch))
+        elif ch in ("'", '"'):
+            yield _scan_flow_scalar(stream, cast(Literal['"', "'"], ch), is_key=False)
+        else:
+            yield _scan_plain_scalar(stream, is_key=False)
+
+
+def _scan_to_next_token(stream: StreamBuffer) -> None:
+    """Skip spaces, line breaks and comments.
+
+    The byte order mark is also stripped,
+    if it's the first character in the stream.
+    """
+    if stream.index == 0 and stream.peek() == "\uFEFF":
+        stream.forward()
+    found = False
+    while not found:
+        while stream.peek() == " ":
+            stream.forward()
+        if stream.peek() == "#":
+            while stream.peek() not in _CHARS_END_NEWLINE:
+                stream.forward()
+        if not _scan_line_break(stream):
+            found = True
+
+
+def _scan_plain_scalar(
+    stream: StreamBuffer, is_key: bool = False
+) -> KeyToken | ValueToken:
+    chunks = []
+    start_mark = stream.get_position()
+    end_mark = start_mark
+    indent = 0 if is_key else 1
+    spaces: list[str] = []
+    while True:
+        length = 0
+        if stream.peek() == "#":
+            break
+        while True:
+            ch = stream.peek(length)
+            if ch in _CHARS_END_SPACE_TAB_NEWLINE or (
+                is_key
+                and ch == ":"
+                and stream.peek(length + 1) in _CHARS_END_SPACE_TAB_NEWLINE
+            ):
+                break
+            length += 1
+        if length == 0:
+            break
+        chunks.extend(spaces)
+        chunks.append(stream.prefix(length))
+        stream.forward(length)
+        end_mark = stream.get_position()
+        spaces = _scan_plain_spaces(stream, allow_newline=(not is_key))
+        if not spaces or stream.peek() == "#" or (stream.column < indent):
+            break
+
+    return (
+        KeyToken(start_mark, end_mark, "".join(chunks))
+        if is_key
+        else ValueToken(start_mark, end_mark, "".join(chunks))
+    )
+
+
+def _scan_plain_spaces(stream: StreamBuffer, allow_newline: bool = True) -> list[str]:
+    chunks = []
+    length = 0
+    while stream.peek(length) == " ":
+        length += 1
+    whitespaces = stream.prefix(length)
+    stream.forward(length)
+    ch = stream.peek()
+    if allow_newline and ch in _CHARS_NEWLINE:
+        line_break = _scan_line_break(stream)
+        breaks = []
+        while stream.peek() in _CHARS_SPACE_NEWLINE:
+            if stream.peek() == " ":
+                stream.forward()
+            else:
+                breaks.append(_scan_line_break(stream))
+        if line_break != "\n":
+            chunks.append(line_break)
+        elif not breaks:
+            chunks.append(" ")
+        chunks.extend(breaks)
+    elif whitespaces:
+        chunks.append(whitespaces)
+    return chunks
+
+
+def _scan_line_break(stream: StreamBuffer) -> str:
+    # Transforms:
+    #   '\r\n'      :   '\n'
+    #   '\r'        :   '\n'
+    #   '\n'        :   '\n'
+    #   '\x85'      :   '\n'
+    #   '\u2028'    :   '\u2028'
+    #   '\u2029     :   '\u2029'
+    #   default     :   ''
+    ch = stream.peek()
+    if ch in "\r\n\x85":
+        if stream.prefix(2) == "\r\n":
+            stream.forward(2)
+        else:
+            stream.forward()
+        return "\n"
+    elif ch in "\u2028\u2029":
+        stream.forward()
+        return ch
+    return ""
+
+
+def _scan_flow_scalar(
+    stream: StreamBuffer, style: Literal["'", '"'], is_key: bool = False
+) -> KeyToken | ValueToken:
+    double = style == '"'
+    chunks = []
+    start_mark = stream.get_position()
+    quote = stream.peek()
+    stream.forward()
+    chunks.extend(_scan_flow_scalar_non_spaces(stream, double, start_mark))
+    while stream.peek() != quote:
+        chunks.extend(_scan_flow_scalar_spaces(stream, start_mark))
+        chunks.extend(_scan_flow_scalar_non_spaces(stream, double, start_mark))
+    stream.forward()
+    end_mark = stream.get_position()
+    return (
+        KeyToken(start_mark, end_mark, "".join(chunks), style)
+        if is_key
+        else ValueToken(start_mark, end_mark, "".join(chunks), style)
+    )
+
+
+def _scan_flow_scalar_non_spaces(
+    stream: StreamBuffer, double: bool, start_mark: Position
+) -> list[str]:
+    chunks = []
+    while True:
+        length = 0
+        while stream.peek(length) not in "'\"\\" + _CHARS_END_SPACE_TAB_NEWLINE:
+            length += 1
+        if length:
+            chunks.append(stream.prefix(length))
+            stream.forward(length)
+        ch = stream.peek()
+        if not double and ch == "'" and stream.peek(1) == "'":
+            chunks.append("'")
+            stream.forward(2)
+        elif (double and ch == "'") or (not double and ch in '"\\'):
+            chunks.append(ch)
+            stream.forward()
+        elif double and ch == "\\":
+            stream.forward()
+            ch = stream.peek()
+            if ch in _ESCAPE_REPLACEMENTS:
+                chunks.append(_ESCAPE_REPLACEMENTS[ch])
+                stream.forward()
+            elif ch in _ESCAPE_CODES:
+                length = _ESCAPE_CODES[ch]
+                stream.forward()
+                for k in range(length):
+                    if stream.peek(k) not in "0123456789ABCDEFabcdef":
+                        raise TokenizeError(
+                            "expected escape sequence of %d hexadecimal numbers, but found %r"
+                            % (length, stream.peek(k)),
+                            stream.get_position(),
+                            "while scanning a double-quoted scalar",
+                            start_mark,
+                        )
+                code = int(stream.prefix(length), 16)
+                chunks.append(chr(code))
+                stream.forward(length)
+            elif ch in _CHARS_NEWLINE:
+                _scan_line_break(stream)
+                chunks.extend(_scan_flow_scalar_breaks(stream))
+            else:
+                raise TokenizeError(
+                    "found unknown escape character %r" % ch,
+                    stream.get_position(),
+                    "while scanning a double-quoted scalar",
+                    start_mark,
+                )
+        else:
+            return chunks
+
+
+def _scan_flow_scalar_spaces(stream: StreamBuffer, start_mark: Position) -> list[str]:
+    chunks = []
+    length = 0
+    while stream.peek(length) in " \t":
+        length += 1
+    whitespaces = stream.prefix(length)
+    stream.forward(length)
+    ch = stream.peek()
+    if ch == _CHARS_END:
+        raise TokenizeError(
+            "found unexpected end of stream",
+            stream.get_position(),
+            "while scanning a quoted scalar",
+            start_mark,
+        )
+    elif ch in _CHARS_NEWLINE:
+        line_break = _scan_line_break(stream)
+        breaks = _scan_flow_scalar_breaks(stream)
+        if line_break != "\n":
+            chunks.append(line_break)
+        elif not breaks:
+            chunks.append(" ")
+        chunks.extend(breaks)
+    else:
+        chunks.append(whitespaces)
+    return chunks
+
+
+def _scan_flow_scalar_breaks(stream: StreamBuffer) -> list[str]:
+    chunks = []
+    while True:
+        while stream.peek() in " \t":
+            stream.forward()
+        if stream.peek() in _CHARS_NEWLINE:
+            chunks.append(_scan_line_break(stream))
+        else:
+            return chunks
+
+
+def _scan_block_scalar(stream: StreamBuffer, style: Literal["|", ">"]) -> ValueToken:
+    indent = 0
+    folded = style == ">"
+    chunks = []
+    start_mark = stream.get_position()
+
+    # Scan the header.
+    stream.forward()
+    chomping, increment = _scan_block_scalar_indicators(stream, start_mark)
+    _scan_block_scalar_ignored_line(stream, start_mark)
+
+    # Determine the indentation level and go to the first non-empty line.
+    min_indent = indent + 1
+    if min_indent < 1:
+        min_indent = 1
+    if increment is None:
+        breaks, max_indent, end_mark = _scan_block_scalar_indentation(stream)
+        indent = max(min_indent, max_indent)
+    else:
+        indent = min_indent + increment - 1
+        breaks, end_mark = _scan_block_scalar_breaks(stream, indent)
+    line_break = ""
+
+    # Scan the inner part of the block scalar.
+    while stream.column == indent and stream.peek() != _CHARS_END:
+        chunks.extend(breaks)
+        leading_non_space = stream.peek() not in " \t"
+        length = 0
+        while stream.peek(length) not in _CHARS_END_NEWLINE:
+            length += 1
+        chunks.append(stream.prefix(length))
+        stream.forward(length)
+        line_break = _scan_line_break(stream)
+        breaks, end_mark = _scan_block_scalar_breaks(stream, indent)
+        if stream.column == indent and stream.peek() != _CHARS_END:
+            if (
+                folded
+                and line_break == "\n"
+                and leading_non_space
+                and stream.peek() not in " \t"
+            ):
+                if not breaks:
+                    chunks.append(" ")
+            else:
+                chunks.append(line_break)
+        else:
+            break
+
+    # Chomp the tail.
+    if chomping is not False:
+        chunks.append(line_break)
+    if chomping is True:
+        chunks.extend(breaks)
+
+    # We are done.
+    return ValueToken(start_mark, end_mark, "".join(chunks), style)
+
+
+def _scan_block_scalar_indicators(
+    stream: StreamBuffer, start_mark: Position
+) -> tuple[bool | None, int | None]:
+    chomping = None
+    increment = None
+    ch = stream.peek()
+    if ch in "+-":
+        chomping = ch == "+"
+        stream.forward()
+        ch = stream.peek()
+        if ch in "0123456789":
+            increment = int(ch)
+            if increment == 0:
+                raise TokenizeError(
+                    "expected indentation indicator in the range 1-9, but found 0",
+                    stream.get_position(),
+                    "while scanning a block scalar",
+                    start_mark,
+                )
+            stream.forward()
+    elif ch in "0123456789":
+        increment = int(ch)
+        if increment == 0:
+            raise TokenizeError(
+                "expected indentation indicator in the range 1-9, but found 0",
+                stream.get_position(),
+                "while scanning a block scalar",
+                start_mark,
+            )
+        stream.forward()
+        ch = stream.peek()
+        if ch in "+-":
+            chomping = ch == "+"
+            stream.forward()
+    ch = stream.peek()
+    if ch not in _CHARS_END_SPACE_NEWLINE:
+        raise TokenizeError(
+            "expected chomping or indentation indicators, but found %r" % ch,
+            stream.get_position(),
+            "while scanning a block scalar",
+            start_mark,
+        )
+    return chomping, increment
+
+
+def _scan_block_scalar_ignored_line(stream: StreamBuffer, start_mark: Position) -> None:
+    while stream.peek() == " ":
+        stream.forward()
+    if stream.peek() == "#":
+        while stream.peek() not in _CHARS_END_NEWLINE:
+            stream.forward()
+    ch = stream.peek()
+    if ch not in _CHARS_END_NEWLINE:
+        raise TokenizeError(
+            "expected a comment or a line break, but found %r" % ch,
+            stream.get_position(),
+            "while scanning a block scalar",
+            start_mark,
+        )
+    _scan_line_break(stream)
+
+
+def _scan_block_scalar_indentation(
+    stream: StreamBuffer,
+) -> tuple[list[str], int, Position]:
+    chunks = []
+    max_indent = 0
+    end_mark = stream.get_position()
+    while stream.peek() in _CHARS_SPACE_NEWLINE:
+        if stream.peek() != " ":
+            chunks.append(_scan_line_break(stream))
+            end_mark = stream.get_position()
+        else:
+            stream.forward()
+            if stream.column > max_indent:
+                max_indent = stream.column
+    return chunks, max_indent, end_mark
+
+
+def _scan_block_scalar_breaks(
+    stream: StreamBuffer, indent: int
+) -> tuple[list[str], Position]:
+    chunks = []
+    end_mark = stream.get_position()
+    while stream.column < indent and stream.peek() == " ":
+        stream.forward()
+    while stream.peek() in _CHARS_NEWLINE:
+        chunks.append(_scan_line_break(stream))
+        end_mark = stream.get_position()
+        while stream.column < indent and stream.peek() == " ":
+            stream.forward()
+    return chunks, end_mark
+
+
+_CHARS_END: Final[str] = "\0"
+_CHARS_NEWLINE: Final[str] = "\r\n\x85\u2028\u2029"
+_CHARS_END_NEWLINE: Final[str] = "\0\r\n\x85\u2028\u2029"
+_CHARS_SPACE_NEWLINE: Final[str] = " \r\n\x85\u2028\u2029"
+_CHARS_END_SPACE_NEWLINE: Final[str] = "\0 \r\n\x85\u2028\u2029"
+_CHARS_END_SPACE_TAB_NEWLINE: Final[str] = "\0 \t\r\n\x85\u2028\u2029"
+
+_ESCAPE_REPLACEMENTS: Final[dict[str, str]] = {
+    "0": "\0",
+    "a": "\x07",
+    "b": "\x08",
+    "t": "\x09",
+    "\t": "\x09",
+    "n": "\x0A",
+    "v": "\x0B",
+    "f": "\x0C",
+    "r": "\x0D",
+    "e": "\x1B",
+    " ": "\x20",
+    '"': '"',
+    "\\": "\\",
+    "/": "/",
+    "N": "\x85",
+    "_": "\xA0",
+    "L": "\u2028",
+    "P": "\u2029",
+}
+
+_ESCAPE_CODES: Final[dict[str, int]] = {
+    "x": 2,
+    "u": 4,
+    "U": 8,
+}
diff --git a/tests/test_renderers/fixtures/directive_options.md b/tests/test_renderers/fixtures/directive_options.md
index 9b472587..779a7942 100644
--- a/tests/test_renderers/fixtures/directive_options.md
+++ b/tests/test_renderers/fixtures/directive_options.md
@@ -133,7 +133,7 @@ foo
 <document source="<src>/index.md">
     <system_message level="2" line="1" source="<src>/index.md" type="WARNING">
         <paragraph>
-            'restructuredtext-test-directive': Invalid options format (bad YAML) [myst.directive_parse]
+            'restructuredtext-test-directive': Invalid options format: expected ':' after key [myst.directive_parse]
     <system_message level="1" line="1" source="<src>/index.md" type="INFO">
         <paragraph>
             Directive processed. Type="restructuredtext-test-directive", arguments=[], options={}, content:
diff --git a/tests/test_renderers/fixtures/directive_parsing.txt b/tests/test_renderers/fixtures/directive_parsing.txt
index e1c76c0d..25b9b769 100644
--- a/tests/test_renderers/fixtures/directive_parsing.txt
+++ b/tests/test_renderers/fixtures/directive_parsing.txt
@@ -25,6 +25,21 @@ options: {}
 warnings: []
 .
 
+note: content in first line and body
+.
+```{note} a
+b
+```
+.
+arguments: []
+body:
+- a
+- b
+content_offset: 0
+options: {}
+warnings: []
+.
+
 note: content after option
 .
 ```{note}
@@ -92,11 +107,14 @@ body:
 - first line
 - ''
 - body line
-content_offset: 1
+content_offset: 0
 options:
   class:
   - tip
-warnings: []
+warnings:
+- - Cannot split content across first line and body, when options block is present
+    (move first line to body)
+  - null
 .
 
 admonition: no options, no new line
@@ -162,7 +180,8 @@ body: []
 content_offset: 3
 options: {}
 warnings:
-- Invalid options format (bad YAML)
+- - 'Unknown option keys: [''a''] (allowed: [''class'', ''name''])'
+  - 1
 .
 
 warning: yaml not a dict
@@ -178,7 +197,8 @@ body: []
 content_offset: 3
 options: {}
 warnings:
-- Invalid options format (not a dict)
+- - 'Invalid options format: expected '':'' after key'
+  - 1
 .
 
 warning: unknown option name
@@ -192,7 +212,8 @@ body: []
 content_offset: 1
 options: {}
 warnings:
-- 'Unknown option keys: [''unknown''] (allowed: [''class'', ''name''])'
+- - 'Unknown option keys: [''unknown''] (allowed: [''class'', ''name''])'
+  - 0
 .
 
 warning: invalid option value
@@ -206,7 +227,8 @@ body: []
 content_offset: 1
 options: {}
 warnings:
-- 'Invalid option value for ''class'': 1: cannot make "1" into a class name'
+- - 'Invalid option value for ''class'': 1: cannot make "1" into a class name'
+  - 0
 .
 
 error: missing argument
diff --git a/tests/test_renderers/fixtures/option_parsing.yaml b/tests/test_renderers/fixtures/option_parsing.yaml
new file mode 100644
index 00000000..f48b60ef
--- /dev/null
+++ b/tests/test_renderers/fixtures/option_parsing.yaml
@@ -0,0 +1,169 @@
+plain key/values:
+  content: |-
+    key1:
+    key2: val2
+    key3:
+        val3
+    key4: val4.1
+        val4.2
+  expected: |-
+    [
+      [
+        "key1",
+        ""
+      ],
+      [
+        "key2",
+        "val2"
+      ],
+      [
+        "key3",
+        "val3"
+      ],
+      [
+        "key4",
+        "val4.1 val4.2"
+      ]
+    ]
+
+plain key/values with comments:
+  content: |-
+    key1: # comment
+    key2: val2 # comment
+    # comment
+    key3:
+        val3 # comment
+
+    key4: val4.1
+        val4.2 # comment
+  expected: |-
+    [
+      [
+        "key1",
+        ""
+      ],
+      [
+        "key2",
+        "val2"
+      ],
+      [
+        "key3",
+        "val3"
+      ],
+      [
+        "key4",
+        "val4.1 val4.2"
+      ]
+    ]
+
+quoted key/values:
+  content: |-
+    "key1": "val1"
+    'key2': 'val2'
+    "key
+    3": "val
+    3"
+    escapes: "\"\e\x07"
+  expected: |-
+    [
+      [
+        "key1",
+        "val1"
+      ],
+      [
+        "key2",
+        "val2"
+      ],
+      [
+        "key 3",
+        "val 3"
+      ],
+      [
+        "escapes",
+        "\"\u001b\u0007"
+      ]
+    ]
+
+literal values:
+  content: |
+    key1: |
+      val1.1
+      val1.2
+
+      val1.3
+    key2: |2
+        val2.1
+      val2.2
+
+        val2.3
+    key3: |-
+      val3.1
+      val3.2
+
+      val3.3
+    key4: |+
+      val4.1
+      val4.2
+
+      val4.3
+  expected: |-
+    [
+      [
+        "key1",
+        "val1.1\nval1.2\n\nval1.3\n"
+      ],
+      [
+        "key2",
+        "  val2.1\nval2.2\n\n  val2.3\n"
+      ],
+      [
+        "key3",
+        "val3.1\nval3.2\n\nval3.3"
+      ],
+      [
+        "key4",
+        "val4.1\nval4.2\n\nval4.3\n"
+      ]
+    ]
+
+folded values:
+  content: |
+    key1: >
+      val1.1
+      val1.2
+
+      val1.3
+    key2: >2
+        val2.1
+      val2.2
+
+        val2.3
+    key3: >-
+      val3.1
+      val3.2
+
+      val3.3
+    key4: >+
+      val4.1
+      val4.2
+
+      val4.3
+  expected: |-
+    [
+      [
+        "key1",
+        "val1.1 val1.2\nval1.3\n"
+      ],
+      [
+        "key2",
+        "  val2.1\nval2.2\n\n  val2.3\n"
+      ],
+      [
+        "key3",
+        "val3.1 val3.2\nval3.3"
+      ],
+      [
+        "key4",
+        "val4.1 val4.2\nval4.3\n"
+      ]
+    ]
diff --git a/tests/test_renderers/fixtures/option_parsing_errors.yaml b/tests/test_renderers/fixtures/option_parsing_errors.yaml
new file mode 100644
index 00000000..b077bb94
--- /dev/null
+++ b/tests/test_renderers/fixtures/option_parsing_errors.yaml
@@ -0,0 +1,42 @@
+no `:`:
+  content: |
+    key1
+  expected: |-
+    expected ':' after key
+    at line 1, column 0
+
+Indented key:
+  content: |2
+     key1: value1
+  expected: |-
+    expected key to start at column 0
+    at line 0, column 1
+
+Quote not closed:
+  content: |
+    key1: "value1
+  expected: |-
+    while scanning a quoted scalar
+    at line 0, column 6
+    found unexpected end of stream
+    at line 1, column 0
+
+Content after literal:
+  content: |
+    key1: | value1
+      extra
+  expected: |-
+    while scanning a block scalar
+    at line 0, column 6
+    expected a comment or a line break, but found 'v'
+    at line 0, column 8
+
+Content after folded:
+  content: |
+    key1: > value1
+      extra
+  expected: |-
+    while scanning a block scalar
+    at line 0, column 6
+    expected a comment or a line break, but found 'v'
+    at line 0, column 8
diff --git a/tests/test_renderers/fixtures/reporter_warnings.md b/tests/test_renderers/fixtures/reporter_warnings.md
index 776d0e8c..dd8d70b9 100644
--- a/tests/test_renderers/fixtures/reporter_warnings.md
+++ b/tests/test_renderers/fixtures/reporter_warnings.md
@@ -149,7 +149,7 @@ bad-option-value
 :class: [1]
 ```
 .
-<string>:1: (WARNING/2) 'note': option "class" value not string (enclose with ""): [1] [myst.directive_parse]
+<string>:1: (WARNING/2) 'note': Invalid option value for 'class': [1]: cannot make "[1]" into a class name [myst.directive_parse]
 <string>:1: (ERROR/3) Content block expected for the "note" directive; none found.
 .
 
diff --git a/tests/test_renderers/test_parse_directives.py b/tests/test_renderers/test_parse_directives.py
index 4a25ed11..a8c8fa74 100644
--- a/tests/test_renderers/test_parse_directives.py
+++ b/tests/test_renderers/test_parse_directives.py
@@ -1,3 +1,4 @@
+import json
 from pathlib import Path
 
 import pytest
@@ -7,10 +8,33 @@
 from markdown_it import MarkdownIt
 
 from myst_parser.parsers.directives import MarkupError, parse_directive_text
+from myst_parser.parsers.options import TokenizeError
+from myst_parser.parsers.options import to_items as options_to_items
 
 FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures")
 
 
+@pytest.mark.param_file(FIXTURE_PATH / "option_parsing.yaml", "yaml")
+def test_option_parsing(file_params):
+    """Test parsing of directive options."""
+    result = list(options_to_items(file_params.content))
+    file_params.assert_expected(
+        json.dumps(result, ensure_ascii=False, indent=2), rstrip_lines=True
+    )
+
+
+@pytest.mark.param_file(FIXTURE_PATH / "option_parsing_errors.yaml", "yaml")
+def test_option_parsing_errors(file_params):
+    """Test parsing of directive options."""
+    try:
+        list(options_to_items(file_params.content))
+    except TokenizeError as err:
+        result = str(err)
+    else:
+        result = "No error"
+    file_params.assert_expected(result, rstrip_lines=True)
+
+
 @pytest.mark.param_file(FIXTURE_PATH / "directive_parsing.txt")
 def test_parsing(file_params):
     """Test parsing of directive text."""
@@ -25,7 +49,7 @@ def test_parsing(file_params):
         raise AssertionError(f"Unknown directive: {name}")
     try:
         result = parse_directive_text(
-            klass, first_line[0] if first_line else "", tokens[0].content
+            klass, first_line[0] if first_line else "", tokens[0].content, line=0
         )
     except MarkupError as err:
         outcome = f"error: {err}"
@@ -51,6 +75,15 @@ def test_parsing_errors(descript, klass, arguments, content):
         parse_directive_text(klass, arguments, content)
 
 
+def test_parsing_full_yaml():
+    result = parse_directive_text(
+        Note, "", "---\na: [1]\n---\ncontent", validate_options=False
+    )
+    assert not result.warnings
+    assert result.options == {"a": [1]}
+    assert result.body == ["content"]
+
+
 def test_additional_options():
     """Allow additional options to be passed to a directive."""
     # this should be fine
@@ -79,4 +112,4 @@ def test_additional_options():
         Note, "", "content", additional_options={"foo": "bar"}
     )
     assert len(result.warnings) == 1
-    assert "Unknown option" in result.warnings[0]
+    assert "Unknown option" in result.warnings[0][0]