Skip to content

Stage 4: Pandoc Conversion

obsidian_export.pipeline.stage4_pandoc

Stage 4: Pandoc invocation for PDF and DOCX output.

PandocInvocation dataclass

Groups the parameters shared by PDF and DOCX pandoc conversions.

Source code in obsidian_export/pipeline/stage4_pandoc.py
@dataclass(frozen=True)
class PandocInvocation:
    """Groups the parameters shared by PDF and DOCX pandoc conversions."""

    text: str
    title: str
    pandoc_config: PandocConfig
    style_config: StyleConfig
    filters_dir: Path
    output_path: Path
    resource_path: Path | None

convert_to_pdf

convert_to_pdf(invocation: PandocInvocation, rendered_header: str) -> None

Convert preprocessed markdown text to PDF via pandoc + tectonic.

Source code in obsidian_export/pipeline/stage4_pandoc.py
def convert_to_pdf(
    invocation: PandocInvocation,
    rendered_header: str,
) -> None:
    """Convert preprocessed markdown text to PDF via pandoc + tectonic."""
    with tempfile.NamedTemporaryFile(mode="w", suffix=".tex", delete=False, encoding="utf-8") as hf:
        hf.write(rendered_header)
        header_tmp_path = Path(hf.name)

    try:
        metadata = {
            "title": invocation.title,
            "table_fontsize": invocation.style_config.table_fontsize,
            "url_footnote_threshold": invocation.style_config.url_footnote_threshold,
        }
        extra_args = [
            "--to=pdf",
            "--pdf-engine=tectonic",
            f"--include-in-header={header_tmp_path}",
            f"--variable=geometry:{invocation.style_config.geometry}",
            f"--variable=fontsize:{invocation.style_config.fontsize}",
            f"--variable=linkcolor:{invocation.style_config.linkcolor}",
            f"--variable=urlcolor:{invocation.style_config.urlcolor}",
        ]
        lua_filter_names = [
            "center_figures.lua",
            "fix_tables.lua",
            "escape_strings.lua",
            "callout_boxes.lua",
            "promote_footnotes.lua",
            "newpage_on_rule.lua",
        ]
        _run_pandoc(invocation, lua_filter_names, metadata, extra_args)
    finally:
        header_tmp_path.unlink(missing_ok=True)

convert_to_docx

convert_to_docx(invocation: PandocInvocation, reference_doc: Path | None) -> None

Convert preprocessed markdown text to DOCX via pandoc.

Applies DOCX-specific Lua filters (callout boxes, footnote promotion, page breaks) from the invocation's filters_dir. If reference_doc is provided, it is passed as --reference-doc to inject custom styles.

Source code in obsidian_export/pipeline/stage4_pandoc.py
def convert_to_docx(
    invocation: PandocInvocation,
    reference_doc: Path | None,
) -> None:
    """Convert preprocessed markdown text to DOCX via pandoc.

    Applies DOCX-specific Lua filters (callout boxes, footnote promotion,
    page breaks) from the invocation's filters_dir. If *reference_doc* is
    provided, it is passed as ``--reference-doc`` to inject custom styles.
    """
    metadata = {
        "title": invocation.title,
        "url_footnote_threshold": invocation.style_config.url_footnote_threshold,
    }
    extra_args = ["--to=docx"]
    if reference_doc is not None:
        extra_args.append(f"--reference-doc={reference_doc}")
    lua_filter_names = [
        "callout_boxes_docx.lua",
        "promote_footnotes.lua",
        "newpage_on_rule_docx.lua",
    ]
    _run_pandoc(invocation, lua_filter_names, metadata, extra_args)