Skip to content

Public API

obsidian_import

obsidian-import: Extract files into Obsidian-flavored Markdown.

Public API

extract_file(path, config) -> ExtractedDocument extract_text(path, config) -> str discover_files(config) -> Iterator[DiscoveredFile]

extract_file

extract_file(path: Path, config: ImportConfig) -> ExtractedDocument

Extract a single file to Obsidian-flavored markdown.

Uses the configured backend for the file's extension. Returns an ExtractedDocument with the extracted markdown and metadata. For image files, the source image is listed in associated_files for copying.

Source code in obsidian_import/__init__.py
def extract_file(path: Path, config: ImportConfig) -> ExtractedDocument:
    """Extract a single file to Obsidian-flavored markdown.

    Uses the configured backend for the file's extension.
    Returns an ExtractedDocument with the extracted markdown and metadata.
    For image files, the source image is listed in associated_files for copying.
    """
    extension = path.suffix.lower()
    result = _call_backend(path, config)

    page_count = _estimate_page_count(result.markdown, extension)

    associated: tuple[Path, ...] = ()
    if is_image_extension(extension):
        associated = (path,)

    doc_stem = path.stem
    markdown = result.markdown
    if result.media_files:
        for mf in result.media_files:
            wikilink = f"![[{doc_stem}/{mf.filename}]]"
            if wikilink not in markdown:
                markdown += f"\n\n{wikilink}"

    return ExtractedDocument(
        source_path=path,
        markdown=markdown,
        title=path.stem,
        file_type=extension.lstrip("."),
        page_count=page_count,
        associated_files=associated,
        media_files=result.media_files,
    )

extract_text

extract_text(path: Path, config: ImportConfig) -> str

Extract raw markdown text from a file. No frontmatter, no metadata wrapping.

Source code in obsidian_import/__init__.py
def extract_text(path: Path, config: ImportConfig) -> str:
    """Extract raw markdown text from a file. No frontmatter, no metadata wrapping."""
    result = _call_backend(path, config)
    return result.markdown

discover_files

discover_files(config: ImportConfig) -> Iterator[DiscoveredFile]

Discover files matching the configured input directories and extensions.

Source code in obsidian_import/__init__.py
def discover_files(config: ImportConfig) -> Iterator[DiscoveredFile]:
    """Discover files matching the configured input directories and extensions."""
    return _discover_files(config)

obsidian_import.config

Configuration dataclasses for obsidian-import.

config_for_backend

config_for_backend(backend: str, timeout_seconds: int, max_file_size_mb: int, xlsx_max_rows_per_sheet: int) -> ImportConfig

Create an ImportConfig with all backends set to a single backend name.

Useful for consumers that just need quick extraction without managing the full config surface. Media extraction is disabled by default.

Source code in obsidian_import/config.py
def config_for_backend(
    backend: str,
    timeout_seconds: int,
    max_file_size_mb: int,
    xlsx_max_rows_per_sheet: int,
) -> ImportConfig:
    """Create an ImportConfig with all backends set to a single backend name.

    Useful for consumers that just need quick extraction without managing
    the full config surface. Media extraction is disabled by default.
    """
    base = _load_default_yaml()
    for key in base["backends"]:
        base["backends"][key] = backend
    base["extraction"]["timeout_seconds"] = timeout_seconds
    base["extraction"]["max_file_size_mb"] = max_file_size_mb
    base["extraction"]["xlsx_max_rows_per_sheet"] = xlsx_max_rows_per_sheet
    base["media"]["extract_images"] = False
    return _build_config(base, config_dir=None)

load_config

load_config(path: Path) -> ImportConfig

Load config from YAML file, merging on top of bundled defaults.

Users can write minimal YAML with only overrides. Relative paths in config are resolved relative to the config file's directory.

Source code in obsidian_import/config.py
def load_config(path: Path) -> ImportConfig:
    """Load config from YAML file, merging on top of bundled defaults.

    Users can write minimal YAML with only overrides. Relative paths in
    config are resolved relative to the config file's directory.
    """
    user_raw = yaml.safe_load(path.read_text(encoding="utf-8"))
    if not user_raw:
        user_raw = {}
    base = _load_default_yaml()
    merged = _deep_merge(base, user_raw)
    return _build_config(merged, config_dir=path.parent)