Source code for docfly.autotoctree

# -*- coding: utf-8 -*-

"""
Automatic Table of Contents (TOC) generator for Sphinx documentation.

This module provides tools to automatically generate Sphinx
`toctree <https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-toctree>`_
directives based on your documentation's folder structure. Instead of manually maintaining
TOC entries when adding or removing documentation pages, this functionality
examines your directory structure and builds appropriate ``toctree`` directives
with correct titles extracted from each document.

The main workflow involves:

1. Finding directories containing index files (rst, md, or ipynb)
2. Extracting the title from each index file
3. Generating a properly formatted toctree directive linking to all child pages

This approach ensures your documentation navigation stays organized and up-to-date
with minimal manual intervention.
"""

import typing as T
import json
import dataclasses
from pathlib import Path
from functools import cached_property

from .template import TocTemplateParams, render_toc

T_INDEX_FILE_TYPE = T.Literal["rst", "md", "nb"]


[docs] class IndexFileNotFoundError(FileNotFoundError): pass
[docs] @dataclasses.dataclass class PageFolder: """ Represents a folder containing an index document with a title. A PageFolder typically maps to a documentation section with an index file (index.rst, index.md, or index.ipynb) that contains a title. This class provides methods to extract the title, find child page folders, and generate a toctree directive linking to those children. The index file is searched in this order: 1. .rst (reStructuredText) 2. .ipynb (Jupyter Notebook) 3. .md (Markdown) :param dir: Path to the directory containing the index file :param index_filename: Base name of the index file without extension (default: "index") Example folder structure:: docs/sources/ docs/sources/index.rst docs/sources/document-1/index.rst docs/sources/document-2/index.ipynb docs/sources/document-3/index.md docs/sources/document-3/... Usage: .. code-block:: python # Create a PageFolder for the main docs directory main_folder = PageFolder.new(dir=Path("docs/sources")) # Generate toctree directive toc_content = main_folder.toc_directive() # Result will be: # .. toctree:: # :maxdepth: 1 # # Document 1 <document-1/index> # Document 2 <document-2/index> # Document 3 <document-3/index> """ dir: Path = dataclasses.field() index_filename: str = dataclasses.field() path_index_file: Path = dataclasses.field(init=False) index_file_type: T_INDEX_FILE_TYPE = dataclasses.field(init=False)
[docs] @classmethod def new( cls, dir: Path, index_filename: str = "index", ): """ Create a new PageFolder instance with resolved index file. This factory method creates a PageFolder instance and resolves which type of index file exists (.rst, .ipynb, or .md). TODO: 现在有一个问题是作为包含 ``.. autotoctree:`` 的父节点必须要是 RST, 你不能在 Notebook 里包含这个. TODO: 我们需要将父节点的 index_filename 和 discover 阶段的子目录的 index_file 区分开来, 以后会实现. """ index_filename = index_filename.split(".")[0] child_page_folder = cls(dir=dir, index_filename=index_filename) if child_page_folder.path_index_rst.exists(): child_page_folder.path_index_file = child_page_folder.path_index_rst child_page_folder.index_file_type = "rst" # We check notebook before markdown, because sometime people have # converted markdown (from notebook) at the same location. elif child_page_folder.path_index_ipynb.exists(): child_page_folder.path_index_file = child_page_folder.path_index_ipynb child_page_folder.index_file_type = "nb" elif child_page_folder.path_index_md.exists(): # pragma: no cover child_page_folder.path_index_file = child_page_folder.path_index_md child_page_folder.index_file_type = "md" else: # pragma: no cover raise IndexFileNotFoundError( f"Cannot find index file in {child_page_folder.dir}" ) return child_page_folder
@property def path_index_rst(self) -> Path: """ Get the absolute path to the potential reStructuredText index file. """ return self.dir.joinpath(f"{self.index_filename}.rst") @property def path_index_ipynb(self) -> Path: """ Get the absolute path to the potential Jupyter Notebook index file. """ return self.dir.joinpath(f"{self.index_filename}.ipynb") @property def path_index_md(self) -> Path: """ Get the absolute path to the potential Markdown index file. """ return self.dir.joinpath(f"{self.index_filename}.md") @property def path_str(self): """ Get the relative path string used in toctree entries. """ return f"{self.dir.name}/{self.index_filename}"
[docs] def get_title_from_rst(self) -> T.Optional[str]: """ Extract title from a reStructuredText file. Finds the first section title by looking for underline patterns (====, ----, etc.) and returns the text line above it. Also handles .. include:: directives by replacing them with the content of the included file. :return: Extracted title or None if no title found """ # replace ``.. include::`` with the content of the included file lines = list() with self.path_index_file.open("r", encoding="utf-8") as f: for cursor_line in f.readlines(): cursor_line = cursor_line.strip() if cursor_line.startswith(".. include::"): relpath_parts = cursor_line.split("::")[-1].strip().split("/") path_included = self.path_index_file.parent.joinpath(*relpath_parts) if path_included.exists(): cursor_line = path_included.read_text(encoding="utf-8") lines.append(cursor_line) rst_content = "\n".join(lines) # Identify the title line header_bar_char_list = "=-~+*#^" # please add more comments here cursor_previous_line = None for cursor_line in rst_content.split("\n"): for header_bar_char in header_bar_char_list: if cursor_line.startswith(header_bar_char): flag_full_bar_char = cursor_line == header_bar_char * len( cursor_line ) flag_line_length_greather_than_1 = len(cursor_line) >= 1 flag_previous_line_not_empty = bool(cursor_previous_line) if ( flag_full_bar_char and flag_line_length_greather_than_1 and flag_previous_line_not_empty ): return cursor_previous_line.strip() cursor_previous_line = cursor_line return None
[docs] def get_title_from_md(self) -> T.Optional[str]: """ Extract title from a Markdown file. :return: Extracted title or None if no title found :raises NotImplementedError: This method is not implemented yet """ raise NotImplementedError
[docs] def get_title_from_ipynb(self) -> T.Optional[str]: """ Extract title from a Jupyter Notebook file. Looks for a title in: 1. The first markdown cell with a level 1 heading (# Title) 2. A raw reStructuredText cell with a title and underline :return: Extracted title or None if no title found """ header_bar_char_list = "=-~+*#^" data = json.loads(self.path_index_ipynb.read_text(encoding="utf-8")) for row in data["cells"]: if len(row["source"]): cell_type: str = row.get("cell_type", "unknown") raw_mimetype: str = row.get("metadata", {}).get( "raw_mimetype", "unknown" ) rst_mimetype = [ "text/restructuredtext", "text/x-rst", ] if cell_type == "markdown": content = row["source"][0] line = content.split("\n")[0] if "# " in line: return line[2:].strip() elif cell_type == "raw" and raw_mimetype in rst_mimetype: try: line = row["source"][3].strip() except IndexError: # pragma: no cover continue try: title_line = row["source"][2].strip() except IndexError: # pragma: no cover continue for header_bar_char in header_bar_char_list: if line.startswith(header_bar_char): flag_full_bar_char = line == header_bar_char * len(line) flag_line_length_greather_than_1 = len(line) >= 1 flag_previous_line_not_empty = bool(title_line) if ( flag_full_bar_char and flag_line_length_greather_than_1 and flag_previous_line_not_empty ): return title_line else: # pragma: no cover pass return None
@cached_property def title(self) -> T.Optional[str]: """ Title for the first header in the index file """ if self.index_file_type == "rst": return self.get_title_from_rst() elif self.index_file_type == "nb": return self.get_title_from_ipynb() elif self.index_file_type == "md": return self.get_title_from_md() else: # pragma: no cover print("never gonna reach here") @cached_property def child_page_folders(self) -> T.List["PageFolder"]: """ Find all valid child page folders. Searches for directories containing index files with valid titles and returns them as :class:`PageFolder` instances. """ child_page_folders = list() dir_list = [path for path in self.dir.iterdir() if path.is_dir()] dir_list.sort() for dir in dir_list: try: child_page_folder = self.__class__.new( dir=dir, index_filename=self.index_filename ) # skip folders that cannot find index file except IndexFileNotFoundError: continue try: if child_page_folder.title is not None: child_page_folders.append(child_page_folder) else: # pragma: no cover print( f"Warning: cannot detect title in " f"{child_page_folder.path_index_file} file" ) # skip folders that is failed to extract title except: # pragma: no cover pass return child_page_folders
[docs] def toc_directive(self, maxdepth=1): """ Generate a ``toctree`` directive for the child page folders. Creates a properly formatted reStructuredText ``toctree`` directive that includes all child pages with their titles. :param maxdepth: Maximum depth for the toctree directive :return: Complete toctree directive as a string """ params = TocTemplateParams( page_folders=self.child_page_folders, maxdepth=maxdepth, ) articles_directive_content = render_toc(params) return articles_directive_content