# -*- coding: utf-8 -*-"""Automatic Table of Contents (TOC) generator for Sphinx documentation.This module provides tools to automatically generate Sphinx`toctree <https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html#directive-toctree>`_directives based on your documentation's folder structure. Instead of manually maintainingTOC entries when adding or removing documentation pages, this functionalityexamines your directory structure and builds appropriate ``toctree`` directiveswith correct titles extracted from each document.The main workflow involves:1. Finding directories containing index files (rst, md, or ipynb)2. Extracting the title from each index file3. Generating a properly formatted toctree directive linking to all child pagesThis approach ensures your documentation navigation stays organized and up-to-datewith minimal manual intervention."""importtypingasTimportjsonimportdataclassesfrompathlibimportPathfromfunctoolsimportcached_propertyfrom.templateimportTocTemplateParams,render_tocT_INDEX_FILE_TYPE=T.Literal["rst","md","nb"]
[docs]@dataclasses.dataclassclassPageFolder:""" Represents a folder containing an index document with a title. A PageFolder typically maps to a documentation section with an index file (index.rst, index.md, or index.ipynb) that contains a title. This class provides methods to extract the title, find child page folders, and generate a toctree directive linking to those children. The index file is searched in this order: 1. .rst (reStructuredText) 2. .ipynb (Jupyter Notebook) 3. .md (Markdown) :param dir: Path to the directory containing the index file :param index_filename: Base name of the index file without extension (default: "index") Example folder structure:: docs/sources/ docs/sources/index.rst docs/sources/document-1/index.rst docs/sources/document-2/index.ipynb docs/sources/document-3/index.md docs/sources/document-3/... Usage: .. code-block:: python # Create a PageFolder for the main docs directory main_folder = PageFolder.new(dir=Path("docs/sources")) # Generate toctree directive toc_content = main_folder.toc_directive() # Result will be: # .. toctree:: # :maxdepth: 1 # # Document 1 <document-1/index> # Document 2 <document-2/index> # Document 3 <document-3/index> """dir:Path=dataclasses.field()index_filename:str=dataclasses.field()path_index_file:Path=dataclasses.field(init=False)index_file_type:T_INDEX_FILE_TYPE=dataclasses.field(init=False)
[docs]@classmethoddefnew(cls,dir:Path,index_filename:str="index",):""" Create a new PageFolder instance with resolved index file. This factory method creates a PageFolder instance and resolves which type of index file exists (.rst, .ipynb, or .md). TODO: 现在有一个问题是作为包含 ``.. autotoctree:`` 的父节点必须要是 RST, 你不能在 Notebook 里包含这个. TODO: 我们需要将父节点的 index_filename 和 discover 阶段的子目录的 index_file 区分开来, 以后会实现. """index_filename=index_filename.split(".")[0]child_page_folder=cls(dir=dir,index_filename=index_filename)ifchild_page_folder.path_index_rst.exists():child_page_folder.path_index_file=child_page_folder.path_index_rstchild_page_folder.index_file_type="rst"# We check notebook before markdown, because sometime people have# converted markdown (from notebook) at the same location.elifchild_page_folder.path_index_ipynb.exists():child_page_folder.path_index_file=child_page_folder.path_index_ipynbchild_page_folder.index_file_type="nb"elifchild_page_folder.path_index_md.exists():# pragma: no coverchild_page_folder.path_index_file=child_page_folder.path_index_mdchild_page_folder.index_file_type="md"else:# pragma: no coverraiseIndexFileNotFoundError(f"Cannot find index file in {child_page_folder.dir}")returnchild_page_folder
@propertydefpath_index_rst(self)->Path:""" Get the absolute path to the potential reStructuredText index file. """returnself.dir.joinpath(f"{self.index_filename}.rst")@propertydefpath_index_ipynb(self)->Path:""" Get the absolute path to the potential Jupyter Notebook index file. """returnself.dir.joinpath(f"{self.index_filename}.ipynb")@propertydefpath_index_md(self)->Path:""" Get the absolute path to the potential Markdown index file. """returnself.dir.joinpath(f"{self.index_filename}.md")@propertydefpath_str(self):""" Get the relative path string used in toctree entries. """returnf"{self.dir.name}/{self.index_filename}"
[docs]defget_title_from_rst(self)->T.Optional[str]:""" Extract title from a reStructuredText file. Finds the first section title by looking for underline patterns (====, ----, etc.) and returns the text line above it. Also handles .. include:: directives by replacing them with the content of the included file. :return: Extracted title or None if no title found """# replace ``.. include::`` with the content of the included filelines=list()withself.path_index_file.open("r",encoding="utf-8")asf:forcursor_lineinf.readlines():cursor_line=cursor_line.strip()ifcursor_line.startswith(".. include::"):relpath_parts=cursor_line.split("::")[-1].strip().split("/")path_included=self.path_index_file.parent.joinpath(*relpath_parts)ifpath_included.exists():cursor_line=path_included.read_text(encoding="utf-8")lines.append(cursor_line)rst_content="\n".join(lines)# Identify the title lineheader_bar_char_list="=-~+*#^"# please add more comments herecursor_previous_line=Noneforcursor_lineinrst_content.split("\n"):forheader_bar_charinheader_bar_char_list:ifcursor_line.startswith(header_bar_char):flag_full_bar_char=cursor_line==header_bar_char*len(cursor_line)flag_line_length_greather_than_1=len(cursor_line)>=1flag_previous_line_not_empty=bool(cursor_previous_line)if(flag_full_bar_charandflag_line_length_greather_than_1andflag_previous_line_not_empty):returncursor_previous_line.strip()cursor_previous_line=cursor_linereturnNone
[docs]defget_title_from_md(self)->T.Optional[str]:""" Extract title from a Markdown file. :return: Extracted title or None if no title found :raises NotImplementedError: This method is not implemented yet """raiseNotImplementedError
[docs]defget_title_from_ipynb(self)->T.Optional[str]:""" Extract title from a Jupyter Notebook file. Looks for a title in: 1. The first markdown cell with a level 1 heading (# Title) 2. A raw reStructuredText cell with a title and underline :return: Extracted title or None if no title found """header_bar_char_list="=-~+*#^"data=json.loads(self.path_index_ipynb.read_text(encoding="utf-8"))forrowindata["cells"]:iflen(row["source"]):cell_type:str=row.get("cell_type","unknown")raw_mimetype:str=row.get("metadata",{}).get("raw_mimetype","unknown")rst_mimetype=["text/restructuredtext","text/x-rst",]ifcell_type=="markdown":content=row["source"][0]line=content.split("\n")[0]if"# "inline:returnline[2:].strip()elifcell_type=="raw"andraw_mimetypeinrst_mimetype:try:line=row["source"][3].strip()exceptIndexError:# pragma: no covercontinuetry:title_line=row["source"][2].strip()exceptIndexError:# pragma: no covercontinueforheader_bar_charinheader_bar_char_list:ifline.startswith(header_bar_char):flag_full_bar_char=line==header_bar_char*len(line)flag_line_length_greather_than_1=len(line)>=1flag_previous_line_not_empty=bool(title_line)if(flag_full_bar_charandflag_line_length_greather_than_1andflag_previous_line_not_empty):returntitle_lineelse:# pragma: no coverpassreturnNone
@cached_propertydeftitle(self)->T.Optional[str]:""" Title for the first header in the index file """ifself.index_file_type=="rst":returnself.get_title_from_rst()elifself.index_file_type=="nb":returnself.get_title_from_ipynb()elifself.index_file_type=="md":returnself.get_title_from_md()else:# pragma: no coverprint("never gonna reach here")@cached_propertydefchild_page_folders(self)->T.List["PageFolder"]:""" Find all valid child page folders. Searches for directories containing index files with valid titles and returns them as :class:`PageFolder` instances. """child_page_folders=list()dir_list=[pathforpathinself.dir.iterdir()ifpath.is_dir()]dir_list.sort()fordirindir_list:try:child_page_folder=self.__class__.new(dir=dir,index_filename=self.index_filename)# skip folders that cannot find index fileexceptIndexFileNotFoundError:continuetry:ifchild_page_folder.titleisnotNone:child_page_folders.append(child_page_folder)else:# pragma: no coverprint(f"Warning: cannot detect title in "f"{child_page_folder.path_index_file} file")# skip folders that is failed to extract titleexcept:# pragma: no coverpassreturnchild_page_folders
[docs]deftoc_directive(self,maxdepth=1):""" Generate a ``toctree`` directive for the child page folders. Creates a properly formatted reStructuredText ``toctree`` directive that includes all child pages with their titles. :param maxdepth: Maximum depth for the toctree directive :return: Complete toctree directive as a string """params=TocTemplateParams(page_folders=self.child_page_folders,maxdepth=maxdepth,)articles_directive_content=render_toc(params)returnarticles_directive_content