From 490bb79a50800d09a5f560cd379fee5d0f3649aa Mon Sep 17 00:00:00 2001 From: Mathias Millet Date: Sat, 19 Oct 2024 12:07:28 +0200 Subject: [PATCH 1/3] feat: add NumberedHeadingsPreprocessor --- docs/source/api/preprocessors.rst | 2 + nbconvert/preprocessors/__init__.py | 2 + nbconvert/preprocessors/numbered_headings.py | 51 +++++++++++ tests/preprocessors/test_numbered_headings.py | 86 +++++++++++++++++++ 4 files changed, 141 insertions(+) create mode 100644 nbconvert/preprocessors/numbered_headings.py create mode 100644 tests/preprocessors/test_numbered_headings.py diff --git a/docs/source/api/preprocessors.rst b/docs/source/api/preprocessors.rst index 6276007a6..b5f74a9bc 100644 --- a/docs/source/api/preprocessors.rst +++ b/docs/source/api/preprocessors.rst @@ -36,6 +36,8 @@ Converting text .. autoclass:: HighlightMagicsPreprocessor +.. autoclass:: NumberedHeadingsPreprocessor + Metadata and header control ~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/nbconvert/preprocessors/__init__.py b/nbconvert/preprocessors/__init__.py index d752edf90..de527d0ec 100644 --- a/nbconvert/preprocessors/__init__.py +++ b/nbconvert/preprocessors/__init__.py @@ -13,6 +13,7 @@ from .extractoutput import ExtractOutputPreprocessor from .highlightmagics import HighlightMagicsPreprocessor from .latex import LatexPreprocessor +from .numbered_headings import NumberedHeadingsPreprocessor from .regexremove import RegexRemovePreprocessor from .svg2pdf import SVG2PDFPreprocessor from .tagremove import TagRemovePreprocessor @@ -30,6 +31,7 @@ "ExtractOutputPreprocessor", "HighlightMagicsPreprocessor", "LatexPreprocessor", + "NumberedHeadingsPreprocessor", "RegexRemovePreprocessor", "SVG2PDFPreprocessor", "TagRemovePreprocessor", diff --git a/nbconvert/preprocessors/numbered_headings.py b/nbconvert/preprocessors/numbered_headings.py new file mode 100644 index 000000000..fb3e4adc5 --- /dev/null +++ b/nbconvert/preprocessors/numbered_headings.py @@ -0,0 +1,51 @@ +""" +Preprocessor that transforms markdown cells: Insert numbering in from of heading +""" + +import re + +from nbconvert.preprocessors.base import Preprocessor + + +class NumberedHeadingsPreprocessor(Preprocessor): + """Pre-processor that will rewrite markdown headings to include numberings.""" + + def __init__(self, *args, **kwargs): + """Init""" + super().__init__(*args, **kwargs) + self.current_numbering = [0] + + def format_numbering(self): + """Return a string representation of the current numbering""" + return ".".join(str(n) for n in self.current_numbering) + + def _inc_current_numbering(self, level): + """Increase internal counter keeping track of numberings""" + if level > len(self.current_numbering): + self.current_numbering = self.current_numbering + [0] * ( + level - len(self.current_numbering) + ) + elif level < len(self.current_numbering): + self.current_numbering = self.current_numbering[:level] + self.current_numbering[level - 1] += 1 + + def _transform_markdown_line(self, line, resources): + """Rewrites one markdown line, if needed""" + if m := re.match(r"^(?P#+) (?P.*)", line): + level = len(m.group("level")) + self._inc_current_numbering(level) + old_heading = m.group("heading").strip() + new_heading = self.format_numbering() + " " + old_heading + return "#" * level + " " + new_heading + + return line + + def preprocess_cell(self, cell, resources, index): + """Rewrites all the headings in the cell if it is markdown""" + if cell["cell_type"] == "markdown": + cell["source"] = "\n".join( + self._transform_markdown_line(line, resources) + for line in cell["source"].splitlines() + ) + + return cell, resources diff --git a/tests/preprocessors/test_numbered_headings.py b/tests/preprocessors/test_numbered_headings.py new file mode 100644 index 000000000..d7740f779 --- /dev/null +++ b/tests/preprocessors/test_numbered_headings.py @@ -0,0 +1,86 @@ +""" +Module with tests for the Numbered Headings preprocessor. +""" + +from nbformat import v4 as nbformat + +from nbconvert.preprocessors.numbered_headings import NumberedHeadingsPreprocessor + +from .base import PreprocessorTestsBase + +MARKDOWN_1 = """ +# Heading 1 + +## Sub-heading + +some content +""" + +MARKDOWN_1_POST = """ +# 1 Heading 1 + +## 1.1 Sub-heading + +some content +""" + + +MARKDOWN_2 = """ + +## Second sub-heading + +# Another main heading + +## Sub-heading + + +some more content + +### Third heading +""" + +MARKDOWN_2_POST = """ + +## 1.2 Second sub-heading + +# 2 Another main heading + +## 2.1 Sub-heading + + +some more content + +### 2.1.1 Third heading +""" + + +class TestNumberedHeadings(PreprocessorTestsBase): + def build_notebook(self): + cells = [ + nbformat.new_code_cell(source="$ e $", execution_count=1), + nbformat.new_markdown_cell(source=MARKDOWN_1), + nbformat.new_code_cell(source="$ e $", execution_count=1), + nbformat.new_markdown_cell(source=MARKDOWN_2), + ] + + return nbformat.new_notebook(cells=cells) + + def build_preprocessor(self): + """Make an instance of a preprocessor""" + preprocessor = NumberedHeadingsPreprocessor() + preprocessor.enabled = True + return preprocessor + + def test_constructor(self): + """Can a ClearOutputPreprocessor be constructed?""" + self.build_preprocessor() + + def test_output(self): + """Test the output of the NumberedHeadingsPreprocessor""" + nb = self.build_notebook() + res = self.build_resources() + preprocessor = self.build_preprocessor() + nb, res = preprocessor(nb, res) + print(nb.cells[1].source) + assert nb.cells[1].source.strip() == MARKDOWN_1_POST.strip() + assert nb.cells[3].source.strip() == MARKDOWN_2_POST.strip() From 21418703d29fd603d536a9c9b3062f38e4cd89e7 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Jan 2025 12:14:56 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb4c31f6e..808fb0bba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1634,6 +1634,7 @@ raw template {%- endblock in_prompt -%} """ + exporter_attr = AttrExporter() output_attr, _ = exporter_attr.from_notebook_node(nb) assert "raw template" in output_attr From 232d78cd3cea2f0fb6fcc45dd6333001e7d526a8 Mon Sep 17 00:00:00 2001 From: Mathias Millet Date: Thu, 6 Feb 2025 23:28:00 +0100 Subject: [PATCH 3/3] use mistune for numbered headings --- nbconvert/exporters/exporter.py | 1 + nbconvert/preprocessors/numbered_headings.py | 54 ++++++++++++------- tests/preprocessors/test_numbered_headings.py | 26 +++++++-- 3 files changed, 58 insertions(+), 23 deletions(-) diff --git a/nbconvert/exporters/exporter.py b/nbconvert/exporters/exporter.py index ca6739491..b446cfc71 100644 --- a/nbconvert/exporters/exporter.py +++ b/nbconvert/exporters/exporter.py @@ -97,6 +97,7 @@ class Exporter(LoggingConfigurable): "nbconvert.preprocessors.ExtractOutputPreprocessor", "nbconvert.preprocessors.ExtractAttachmentsPreprocessor", "nbconvert.preprocessors.ClearMetadataPreprocessor", + "nbconvert.preprocessors.NumberedHeadingsPreprocessor", ], help="""List of preprocessors available by default, by name, namespace, instance, or type.""", diff --git a/nbconvert/preprocessors/numbered_headings.py b/nbconvert/preprocessors/numbered_headings.py index fb3e4adc5..5a31bb34a 100644 --- a/nbconvert/preprocessors/numbered_headings.py +++ b/nbconvert/preprocessors/numbered_headings.py @@ -2,10 +2,19 @@ Preprocessor that transforms markdown cells: Insert numbering in from of heading """ -import re - from nbconvert.preprocessors.base import Preprocessor +try: # for Mistune >= 3.0 + import mistune + from mistune.core import BlockState + from mistune.renderers.markdown import MarkdownRenderer + + MISTUNE_V3 = True +except ImportError: # for Mistune >= 2.0 + MISTUNE_V3 = False + +WRONG_MISTUNE_VERSION_ERROR = "Error: NumberedHeadingsPreprocessor requires mistune >= 3" + class NumberedHeadingsPreprocessor(Preprocessor): """Pre-processor that will rewrite markdown headings to include numberings.""" @@ -13,6 +22,10 @@ class NumberedHeadingsPreprocessor(Preprocessor): def __init__(self, *args, **kwargs): """Init""" super().__init__(*args, **kwargs) + if not MISTUNE_V3: + raise Exception(WRONG_MISTUNE_VERSION_ERROR) + self.md_parser = mistune.create_markdown(renderer=None) + self.md_renderer = MarkdownRenderer() self.current_numbering = [0] def format_numbering(self): @@ -29,23 +42,24 @@ def _inc_current_numbering(self, level): self.current_numbering = self.current_numbering[:level] self.current_numbering[level - 1] += 1 - def _transform_markdown_line(self, line, resources): - """Rewrites one markdown line, if needed""" - if m := re.match(r"^(?P#+) (?P.*)", line): - level = len(m.group("level")) - self._inc_current_numbering(level) - old_heading = m.group("heading").strip() - new_heading = self.format_numbering() + " " + old_heading - return "#" * level + " " + new_heading - - return line - def preprocess_cell(self, cell, resources, index): """Rewrites all the headings in the cell if it is markdown""" - if cell["cell_type"] == "markdown": - cell["source"] = "\n".join( - self._transform_markdown_line(line, resources) - for line in cell["source"].splitlines() - ) - - return cell, resources + if cell["cell_type"] != "markdown": + return cell, resources + try: + md_ast = self.md_parser(cell["source"]) + assert not isinstance(md_ast, str) # type guard ; str is not returned by ast parser + for element in md_ast: + if element["type"] == "heading": + level = element["attrs"]["level"] + self._inc_current_numbering(level) + if len(element["children"]) > 0: + child = element["children"][0] + if child["type"] == "text": + child["raw"] = self.format_numbering() + " " + child["raw"] + new_source = self.md_renderer(md_ast, BlockState()) + cell["source"] = new_source + return cell, resources + except Exception: + self.log.warning("Failed processing cell headings", exc_info=True) + return cell, resources diff --git a/tests/preprocessors/test_numbered_headings.py b/tests/preprocessors/test_numbered_headings.py index d7740f779..abe93e4ac 100644 --- a/tests/preprocessors/test_numbered_headings.py +++ b/tests/preprocessors/test_numbered_headings.py @@ -47,12 +47,31 @@ ## 2.1 Sub-heading - some more content ### 2.1.1 Third heading """ +MARKDOWN_3 = """ +# HEADING + +``` +# this is not a heading + +## this neither +``` +""" + +MARKDOWN_3_POST = """ +# 3 HEADING + +``` +# this is not a heading + +## this neither +``` +""" + class TestNumberedHeadings(PreprocessorTestsBase): def build_notebook(self): @@ -61,6 +80,7 @@ def build_notebook(self): nbformat.new_markdown_cell(source=MARKDOWN_1), nbformat.new_code_cell(source="$ e $", execution_count=1), nbformat.new_markdown_cell(source=MARKDOWN_2), + nbformat.new_markdown_cell(source=MARKDOWN_3), ] return nbformat.new_notebook(cells=cells) @@ -72,7 +92,7 @@ def build_preprocessor(self): return preprocessor def test_constructor(self): - """Can a ClearOutputPreprocessor be constructed?""" + """Can a NumberedHeadingsPreprocessor be constructed?""" self.build_preprocessor() def test_output(self): @@ -81,6 +101,6 @@ def test_output(self): res = self.build_resources() preprocessor = self.build_preprocessor() nb, res = preprocessor(nb, res) - print(nb.cells[1].source) assert nb.cells[1].source.strip() == MARKDOWN_1_POST.strip() assert nb.cells[3].source.strip() == MARKDOWN_2_POST.strip() + assert nb.cells[4].source.strip() == MARKDOWN_3_POST.strip()