diff --git a/pyinstrument/profiler.py b/pyinstrument/profiler.py index b7b0504e..161c2bb4 100644 --- a/pyinstrument/profiler.py +++ b/pyinstrument/profiler.py @@ -364,31 +364,44 @@ def output_text( def output_html( self, + resample_interval: float | None = None, ) -> str: """ Return the profile output as HTML, as rendered by :class:`HTMLRenderer` + + See :class:`renderers.HTMLRenderer` for parameter description. """ - return self.output(renderer=renderers.HTMLRenderer()) + return self.output(renderer=renderers.HTMLRenderer(resample_interval=resample_interval)) def write_html( - self, path: str | os.PathLike[str], timeline: bool = False, show_all: bool = False + self, + path: str | os.PathLike[str], + timeline: bool = False, + show_all: bool = False, + resample_interval: float | None = None, ): """ Writes the profile output as HTML to a file, as rendered by :class:`HTMLRenderer` """ file = Path(path) file.write_text( - self.output(renderer=renderers.HTMLRenderer(timeline=timeline, show_all=show_all)), + self.output( + renderer=renderers.HTMLRenderer( + timeline=timeline, show_all=show_all, resample_interval=resample_interval + ) + ), encoding="utf-8", ) - def open_in_browser(self, timeline: bool = False): + def open_in_browser(self, timeline: bool = False, resample_interval: float | None = None): """ Opens the last profile session in your web browser. """ session = self._get_last_session_or_fail() - return renderers.HTMLRenderer(timeline=timeline).open_in_browser(session) + return renderers.HTMLRenderer( + timeline=timeline, resample_interval=resample_interval + ).open_in_browser(session) def output(self, renderer: renderers.Renderer) -> str: """ diff --git a/pyinstrument/renderers/html.py b/pyinstrument/renderers/html.py index 82cdd799..b37906dd 100644 --- a/pyinstrument/renderers/html.py +++ b/pyinstrument/renderers/html.py @@ -2,6 +2,7 @@ import codecs import json +import sys import tempfile import urllib.parse import warnings @@ -38,9 +39,14 @@ class HTMLRenderer(Renderer): def __init__( self, + *, + resample_interval: float | None = None, show_all: bool = False, timeline: bool = False, ): + """ + :param resample_interval: Controls how the renderer deals with very large sessions. The typically struggles with sessions of more than 100,000 samples. If the session has more samples than this number, it will be automatically resampled to a coarser interval. You can control this interval with this parameter. If None (the default), the interval will be chosen automatically. Setting this to 0 disables resampling. + """ super().__init__() if show_all: warnings.warn( @@ -55,6 +61,8 @@ def __init__( stacklevel=3, ) + self.resample_interval = resample_interval + # These settings are passed down to JSONForHTMLRenderer, and can be # used to modify its output. E.g. they can be used to lower the size # of the output file, by excluding function calls which take a small @@ -63,6 +71,24 @@ def __init__( self.preprocessor_options = {} def render(self, session: Session): + if len(session.frame_records) > 100_000: + original_session = session + resample_interval = self.resample_interval + if resample_interval is None: + # auto mode: choose an interval that gives us 0.01% resolution + resample_interval = session.duration / 10000 + + if resample_interval > 0: + session = original_session.resample(interval=resample_interval) + + while len(session.frame_records) > 100_000: + resample_interval *= 2 + session = original_session.resample(interval=resample_interval) + print( + f"pyinstrument: session has {len(original_session.frame_records)} samples, which is too many for the HTML renderer to handle. Resampled to {len(session.frame_records)} samples with interval {resample_interval:.6f} seconds. Set the renderer option resample_interval to control this behaviour.", + file=sys.stderr, + ) + json_renderer = JSONForHTMLRenderer() json_renderer.processors = self.preprocessors json_renderer.processor_options = self.preprocessor_options diff --git a/pyinstrument/session.py b/pyinstrument/session.py index 255ca9de..3916cf4a 100644 --- a/pyinstrument/session.py +++ b/pyinstrument/session.py @@ -4,7 +4,7 @@ import os import sys from collections import deque -from typing import Any +from typing import Any, Sequence from pyinstrument.frame import Frame from pyinstrument.frame_info import frame_info_get_identifier @@ -213,3 +213,45 @@ def shorten_path(self, path: str) -> str: self._short_file_path_cache[path] = result return result + + @staticmethod + def _resample_frame_records( + frame_records: Sequence[FrameRecordType], interval: float + ) -> list[FrameRecordType]: + """ + Resample frame records to a given interval. Discards samples as needed. + """ + result: list[FrameRecordType] = [] + accumulated_time = 0.0 + + for frame_info_stack, time in frame_records: + accumulated_time += time + + if accumulated_time >= interval: + result.append((frame_info_stack, accumulated_time)) + accumulated_time = accumulated_time % interval + + return result + + def resample(self, interval: float) -> Session: + """ + Returns a new Session object with frame records resampled to the given interval. + + :param interval: The desired sampling interval in seconds. + :rtype: Session + """ + new_frame_records = self._resample_frame_records(self.frame_records, interval) + + return Session( + frame_records=new_frame_records, + start_time=self.start_time, + duration=self.duration, + min_interval=interval, + max_interval=interval, + sample_count=len(new_frame_records), + start_call_stack=self.start_call_stack, + target_description=self.target_description, + cpu_time=self.cpu_time, + sys_path=self.sys_path, + sys_prefixes=self.sys_prefixes, + ) diff --git a/test/test_renderers.py b/test/test_renderers.py index e308f568..87ba4456 100644 --- a/test/test_renderers.py +++ b/test/test_renderers.py @@ -2,12 +2,15 @@ from __future__ import annotations +import sys import time +from unittest.mock import patch import pytest from pyinstrument import renderers from pyinstrument.profiler import Profiler +from pyinstrument.session import Session from .fake_time_util import fake_time @@ -90,3 +93,34 @@ def test_show_all_doesnt_crash( def test_console_renderer_flat_doesnt_crash(profiler_session, flat_time): renderer = renderers.ConsoleRenderer(flat=True, flat_time=flat_time) renderer.render(profiler_session) + + +def test_html_renderer_resampling(capsys): + # create a session with more than 100,000 samples + frame_records = [] + # first 100,000 frames have almost no time in them + frame_records += [("\x00somemodule/__init__.py\x0012", 1e-9)] * 100_000 + # last frame has some time in it + frame_records += [("a\x00b\x001", 1)] + + session = Session( + duration=1.0001, + start_time=0, + frame_records=frame_records, + sample_count=len(frame_records), + min_interval=1e-9, + max_interval=1e-9, + start_call_stack=["\x00somemodule/__init__.py\x0012"], + target_description="test", + cpu_time=1.0001, + sys_path=sys.path, + sys_prefixes=[], + ) + + renderer = renderers.HTMLRenderer() + with patch("pyinstrument.session.Session._resample_frame_records") as mock_resample: + renderer.render(session) + + captured = capsys.readouterr() + assert "Resampled to" in captured.err + assert mock_resample.called