Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
93 commits
Select commit Hold shift + click to select a range
f714c7f
wip
lucia-sb Jun 18, 2025
391a360
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jun 24, 2025
71f8c84
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jul 1, 2025
38bc7c0
remove changes
lucia-sb Jul 1, 2025
5e442f0
remove tests
lucia-sb Jul 1, 2025
5b4db44
remove funcions
lucia-sb Jul 2, 2025
fdb7e1e
patch
lucia-sb Jul 2, 2025
0672b2e
remove imports for patch
lucia-sb Jul 2, 2025
2c63f2c
patch
lucia-sb Jul 2, 2025
9cd4abe
print in patch
lucia-sb Jul 3, 2025
fbad066
fix typo
lucia-sb Jul 3, 2025
e11957c
repack wheels
lucia-sb Jul 3, 2025
cd33605
fix
lucia-sb Jul 3, 2025
3681ff6
add wheel library
lucia-sb Jul 3, 2025
ecc2fd9
comment patch
lucia-sb Jul 3, 2025
34136f9
after repair
lucia-sb Jul 3, 2025
b6e8713
patch
lucia-sb Jul 4, 2025
40dfd38
uncomment patch call
lucia-sb Jul 4, 2025
989dcd2
skip unchanged
lucia-sb Jul 8, 2025
8508bb2
skip unchanged
lucia-sb Jul 8, 2025
c8cdc85
skip unchanged
lucia-sb Jul 8, 2025
623c0b4
debug
lucia-sb Jul 8, 2025
343a13a
debug
lucia-sb Jul 9, 2025
925ed4b
debug
lucia-sb Jul 9, 2025
a711b96
after repair
lucia-sb Jul 9, 2025
e8891c4
after repair
lucia-sb Jul 9, 2025
4e9bdc8
fix
lucia-sb Jul 9, 2025
fd2b507
classify wheels
lucia-sb Jul 11, 2025
7b9041f
fix
lucia-sb Jul 11, 2025
3ae7429
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jul 14, 2025
242dc24
classify wheels
lucia-sb Jul 14, 2025
495b81c
Merge remote-tracking branch 'refs/remotes/origin/lucia/improve-packa…
lucia-sb Jul 14, 2025
a395b57
remove import
lucia-sb Jul 14, 2025
3d268c2
patch
lucia-sb Jul 14, 2025
2649bfa
patch fix
lucia-sb Jul 14, 2025
bb898e0
fix
lucia-sb Jul 14, 2025
2cd2e51
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Jul 15, 2025
f53a55c
remove quotes
lucia-sb Jul 18, 2025
ec9ca1b
Merge branch 'lucia/improve-package-size-analyzer-accuracy' of github…
lucia-sb Jul 18, 2025
7e1fb3b
replace quote
lucia-sb Jul 18, 2025
fb3cc71
fix quotes in patch
lucia-sb Jul 23, 2025
e626c0c
remove patch
lucia-sb Jul 23, 2025
9461354
remove patch
lucia-sb Jul 23, 2025
a7303bd
simplify utils and .toml
lucia-sb Jul 23, 2025
667e53c
toml
lucia-sb Jul 23, 2025
46c9bfb
toml
lucia-sb Jul 23, 2025
32abda2
toml
lucia-sb Jul 23, 2025
57dc620
typo
lucia-sb Aug 19, 2025
ef51ad4
change toml format to gitignore patterns
lucia-sb Aug 21, 2025
5aa5eb4
publish wheels for testing
lucia-sb Aug 28, 2025
51f0a9b
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Aug 28, 2025
df287e2
test
lucia-sb Aug 28, 2025
cb03b6a
rename wheels
lucia-sb Aug 29, 2025
76db4bf
uncomment
lucia-sb Sep 3, 2025
7efbd39
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Sep 10, 2025
221352f
remove built extra index
lucia-sb Sep 10, 2025
9579f04
remove built flag
lucia-sb Sep 10, 2025
bbdfb50
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Sep 29, 2025
bd853e1
upload wheels
lucia-sb Sep 29, 2025
7f212fe
Uncomment condition to publish wheels
lucia-sb Oct 31, 2025
301658e
Merge branch 'master' into lucia/improve-package-size-analyzer-accuracy
lucia-sb Oct 31, 2025
ca20180
Use PEP 517
lucia-sb Oct 31, 2025
ba09017
Add debugging logs
lucia-sb Oct 31, 2025
b74f756
Add debugging logs
lucia-sb Oct 31, 2025
804bc7b
Change parameter format
lucia-sb Oct 31, 2025
661846c
Add pyproject
lucia-sb Oct 31, 2025
e3ff87a
remove flag
lucia-sb Oct 31, 2025
fd23319
Change flag
lucia-sb Oct 31, 2025
5bf7360
Fix typo
lucia-sb Oct 31, 2025
6c49f37
Add debug logs
lucia-sb Oct 31, 2025
f2f00af
remove pyproject
lucia-sb Oct 31, 2025
b3d36c5
Change flag
lucia-sb Oct 31, 2025
a8f32c2
Remove flag
lucia-sb Oct 31, 2025
2fda249
Add verbosity to pip
lucia-sb Nov 13, 2025
abf5b99
Use pyproject
lucia-sb Nov 13, 2025
9a37f0d
Use hook
lucia-sb Nov 14, 2025
bc66fb3
Increase verbosity
lucia-sb Nov 14, 2025
0b09273
add dependencies to pyproject
lucia-sb Nov 14, 2025
147c03a
Add full path to backend-path
lucia-sb Nov 17, 2025
57923e8
try exclude
lucia-sb Nov 17, 2025
024c31c
Try specific excluded
lucia-sb Nov 17, 2025
45c8c37
Add ** to the beginning of the path
lucia-sb Nov 17, 2025
e597da1
Try build backend
lucia-sb Nov 18, 2025
6078f30
check initial remove
lucia-sb Nov 18, 2025
4bd4a75
Fix typo
lucia-sb Nov 18, 2025
5b26ad3
Retry build_backend
lucia-sb Nov 18, 2025
8bee0f7
Add flags
lucia-sb Nov 18, 2025
018ed15
Disable isolation
lucia-sb Nov 18, 2025
b8aa789
Add flags and isolation
lucia-sb Nov 18, 2025
f9979e1
Try new flag
lucia-sb Nov 18, 2025
e94b5fe
Change requires
lucia-sb Nov 18, 2025
603e2c2
Change flags
lucia-sb Nov 18, 2025
9a52cbf
Change flags
lucia-sb Nov 18, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .builders/images/runner_dependencies.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ urllib3==2.2.0
auditwheel==6.0.0; sys_platform == 'linux'
delvewheel==1.5.2; sys_platform == 'win32'
delocate==0.13.0; sys_platform == 'darwin'
wheel==0.45.1
pathspec==0.12.1
129 changes: 129 additions & 0 deletions .builders/scripts/build_backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# datadog_hatch_wrapper/build.py
from __future__ import annotations

import os
import shutil
from functools import lru_cache
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Optional


@lru_cache(maxsize=1)
def _load_excluded_spec():
try:
import tomllib
except ImportError:
import tomli as tomllib

import pathspec

cfg = Path(__file__).parent / "files_to_remove.toml"
with cfg.open("rb") as f:
data = tomllib.load(f)
patterns = data.get("excluded_paths", [])
return pathspec.PathSpec.from_lines("gitignore", patterns)


# PEP 517 functions
def get_requires_for_build_wheel(config_settings=None):
# We need hatchling (the real backend) + wheel tools for repacking + pathspec/tomllib
return ["hatchling", "wheel", "pathspec"]


def prepare_metadata_for_build_wheel(metadata_directory, config_settings=None):
# delegate to hatchling if available
try:
from hatchling.build import prepare_metadata_for_build_wheel as _pmd
except Exception:
raise
return _pmd(metadata_directory, config_settings=config_settings)


def build_wheel(
wheel_directory: str, config_settings: Optional[dict] = None, metadata_directory: Optional[str] = None
) -> str:
"""
Build the wheel using hatchling into a temporary directory, post-process it (remove tests),
then move the final wheel into `wheel_directory` and return the filename.
"""

from wheel.cli.pack import pack
from wheel.cli.unpack import unpack

# 1) use hatchling to build wheel(s) into a temp dir
tmpd = TemporaryDirectory()
tmp_path = Path(tmpd.name)
try:
# call hatchling's build_wheel; its signature matches PEP 517:
from hatchling.build import build_wheel as hatch_build_wheel

hatch_build_wheel(str(tmp_path), config_settings=config_settings)
except Exception:
tmpd.cleanup()
raise

# 2) find the built wheel
wheels = list(tmp_path.glob("*.whl"))
if not wheels:
tmpd.cleanup()
raise RuntimeError("hatchling did not produce a wheel")

# If there is more than one, choose the one you expect (or iterate)
wheel_path = wheels[0]

# 3) strip tests using same logic as in your PR:
# - check against files_to_remove.toml or a spec
spec = _load_excluded_spec()

def _is_excluded(member: str) -> bool:
rel = Path(member).as_posix()
return spec.match_file(rel) or spec.match_file(rel + "/")

# quick check: does wheel contain excluded entries?
from zipfile import ZipFile

with ZipFile(wheel_path, "r") as zf:
if not any(_is_excluded(name) for name in zf.namelist()):
# nothing to do; just move the wheel to final dir
final = Path(wheel_directory) / wheel_path.name
shutil.move(str(wheel_path), str(final))
tmpd.cleanup()
return final.name

# Unpack, remove excluded files/directories, repack
tmp_unpack = TemporaryDirectory()
try:
unpack(wheel_path, dest=tmp_unpack)
unpacked_dir = next(Path(tmp_unpack.name).iterdir())

# walk bottom-up and remove excluded files/folders
for root, dirs, files in os.walk(unpacked_dir, topdown=False):
rootp = Path(root)
for d in list(dirs):
full_dir = rootp / d
rel = full_dir.relative_to(unpacked_dir).as_posix()
if _is_excluded(rel):
shutil.rmtree(full_dir)
dirs.remove(d)
for f in files:
rel = (rootp / f).relative_to(unpacked_dir).as_posix()
if _is_excluded(rel):
(rootp / f).unlink()

# repack into wheel_directory (pack writes a new wheel file)
pack(unpacked_dir, dest_dir=wheel_directory)

# pack puts a new wheel file in wheel_directory; pick the freshest one
# and return its filename
final_wheels = sorted(Path(wheel_directory).glob("*.whl"), key=lambda p: p.stat().st_mtime, reverse=True)
if not final_wheels:
raise RuntimeError("Failed to repack wheel")
final = final_wheels[0]
return final.name
finally:
tmpd.cleanup()
try:
tmp_unpack and shutil.rmtree(tmp_unpack.name)
except Exception:
pass
131 changes: 119 additions & 12 deletions .builders/scripts/build_wheels.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,38 @@
from __future__ import annotations

import argparse
import email
import json
import os
import re
import shutil
import subprocess
import sys
import time
import tomllib
from functools import cache
from hashlib import sha256
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import TypedDict
from zipfile import ZipFile

import pathspec
import urllib3
from dotenv import dotenv_values
from utils import extract_metadata, normalize_project_name
from utils import iter_wheels

INDEX_BASE_URL = 'https://agent-int-packages.datadoghq.com'
CUSTOM_EXTERNAL_INDEX = f'{INDEX_BASE_URL}/external'
CUSTOM_BUILT_INDEX = f'{INDEX_BASE_URL}/built'
UNNORMALIZED_PROJECT_NAME_CHARS = re.compile(r'[-_.]+')


class WheelSizes(TypedDict):
compressed: int
uncompressed: int


if sys.platform == 'win32':
PY3_PATH = Path('C:\\py3\\Scripts\\python.exe')
PY2_PATH = Path('C:\\py2\\Scripts\\python.exe')
Expand Down Expand Up @@ -62,6 +74,83 @@ def check_process(*args, **kwargs) -> subprocess.CompletedProcess:
return process


def extract_metadata(wheel: Path) -> email.Message:
with ZipFile(str(wheel)) as zip_archive:
for path in zip_archive.namelist():
root = path.split('/', 1)[0]
if root.endswith('.dist-info'):
dist_info_dir = root
break
else:
message = f'Could not find the `.dist-info` directory in wheel: {wheel.name}'
raise RuntimeError(message)

try:
with zip_archive.open(f'{dist_info_dir}/METADATA') as zip_file:
metadata_file_contents = zip_file.read().decode('utf-8')
except KeyError:
message = f'Could not find a `METADATA` file in the `{dist_info_dir}` directory'
raise RuntimeError(message) from None

return email.message_from_string(metadata_file_contents)


def normalize_project_name(name: str) -> str:
# https://peps.python.org/pep-0503/#normalized-names
return UNNORMALIZED_PROJECT_NAME_CHARS.sub('-', name).lower()


@cache
def get_wheel_hashes(project) -> dict[str, str]:
retry_wait = 2
while True:
try:
response = urllib3.request(
'GET',
f'https://pypi.org/simple/{project}',
headers={"Accept": "application/vnd.pypi.simple.v1+json"},
)
except urllib3.exceptions.HTTPError as e:
err_msg = f'Failed to fetch hashes for `{project}`: {e}'
else:
if response.status == 200:
break

err_msg = f'Failed to fetch hashes for `{project}`, status code: {response.status}'

print(err_msg)
print(f'Retrying in {retry_wait} seconds')
time.sleep(retry_wait)
retry_wait *= 2
continue

data = response.json()
return {
file['filename']: file['hashes']['sha256']
for file in data['files']
if file['filename'].endswith('.whl') and 'sha256' in file['hashes']
}


def wheel_was_built(wheel: Path) -> bool:
project_metadata = extract_metadata(wheel)
project_name = normalize_project_name(project_metadata['Name'])
wheel_hashes = get_wheel_hashes(project_name)
if wheel.name not in wheel_hashes:
return True

file_hash = sha256(wheel.read_bytes()).hexdigest()
return file_hash != wheel_hashes[wheel.name]


def add_dependency(dependencies: dict[str, str], sizes: dict[str, WheelSizes], wheel: Path) -> None:
project_metadata = extract_metadata(wheel)
project_name = normalize_project_name(project_metadata['Name'])
project_version = project_metadata['Version']
dependencies[project_name] = project_version
sizes[project_name] = {'version': project_version, **calculate_wheel_sizes(wheel)}


def calculate_wheel_sizes(wheel_path: Path) -> WheelSizes:
compressed_size = wheel_path.stat(follow_symlinks=True).st_size
with ZipFile(wheel_path) as zf:
Expand Down Expand Up @@ -92,6 +181,13 @@ def main():

with TemporaryDirectory() as d:
staged_wheel_dir = Path(d).resolve()
staged_built_wheels_dir = staged_wheel_dir / 'built'
staged_external_wheels_dir = staged_wheel_dir / 'external'

# Create the directories
staged_built_wheels_dir.mkdir(parents=True, exist_ok=True)
staged_external_wheels_dir.mkdir(parents=True, exist_ok=True)

env_vars = dict(os.environ)
env_vars['PATH'] = f'{python_path.parent}{os.pathsep}{env_vars["PATH"]}'
env_vars['PIP_WHEEL_DIR'] = str(staged_wheel_dir)
Expand All @@ -100,7 +196,7 @@ def main():
env_vars['DD_ENV_FILE'] = str(ENV_FILE)

# Off is on, see: https://github.com/pypa/pip/issues/5735
env_vars['PIP_NO_BUILD_ISOLATION'] = '0'
# env_vars['PIP_NO_BUILD_ISOLATION'] = '0'

# Spaces are used to separate multiple values which means paths themselves cannot contain spaces, see:
# https://github.com/pypa/pip/issues/10114#issuecomment-1880125475
Expand All @@ -121,35 +217,47 @@ def main():
if constraints_file := env_vars.get('PIP_CONSTRAINT'):
env_vars['PIP_CONSTRAINT'] = path_to_uri(constraints_file)

print("--------------------------------")
print("Building wheels")
print("--------------------------------")
# Fetch or build wheels
command_args = [
str(python_path),
'-m',
'pip',
'-vvv',
'wheel',
'--config-settings=build-backend=scripts.build_backend',
f'--config-settings=backend-path={MOUNT_DIR / ".builders"}',
'-r',
str(MOUNT_DIR / 'requirements.in'),
'--wheel-dir',
str(staged_wheel_dir),
# Temporarily removing extra index urls. See below.
# '--extra-index-url', CUSTOM_EXTERNAL_INDEX,
# '--extra-index-url',
# CUSTOM_EXTERNAL_INDEX,
]
# Temporarily disable extra index urls. There are broken wheels in the gcloud bucket
# while working on removing tests from them. Adding extra indices causes undefined behavior
# and can pull a broken image, preventing the building from running.
# if args.use_built_index:
# command_args.extend(['--extra-index-url', CUSTOM_BUILT_INDEX])

check_process(command_args, env=env_vars)
print("--------------------------------")
print("Finished building wheels")
print("--------------------------------")
# Classify wheels
for wheel in iter_wheels(staged_wheel_dir):
if wheel_was_built(wheel):
shutil.move(wheel, staged_built_wheels_dir)
else:
shutil.move(wheel, staged_external_wheels_dir)

# Repair wheels
check_process(
[
sys.executable,
'-u',
str(MOUNT_DIR / 'scripts' / 'repair_wheels.py'),
'--source-dir',
str(staged_wheel_dir),
'--source-built-dir',
str(staged_built_wheels_dir),
'--source-external-dir',
str(staged_external_wheels_dir),
'--built-dir',
str(built_wheels_dir),
'--external-dir',
Expand All @@ -167,7 +275,6 @@ def main():
project_version = project_metadata['Version']
dependencies[project_name] = project_version


sizes[project_name] = {'version': project_version, **calculate_wheel_sizes(wheel)}

output_path = MOUNT_DIR / 'sizes.json'
Expand Down
45 changes: 45 additions & 0 deletions .builders/scripts/files_to_remove.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
excluded_paths = [
# --- Type annotation ---
"krb5/**/*.pyi",
"krb5/**/py.typed",

"Cryptodome/**/*.pyi",
"Cryptodome/**/py.typed",

"ddtrace/**/*.pyi",
"ddtrace/**/py.typed",

"pyVmomi/**/*.pyi",
"pyVmomi/**/py.typed",

"gssapi/**/*.pyi",
"gssapi/**/py.typed",

# --- Tests ---

"idlelib/idle_test/",
"bs4/tests/",
"Cryptodome/SelfTest/",
"gssapi/tests/",
"keystoneauth1/tests/",
"lazy_loader/tests/",
"openstack/tests/",
"os_service_types/tests/",
"pbr/tests/",
"pkg_resources/tests/",
"pip/_vendor/colorama/tests/",
"psutil/tests/",
"requests_unixsocket/tests/",
"securesystemslib/_vendor/ed25519/test_data/",
"setuptools/_distutils/compilers/C/tests/",
"setuptools/_vendor/packaging/tests/",
"setuptools/_distutils/tests/",
"setuptools/tests/",
"simplejson/tests/",
"stevedore/tests/",
"supervisor/tests/",
"/test/",
"vertica_python/tests/",
"websocket/tests/",
"win32com/test/",
]
Loading
Loading