Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -723,7 +723,11 @@ def test_exceeds_cshm_handle_size_limit(self):
try:
error_message = response.json().get("error", "")
self.assertIn(
"'raw_handle' exceeds the maximum allowed data size limit INT_MAX",
"Request JSON size",
error_message,
)
self.assertIn(
"exceeds the maximum allowed value",
error_message,
)
except ValueError:
Expand Down
75 changes: 65 additions & 10 deletions qa/L0_http/http_input_size_limit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

sys.path.append("../common")

import json
import unittest

import numpy as np
Expand All @@ -39,6 +40,7 @@
# Each FP32 value is 4 bytes, so we need to divide target byte sizes by 4 to get element counts
BYTES_PER_FP32 = 4
MB = 2**20 # 1 MB = 1,048,576 bytes
GB = 2**30 # 1 GB = 1,073,741,824 bytes
DEFAULT_LIMIT_BYTES = 64 * MB # 64MB default limit
INCREASED_LIMIT_BYTES = 128 * MB # 128MB increased limit

Expand Down Expand Up @@ -167,8 +169,11 @@ def test_default_limit_rejection_json(self):
)

# Test case 2: Input just under the 64MB limit (should succeed)
# (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
shape_size = DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS
# The test creates a JSON payload with data, which adds overhead compared
# to raw binary format. We adjust the shape size to ensure the final
# JSON payload is under the size limit. An element is roughly 5
# bytes in JSON, compared to 4 bytes as a raw FP32.
shape_size = (DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5

payload = {
"inputs": [
Expand All @@ -180,9 +185,8 @@ def test_default_limit_rejection_json(self):
}
]
}
assert (
shape_size * BYTES_PER_FP32 < 64 * MB
) # Verify we're actually under the 64MB limit
# Verify we're actually under the 64MB limit
self.assertLess(len(json.dumps(payload).encode("utf-8")), DEFAULT_LIMIT_BYTES)

response = requests.post(
self._get_infer_url(model), headers=headers, json=payload
Expand Down Expand Up @@ -320,8 +324,11 @@ def test_large_input_json(self):
)

# Test case 2: Input just under the 128MB configured limit (should succeed)
# (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
shape_size = INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS
# The test creates a JSON payload with data, which adds overhead compared
# to raw binary format. We adjust the shape size to ensure the final
# JSON payload is under the size limit. An element is roughly 5
# bytes in JSON, compared to 4 bytes as a raw FP32.
shape_size = (INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5

payload = {
"inputs": [
Expand All @@ -333,9 +340,8 @@ def test_large_input_json(self):
}
]
}
assert (
shape_size * BYTES_PER_FP32 < 128 * MB
) # Verify we're actually under the 128MB limit
# Verify we're actually under the 128MB limit
self.assertLess(len(json.dumps(payload).encode("utf-8")), INCREASED_LIMIT_BYTES)

response = requests.post(
self._get_infer_url(model), headers=headers, json=payload
Expand All @@ -360,6 +366,55 @@ def test_large_input_json(self):
f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
)

def test_large_string_in_json(self):
"""Test JSON request with large string input"""
model = "simple_identity"

# Create a string that is larger (large payload about 2GB) than the default limit of 64MB
# (2^31 + 64) elements * 1 bytes = 2GB + 64 bytes = 2,147,483,712 bytes
large_string_size = 2 * GB + 64
large_string = "A" * large_string_size

payload = {
"inputs": [
{
"name": "INPUT0",
"datatype": "BYTES",
"shape": [1, 1],
"data": [large_string],
}
]
}

headers = {"Content-Type": "application/json"}
response = requests.post(
self._get_infer_url(model), headers=headers, json=payload
)

# Should fail with 400 bad request
self.assertEqual(
400,
response.status_code,
"Expected error code for oversized JSON request, got: {}".format(
response.status_code
),
)

# Verify error message
error_msg = response.content.decode()
self.assertIn(
"Request JSON size",
error_msg,
)
self.assertIn(
"exceeds the maximum allowed value",
error_msg,
)
self.assertIn(
"Use --http-max-input-size to increase the limit",
error_msg,
)


if __name__ == "__main__":
unittest.main()
7 changes: 5 additions & 2 deletions qa/L0_http/http_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,11 @@ def test_loading_large_invalid_model(self):
try:
error_message = response.json().get("error", "")
self.assertIn(
"'file:1/model.onnx' exceeds the maximum allowed data size limit "
"INT_MAX",
"Request JSON size",
error_message,
)
self.assertIn(
"exceeds the maximum allowed value",
error_message,
)
except ValueError:
Expand Down
8 changes: 8 additions & 0 deletions qa/L0_http/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ MODELDIR=http_input_size_limit_test_models
mkdir -p $MODELDIR
rm -rf ${MODELDIR}/*
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
cp -r ./models/simple_identity ${MODELDIR}/.

# First run with default size limit - large inputs should fail
SERVER_ARGS="--model-repository=${MODELDIR}"
Expand Down Expand Up @@ -787,6 +788,13 @@ if [ $? -ne 0 ]; then
echo -e "\n***\n*** Default Input Size Limit Test Failed for JSON input\n***"
RET=1
fi

python http_input_size_limit_test.py InferSizeLimitTest.test_large_string_in_json >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Default Input Size Limit Test Failed for large string in JSON\n***"
RET=1
fi
set -e

kill $SERVER_PID
Expand Down
10 changes: 10 additions & 0 deletions src/http_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3060,6 +3060,16 @@ HTTPAPIServer::EVBufferToJson(
triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
const size_t length, int n)
{
if (length > max_input_size_) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
("Request JSON size of " + std::to_string(length) +
" bytes exceeds the maximum allowed value of " +
std::to_string(max_input_size_) +
" bytes. Use --http-max-input-size to increase the limit.")
.c_str());
}

size_t offset = 0, remaining_length = length;
char* json_base;
std::vector<char> json_buffer;
Expand Down
Loading