Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -723,7 +723,11 @@ def test_exceeds_cshm_handle_size_limit(self):
try:
error_message = response.json().get("error", "")
self.assertIn(
"'raw_handle' exceeds the maximum allowed data size limit INT_MAX",
"Request JSON size",
error_message,
)
self.assertIn(
"exceeds the maximum allowed value",
error_message,
)
except ValueError:
Expand Down
90 changes: 80 additions & 10 deletions qa/L0_http/http_input_size_limit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

sys.path.append("../common")

import json
import unittest

import numpy as np
Expand Down Expand Up @@ -167,8 +168,11 @@
)

# Test case 2: Input just under the 64MB limit (should succeed)
# (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
shape_size = DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS
# The test creates a JSON payload with data, which adds overhead compared
# to raw binary format. We adjust the shape size to ensure the final
# JSON payload is under the size limit. An element of '1.0' is roughly 5
# bytes in JSON, compared to 4 bytes as a raw FP32.
shape_size = (DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5

payload = {
"inputs": [
Expand All @@ -180,15 +184,23 @@
}
]
}
assert (
shape_size * BYTES_PER_FP32 < 64 * MB
) # Verify we're actually under the 64MB limit
# Verify we're actually under the 64MB limit
self.assertLess(len(json.dumps(payload)), DEFAULT_LIMIT_BYTES)

headers = {"Content-Type": "application/json"}
response = requests.post(
self._get_infer_url(model), headers=headers, json=payload
)

# Should succeed with 200 OK
if response.status_code != 200:
print(f"\n[DEBUG] test_default_limit_rejection_json - FAILED SUCCESS CASE")
print(f"[DEBUG] Expected status code: 200")
print(f"[DEBUG] Actual status code: {response.status_code}")
try:
print(f"[DEBUG] Error response: {response.json()}")
except ValueError:
print(f"[DEBUG] Error response (not JSON): {response.content.decode()}")
self.assertEqual(
200,
response.status_code,
Expand Down Expand Up @@ -320,8 +332,11 @@
)

# Test case 2: Input just under the 128MB configured limit (should succeed)
# (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
shape_size = INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS
# The test creates a JSON payload with data, which adds overhead compared
# to raw binary format. We adjust the shape size to ensure the final
# JSON payload is under the size limit. An element of '1.0' is roughly 5
# bytes in JSON, compared to 4 bytes as a raw FP32.
shape_size = (INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5

payload = {
"inputs": [
Expand All @@ -333,15 +348,22 @@
}
]
}
assert (
shape_size * BYTES_PER_FP32 < 128 * MB
) # Verify we're actually under the 128MB limit
# Verify we're actually under the 128MB limit
self.assertLess(len(json.dumps(payload)), INCREASED_LIMIT_BYTES)

response = requests.post(
self._get_infer_url(model), headers=headers, json=payload
)

# Should succeed with 200 OK
if response.status_code != 200:
print(f"\n[DEBUG] test_large_input_json - FAILED SUCCESS CASE")
print(f"[DEBUG] Expected status code: 200")
print(f"[DEBUG] Actual status code: {response.status_code}")
try:
print(f"[DEBUG] Error response: {response.json()}")
except:
print(f"[DEBUG] Error response (not JSON): {response.content.decode()}")
self.assertEqual(
200,
response.status_code,
Expand All @@ -360,6 +382,54 @@
f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
)

def test_large_string_in_json(self):
"""Test JSON request with large string input"""
model = "simple_identity"

# Create a string that is larger (a very large payload about 2GB) than the default limit of 64MB
large_string_size = 2222 * 1024222
large_string = "A" * large_string_size

payload = {
"inputs": [
{
"name": "INPUT0",
"datatype": "BYTES",
"shape": [1, 1],
"data": [large_string],
}
]
}

headers = {"Content-Type": "application/json"}
response = requests.post(
self._get_infer_url(model), headers=headers, json=payload
)

# Should fail with 400 bad request
self.assertEqual(
400,
response.status_code,
"Expected error code for oversized JSON request, got: {}".format(
response.status_code
),
)

# Verify error message
error_msg = response.content.decode()
self.assertIn(
"Request JSON size",
error_msg,
)
self.assertIn(
"exceeds the maximum allowed value",
error_msg,
)
self.assertIn(
"Use --http-max-input-size to increase the limit",
error_msg,
)


if __name__ == "__main__":
unittest.main()
7 changes: 5 additions & 2 deletions qa/L0_http/http_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,8 +364,11 @@ def test_loading_large_invalid_model(self):
try:
error_message = response.json().get("error", "")
self.assertIn(
"'file:1/model.onnx' exceeds the maximum allowed data size limit "
"INT_MAX",
"Request JSON size",
error_message,
)
self.assertIn(
"exceeds the maximum allowed value",
error_message,
)
except ValueError:
Expand Down
8 changes: 8 additions & 0 deletions qa/L0_http/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,7 @@ MODELDIR=http_input_size_limit_test_models
mkdir -p $MODELDIR
rm -rf ${MODELDIR}/*
cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
cp -r ./models/simple_identity ${MODELDIR}/.

# First run with default size limit - large inputs should fail
SERVER_ARGS="--model-repository=${MODELDIR}"
Expand Down Expand Up @@ -787,6 +788,13 @@ if [ $? -ne 0 ]; then
echo -e "\n***\n*** Default Input Size Limit Test Failed for JSON input\n***"
RET=1
fi

python http_input_size_limit_test.py InferSizeLimitTest.test_large_string_in_json >> $CLIENT_LOG 2>&1
if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** Default Input Size Limit Test Failed for large string in JSON\n***"
RET=1
fi
set -e

kill $SERVER_PID
Expand Down
10 changes: 10 additions & 0 deletions src/http_server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3060,6 +3060,16 @@ HTTPAPIServer::EVBufferToJson(
triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
const size_t length, int n)
{
if (length > max_input_size_) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
("Request JSON size of " + std::to_string(length) +
" bytes exceeds the maximum allowed value of " +
std::to_string(max_input_size_) +
" bytes. Use --http-max-input-size to increase the limit.")
.c_str());
}

size_t offset = 0, remaining_length = length;
char* json_base;
std::vector<char> json_buffer;
Expand Down
Loading