triton-inference-server · pskiran1 · Oct 10, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025
diff --git a/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -723,7 +723,11 @@ def test_exceeds_cshm_handle_size_limit(self):
         try:
             error_message = response.json().get("error", "")
             self.assertIn(
-                "'raw_handle' exceeds the maximum allowed data size limit INT_MAX",
+                "Request JSON size",
+                error_message,
+            )
+            self.assertIn(
+                "exceeds the maximum allowed value",
                 error_message,
             )
         except ValueError:

diff --git a/qa/L0_http/http_input_size_limit_test.py b/qa/L0_http/http_input_size_limit_test.py
@@ -29,6 +29,7 @@
 
 sys.path.append("../common")
 
+import json
 import unittest
 
 import numpy as np
@@ -39,6 +40,7 @@
 # Each FP32 value is 4 bytes, so we need to divide target byte sizes by 4 to get element counts
 BYTES_PER_FP32 = 4
 MB = 2**20  # 1 MB = 1,048,576 bytes
+GB = 2**30  # 1 GB = 1,073,741,824 bytes
 DEFAULT_LIMIT_BYTES = 64 * MB  # 64MB default limit
 INCREASED_LIMIT_BYTES = 128 * MB  # 128MB increased limit
 
@@ -167,8 +169,11 @@ def test_default_limit_rejection_json(self):
         )
 
         # Test case 2: Input just under the 64MB limit (should succeed)
-        # (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
-        shape_size = DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS
+        # The test creates a JSON payload with data, which adds overhead compared
+        # to raw binary format. We adjust the shape size to ensure the final
+        # JSON payload is under the size limit. An element is roughly 5
+        # bytes in JSON, compared to 4 bytes as a raw FP32.
+        shape_size = (DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5
 
         payload = {
             "inputs": [
@@ -180,9 +185,8 @@ def test_default_limit_rejection_json(self):
                 }
             ]
         }
-        assert (
-            shape_size * BYTES_PER_FP32 < 64 * MB
-        )  # Verify we're actually under the 64MB limit
+        # Verify we're actually under the 64MB limit
+        self.assertLess(len(json.dumps(payload).encode("utf-8")), DEFAULT_LIMIT_BYTES)
 
         response = requests.post(
             self._get_infer_url(model), headers=headers, json=payload
@@ -320,8 +324,11 @@ def test_large_input_json(self):
         )
 
         # Test case 2: Input just under the 128MB configured limit (should succeed)
-        # (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
-        shape_size = INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS
+        # The test creates a JSON payload with data, which adds overhead compared
+        # to raw binary format. We adjust the shape size to ensure the final
+        # JSON payload is under the size limit. An element is roughly 5
+        # bytes in JSON, compared to 4 bytes as a raw FP32.
+        shape_size = (INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5
 
         payload = {
             "inputs": [
@@ -333,9 +340,8 @@ def test_large_input_json(self):
                 }
             ]
         }
-        assert (
-            shape_size * BYTES_PER_FP32 < 128 * MB
-        )  # Verify we're actually under the 128MB limit
+        # Verify we're actually under the 128MB limit
+        self.assertLess(len(json.dumps(payload).encode("utf-8")), INCREASED_LIMIT_BYTES)
 
         response = requests.post(
             self._get_infer_url(model), headers=headers, json=payload
@@ -360,6 +366,55 @@ def test_large_input_json(self):
             f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
         )
 
+    def test_large_string_in_json(self):
+        """Test JSON request with large string input"""
+        model = "simple_identity"
+
+        # Create a string that is larger (large payload about 2GB) than the default limit of 64MB
+        # (2^31 + 64) elements * 1 bytes = 2GB + 64 bytes = 2,147,483,712 bytes
+        large_string_size = 2 * GB + 64
+        large_string = "A" * large_string_size
+
+        payload = {
+            "inputs": [
+                {
+                    "name": "INPUT0",
+                    "datatype": "BYTES",
+                    "shape": [1, 1],
+                    "data": [large_string],
+                }
+            ]
+        }
+
+        headers = {"Content-Type": "application/json"}
+        response = requests.post(
+            self._get_infer_url(model), headers=headers, json=payload
+        )
+
+        # Should fail with 400 bad request
+        self.assertEqual(
+            400,
+            response.status_code,
+            "Expected error code for oversized JSON request, got: {}".format(
+                response.status_code
+            ),
+        )
+
+        # Verify error message
+        error_msg = response.content.decode()
+        self.assertIn(
+            "Request JSON size",
+            error_msg,
+        )
+        self.assertIn(
+            "exceeds the maximum allowed value",
+            error_msg,
+        )
+        self.assertIn(
+            "Use --http-max-input-size to increase the limit",
+            error_msg,
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_http/http_test.py b/qa/L0_http/http_test.py
@@ -364,8 +364,11 @@ def test_loading_large_invalid_model(self):
         try:
             error_message = response.json().get("error", "")
             self.assertIn(
-                "'file:1/model.onnx' exceeds the maximum allowed data size limit "
-                "INT_MAX",
+                "Request JSON size",
+                error_message,
+            )
+            self.assertIn(
+                "exceeds the maximum allowed value",
                 error_message,
             )
         except ValueError:

diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh
@@ -760,6 +760,7 @@ MODELDIR=http_input_size_limit_test_models
 mkdir -p $MODELDIR
 rm -rf ${MODELDIR}/*
 cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
+cp -r ./models/simple_identity ${MODELDIR}/.
 
 # First run with default size limit - large inputs should fail
 SERVER_ARGS="--model-repository=${MODELDIR}"
@@ -787,6 +788,13 @@ if [ $? -ne 0 ]; then
     echo -e "\n***\n*** Default Input Size Limit Test Failed for JSON input\n***"
     RET=1
 fi
+
+python http_input_size_limit_test.py InferSizeLimitTest.test_large_string_in_json >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Default Input Size Limit Test Failed for large string in JSON\n***"
+    RET=1
+fi
 set -e
 
 kill $SERVER_PID

diff --git a/src/http_server.cc b/src/http_server.cc
@@ -3060,6 +3060,16 @@ HTTPAPIServer::EVBufferToJson(
     triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
     const size_t length, int n)
 {
+  if (length > max_input_size_) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        ("Request JSON size of " + std::to_string(length) +
+         " bytes exceeds the maximum allowed value of " +
+         std::to_string(max_input_size_) +
+         " bytes. Use --http-max-input-size to increase the limit.")
+            .c_str());
+  }
+
   size_t offset = 0, remaining_length = length;
   char* json_base;
   std::vector<char> json_buffer;