triton-inference-server · pskiran1 · Oct 10, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 1, 2025
diff --git a/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py b/qa/L0_cuda_shared_memory/cuda_shared_memory_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2019-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -723,7 +723,11 @@ def test_exceeds_cshm_handle_size_limit(self):
         try:
             error_message = response.json().get("error", "")
             self.assertIn(
-                "'raw_handle' exceeds the maximum allowed data size limit INT_MAX",
+                "Request JSON size",
+                error_message,
+            )
+            self.assertIn(
+                "exceeds the maximum allowed value",
                 error_message,
             )
         except ValueError:

diff --git a/qa/L0_http/http_input_size_limit_test.py b/qa/L0_http/http_input_size_limit_test.py
@@ -29,6 +29,7 @@
 
 sys.path.append("../common")
 
+import json
 import unittest
 
 import numpy as np
@@ -167,8 +168,11 @@
         )
 
         # Test case 2: Input just under the 64MB limit (should succeed)
-        # (2^24 - 32) elements * 4 bytes = 64MB - 128 bytes = 67,108,736 bytes
-        shape_size = DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS
+        # The test creates a JSON payload with data, which adds overhead compared
+        # to raw binary format. We adjust the shape size to ensure the final
+        # JSON payload is under the size limit. An element of '1.0' is roughly 5
+        # bytes in JSON, compared to 4 bytes as a raw FP32.
+        shape_size = (DEFAULT_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5
 
         payload = {
             "inputs": [
@@ -180,15 +184,23 @@
                 }
             ]
         }
-        assert (
-            shape_size * BYTES_PER_FP32 < 64 * MB
-        )  # Verify we're actually under the 64MB limit
+        # Verify we're actually under the 64MB limit
+        self.assertLess(len(json.dumps(payload)), DEFAULT_LIMIT_BYTES)
 
+        headers = {"Content-Type": "application/json"}
         response = requests.post(
             self._get_infer_url(model), headers=headers, json=payload
         )
 
         # Should succeed with 200 OK
+        if response.status_code != 200:
+            print(f"\n[DEBUG] test_default_limit_rejection_json - FAILED SUCCESS CASE")
+            print(f"[DEBUG] Expected status code: 200")
+            print(f"[DEBUG] Actual status code: {response.status_code}")
+            try:
+                print(f"[DEBUG] Error response: {response.json()}")
+            except ValueError:
+                print(f"[DEBUG] Error response (not JSON): {response.content.decode()}")
         self.assertEqual(
             200,
             response.status_code,
@@ -320,8 +332,11 @@
         )
 
         # Test case 2: Input just under the 128MB configured limit (should succeed)
-        # (2^25 - 32) elements * 4 bytes = 128MB - 128 bytes = 134,217,600 bytes
-        shape_size = INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS
+        # The test creates a JSON payload with data, which adds overhead compared
+        # to raw binary format. We adjust the shape size to ensure the final
+        # JSON payload is under the size limit. An element of '1.0' is roughly 5
+        # bytes in JSON, compared to 4 bytes as a raw FP32.
+        shape_size = (INCREASED_LIMIT_ELEMENTS - OFFSET_ELEMENTS) * 4 // 5
 
         payload = {
             "inputs": [
@@ -333,15 +348,22 @@
                 }
             ]
         }
-        assert (
-            shape_size * BYTES_PER_FP32 < 128 * MB
-        )  # Verify we're actually under the 128MB limit
+        # Verify we're actually under the 128MB limit
+        self.assertLess(len(json.dumps(payload)), INCREASED_LIMIT_BYTES)
 
         response = requests.post(
             self._get_infer_url(model), headers=headers, json=payload
         )
 
         # Should succeed with 200 OK
+        if response.status_code != 200:
+            print(f"\n[DEBUG] test_large_input_json - FAILED SUCCESS CASE")
+            print(f"[DEBUG] Expected status code: 200")
+            print(f"[DEBUG] Actual status code: {response.status_code}")
+            try:
+                print(f"[DEBUG] Error response: {response.json()}")
+            except:
+                print(f"[DEBUG] Error response (not JSON): {response.content.decode()}")
         self.assertEqual(
             200,
             response.status_code,
@@ -360,6 +382,54 @@
             f"Expected shape {[1, shape_size]}, got {result['outputs'][0]['shape']}",
         )
 
+    def test_large_string_in_json(self):
+        """Test JSON request with large string input"""
+        model = "simple_identity"
+
+        # Create a string that is larger (a very large payload about 2GB) than the default limit of 64MB
+        large_string_size = 2222 * 1024222
+        large_string = "A" * large_string_size
+
+        payload = {
+            "inputs": [
+                {
+                    "name": "INPUT0",
+                    "datatype": "BYTES",
+                    "shape": [1, 1],
+                    "data": [large_string],
+                }
+            ]
+        }
+
+        headers = {"Content-Type": "application/json"}
+        response = requests.post(
+            self._get_infer_url(model), headers=headers, json=payload
+        )
+
+        # Should fail with 400 bad request
+        self.assertEqual(
+            400,
+            response.status_code,
+            "Expected error code for oversized JSON request, got: {}".format(
+                response.status_code
+            ),
+        )
+
+        # Verify error message
+        error_msg = response.content.decode()
+        self.assertIn(
+            "Request JSON size",
+            error_msg,
+        )
+        self.assertIn(
+            "exceeds the maximum allowed value",
+            error_msg,
+        )
+        self.assertIn(
+            "Use --http-max-input-size to increase the limit",
+            error_msg,
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/qa/L0_http/http_test.py b/qa/L0_http/http_test.py
@@ -364,8 +364,11 @@ def test_loading_large_invalid_model(self):
         try:
             error_message = response.json().get("error", "")
             self.assertIn(
-                "'file:1/model.onnx' exceeds the maximum allowed data size limit "
-                "INT_MAX",
+                "Request JSON size",
+                error_message,
+            )
+            self.assertIn(
+                "exceeds the maximum allowed value",
                 error_message,
             )
         except ValueError:

diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh
@@ -760,6 +760,7 @@ MODELDIR=http_input_size_limit_test_models
 mkdir -p $MODELDIR
 rm -rf ${MODELDIR}/*
 cp -r $DATADIR/qa_identity_model_repository/onnx_zero_1_float32 ${MODELDIR}/.
+cp -r ./models/simple_identity ${MODELDIR}/.
 
 # First run with default size limit - large inputs should fail
 SERVER_ARGS="--model-repository=${MODELDIR}"
@@ -787,6 +788,13 @@ if [ $? -ne 0 ]; then
     echo -e "\n***\n*** Default Input Size Limit Test Failed for JSON input\n***"
     RET=1
 fi
+
+python http_input_size_limit_test.py InferSizeLimitTest.test_large_string_in_json >> $CLIENT_LOG 2>&1
+if [ $? -ne 0 ]; then
+    cat $CLIENT_LOG
+    echo -e "\n***\n*** Default Input Size Limit Test Failed for large string in JSON\n***"
+    RET=1
+fi
 set -e
 
 kill $SERVER_PID

diff --git a/src/http_server.cc b/src/http_server.cc
@@ -3060,6 +3060,16 @@ HTTPAPIServer::EVBufferToJson(
     triton::common::TritonJson::Value* document, evbuffer_iovec* v, int* v_idx,
     const size_t length, int n)
 {
+  if (length > max_input_size_) {
+    return TRITONSERVER_ErrorNew(
+        TRITONSERVER_ERROR_INVALID_ARG,
+        ("Request JSON size of " + std::to_string(length) +
+         " bytes exceeds the maximum allowed value of " +
+         std::to_string(max_input_size_) +
+         " bytes. Use --http-max-input-size to increase the limit.")
+            .c_str());
+  }
+
   size_t offset = 0, remaining_length = length;
   char* json_base;
   std::vector<char> json_buffer;