vllm-project · Sophie8 · Oct 25, 2025 · Oct 31, 2025
@@ -162,6 +162,44 @@ type BatchSimilarityRequest struct {
 	LatencyPriority float32  `json:"latency_priority,omitempty"` // 0.0-1.0, only for "auto" model
 }
 
+// KeywordSimilarityMatch represents a single match in batch similarity matching
+type KeywordSimilarityMatch struct {
+	Index               int     `json:"index"`                // Index of the candidate in the input array
+	SimilarityThreshold float32 `json:"similarity_threshold"` // threshold as matched for the cosine similarity score between query and the keyword
+	Keyword             string  `json:"text"`                 // The keyword to calculate similarity with
+}
+
+// KeywordSimilarityMatchrResponse represents a single match in batch similarity matching
+type KeywordSimilarityMatchResponse struct {
+	Index                int     `json:"index"`                 // Index of the candidate in the input array
+	SimilarityThreshold  float32 `json:"similarity_threshold"`  // threshold as matched for the cosine similarity score between query and the keyword
+	SimilarityCalculated float32 `json:"similarity_calculated"` // threshold as matched for the cosine similarity score between query and the keyword
+	Keyword              string  `json:"text"`                  // The keyword to calculate similarity with
+	Matched              bool    `json:"matched"`               // The query matched the keyword or not
+	ModelUsed            string  `json:"model_used"`            // "qwen3", "gemma", or "unknown"
+	ProcessingTimeMs     float32 `json:"processing_time_ms"`    // Processing time in milliseconds
+}
+
+// BatchEmbeddingSimilarityMatchRequest represents a request to find the similarity between a query and configurable keywords
+type BatchEmbeddingSimilarityMatchRequest struct {
+	Query                string                   `json:"query"`                      // Query text
+	Keywords             []string                 `json:"keywords"`                   // Array of keyword texts
+	Model                string                   `json:"model,omitempty"`            // "auto" (default), "qwen3", "gemma"
+	Dimension            int                      `json:"dimension,omitempty"`        // Target dimension: 768 (default), 512, 256, 128
+	SimilarityThresholds []KeywordSimilarityMatch `json:"similarity_thresholds"`      // Configurable thresholds per keyword (e.g. keyword A: 80%, keyword B: 60%)
+	AggregationMethod    string                   `json:"aggregation_method"`         // Aggregation method to pick the best matched category, support max now. Placeholder for further extension
+	QualityPriority      float32                  `json:"quality_priority,omitempty"` // 0.0-1.0, only for "auto" model
+	LatencyPriority      float32                  `json:"latency_priority,omitempty"` // 0.0-1.0, only for "auto" model
+}
+
+// BatchEmbeddingSimilarityMatchResponse represents a response to find the similarity between a query and configurable keywords
+type BatchEmbeddingSimilarityMatchResponse struct {
+	Query               string                           `json:"query"`                 // Query text
+	KeywordMatches      []KeywordSimilarityMatchResponse `json:"keyword_matches"`       // Array of KeywordSimilarityMatchResponse
+	AggregationMethod   string                           `json:"aggregation_method"`    // Aggregation method to pick the best matched category, support max now. Placeholder for further extension
+	BestMatchedCategory string                           `json:"best_matched_category"` // The best matched category based on the aggregation method above
+}
+
 // BatchSimilarityMatch represents a single match in batch similarity matching
 type BatchSimilarityMatch struct {
 	Index      int     `json:"index"`      // Index of the candidate in the input array
@@ -691,6 +729,12 @@ func (s *ClassificationAPIServer) handleCombinedClassification(w http.ResponseWr
 	s.writeErrorResponse(w, http.StatusNotImplemented, "NOT_IMPLEMENTED", "Combined classification not implemented yet")
 }
 
+// Placeholder funtion to fusion all the designed internal signal providers: Keyword matcher, reges scanner, embedding similarity, BERT classifier
+// func (s *ClassificationAPIServer) handleAllInTreeSinganlProviders(w http.ResponseWriter, _ *http.Request) {
+//	response, err := s.classificationSvc.handleBatchClassification(..)
+
+// }
+
 func (s *ClassificationAPIServer) handleBatchClassification(w http.ResponseWriter, r *http.Request) {
 	// Record batch classification request
 	metrics.RecordBatchClassificationRequest("unified")
@@ -1602,3 +1646,117 @@ func (s *ClassificationAPIServer) handleBatchSimilarity(w http.ResponseWriter, r
 
 	s.writeJSONResponse(w, http.StatusOK, response)
 }
+
+// handleBatchInTreeSimilarityMatching handles batch embedding based similarity matching requests
+func (s *ClassificationAPIServer) handleBatchInTreeSimilarityMatching(w http.ResponseWriter, r *http.Request) {
+	// Parse request
+	var req BatchEmbeddingSimilarityMatchRequest
+	if err := s.parseJSONRequest(r, &req); err != nil {
+		s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", err.Error())
+		return
+	}
+
+	// Validate input
+	if req.Query == "" {
+		s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", "query must be provided")
+		return
+	}
+	if len(req.Keywords) == 0 {
+		s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", "keyword array cannot be empty")
+		return
+	}
+
+	// Set defaults
+	if req.Model == "" {
+		req.Model = "auto"
+	}
+	if req.Dimension == 0 {
+		req.Dimension = 768 // Default to full dimension
+	}
+	if req.Model == "auto" && req.QualityPriority == 0 && req.LatencyPriority == 0 {
+		req.QualityPriority = 0.5
+		req.LatencyPriority = 0.5
+	}
+
+	// Validate dimension
+	validDimensions := map[int]bool{128: true, 256: true, 512: true, 768: true, 1024: true}
+	if !validDimensions[req.Dimension] {
+		s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_DIMENSION",
+			fmt.Sprintf("dimension must be one of: 128, 256, 512, 768, 1024 (got %d)", req.Dimension))
+		return
+	}
+
+	// Calculate batch similarity
+	result, err := candle_binding.CalculateSimilarityBatch(
+		req.Query,
+		req.Keywords,
+		0, // return scores for all the keywords
+		req.Model,
+		req.Dimension,
+	)
+	if err != nil {
+		s.writeErrorResponse(w, http.StatusInternalServerError, "BATCH_SIMILARITY_FAILED",
+			fmt.Sprintf("failed to calculate batch similarity: %v", err))
+		return
+	}
+
+	// Build embedding based similarity response
+	matches := make([]KeywordSimilarityMatchResponse, len(result.Matches))
+	for i, match := range result.Matches {
+		if match.Similarity >= req.SimilarityThresholds[i].SimilarityThreshold {
+			matches[i] = KeywordSimilarityMatchResponse{
+				Index:                match.Index,
+				SimilarityThreshold:  req.SimilarityThresholds[i].SimilarityThreshold,
+				SimilarityCalculated: match.Similarity,
+				Keyword:              req.Keywords[match.Index],
+				Matched:              true,
+				ModelUsed:            result.ModelType,
+				ProcessingTimeMs:     result.ProcessingTimeMs,
+			}
+		} else {
+			matches[i] = KeywordSimilarityMatchResponse{
+				Index:                match.Index,
+				SimilarityThreshold:  req.SimilarityThresholds[i].SimilarityThreshold,
+				SimilarityCalculated: match.Similarity,
+				Keyword:              req.Keywords[match.Index],
+				Matched:              false,
+				ModelUsed:            result.ModelType,
+				ProcessingTimeMs:     result.ProcessingTimeMs,
+			}
+		}
+	}
+	// Validate input
+	if req.AggregationMethod != "" && req.AggregationMethod != "max" {
+		s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", "Aggregation method only supports max now")
+		return
+	}
+	var aggregationMethod string
+	// Set default value
+	if req.AggregationMethod == "" {
+		aggregationMethod = "max"
+	}
+	// Support mean/max/any aggregation methods to find the best match
+	var bestMatchedCategory string
+	var bestScore float32
+	if aggregationMethod == "max" {
+		// pick the most matched category based on max of all cosine similarity scores
+		for _, match := range matches {
+			if match.SimilarityCalculated > bestScore {
+				bestScore = match.SimilarityCalculated
+				bestMatchedCategory = match.Keyword
+			}
+		}
+	}
+	// Make Response
+	response := BatchEmbeddingSimilarityMatchResponse{
+		Query:               req.Query,
+		KeywordMatches:      matches,
+		AggregationMethod:   aggregationMethod,
+		BestMatchedCategory: bestMatchedCategory,
+	}
+
+	observability.Infof("Calculated batch embedding similarity: query='%s', %d keywords, (model: %s, took: %.2fms)",
+		req.Query, len(req.Keywords), result.ModelType, result.ProcessingTimeMs)
+
+	s.writeJSONResponse(w, http.StatusOK, response)
+}
@@ -0,0 +1,58 @@
+import React, { useEffect, useState } from 'react'
+import styles from './styles.module.css'
+
+export default function ScrollToTop(): React.ReactElement {
+  const [isVisible, setIsVisible] = useState(false)
+
+  useEffect(() => {
+    const toggleVisibility = () => {
+      if (window.pageYOffset > 300) {
+        setIsVisible(true)
+      }
+      else {
+        setIsVisible(false)
+      }
+    }
+
+    window.addEventListener('scroll', toggleVisibility)
+
+    return () => {
+      window.removeEventListener('scroll', toggleVisibility)
+    }
+  }, [])
+
+  const scrollToTop = () => {
+    window.scrollTo({
+      top: 0,
+      behavior: 'smooth',
+    })
+  }
+
+  return (
+    <>
+      {isVisible && (
+        <button
+          onClick={scrollToTop}
+          className={styles.scrollToTop}
+          aria-label="Scroll to top"
+        >
+          <svg
+            width="24"
+            height="24"
+            viewBox="0 0 24 24"
+            fill="none"
+            xmlns="http://www.w3.org/2000/svg"
+          >
+            <path
+              d="M12 19V5M12 5L5 12M12 5L19 12"
+              stroke="currentColor"
+              strokeWidth="2"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+            />
+          </svg>
+        </button>
+      )}
+    </>
+  )
+}
@@ -0,0 +1,93 @@
+.scrollToTop {
+  position: fixed;
+  bottom: 2rem;
+  right: 2rem;
+  width: 50px;
+  height: 50px;
+  border-radius: 50%;
+  background: linear-gradient(45deg, var(--tech-primary-blue), var(--tech-accent-purple));
+  color: white;
+  border: none;
+  cursor: pointer;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  box-shadow: 0 4px 12px rgba(9, 105, 218, 0.3);
+  transition: all 0.3s ease;
+  z-index: 999;
+  opacity: 0;
+  transform: translateY(20px);
+  animation: fadeInUp 0.3s ease forwards;
+}
+
+@keyframes fadeInUp {
+  to {
+    opacity: 1;
+    transform: translateY(0);
+  }
+}
+
+.scrollToTop:hover {
+  transform: translateY(-4px) scale(1.1);
+  box-shadow: 0 8px 20px rgba(9, 105, 218, 0.4);
+}
+
+.scrollToTop:active {
+  transform: translateY(-2px) scale(1.05);
+}
+
+[data-theme='dark'] .scrollToTop {
+  background: linear-gradient(45deg, var(--tech-primary-blue), var(--tech-accent-orange));
+  box-shadow: 0 4px 12px rgba(88, 166, 255, 0.4);
+}
+
+[data-theme='dark'] .scrollToTop:hover {
+  box-shadow: 0 8px 20px rgba(88, 166, 255, 0.5);
+}
+
+@media (max-width: 768px) {
+  .scrollToTop {
+    bottom: 1.5rem;
+    right: 1.5rem;
+    width: 45px;
+    height: 45px;
+  }
+}
+
+.scrollToTop::before {
+  content: '';
+  position: absolute;
+  top: 0;
+  left: 0;
+  right: 0;
+  bottom: 0;
+  border-radius: 50%;
+  background: inherit;
+  opacity: 0;
+  animation: pulse 2s ease-out infinite;
+}
+
+@keyframes pulse {
+  0% {
+    transform: scale(1);
+    opacity: 0.5;
+  }
+  50% {
+    transform: scale(1.1);
+    opacity: 0.3;
+  }
+  100% {
+    transform: scale(1.3);
+    opacity: 0;
+  }
+}
+
+.scrollToTop svg {
+  position: relative;
+  z-index: 1;
+  transition: transform 0.3s ease;
+}
+
+.scrollToTop:hover svg {
+  transform: translateY(-2px);
+}
@@ -0,0 +1,12 @@
+import React from 'react'
+import Root from '@theme-original/Root'
+import ScrollToTop from '../components/ScrollToTop'
+
+export default function RootWrapper(props: any): React.ReactElement {
+  return (
+    <>
+      <Root {...props} />
+      <ScrollToTop />
+    </>
+  )
+}