Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions src/semantic-router/pkg/api/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,44 @@ type BatchSimilarityRequest struct {
LatencyPriority float32 `json:"latency_priority,omitempty"` // 0.0-1.0, only for "auto" model
}

// KeywordSimilarityMatch represents a single match in batch similarity matching
type KeywordSimilarityMatch struct {
Index int `json:"index"` // Index of the candidate in the input array
SimilarityThreshold float32 `json:"similarity_threshold"` // threshold as matched for the cosine similarity score between query and the keyword
Keyword string `json:"text"` // The keyword to calculate similarity with
}

// KeywordSimilarityMatchrResponse represents a single match in batch similarity matching
type KeywordSimilarityMatchResponse struct {
Index int `json:"index"` // Index of the candidate in the input array
SimilarityThreshold float32 `json:"similarity_threshold"` // threshold as matched for the cosine similarity score between query and the keyword
SimilarityCalculated float32 `json:"similarity_calculated"` // threshold as matched for the cosine similarity score between query and the keyword
Keyword string `json:"text"` // The keyword to calculate similarity with
Matched bool `json:"matched"` // The query matched the keyword or not
ModelUsed string `json:"model_used"` // "qwen3", "gemma", or "unknown"
ProcessingTimeMs float32 `json:"processing_time_ms"` // Processing time in milliseconds
}

// BatchEmbeddingSimilarityMatchRequest represents a request to find the similarity between a query and configurable keywords
type BatchEmbeddingSimilarityMatchRequest struct {
Query string `json:"query"` // Query text
Keywords []string `json:"keywords"` // Array of keyword texts
Model string `json:"model,omitempty"` // "auto" (default), "qwen3", "gemma"
Dimension int `json:"dimension,omitempty"` // Target dimension: 768 (default), 512, 256, 128
SimilarityThresholds []KeywordSimilarityMatch `json:"similarity_thresholds"` // Configurable thresholds per keyword (e.g. keyword A: 80%, keyword B: 60%)
AggregationMethod string `json:"aggregation_method"` // Aggregation method to pick the best matched category, support max now. Placeholder for further extension
QualityPriority float32 `json:"quality_priority,omitempty"` // 0.0-1.0, only for "auto" model
LatencyPriority float32 `json:"latency_priority,omitempty"` // 0.0-1.0, only for "auto" model
}

// BatchEmbeddingSimilarityMatchResponse represents a response to find the similarity between a query and configurable keywords
type BatchEmbeddingSimilarityMatchResponse struct {
Query string `json:"query"` // Query text
KeywordMatches []KeywordSimilarityMatchResponse `json:"keyword_matches"` // Array of KeywordSimilarityMatchResponse
AggregationMethod string `json:"aggregation_method"` // Aggregation method to pick the best matched category, support max now. Placeholder for further extension
BestMatchedCategory string `json:"best_matched_category"` // The best matched category based on the aggregation method above
}

// BatchSimilarityMatch represents a single match in batch similarity matching
type BatchSimilarityMatch struct {
Index int `json:"index"` // Index of the candidate in the input array
Expand Down Expand Up @@ -691,6 +729,12 @@ func (s *ClassificationAPIServer) handleCombinedClassification(w http.ResponseWr
s.writeErrorResponse(w, http.StatusNotImplemented, "NOT_IMPLEMENTED", "Combined classification not implemented yet")
}

// Placeholder funtion to fusion all the designed internal signal providers: Keyword matcher, reges scanner, embedding similarity, BERT classifier
// func (s *ClassificationAPIServer) handleAllInTreeSinganlProviders(w http.ResponseWriter, _ *http.Request) {
// response, err := s.classificationSvc.handleBatchClassification(..)

// }

func (s *ClassificationAPIServer) handleBatchClassification(w http.ResponseWriter, r *http.Request) {
// Record batch classification request
metrics.RecordBatchClassificationRequest("unified")
Expand Down Expand Up @@ -1602,3 +1646,117 @@ func (s *ClassificationAPIServer) handleBatchSimilarity(w http.ResponseWriter, r

s.writeJSONResponse(w, http.StatusOK, response)
}

// handleBatchInTreeSimilarityMatching handles batch embedding based similarity matching requests
func (s *ClassificationAPIServer) handleBatchInTreeSimilarityMatching(w http.ResponseWriter, r *http.Request) {
// Parse request
var req BatchEmbeddingSimilarityMatchRequest
if err := s.parseJSONRequest(r, &req); err != nil {
s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", err.Error())
return
}

// Validate input
if req.Query == "" {
s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", "query must be provided")
return
}
if len(req.Keywords) == 0 {
s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", "keyword array cannot be empty")
return
}

// Set defaults
if req.Model == "" {
req.Model = "auto"
}
if req.Dimension == 0 {
req.Dimension = 768 // Default to full dimension
}
if req.Model == "auto" && req.QualityPriority == 0 && req.LatencyPriority == 0 {
req.QualityPriority = 0.5
req.LatencyPriority = 0.5
}

// Validate dimension
validDimensions := map[int]bool{128: true, 256: true, 512: true, 768: true, 1024: true}
if !validDimensions[req.Dimension] {
s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_DIMENSION",
fmt.Sprintf("dimension must be one of: 128, 256, 512, 768, 1024 (got %d)", req.Dimension))
return
}

// Calculate batch similarity
result, err := candle_binding.CalculateSimilarityBatch(
req.Query,
req.Keywords,
0, // return scores for all the keywords
req.Model,
req.Dimension,
)
if err != nil {
s.writeErrorResponse(w, http.StatusInternalServerError, "BATCH_SIMILARITY_FAILED",
fmt.Sprintf("failed to calculate batch similarity: %v", err))
return
}

// Build embedding based similarity response
matches := make([]KeywordSimilarityMatchResponse, len(result.Matches))
for i, match := range result.Matches {
if match.Similarity >= req.SimilarityThresholds[i].SimilarityThreshold {
matches[i] = KeywordSimilarityMatchResponse{
Index: match.Index,
SimilarityThreshold: req.SimilarityThresholds[i].SimilarityThreshold,
SimilarityCalculated: match.Similarity,
Keyword: req.Keywords[match.Index],
Matched: true,
ModelUsed: result.ModelType,
ProcessingTimeMs: result.ProcessingTimeMs,
}
} else {
matches[i] = KeywordSimilarityMatchResponse{
Index: match.Index,
SimilarityThreshold: req.SimilarityThresholds[i].SimilarityThreshold,
SimilarityCalculated: match.Similarity,
Keyword: req.Keywords[match.Index],
Matched: false,
ModelUsed: result.ModelType,
ProcessingTimeMs: result.ProcessingTimeMs,
}
}
}
// Validate input
if req.AggregationMethod != "" && req.AggregationMethod != "max" {
s.writeErrorResponse(w, http.StatusBadRequest, "INVALID_INPUT", "Aggregation method only supports max now")
return
}
var aggregationMethod string
// Set default value
if req.AggregationMethod == "" {
aggregationMethod = "max"
}
// Support mean/max/any aggregation methods to find the best match
var bestMatchedCategory string
var bestScore float32
if aggregationMethod == "max" {
// pick the most matched category based on max of all cosine similarity scores
for _, match := range matches {
if match.SimilarityCalculated > bestScore {
bestScore = match.SimilarityCalculated
bestMatchedCategory = match.Keyword
}
}
}
// Make Response
response := BatchEmbeddingSimilarityMatchResponse{
Query: req.Query,
KeywordMatches: matches,
AggregationMethod: aggregationMethod,
BestMatchedCategory: bestMatchedCategory,
}

observability.Infof("Calculated batch embedding similarity: query='%s', %d keywords, (model: %s, took: %.2fms)",
req.Query, len(req.Keywords), result.ModelType, result.ProcessingTimeMs)

s.writeJSONResponse(w, http.StatusOK, response)
}
58 changes: 58 additions & 0 deletions website/src/components/ScrollToTop/index.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import React, { useEffect, useState } from 'react'
import styles from './styles.module.css'

export default function ScrollToTop(): React.ReactElement {
const [isVisible, setIsVisible] = useState(false)

useEffect(() => {
const toggleVisibility = () => {
if (window.pageYOffset > 300) {
setIsVisible(true)
}
else {
setIsVisible(false)
}
}

window.addEventListener('scroll', toggleVisibility)

return () => {
window.removeEventListener('scroll', toggleVisibility)
}
}, [])

const scrollToTop = () => {
window.scrollTo({
top: 0,
behavior: 'smooth',
})
}

return (
<>
{isVisible && (
<button
onClick={scrollToTop}
className={styles.scrollToTop}
aria-label="Scroll to top"
>
<svg
width="24"
height="24"
viewBox="0 0 24 24"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M12 19V5M12 5L5 12M12 5L19 12"
stroke="currentColor"
strokeWidth="2"
strokeLinecap="round"
strokeLinejoin="round"
/>
</svg>
</button>
)}
</>
)
}
93 changes: 93 additions & 0 deletions website/src/components/ScrollToTop/styles.module.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
.scrollToTop {
position: fixed;
bottom: 2rem;
right: 2rem;
width: 50px;
height: 50px;
border-radius: 50%;
background: linear-gradient(45deg, var(--tech-primary-blue), var(--tech-accent-purple));
color: white;
border: none;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 4px 12px rgba(9, 105, 218, 0.3);
transition: all 0.3s ease;
z-index: 999;
opacity: 0;
transform: translateY(20px);
animation: fadeInUp 0.3s ease forwards;
}

@keyframes fadeInUp {
to {
opacity: 1;
transform: translateY(0);
}
}

.scrollToTop:hover {
transform: translateY(-4px) scale(1.1);
box-shadow: 0 8px 20px rgba(9, 105, 218, 0.4);
}

.scrollToTop:active {
transform: translateY(-2px) scale(1.05);
}

[data-theme='dark'] .scrollToTop {
background: linear-gradient(45deg, var(--tech-primary-blue), var(--tech-accent-orange));
box-shadow: 0 4px 12px rgba(88, 166, 255, 0.4);
}

[data-theme='dark'] .scrollToTop:hover {
box-shadow: 0 8px 20px rgba(88, 166, 255, 0.5);
}

@media (max-width: 768px) {
.scrollToTop {
bottom: 1.5rem;
right: 1.5rem;
width: 45px;
height: 45px;
}
}

.scrollToTop::before {
content: '';
position: absolute;
top: 0;
left: 0;
right: 0;
bottom: 0;
border-radius: 50%;
background: inherit;
opacity: 0;
animation: pulse 2s ease-out infinite;
}

@keyframes pulse {
0% {
transform: scale(1);
opacity: 0.5;
}
50% {
transform: scale(1.1);
opacity: 0.3;
}
100% {
transform: scale(1.3);
opacity: 0;
}
}

.scrollToTop svg {
position: relative;
z-index: 1;
transition: transform 0.3s ease;
}

.scrollToTop:hover svg {
transform: translateY(-2px);
}
12 changes: 12 additions & 0 deletions website/src/theme/Root.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import React from 'react'
import Root from '@theme-original/Root'
import ScrollToTop from '../components/ScrollToTop'

export default function RootWrapper(props: any): React.ReactElement {
return (
<>
<Root {...props} />
<ScrollToTop />
</>
)
}
Loading