@@ -884,81 +884,122 @@ func (am *AlertManager) GetAlertHistory() []Alert {
884884
885885// CheckAlertRules checks all alert rules and triggers alerts if needed
886886func (am * AlertManager ) CheckAlertRules (metrics * AdvancedMetrics ) {
887- am .mu .Lock ()
888- defer am .mu .Unlock ()
887+ // Get a snapshot of alert rules to minimize lock time
888+ alertRules := am .getEnabledAlertRules ()
889+
890+ // Process each rule with minimal locking
891+ for _ , rule := range alertRules {
892+ am .processAlertRule (rule , metrics )
893+ }
894+
895+ // Clean up history
896+ am .cleanupAlertHistory ()
897+ }
889898
899+ // getEnabledAlertRules returns a snapshot of enabled alert rules
900+ func (am * AlertManager ) getEnabledAlertRules () []* AlertRule {
901+ am .mu .RLock ()
902+ defer am .mu .RUnlock ()
903+
904+ alertRules := make ([]* AlertRule , 0 , len (am .alertRules ))
890905 for _ , rule := range am .alertRules {
891- if ! rule .Enabled {
892- continue
906+ if rule .Enabled {
907+ alertRules = append ( alertRules , rule )
893908 }
909+ }
910+ return alertRules
911+ }
894912
895- // Get current metric value
896- metric := metrics .GetMetric (rule .Metric , nil )
897- if metric == nil {
898- continue
899- }
913+ // processAlertRule processes a single alert rule
914+ func (am * AlertManager ) processAlertRule (rule * AlertRule , metrics * AdvancedMetrics ) {
915+ // Get current metric value (metrics have their own synchronization)
916+ metric := metrics .GetMetric (rule .Metric , nil )
917+ if metric == nil {
918+ return
919+ }
920+
921+ // Check condition
922+ shouldAlert := am .evaluateAlertCondition (metric .Value , rule .Condition , rule .Threshold )
923+ alertID := fmt .Sprintf ("%s-%s" , rule .ID , rule .Metric )
924+
925+ am .mu .Lock ()
926+ defer am .mu .Unlock ()
927+
928+ if shouldAlert {
929+ am .createOrUpdateAlert (alertID , rule , metric )
930+ } else {
931+ am .resolveAlert (alertID )
932+ }
933+ }
934+
935+ // evaluateAlertCondition evaluates if an alert should be triggered
936+ func (am * AlertManager ) evaluateAlertCondition (value float64 , condition string , threshold float64 ) bool {
937+ switch condition {
938+ case ">" :
939+ return value > threshold
940+ case ">=" :
941+ return value >= threshold
942+ case "<" :
943+ return value < threshold
944+ case "<=" :
945+ return value <= threshold
946+ case "==" :
947+ return value == threshold
948+ case "!=" :
949+ return value != threshold
950+ default :
951+ return false
952+ }
953+ }
900954
901- // Check condition
902- shouldAlert := false
903- switch rule .Condition {
904- case ">" :
905- shouldAlert = metric .Value > rule .Threshold
906- case ">=" :
907- shouldAlert = metric .Value >= rule .Threshold
908- case "<" :
909- shouldAlert = metric .Value < rule .Threshold
910- case "<=" :
911- shouldAlert = metric .Value <= rule .Threshold
912- case "==" :
913- shouldAlert = metric .Value == rule .Threshold
914- case "!=" :
915- shouldAlert = metric .Value != rule .Threshold
955+ // createOrUpdateAlert creates or updates an alert
956+ func (am * AlertManager ) createOrUpdateAlert (alertID string , rule * AlertRule , metric * Metric ) {
957+ if existing , exists := am .activeAlerts [alertID ]; exists {
958+ // Update existing alert
959+ existing .CurrentValue = metric .Value
960+ existing .Timestamp = time .Now ()
961+ } else {
962+ // Create new alert
963+ alert := & Alert {
964+ ID : alertID ,
965+ Name : rule .Name ,
966+ Description : rule .Description ,
967+ Level : rule .Level ,
968+ Metric : rule .Metric ,
969+ CurrentValue : metric .Value ,
970+ Threshold : rule .Threshold ,
971+ Condition : rule .Condition ,
972+ Timestamp : time .Now (),
973+ Resolved : false ,
916974 }
975+ am .activeAlerts [alertID ] = alert
917976
918- // Create or update alert
919- alertID := fmt .Sprintf ("%s-%s" , rule .ID , rule .Metric )
920- if shouldAlert {
921- if existing , exists := am .activeAlerts [alertID ]; exists {
922- // Update existing alert
923- existing .CurrentValue = metric .Value
924- existing .Timestamp = time .Now ()
925- } else {
926- // Create new alert
927- alert := & Alert {
928- ID : alertID ,
929- Name : rule .Name ,
930- Description : rule .Description ,
931- Level : rule .Level ,
932- Metric : rule .Metric ,
933- CurrentValue : metric .Value ,
934- Threshold : rule .Threshold ,
935- Condition : rule .Condition ,
936- Timestamp : time .Now (),
937- Resolved : false ,
938- }
939- am .activeAlerts [alertID ] = alert
940-
941- // Send notification
942- select {
943- case am .alertChan <- alert :
944- default :
945- // Channel full, drop the notification
946- }
947- }
948- } else {
949- // Resolve alert if it exists
950- if existing , exists := am .activeAlerts [alertID ]; exists {
951- existing .Resolved = true
952- existing .ResolvedAt = time .Now ()
953-
954- // Move to history
955- am .alertHistory = append (am .alertHistory , * existing )
956- delete (am .activeAlerts , alertID )
957- }
977+ // Send notification
978+ select {
979+ case am .alertChan <- alert :
980+ default :
981+ // Channel full, drop the notification
958982 }
959983 }
984+ }
985+
986+ // resolveAlert resolves an alert if it exists
987+ func (am * AlertManager ) resolveAlert (alertID string ) {
988+ if existing , exists := am .activeAlerts [alertID ]; exists {
989+ existing .Resolved = true
990+ existing .ResolvedAt = time .Now ()
991+
992+ // Move to history
993+ am .alertHistory = append (am .alertHistory , * existing )
994+ delete (am .activeAlerts , alertID )
995+ }
996+ }
997+
998+ // cleanupAlertHistory keeps the history size manageable
999+ func (am * AlertManager ) cleanupAlertHistory () {
1000+ am .mu .Lock ()
1001+ defer am .mu .Unlock ()
9601002
961- // Keep history size manageable
9621003 if len (am .alertHistory ) > defaultAlertHistoryMaxSize {
9631004 am .alertHistory = am .alertHistory [1 :]
9641005 }
0 commit comments