1+ import warnings
2+
13import numpy as np
24from KDEpy import FFTKDE
35from scipy import stats
@@ -31,26 +33,29 @@ def count_modes(x, y, min_prom_frac=0.02, max_prom_frac=0.2):
3133 min_prom_frac: relative minimum of the prominence as a fraction of the maximum of y (smoothed)
3234 max_prom_frac: relative maximum of the prominence as a fraction of the maximum of y (smoothed)
3335 """
34- # Smooth outliers out
35- y_smooth = gaussian_filter1d (y , sigma = 2 )
36-
37- # Compute derivative, keep high percentile, to find "real" peaks
38- dy = np .gradient (y_smooth , x )
39- noise_est = np .percentile (np .abs (dy ), 90 )
40-
41- # Set prominence proportional to that noise, capped to fraction of max
42- max_y = np .max (y_smooth )
43- prom_est = np .clip (noise_est , min_prom_frac * max_y , max_prom_frac * max_y )
44-
45- # grid has been fitted by KDEpy already. Consider that peaks should be at
46- # least 5% of the data range appart. This helps keeping out "horns" at the
47- # top of a mode.
48- dx = x [1 ] - x [0 ]
49- x_range = x [- 1 ] - x [0 ]
50- min_distance = max (1 , int ((x_range * 0.05 ) / dx ))
51-
52- peaks , _ = find_peaks (y_smooth , prominence = prom_est , distance = min_distance )
53- return len (peaks ), x [peaks ], prom_est
36+ try :
37+ # Smooth outliers out
38+ y_smooth = gaussian_filter1d (y , sigma = 2 )
39+
40+ # Compute derivative, keep high percentile, to find "real" peaks
41+ dy = np .gradient (y_smooth , x )
42+ noise_est = np .percentile (np .abs (dy ), 90 )
43+
44+ # Set prominence proportional to that noise, capped to fraction of max
45+ max_y = np .max (y_smooth )
46+ prom_est = np .clip (noise_est , min_prom_frac * max_y , max_prom_frac * max_y )
47+
48+ # grid has been fitted by KDEpy already. Consider that peaks should be at
49+ # least 5% of the data range appart. This helps keeping out "horns" at the
50+ # top of a mode.
51+ dx = x [1 ] - x [0 ]
52+ x_range = x [- 1 ] - x [0 ]
53+ min_distance = max (1 , int ((x_range * 0.05 ) / dx ))
54+
55+ peaks , _ = find_peaks (y_smooth , prominence = prom_est , distance = min_distance )
56+ return len (peaks ), x [peaks ], prom_est
57+ except Exception :
58+ return 0 , None , None
5459
5560
5661# Return a list of interval correspondings to the modes in the data series,
@@ -109,18 +114,22 @@ def split_per_mode(data, intervals):
109114# the data is unimodal (or even normally distributed). Symmetry of the
110115# distribution isn't required
111116def bootstrap_median_diff_ci (a , b , n_iter = 1000 , alpha = 0.05 ):
112- data = (a , b )
113- res = bootstrap (
114- data ,
115- statistic = lambda * args : np .median (args [1 ]) - np .median (args [0 ]),
116- n_resamples = n_iter ,
117- confidence_level = 1 - alpha ,
118- method = "percentile" , # percentile doesn't require symmetry
119- vectorized = False ,
120- paired = False ,
121- random_state = None ,
122- )
123- return np .median (b ) - np .median (a ), res .confidence_interval
117+ try :
118+ data = (a , b )
119+
120+ res = bootstrap (
121+ data ,
122+ statistic = lambda * args : np .median (args [1 ]) - np .median (args [0 ]),
123+ n_resamples = n_iter ,
124+ confidence_level = 1 - alpha ,
125+ method = "percentile" , # percentile doesn't require symmetry
126+ vectorized = False ,
127+ paired = False ,
128+ random_state = None ,
129+ )
130+ return np .median (b ) - np .median (a ), res .confidence_interval
131+ except Exception :
132+ return None , (None , None )
124133
125134
126135# Normality test. using Shapiro-Wilk based on this:
@@ -380,15 +389,40 @@ def interpret_cles(
380389
381390def interpret_silverman_kde (base_data , new_data , lower_is_better ):
382391 try :
392+ warning_msgs = []
393+ base_mode_count , base_peak_locs , base_prom = 0 , None , None
394+ new_mode_count , new_peak_locs , new_prom = 0 , None , None
395+ x_base , y_base = [], []
396+ x_new , y_new = [], []
383397 # Kernel Density Estimator (KDE) - estimate the probability density function (PDF) of a continuous random variable in a smooth, non-parametric way, based on a finite sample of data.
384- x_base , y_base = FFTKDE (kernel = "gaussian" , bw = "silverman" ).fit (base_data ).evaluate ()
385- x_new , y_new = FFTKDE (kernel = "gaussian" , bw = "silverman" ).fit (new_data ).evaluate ()
398+ # 1 datapoint will result in a Bandwidth = 0 → divide-by-zero warning
399+ if len (base_data ) > 0 :
400+ try :
401+ with warnings .catch_warnings ():
402+ warnings .simplefilter ("ignore" , category = UserWarning )
403+ x_base , y_base = (
404+ FFTKDE (kernel = "gaussian" , bw = "silverman" ).fit (base_data ).evaluate ()
405+ )
406+ base_mode_count , base_peak_locs , base_prom = count_modes (x_base , y_base )
407+ except Exception :
408+ warning_msgs .append ("Cannot Silverman KDE for base. Likely not enough data." )
409+ else :
410+ warning_msgs .append ("Base revision has less than 2 data points to run Silverman KDE." )
411+ if len (new_data ) > 0 :
412+ try :
413+ with warnings .catch_warnings ():
414+ warnings .simplefilter ("ignore" , category = UserWarning )
415+ x_new , y_new = (
416+ FFTKDE (kernel = "gaussian" , bw = "silverman" ).fit (new_data ).evaluate ()
417+ )
418+ new_mode_count , new_peak_locs , new_prom = count_modes (x_new , y_new )
419+ except Exception :
420+ warning_msgs .append ("Cannot Silverman KDE for new. Likely not enough data." )
421+ else :
422+ warning_msgs .append ("New revision has less than 2 data points to run Silverman KDE." )
386423
387424 # Estimate modes, and warn if multimodal. We estimate the modes w/ a bandwidth computed via silverman's method because it smoothes a bit more
388- base_mode_count , base_peak_locs , base_prom = count_modes (x_base , y_base )
389- new_mode_count , new_peak_locs , new_prom = count_modes (x_new , y_new )
390425
391- warning_msgs = []
392426 if base_mode_count > 1 :
393427 warning_msgs .append ("Base revision distribution appears multimodal." )
394428 if new_mode_count > 1 :
@@ -506,8 +540,8 @@ def plot_kde_with_isj_bandwidth(base, new):
506540 padding = 0.05 * (x_max - x_min ) if x_max > x_min else 1
507541 x_min , x_max = x_min - padding , x_max + padding
508542
509- # Generate grid points defaults to 512 as commonly used
510- x_grid = np .linspace (x_min , x_max , 512 )
543+ # Generate grid points
544+ x_grid = np .linspace (x_min , x_max , 256 )
511545
512546 kde_x_base = []
513547 kde_y_base = []
@@ -530,29 +564,62 @@ def plot_kde_with_isj_bandwidth(base, new):
530564
531565 try :
532566 # min 2 data point. FFTKDE(bw="ISJ").fit measure the data's spread or variance. At less than 2, variance is undefined, cannot compute a bandwidth.
533- if len (base ) > 1 :
534- kde_base = FFTKDE (bw = "ISJ" ).fit (base )
535- y_base = kde_base .evaluate (x_grid )
536- kde_x_base = x_grid .tolist ()
537- kde_y_base = y_base .tolist ()
538- kde_plot_base ["kde_x" ] = kde_x_base
539- kde_plot_base ["kde_y" ] = kde_y_base
567+ if len (base ) > 1 and np .std (base ) != 0 :
568+ try :
569+ with warnings .catch_warnings ():
570+ warnings .simplefilter ("ignore" )
571+ kde_base = FFTKDE (bw = "ISJ" ).fit (base )
572+ y_base = kde_base .evaluate (x_grid )
573+ kde_x_base = x_grid .tolist ()
574+ kde_y_base = y_base .tolist ()
575+ kde_plot_base ["kde_x" ] = kde_x_base
576+ kde_plot_base ["kde_y" ] = kde_y_base
577+ except Exception :
578+ kde_warnings .append (
579+ "KDE with ISJ bandwidth fitting to Base data failed. Likely not enough data or no standard deviation in data."
580+ )
581+ kde_plot_base ["kde_x" ] = base
540582 else :
541583 kde_warnings .append (
542- "Less than 2 datapoints, cannot fit Kernel Density Estimator (KDE) with an ISJ to Base."
584+ "Less than 2 datapoints or no standard variance for a meaningful fit Kernel Density Estimator (KDE) with an ISJ bandwidth to Base."
543585 )
544-
545- if len (new ) > 1 :
546- kde_new = FFTKDE (bw = "ISJ" ).fit (new )
547- y_new = kde_new .evaluate (x_grid )
548- kde_x_new = x_grid .tolist ()
549- kde_y_new = y_new .tolist ()
550- kde_plot_new ["kde_x" ] = kde_x_new
551- kde_plot_new ["kde_y" ] = kde_y_new
586+ # produce flat line for 0 data points
587+ if len (base ) == 0 :
588+ x = np .linspace (- 1 , 1 , 100 )
589+ y = np .zeros_like (x )
590+ kde_plot_base ["kde_x" ] = x .tolist ()
591+ kde_plot_base ["kde_y" ] = y .tolist ()
592+ else :
593+ kde_plot_base ["kde_x" ] = base
594+ kde_plot_base ["kde_y" ] = []
595+
596+ if len (new ) > 1 and np .std (new ) != 0 :
597+ try :
598+ with warnings .catch_warnings ():
599+ warnings .simplefilter ("ignore" )
600+ kde_new = FFTKDE (bw = "ISJ" ).fit (new )
601+ y_new = kde_new .evaluate (x_grid )
602+ kde_x_new = x_grid .tolist ()
603+ kde_y_new = y_new .tolist ()
604+ kde_plot_new ["kde_x" ] = kde_x_new
605+ kde_plot_new ["kde_y" ] = kde_y_new
606+ except Exception :
607+ kde_warnings .append (
608+ "KDE with ISJ bandwidth fitting to New data failed. Likely not enough data or no standard deviation in data."
609+ )
610+ kde_plot_new ["kde_x" ] = new
552611 else :
553612 kde_warnings .append (
554- "Less than 2 datapoints, cannot fit Kernel Density Estimator (KDE) with an ISJ to New."
613+ "Less than 2 datapoints or no standard variance for a meaningful fit Kernel Density Estimator (KDE) with an ISJ bandwidth to New."
555614 )
615+ # produce flat line for 0 data points
616+ if len (new ) == 0 :
617+ x = np .linspace (- 1 , 1 , 100 )
618+ y = np .zeros_like (x )
619+ kde_plot_new ["kde_x" ] = x .tolist ()
620+ kde_plot_new ["kde_y" ] = y .tolist ()
621+ else :
622+ kde_plot_new ["kde_x" ] = new
556623
557624 except Exception :
558625 # KDE failed, charts will show just medians
0 commit comments