First-class support of SKCE for probability vectors and boolean targets (#85)

devmotion · devmotion · commit a140a02f8287 · 2021-04-28T17:38:36.000Z
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "CalibrationErrors"
 uuid = "33913031-fe46-5864-950f-100836f47845"
 authors = ["David Widmann <david.widmann@it.uu.se>"]
-version = "0.5.16"
+version = "0.5.17"
 
 [deps]
 DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
diff --git a/src/skce/generic.jl b/src/skce/generic.jl
@@ -88,6 +88,20 @@ function unsafe_skce_eval(
     return result
 end
 
+# for binary classification with probabilities (corresponding to parameters of Bernoulli
+# distributions) and boolean targets the expression simplifies to
+# ```math
+# k((p, y), (p̃, ỹ)) = (y(1-p) + (1-y)p)(ỹ(1-p̃) + (1-ỹ)p̃)(k((p, y), (p̃, ỹ)) - k((p, 1-y), (p̃, ỹ)) - k((p, y), (p̃, 1-ỹ)) + k((p, 1-y), (p̃, 1-ỹ)))
+# ```
+function unsafe_skce_eval(kernel::Kernel, p::Real, y::Bool, p̃::Real, ỹ::Bool)
+    noty = !y
+    notỹ = !ỹ
+    z =
+        kernel((p, y), (p̃, ỹ)) - kernel((p, noty), (p̃, ỹ)) -
+        kernel((p, y), (p̃, notỹ)) + kernel((p, noty), (p̃, notỹ))
+    return (y ? 1 - p : p) * (ỹ ? 1 - p̃ : p̃) * z
+end
+
 # evaluation for tensor product kernels
 function unsafe_skce_eval(kernel::KernelTensorProduct, p, y, p̃, ỹ)
     κpredictions, κtargets = kernel.kernels
@@ -105,6 +119,10 @@ function unsafe_skce_eval(
     κpredictions, κtargets = kernel.kernels
     return κpredictions(p, p̃) * unsafe_skce_eval_targets(κtargets, p, y, p̃, ỹ)
 end
+function unsafe_skce_eval(kernel::KernelTensorProduct, p::Real, y::Bool, p̃::Real, ỹ::Bool)
+    κpredictions, κtargets = kernel.kernels
+    return κpredictions(p, p̃) * unsafe_skce_eval_targets(κtargets, p, y, p̃, ỹ)
+end
 
 function unsafe_skce_eval_targets(
     κtargets::Kernel,
@@ -258,3 +276,13 @@ function unsafe_skce_eval_targets(
     @inbounds res = (y == ỹ) - p[ỹ] - p̃[y] + dot(p, p̃)
     return res
 end
+
+function unsafe_skce_eval_targets(κtargets::Kernel, p::Real, y::Bool, p̃::Real, ỹ::Bool)
+    noty = !y
+    notỹ = !ỹ
+    z = κtargets(y, ỹ) - κtargets(noty, ỹ) - κtargets(y, notỹ) + κtargets(noty, notỹ)
+    return (y ? 1 - p : p) * (ỹ ? 1 - p̃ : p̃) * z
+end
+function unsafe_skce_eval_targets(::WhiteKernel, p::Real, y::Bool, p̃::Real, ỹ::Bool)
+    return 2 * (y - p) * (ỹ - p̃)
+end
diff --git a/test/kernels.jl b/test/kernels.jl
@@ -1,51 +1,15 @@
 @testset "kernels.jl" begin
-    # alternative implementation of white kernel
-    struct WhiteKernel2 <: Kernel end
-    (::WhiteKernel2)(x, y) = x == y
+    kernel = TVExponentialKernel()
 
-    # alternative implementation TensorProductKernel
-    struct TensorProduct2{K1<:Kernel,K2<:Kernel} <: Kernel
-        kernel1::K1
-        kernel2::K2
-    end
-    function (kernel::TensorProduct2)((x1, x2), (y1, y2))
-        return kernel.kernel1(x1, y1) * kernel.kernel2(x2, y2)
-    end
+    # traits
+    @test KernelFunctions.metric(kernel) === TotalVariation()
 
-    @testset "TVExponentialKernel" begin
-        kernel = TVExponentialKernel()
+    # simple evaluation
+    x, y = rand(10), rand(10)
+    @test kernel(x, y) == exp(-totalvariation(x, y))
 
-        # traits
-        @test KernelFunctions.metric(kernel) === TotalVariation()
-
-        # simple evaluation
-        x, y = rand(10), rand(10)
-        @test kernel(x, y) == exp(-totalvariation(x, y))
-
-        # transformations
-        @test (kernel ∘ ScaleTransform(0.1))(x, y) == exp(-0.1 * totalvariation(x, y))
-        ard = rand(10)
-        @test (kernel ∘ ARDTransform(ard))(x, y) == exp(-totalvariation(ard .* x, ard .* y))
-    end
-
-    @testset "unsafe_skce_eval" begin
-        kernel = SqExponentialKernel()
-        kernel1 = kernel ⊗ WhiteKernel()
-        kernel2 = kernel ⊗ WhiteKernel2()
-        kernel3 = TensorProduct2(kernel, WhiteKernel())
-
-        x1, x2 = rand(10), rand(1:10)
-
-        @test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, x1, x2) ≈
-              CalibrationErrors.unsafe_skce_eval(kernel2, x1, x2, x1, x2)
-        @test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, x1, x2) ≈
-              CalibrationErrors.unsafe_skce_eval(kernel3, x1, x2, x1, x2)
-
-        y1, y2 = rand(10), rand(1:10)
-
-        @test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, y1, y2) ≈
-              CalibrationErrors.unsafe_skce_eval(kernel2, x1, x2, y1, y2)
-        @test CalibrationErrors.unsafe_skce_eval(kernel1, x1, x2, y1, y2) ≈
-              CalibrationErrors.unsafe_skce_eval(kernel3, x1, x2, y1, y2)
-    end
+    # transformations
+    @test (kernel ∘ ScaleTransform(0.1))(x, y) == exp(-0.1 * totalvariation(x, y))
+    ard = rand(10)
+    @test (kernel ∘ ARDTransform(ard))(x, y) == exp(-totalvariation(ard .* x, ard .* y))
 end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -8,6 +8,8 @@ using Random
 using Statistics
 using Test
 
+using CalibrationErrors: unsafe_skce_eval
+
 Random.seed!(1234)
 
 @testset "CalibrationErrors" begin
@@ -32,6 +34,9 @@ Random.seed!(1234)
     end
 
     @testset "SKCE" begin
+        @testset "generic" begin
+            include("skce/generic.jl")
+        end
         @testset "biased" begin
             include("skce/biased.jl")
         end
diff --git a/test/skce/biased.jl b/test/skce/biased.jl
@@ -1,18 +1,25 @@
 @testset "biased.jl" begin
     @testset "Two-dimensional example" begin
+        # categorical distributions
         skce = BiasedSKCE(SqExponentialKernel() ⊗ WhiteKernel())
-
-        # only two predictions, i.e., three unique terms in the estimator
         @test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 2])))
         @test @inferred(skce([[1, 0], [0, 1]], [1, 1])) ≈ 0.5
         @test @inferred(skce([[1, 0], [0, 1]], [2, 1])) ≈ 1 - exp(-1)
         @test @inferred(skce([[1, 0], [0, 1]], [2, 2])) ≈ 0.5
+
+        # probabilities
+        skce = BiasedSKCE((SqExponentialKernel() ∘ ScaleTransform(sqrt(2))) ⊗ WhiteKernel())
+        @test iszero(@inferred(skce([1, 0], [true, false])))
+        @test @inferred(skce([1, 0], [true, true])) ≈ 0.5
+        @test @inferred(skce([1, 0], [false, true])) ≈ 1 - exp(-1)
+        @test @inferred(skce([1, 0], [false, false])) ≈ 0.5
     end
 
     @testset "Basic properties" begin
         skce = BiasedSKCE((ExponentialKernel() ∘ ScaleTransform(0.1)) ⊗ WhiteKernel())
         estimates = Vector{Float64}(undef, 1_000)
 
+        # categorical distributions
         for nclasses in (2, 10, 100)
             dist = Dirichlet(nclasses, 1.0)
 
@@ -27,5 +34,17 @@
 
             @test all(x -> x > zero(x), estimates)
         end
+
+        # probabilities
+        predictions = Vector{Float64}(undef, 20)
+        targets = Vector{Bool}(undef, 20)
+        for i in 1:length(estimates)
+            rand!(predictions)
+            map!(targets, predictions) do p
+                rand() < p
+            end
+            estimates[i] = skce(predictions, targets)
+        end
+        @test all(x -> x > zero(x), estimates)
     end
 end
diff --git a/test/skce/generic.jl b/test/skce/generic.jl
@@ -0,0 +1,60 @@
+@testset "generic.jl" begin
+    # alternative implementation of white kernel
+    struct WhiteKernel2 <: Kernel end
+    (::WhiteKernel2)(x, y) = x == y
+
+    # alternative implementation TensorProductKernel
+    struct TensorProduct2{K1<:Kernel,K2<:Kernel} <: Kernel
+        kernel1::K1
+        kernel2::K2
+    end
+    function (kernel::TensorProduct2)((x1, x2), (y1, y2))
+        return kernel.kernel1(x1, y1) * kernel.kernel2(x2, y2)
+    end
+
+    @testset "binary classification" begin
+        # probabilities and boolean targets
+        p, p̃ = rand(2)
+        y, ỹ = rand(Bool, 2)
+        scale = rand()
+        kernel = SqExponentialKernel() ∘ ScaleTransform(scale)
+        val = unsafe_skce_eval(kernel ⊗ WhiteKernel(), p, y, p̃, ỹ)
+        @test unsafe_skce_eval(kernel ⊗ WhiteKernel2(), p, y, p̃, ỹ) ≈ val
+        @test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel()), p, y, p̃, ỹ) ≈ val
+        @test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel2()), p, y, p̃, ỹ) ≈ val
+
+        # corresponding values and kernel for full categorical distribution
+        pfull = [p, 1 - p]
+        yint = y ? 1 : 2
+        p̃full = [p̃, 1 - p̃]
+        ỹint = ỹ ? 1 : 2
+        kernelfull = SqExponentialKernel() ∘ ScaleTransform(scale / sqrt(2))
+
+        @test unsafe_skce_eval(kernelfull ⊗ WhiteKernel(), pfull, yint, p̃full, ỹint) ≈ val
+        @test unsafe_skce_eval(kernelfull ⊗ WhiteKernel2(), pfull, yint, p̃full, ỹint) ≈
+              val
+        @test unsafe_skce_eval(
+            TensorProduct2(kernelfull, WhiteKernel()), pfull, yint, p̃full, ỹint
+        ) ≈ val
+        @test unsafe_skce_eval(
+            TensorProduct2(kernelfull, WhiteKernel2()), pfull, yint, p̃full, ỹint
+        ) ≈ val
+    end
+
+    @testset "multi-class classification" begin
+        n = 10
+        p = rand(n)
+        p ./= sum(p)
+        y = rand(1:n)
+        p̃ = rand(n)
+        p̃ ./= sum(p̃)
+        ỹ = rand(1:n)
+
+        kernel = SqExponentialKernel() ∘ ScaleTransform(rand())
+        val = unsafe_skce_eval(kernel ⊗ WhiteKernel(), p, y, p̃, ỹ)
+
+        @test unsafe_skce_eval(kernel ⊗ WhiteKernel2(), p, y, p̃, ỹ) ≈ val
+        @test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel()), p, y, p̃, ỹ) ≈ val
+        @test unsafe_skce_eval(TensorProduct2(kernel, WhiteKernel2()), p, y, p̃, ỹ) ≈ val
+    end
+end
diff --git a/test/skce/unbiased.jl b/test/skce/unbiased.jl
@@ -1,18 +1,27 @@
 @testset "unbiased.jl" begin
     @testset "Unbiased: Two-dimensional example" begin
+        # categorical distributions
         skce = UnbiasedSKCE(SqExponentialKernel() ⊗ WhiteKernel())
-
-        # only two predictions, i.e., one term in the estimator
         @test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 2])))
         @test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 1])))
         @test @inferred(skce([[1, 0], [0, 1]], [2, 1])) ≈ -2 * exp(-1)
         @test iszero(@inferred(skce([[1, 0], [0, 1]], [2, 2])))
+
+        # probabilities
+        skce = UnbiasedSKCE(
+            (SqExponentialKernel() ∘ ScaleTransform(sqrt(2))) ⊗ WhiteKernel()
+        )
+        @test iszero(@inferred(skce([1, 0], [true, false])))
+        @test iszero(@inferred(skce([1, 0], [true, true])))
+        @test @inferred(skce([1, 0], [false, true])) ≈ -2 * exp(-1)
+        @test iszero(@inferred(skce([1, 0], [false, false])))
     end
 
     @testset "Unbiased: Basic properties" begin
         skce = UnbiasedSKCE((ExponentialKernel() ∘ ScaleTransform(0.1)) ⊗ WhiteKernel())
         estimates = Vector{Float64}(undef, 1_000)
 
+        # categorical distributions
         for nclasses in (2, 10, 100)
             dist = Dirichlet(nclasses, 1.0)
 
@@ -30,13 +39,26 @@
             @test any(x -> x < zero(x), estimates)
             @test mean(estimates) ≈ 0 atol = 1e-3
         end
+
+        # probabilities
+        predictions = Vector{Float64}(undef, 20)
+        targets = Vector{Bool}(undef, 20)
+        for i in 1:length(estimates)
+            rand!(predictions)
+            map!(targets, predictions) do p
+                rand() < p
+            end
+            estimates[i] = skce(predictions, targets)
+        end
+
+        @test any(x -> x > zero(x), estimates)
+        @test any(x -> x < zero(x), estimates)
+        @test mean(estimates) ≈ 0 atol = 1e-3
     end
 
     @testset "Block: Two-dimensional example" begin
-        # Blocks of two samples
+        # categorical distributions
         skce = BlockUnbiasedSKCE(SqExponentialKernel() ⊗ WhiteKernel())
-
-        # only two predictions, i.e., one term in the estimator
         @test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 2])))
         @test iszero(@inferred(skce([[1, 0], [0, 1]], [1, 1])))
         @test @inferred(skce([[1, 0], [0, 1]], [2, 1])) ≈ -2 * exp(-1)
@@ -48,6 +70,21 @@
         @test @inferred(skce(repeat([[1, 0], [0, 1]], 10), repeat([2, 1], 10))) ≈
               -2 * exp(-1)
         @test iszero(@inferred(skce(repeat([[1, 0], [0, 1]], 10), repeat([2, 2], 10))))
+
+        # probabilities
+        skce = BlockUnbiasedSKCE(
+            (SqExponentialKernel() ∘ ScaleTransform(sqrt(2))) ⊗ WhiteKernel()
+        )
+        @test iszero(@inferred(skce([1, 0], [true, false])))
+        @test iszero(@inferred(skce([1, 0], [true, true])))
+        @test @inferred(skce([1, 0], [false, true])) ≈ -2 * exp(-1)
+        @test iszero(@inferred(skce([1, 0], [false, false])))
+
+        # two predictions, ten times replicated
+        @test iszero(@inferred(skce(repeat([1, 0], 10), repeat([true, false], 10))))
+        @test iszero(@inferred(skce(repeat([1, 0], 10), repeat([true, true], 10))))
+        @test @inferred(skce(repeat([1, 0], 10), repeat([false, true], 10))) ≈ -2 * exp(-1)
+        @test iszero(@inferred(skce(repeat([1, 0], 10), repeat([false, false], 10))))
     end
 
     @testset "Block: Basic properties" begin
@@ -58,6 +95,7 @@
         blockskce_all = BlockUnbiasedSKCE(kernel, nsamples)
         estimates = Vector{Float64}(undef, 1_000)
 
+        # categorical distributions
         for nclasses in (2, 10, 100)
             dist = Dirichlet(nclasses, 1.0)
 
@@ -82,5 +120,28 @@
             @test any(x -> x < zero(x), estimates)
             @test mean(estimates) ≈ 0 atol = 5e-3
         end
+
+        # probabilities
+        predictions = Vector{Float64}(undef, nsamples)
+        targets = Vector{Bool}(undef, nsamples)
+
+        for i in 1:length(estimates)
+            rand!(predictions)
+            map!(targets, predictions) do p
+                return rand() < p
+            end
+            estimates[i] = blockskce(predictions, targets)
+
+            # consistency checks
+            @test estimates[i] ≈ mean(
+                skce(predictions[(2 * i - 1):(2 * i)], targets[(2 * i - 1):(2 * i)]) for
+                i in 1:(nsamples ÷ 2)
+            )
+            @test skce(predictions, targets) == blockskce_all(predictions, targets)
+        end
+
+        @test any(x -> x > zero(x), estimates)
+        @test any(x -> x < zero(x), estimates)
+        @test mean(estimates) ≈ 0 atol = 5e-3
     end
 end