Skip to content

Commit 465fabc

Browse files
authored
Merge pull request #4 from devmotion/updates
Update CI and docs
2 parents 1f5e497 + 1fb52cd commit 465fabc

File tree

7 files changed

+141
-45
lines changed

7 files changed

+141
-45
lines changed

.appveyor.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
environment:
33
matrix:
44
- julia_version: 1.1
5+
- julia_version: 1.2
56
- julia_version: nightly
67
platform:
78
- x86
@@ -26,6 +27,3 @@ build_script:
2627
test_script:
2728
- echo "%JL_TEST_SCRIPT%"
2829
- C:\julia\bin\julia -e "%JL_TEST_SCRIPT%"
29-
on_success:
30-
- echo "%JL_CODECOV_SCRIPT%"
31-
- C:\julia\bin\julia -e "%JL_CODECOV_SCRIPT%"

.travis.yml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ os:
55
- osx
66
julia:
77
- 1.1
8+
- 1.2
89
- nightly
910
matrix:
1011
allow_failures:
@@ -13,18 +14,18 @@ matrix:
1314
notifications:
1415
email: false
1516
after_success:
16-
- if [[ $TRAVIS_JULIA_VERSION = 1.1 ]] && [[ $TRAVIS_OS_NAME = linux ]]; then
17+
- if [[ $TRAVIS_JULIA_VERSION = 1.2 ]] && [[ $TRAVIS_OS_NAME = linux ]]; then
1718
julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Codecov.submit(process_folder())';
1819
julia -e 'using Pkg; Pkg.add("Coverage"); using Coverage; Coveralls.submit(process_folder())';
1920
fi
2021
jobs:
2122
include:
2223
- stage: Documentation
23-
julia: 1.1
24+
julia: 1.2
2425
os: linux
2526
env:
2627
- GKSwstype=nul
2728
script:
2829
- julia --project=docs -e 'using Pkg; Pkg.instantiate()'
2930
- julia --project=docs --color=yes docs/make.jl
30-
after_success: skip
31+
after_success: skip

Project.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
1212
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
1313

1414
[compat]
15+
DataStructures = "0.17"
16+
Distances = "0.8.2"
17+
Parameters = "0.12"
18+
StatsBase = "0.32"
1519
julia = "1.1"
1620

1721
[extras]

docs/Manifest.toml

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ uuid = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
3434
version = "0.8.0"
3535

3636
[[Colors]]
37-
deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Printf", "Reexport", "Test"]
38-
git-tree-sha1 = "9f0a0210450acb91c730b730a994f8eef1d3d543"
37+
deps = ["ColorTypes", "FixedPointNumbers", "InteractiveUtils", "Printf", "Reexport"]
38+
git-tree-sha1 = "c9c1845d6bf22e34738bee65c357a69f416ed5d1"
3939
uuid = "5ae59095-9a9b-59fe-a467-6f913c188581"
40-
version = "0.9.5"
40+
version = "0.9.6"
4141

4242
[[Compat]]
4343
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
@@ -72,9 +72,9 @@ uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab"
7272

7373
[[Distances]]
7474
deps = ["LinearAlgebra", "Statistics"]
75-
git-tree-sha1 = "44bd29b50552dfd0a0b674b925de2719f3b9bb0b"
75+
git-tree-sha1 = "23717536c81b63e250f682b0e0933769eecd1411"
7676
uuid = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
77-
version = "0.8.1"
77+
version = "0.8.2"
7878

7979
[[Distributed]]
8080
deps = ["Random", "Serialization", "Sockets"]
@@ -94,15 +94,15 @@ version = "0.8.0"
9494

9595
[[Documenter]]
9696
deps = ["Base64", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
97-
git-tree-sha1 = "c61d6eedbc3c4323c08b64af12d29c8ee0fcbb5f"
97+
git-tree-sha1 = "1b6ae3796f60311e39cd1770566140d2c056e87f"
9898
uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
99-
version = "0.23.2"
99+
version = "0.23.3"
100100

101101
[[FFMPEG]]
102102
deps = ["BinaryProvider", "Libdl"]
103-
git-tree-sha1 = "1dd2128ff10894081f30931b355dc892d1352de9"
103+
git-tree-sha1 = "f65cf703281fb7917beca5ead1c67e6d60ef9597"
104104
uuid = "c87230d0-a227-11e9-1b43-d7ebe4e7570a"
105-
version = "0.2.2"
105+
version = "0.2.3"
106106

107107
[[FixedPointNumbers]]
108108
git-tree-sha1 = "d14a6fa5890ea3a7e5dcab6811114f132fec2b4b"
@@ -117,9 +117,9 @@ version = "0.41.0"
117117

118118
[[GeometryTypes]]
119119
deps = ["ColorTypes", "FixedPointNumbers", "IterTools", "LinearAlgebra", "StaticArrays"]
120-
git-tree-sha1 = "2b0bfb379a54bdfcd2942f388f7d045f8952373d"
120+
git-tree-sha1 = "4bf5706f3b9a2c5adbbc473c8c91582c1fa816a3"
121121
uuid = "4d00f742-c7ba-57c2-abde-4428a4b178cb"
122-
version = "0.7.5"
122+
version = "0.7.6"
123123

124124
[[InteractiveUtils]]
125125
deps = ["Markdown"]
@@ -148,9 +148,9 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
148148

149149
[[Literate]]
150150
deps = ["Base64", "JSON", "REPL"]
151-
git-tree-sha1 = "04913ce466978fad4eb666c9f5fafc718fcc4366"
151+
git-tree-sha1 = "707c58359f2de555ace074313baea957c3187f2b"
152152
uuid = "98b081ad-f1c9-55d3-8b20-4c87d4299306"
153-
version = "2.0.3"
153+
version = "2.0.4"
154154

155155
[[Logging]]
156156
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
@@ -166,10 +166,9 @@ uuid = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
166166
version = "0.3.0"
167167

168168
[[Missings]]
169-
deps = ["SparseArrays", "Test"]
170-
git-tree-sha1 = "f0719736664b4358aa9ec173077d4285775f8007"
169+
git-tree-sha1 = "29858ce6c8ae629cf2d733bffa329619a1c843d0"
171170
uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28"
172-
version = "0.4.1"
171+
version = "0.4.2"
173172

174173
[[Mmap]]
175174
uuid = "a63ad114-7e13-5084-954f-fe012c677804"
@@ -188,21 +187,21 @@ version = "1.1.0"
188187

189188
[[PDMats]]
190189
deps = ["Arpack", "LinearAlgebra", "SparseArrays", "SuiteSparse", "Test"]
191-
git-tree-sha1 = "9d6a9b3e19634612fb1edcafc4b1d75242b24bde"
190+
git-tree-sha1 = "035f8d60ba2a22cb1d2580b1e0e5ce0cb05e4563"
192191
uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
193-
version = "0.9.9"
192+
version = "0.9.10"
194193

195194
[[Parameters]]
196195
deps = ["OrderedCollections"]
197-
git-tree-sha1 = "1dfd7cd50a8eb06ef693a4c2bbe945943cd000c5"
196+
git-tree-sha1 = "b62b2558efb1eef1fa44e4be5ff58a515c287e38"
198197
uuid = "d96e819e-fc66-5662-9728-84c9c7592b0a"
199-
version = "0.11.0"
198+
version = "0.12.0"
200199

201200
[[Parsers]]
202201
deps = ["Dates", "Test"]
203-
git-tree-sha1 = "db2b35dedab3c0e46dc15996d170af07a5ab91c9"
202+
git-tree-sha1 = "ef0af6c8601db18c282d092ccbd2f01f3f0cd70b"
204203
uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
205-
version = "0.3.6"
204+
version = "0.3.7"
206205

207206
[[Pkg]]
208207
deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
@@ -222,9 +221,9 @@ version = "0.5.8"
222221

223222
[[Plots]]
224223
deps = ["Base64", "Contour", "Dates", "FFMPEG", "FixedPointNumbers", "GR", "GeometryTypes", "JSON", "LinearAlgebra", "Measures", "NaNMath", "Pkg", "PlotThemes", "PlotUtils", "Printf", "REPL", "Random", "RecipesBase", "Reexport", "Requires", "Showoff", "SparseArrays", "Statistics", "StatsBase", "UUIDs"]
225-
git-tree-sha1 = "f2aa8a7b5bc0ccec57a1237a97b6f59fc8d9ef57"
224+
git-tree-sha1 = "59bcea95a16912abb229209c9f6e9e218df44b7c"
226225
uuid = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
227-
version = "0.26.2"
226+
version = "0.26.3"
228227

229228
[[Printf]]
230229
deps = ["Unicode"]
@@ -325,7 +324,7 @@ uuid = "4c63d2b9-4356-54db-8cca-17b64c39e42c"
325324
version = "0.8.0"
326325

327326
[[SuiteSparse]]
328-
deps = ["Libdl", "LinearAlgebra", "SparseArrays"]
327+
deps = ["Libdl", "LinearAlgebra", "Serialization", "SparseArrays"]
329328
uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
330329

331330
[[Test]]

docs/src/background.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
A probabilistic model predicts a probability distribution of possible outputs
66
for a given input.
77

8-
A very simple probabilistic model is a model that predicts a uniform
8+
A very simple probabilistic model is a model that predicts a uniform
99
distribution for a dice roll; there is no input and the possible outputs are
1010
the numbers $1,2,3,4,5,6$. A probably more complicated probabilistic model
1111
would be a model that predicts the distribution of stock price changes from
@@ -34,5 +34,5 @@ and [integral probability metrics](https://arxiv.org/pdf/0901.2698.pdf).
3434
Here we restrict ourselves to classification models, i.e., models for which output
3535
$Y$ takes only values from a finite set.
3636

37-
The dice roll model above is a classification model, whereas the model that predicts
38-
stock price changes is not.
37+
The dice roll model above is a classification model, whereas the model that
38+
predicts stock price changes is not.

docs/src/calibration.md

Lines changed: 98 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@ is $x$, the long-run relative frequency of rain is also $x$".
3434

3535
Commonly (see, e.g,
3636
[Guo et al. (2017)](http://proceedings.mlr.press/v70/guo17a/guo17a.pdf)), only
37-
calibration of the largest predictions $\max_y g_y(x)$ of a model $g$ is considered.
38-
According to this common notion a model is calibrated if almost always
37+
calibration of the most-confident predictions $\max_y g_y(x)$ of a model $g$ is
38+
considered. According to this common notion a model is calibrated if almost
39+
always
3940
```math
4041
\mathbb{P}[Y = \textrm{arg} \, \max_y g_y(X) \,|\, \max_y g_y(X)] = \max_y g_y(X).
4142
```
@@ -44,9 +45,102 @@ According to this common notion a model is calibrated if almost always
4445

4546
According to the more general definition by
4647
[Bröcker (2009)](https://rmets.onlinelibrary.wiley.com/doi/pdf/10.1002/qj.456)
47-
and [Vaicenavicius et al.](http://proceedings.mlr.press/v89/vaicenavicius19a/vaicenavicius19a.pdf),
48+
and [Vaicenavicius et al. (2019)](http://proceedings.mlr.press/v89/vaicenavicius19a/vaicenavicius19a.pdf),
4849
a probabilistic model $g$ is calibrated if almost always
4950
```math
5051
\mathbb{P}[Y = y \,|\, g(X)] = g_y(X)
5152
```
52-
for all classes $y$.
53+
for all classes $y$.
54+
55+
For classification problems with more than two classes, this definition of
56+
calibration is stronger than the more common one above. By reducing the model
57+
and applying the strong notion to the simplified model, however, this definition
58+
still allows to investigate the calibration of the model with respect to only
59+
certain aspects of interest such as the calibration of the most-confident
60+
predictions.
61+
62+
Thus in this Julia package and its documentation, we always refer to the strong
63+
notion of calibration.
64+
65+
Let $y_1, \ldots, y_m$ be the possible outputs. Then we can also define
66+
calibration in a vectorized form. Equivalently to the definition above, a model
67+
$g$ is calibrated if and only if
68+
```math
69+
r(g(X)) - g(X) = 0
70+
```
71+
holds almost always, where
72+
```math
73+
r(\xi) := (\mathbb{P}[Y = y_1 \,|\, g(X) = \xi], \ldots, \mathbb{P}[Y = y_m \,|\, g(X) = \xi])
74+
```
75+
denotes the so-called calibration function.
76+
77+
## Measures
78+
79+
Calibration measures allow a more fine-tuned analysis of calibration and enable
80+
comparisons of calibration of different models. Intuitively, calibration
81+
measures quantify the deviation of the left and right hand side in the
82+
definitions above.
83+
84+
### Expected calibration error (ECE)
85+
86+
The most common calibration measure is the so-called expected calibration error
87+
(ECE) (see, e.g.,
88+
[Guo et al. (2017)](http://proceedings.mlr.press/v70/guo17a/guo17a.pdf)).
89+
Informally, it is defined as the average distance between the left and right
90+
hand side of the definition above with respect to some metric. Mathematically,
91+
the expected calibration of model $g$ with respect to distance measure $d$ is
92+
defined as
93+
```math
94+
\mathrm{ECE}[d, g] := \mathbb{E}[d(r(g(X)), g(X))].
95+
```
96+
Here $d$ could be, e.g., the cityblock distance, the total variation distance,
97+
or the squared Euclidean distance.
98+
99+
If $d(p, q) = 0$ if and only if $p = q$, then the ECE of model $g$ with respect
100+
to distance measure $d$ is zero if and only if $g$ is calibrated.
101+
102+
### Calibration error (CE)
103+
104+
More generally, Widmann et al. (2019) define the calibration error (CE) of
105+
a model $g$ with respect to a function class $\mathcal{F} \subset \{f \colon
106+
\Delta^m \to \mathbb{R}^m\}$ as
107+
```math
108+
\mathrm{CE}[\mathcal{F}, g] := \sup_{f \in \mathcal{F}} \mathbb{E}[(r(g(X)) - g(X))^\intercal f(g(X))].
109+
```
110+
111+
If model $g$ is calibrated, then the CE is zero, regardless of the choice of
112+
$\mathcal{F}$. However, for some function spaces (e.g., for
113+
$\mathcal{F} = \{0\}$) the CE is zero even if $g$ is not calibrated.
114+
115+
Interestingly, the ECE with respect to the cityblock distance, the total
116+
variation distance, and the squared Euclidean distance are all special cases
117+
of the CE (Widmann et al. (2019)).
118+
119+
### Kernel calibration error (KCE)
120+
121+
The kernel calibration error (KCE) is another special case of the CE, in which
122+
the unit ball of a reproducing kernel Hilbert space (RKHS) of vector-valued
123+
functions is chosen as function space $\mathcal{F}$.
124+
125+
A RKHS of vector-valued functions $f \colon \Delta^m \to \mathbb{R}^m$ can be
126+
identified with a unique matrix-valued kernel $k \colon \Delta^m \times
127+
\Delta^m \to \mathbb{R}^{m \times m}$. Then the KCE of a model $g$ with respect
128+
to kernel $k$ is defined as
129+
```math
130+
\mathrm{KCE}[k, g] := \mathrm{CE}[\mathcal{F}, g],
131+
```
132+
where $\mathcal{F}$ is the unit ball of the RKHS corresponding to kernel $k$.
133+
134+
As Widmann et al. (2019) show, for a large class of kernels (so-called universal
135+
kernels) the KCE is zero if and only if the model $g$ is calibrated. Moreover,
136+
the KCE can be formulated in terms of the kernel $k$ as
137+
```math
138+
\mathrm{KCE}[k, g] := {\left(\mathbb{E}[(e_Y - g(X))^{\intercal} k(g(X), g(X')) (e_{Y'} - g(X'))]\right)}^{1/2},
139+
```
140+
where $(X',Y')$ is an independent copy of $(X,Y)$ and $e_i$ denotes the $i$th
141+
unit vector.
142+
143+
The so-called maximum mean calibration error (MMCE), proposed by
144+
[Kumar et al. (2018)](http://proceedings.mlr.press/v80/kumar18a/kumar18a.pdf),
145+
can be viewed as a special case of the KCE, in which only the most-confident
146+
predictions are considered (Widmann et al. (2019)).

examples/distribution.jl

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,10 @@
55

66
# ## Introduction
77
#
8-
# This example is taken from the forthcoming publication
9-
# "Calibration tests in multi-class classification: A unifying framework" by Widmann, Lindsten,
10-
# and Zachariah.
11-
#
8+
# This example is taken from the publication
9+
# "Calibration tests in multi-class classification: A unifying framework" by Widmann,
10+
# Lindsten, and Zachariah.
11+
#
1212
# We estimate calibration errors of the model
1313
# ```math
1414
# \begin{aligned}
@@ -27,7 +27,7 @@
2727
# probability simplex, and $\beta = (1, 0, \ldots, 0)$.
2828
#
2929
# In our experiments we sample 250 predictions from the Dirichlet distribution
30-
# $\textrm{Dir}(\alpha)$, and then we generate corresponding labels according to
30+
# $\textrm{Dir}(\alpha)$, and then we generate corresponding labels according to
3131
# the model stated above, for different choices of $\pi$ and number of classes $m$.
3232
#
3333
# We evaluate the standard estimators of expected calibration error (ECE) based on a
@@ -60,7 +60,7 @@ gr(fmt = :png, dpi = 600)
6060

6161
# ## Estimates
6262
#
63-
#
63+
#
6464

6565
function estimates(rng::AbstractRNG, estimator, π::Real, m::Int)
6666
## check arguments
@@ -243,4 +243,4 @@ plot(data)
243243

244244
Random.seed!(1234)
245245
data = estimates(x -> LinearUnbiasedSKCE(kernel(x)))
246-
plot(data)
246+
plot(data)

0 commit comments

Comments
 (0)