Skip to content

Commit 384a108

Browse files
authored
stats: fix estimation in between row count (#5682)
1 parent 45d916b commit 384a108

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

statistics/histogram.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package statistics
1616
import (
1717
"bytes"
1818
"fmt"
19+
"math"
1920
"strings"
2021
"time"
2122

@@ -344,8 +345,10 @@ func (hg *Histogram) lessAndEqRowCount(value types.Datum) float64 {
344345
func (hg *Histogram) betweenRowCount(a, b types.Datum) float64 {
345346
lessCountA := hg.lessRowCount(a)
346347
lessCountB := hg.lessRowCount(b)
348+
// If lessCountA is not less than lessCountB, it may be that they fall to the same bucket and we cannot estimate
349+
// the fraction, so we use `totalCount / NDV` to estimate the row count, but the result should not greater than lessCountB.
347350
if lessCountA >= lessCountB {
348-
return hg.totalRowCount() / float64(hg.NDV)
351+
return math.Min(lessCountB, hg.totalRowCount()/float64(hg.NDV))
349352
}
350353
return lessCountB - lessCountA
351354
}

statistics/statistics_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,8 @@ func (s *testStatisticsSuite) TestBuild(c *C) {
271271
c.Check(int(count), Equals, 19999)
272272
count = col.betweenRowCount(encodeKey(types.NewIntDatum(30000)), encodeKey(types.NewIntDatum(35000)))
273273
c.Check(int(count), Equals, 4999)
274+
count = col.betweenRowCount(encodeKey(types.MinNotNullDatum()), encodeKey(types.NewIntDatum(0)))
275+
c.Check(int(count), Equals, 0)
274276
count = col.lessRowCount(encodeKey(types.NewIntDatum(0)))
275277
c.Check(int(count), Equals, 0)
276278

0 commit comments

Comments
 (0)