diff --git a/schema/collection.go b/schema/collection.go index 8c87f04d..5bec6a80 100644 --- a/schema/collection.go +++ b/schema/collection.go @@ -12,443 +12,464 @@ // See the License for the specific language governing permissions and // limitations under the License. -package schema +package filter import ( - "bytes" - "encoding/base64" - "encoding/json" - "fmt" - "math" - "strconv" - - jsoniter "github.com/json-iterator/go" - "github.com/santhosh-tekuri/jsonschema/v5" - "github.com/tigrisdata/tigris/errors" - tsApi "github.com/tigrisdata/typesense-go/typesense/api" + "bytes" + "fmt" + "regexp" + "strings" + + "github.com/tigrisdata/tigris/errors" + "github.com/tigrisdata/tigris/schema" + "github.com/tigrisdata/tigris/value" ) const ( - ObjFlattenDelimiter = "." + EQ = "$eq" + GT = "$gt" + LT = "$lt" + GTE = "$gte" + LTE = "$lte" + NOT = "$not" + REGEX = "$regex" + CONTAINS = "$contains" ) -// DefaultCollection is used to represent a collection. The tenant in the metadata package is responsible for creating -// the collection. -type DefaultCollection struct { - // Id is the dictionary encoded value for this collection. - Id uint32 - // SchVer returns the schema version - SchVer uint32 - // Name is the name of the collection. - Name string - // EncodedName is the encoded name of the collection. - EncodedName []byte - // EncodedTableIndexName is the encoded name of the collection's Secondary Index. - EncodedTableIndexName []byte - // Fields are derived from the user schema. - Fields []*Field - // Indexes is a wrapper on the indexes part of this collection. - // Primary Key contains the fields used to make up the primary key - PrimaryKey *Index - // Secondary SecondaryIndexes for this collection - SecondaryIndexes *Indexes - // Validator is used to validate the JSON document. As it is expensive to create this, it is only created once - // during constructor of the collection. - Validator *jsonschema.Schema - // JSON schema - Schema jsoniter.RawMessage - // SchemaDeltas contains incompatible schema changes from version to version - SchemaDeltas []VersionDelta - // FieldVersions contains the list of schema versions at which the field had incompatible change - FieldVersions map[string]*FieldVersions - // ImplicitSearchIndex is created by the Tigris to use a search index for in-memory indexes. This is needed till we move - // to secondary indexes which will be stored in FDB. - ImplicitSearchIndex *ImplicitSearchIndex - // search indexes are indexes that are explicitly created by the user and tagged Tigris as source. Collection will be - // responsible for ensuring these indexes are in sync when any mutation happens to this collection. - SearchIndexes map[string]*SearchIndex - // QueryableFields are similar to Fields but these are flattened forms of fields. For instance, a simple field - // will be one to one mapped to queryable field but complex fields like object type field there may be more than - // one queryableFields. As queryableFields represent a flattened state these can be used as-is to index in memory. - QueryableFields []*QueryableField - // CollectionType is the type of the collection. Only two types of collections are supported "messages" and "documents" - CollectionType CollectionType - // Track all the int64 paths in the collection. For example, if top level object has an int64 field then key would be - // obj.fieldName so that caller can easily navigate to this field. - int64FieldsPath *int64PathBuilder - // This is the existing fields in search - FieldsInSearch []tsApi.Field - - fieldsWithInsertDefaults map[string]struct{} - fieldsWithUpdateDefaults map[string]struct{} -} - -type CollectionType string +type Matcher interface { + // Type returns the type of the value matcher, syntactic sugar for logging, etc + Type() string +} -const ( - DocumentsType CollectionType = "documents" -) +type LikeMatcher interface { + Matcher -func (d *DefaultCollection) GetPrimaryKey() *Index { - return d.PrimaryKey -} - -func disableAdditionalPropertiesAndAllowNullable(required []string, properties map[string]*jsonschema.Schema) { - for name, p := range properties { - isRequired := false - for _, r := range required { - if r == name { - isRequired = true - break - } - } - if isRequired { - continue - } - - // add additional null types so that validation can succeed if fields are explicitly set as null - if len(p.Types) == 1 { - switch p.Types[0] { - case "string", "number", "object", "integer", "boolean": - p.Types = append(p.Types, "null") - case "array": - p.Types = append(p.Types, "null") - if items, ok := p.Items.(*jsonschema.Schema); ok { - if len(items.Properties) == 0 { - items.Types = append(items.Types, "null") - } else { - for _, itemsP := range items.Properties { - switch itemsP.Types[0] { - case "string", "number", "object", "integer", "boolean": - itemsP.Types = append(itemsP.Types, "null") - case "array": - if itemsA, ok := itemsP.Items.(*jsonschema.Schema); ok { - if len(itemsA.Properties) == 0 { - items.Types = append(items.Types, "null") - } else { - if itemsA.AdditionalProperties == nil { - itemsA.AdditionalProperties = false - } - disableAdditionalPropertiesAndAllowNullable(itemsA.Required, itemsA.Properties) - } - } - } - if len(itemsP.Properties) > 0 { - if itemsP.AdditionalProperties == nil { - itemsP.AdditionalProperties = false - } - disableAdditionalPropertiesAndAllowNullable(itemsP.Required, itemsP.Properties) - } - } - } - } - } - } - - // Enforce object schema if properties are specified and no additionalProperties explicitly set - if len(p.Properties) > 0 { - if p.AdditionalProperties == nil { - p.AdditionalProperties = false - } - disableAdditionalPropertiesAndAllowNullable(p.Required, p.Properties) - } - } -} - -func NewDefaultCollection(id uint32, schVer uint32, factory *Factory, schemas Versions, - implicitSearchIndex *ImplicitSearchIndex, -) (*DefaultCollection, error) { - url := factory.Name + ".json" - compiler := jsonschema.NewCompiler() - compiler.Draft = jsonschema.Draft7 // Format is only working for draft7 - if err := compiler.AddResource(url, bytes.NewReader(factory.Schema)); err != nil { - return nil, err - } - - validator := compiler.MustCompile(url) - - // Tigris doesn't allow additional fields as part of the write requests. Setting it to false ensures strict - // schema validation. - if validator.AdditionalProperties == nil { - validator.AdditionalProperties = false - } - disableAdditionalPropertiesAndAllowNullable(validator.Required, validator.Properties) - - var prevVersionInSearch []tsApi.Field - if implicitSearchIndex != nil { - prevVersionInSearch = implicitSearchIndex.prevVersionInSearch - } - queryableFields := NewQueryableFieldsBuilder().BuildQueryableFields(factory.Fields, prevVersionInSearch, factory.Indexes.IndexMetadata) - - schemaDeltas, err := buildSchemaDeltas(schemas) - if err != nil { - return nil, err - } - - fieldVersions := buildFieldVersions(schemaDeltas) - - d := &DefaultCollection{ - Id: id, - SchVer: schVer, - Name: factory.Name, - Fields: factory.Fields, - PrimaryKey: factory.PrimaryKey, - SecondaryIndexes: factory.Indexes, - Validator: validator, - Schema: factory.Schema, - QueryableFields: queryableFields, - CollectionType: factory.CollectionType, - ImplicitSearchIndex: implicitSearchIndex, - fieldsWithInsertDefaults: make(map[string]struct{}), - fieldsWithUpdateDefaults: make(map[string]struct{}), - SearchIndexes: make(map[string]*SearchIndex), - SchemaDeltas: schemaDeltas, - FieldVersions: fieldVersions, - int64FieldsPath: buildInt64Path(factory.Fields), - } - - // set fieldDefaulter for default fields - d.setFieldsForDefaults("", d.Fields) - - return d, nil -} - -func (d *DefaultCollection) AddSearchIndex(index *SearchIndex) { - d.SearchIndexes[index.Name] = index -} - -func (d *DefaultCollection) GetName() string { - return d.Name -} - -// SecondaryIndexKeyword is the subspace within a collection where the secondary index information -// is stored. -func (*DefaultCollection) SecondaryIndexKeyword() string { - return "skey" -} - -func (d *DefaultCollection) SecondaryIndexMetadata() bool { - return d.SecondaryIndexes.IndexMetadata -} - -func (d *DefaultCollection) GetVersion() uint32 { - return d.SchVer -} + // Matches checks if the value matches the condition + Matches(value any) bool +} -func (d *DefaultCollection) Type() CollectionType { - return d.CollectionType +type ArrayMatcher interface { + // ArrMatches checks if any element in the array matches the condition + ArrMatches(value []any) bool } -func (d *DefaultCollection) GetFields() []*Field { - return d.Fields +// ValueMatcher is an interface that has methods like Matches. +type ValueMatcher interface { + Matcher + ArrayMatcher + + // Matches returns true if the receiver has the value object that has the same value as input + Matches(input value.Value) bool + // GetValue returns the value on which the Matcher is operating + GetValue() value.Value } -func (d *DefaultCollection) GetIndexes() *Indexes { - return d.SecondaryIndexes +// NewMatcher returns ValueMatcher that is derived from the key. +func NewMatcher(key string, v value.Value) (ValueMatcher, error) { + switch key { + case EQ: + return &EqualityMatcher{Value: v}, nil + case GT: + return &GreaterThanMatcher{Value: v}, nil + case GTE: + return &GreaterThanEqMatcher{Value: v}, nil + case LT: + return &LessThanMatcher{Value: v}, nil + case LTE: + return &LessThanEqMatcher{Value: v}, nil + default: + return nil, errors.InvalidArgument("unsupported operand '%s'", key) + } } -func (d *DefaultCollection) GetQueryableFields() []*QueryableField { - return d.QueryableFields +// NewLikeMatcher returns LikeMatcher that is derived from the key. +func NewLikeMatcher(key string, input string, collation *value.Collation) (LikeMatcher, error) { + if collation == nil { + collation = value.EmptyCollation + } + + switch key { + case REGEX: + return NewRegexMatcher(input, collation) + case CONTAINS: + return NewContainsMatcher(input, collation) + case NOT: + return NewNotMatcher(input, collation) + default: + return nil, errors.InvalidArgument("unsupported operand '%s'", key) + } } -// GetActiveIndexedFields returns indexes that can be used for queries. -func (d *DefaultCollection) GetActiveIndexedFields() []*QueryableField { - var indexed []*QueryableField - for _, q := range d.QueryableFields { - if q.Indexed && d.SecondaryIndexes.IsActiveIndex(q.FieldName) { - indexed = append(indexed, q) - } - } - return indexed +// EqualityMatcher implements "$eq" operand. +type EqualityMatcher struct { + Value value.Value } -func (d *DefaultCollection) GetWriteModeIndexes() []*QueryableField { - var indexed []*QueryableField - for _, q := range d.QueryableFields { - if q.Indexed && !d.SecondaryIndexes.IsActiveIndex(q.FieldName) { - indexed = append(indexed, q) - } - } - return indexed +// NewEqualityMatcher returns EqualityMatcher object. +func NewEqualityMatcher(v value.Value) *EqualityMatcher { + return &EqualityMatcher{Value: v} } -func (d *DefaultCollection) GetIndexedFields() []*QueryableField { - var indexed []*QueryableField - for _, q := range d.QueryableFields { - if q.Indexed { - indexed = append(indexed, q) - } - } - return indexed +// GetValue returns the value on which the Matcher is operating +func (e *EqualityMatcher) GetValue() value.Value { + return e.Value } -func (d *DefaultCollection) GetPrimaryIndexedFields() []*QueryableField { - var indexed []*QueryableField - for _, q := range d.QueryableFields { - if q.PrimaryIndexed { - indexed = append(indexed, q) - } - } - return indexed +// Matches returns true if the input value is equal to the matcher's value +func (e *EqualityMatcher) Matches(input value.Value) bool { + res, _ := input.CompareTo(e.Value) + return res == 0 } -func (d *DefaultCollection) GetQueryableField(name string) (*QueryableField, error) { - for _, qf := range d.QueryableFields { - if qf.Name() == name { - return qf, nil - } - } - return nil, errors.InvalidArgument("Field `%s` is not present in collection", name) +// ArrMatches returns true if any element in the array is equal to the matcher's value +func (e *EqualityMatcher) ArrMatches(arr []any) bool { + for _, element := range arr { + if nestedArr, ok := element.([]any); ok { + // array of array + for _, ne := range nestedArr { + if value.AnyCompare(ne, e.Value) == 0 { + return true + } + } + } else if value.AnyCompare(element, e.Value) == 0 { + return true + } + } + return false } -func (d *DefaultCollection) GetField(name string) *Field { - for _, r := range d.Fields { - if r.FieldName == name { - return r - } - } +// Type returns the type of the matcher +func (*EqualityMatcher) Type() string { + return "$eq" +} + +// String returns the string representation of the matcher +func (e *EqualityMatcher) String() string { + return fmt.Sprintf("{$eq:%v}", e.Value) +} + +// GreaterThanMatcher implements "$gt" operand. +type GreaterThanMatcher struct { + Value value.Value +} - return nil +// GetValue returns the value on which the Matcher is operating +func (g *GreaterThanMatcher) GetValue() value.Value { + return g.Value } -func (d *DefaultCollection) GetSearchState() SearchIndexState { - return d.ImplicitSearchIndex.GetState() +// Matches returns true if the input value is greater than the matcher's value +func (g *GreaterThanMatcher) Matches(input value.Value) bool { + res, _ := input.CompareTo(g.Value) + return res > 0 } -// Validate expects an unmarshalled document which it will validate again the schema of this collection. -func (d *DefaultCollection) Validate(document any) error { - err := d.Validator.Validate(document) - if err == nil { - return nil - } +// ArrMatches returns true if any element in the array is greater than the matcher's value +func (g *GreaterThanMatcher) ArrMatches(arr []any) bool { + for _, element := range arr { + if nestedArr, ok := element.([]any); ok { + // array of array + for _, ne := range nestedArr { + if value.AnyCompare(ne, g.Value) > 0 { + return true + } + } + } else if value.AnyCompare(element, g.Value) > 0 { + return true + } + } + return false +} - if v, ok := err.(*jsonschema.ValidationError); ok { - if len(v.Causes) == 1 { - field := v.Causes[0].InstanceLocation - if len(field) > 0 && field[0] == '/' { - field = field[1:] - } - return errors.InvalidArgument("json schema validation failed for field '%s' reason '%s'", field, v.Causes[0].Message) - } - } +// Type returns the type of the matcher +func (*GreaterThanMatcher) Type() string { + return "$gt" +} - return errors.InvalidArgument(err.Error()) +// String returns the string representation of the matcher +func (g *GreaterThanMatcher) String() string { + return fmt.Sprintf("{$gt:%v}", g.Value) } -func (d *DefaultCollection) GetImplicitSearchIndex() *ImplicitSearchIndex { - return d.ImplicitSearchIndex +// GreaterThanEqMatcher implements "$gte" operand. +type GreaterThanEqMatcher struct { + Value value.Value } -func (d *DefaultCollection) GetInt64FieldsPath() map[string]struct{} { - return d.int64FieldsPath.get() +// GetValue returns the value on which the Matcher is operating +func (g *GreaterThanEqMatcher) GetValue() value.Value { + return g.Value } -func (d *DefaultCollection) TaggedDefaultsForInsert() map[string]struct{} { - return d.fieldsWithInsertDefaults +// Matches returns true if the input value is greater than or equal to the matcher's value +func (g *GreaterThanEqMatcher) Matches(input value.Value) bool { + res, _ := input.CompareTo(g.Value) + return res >= 0 } -func (d *DefaultCollection) TaggedDefaultsForUpdate() map[string]struct{} { - return d.fieldsWithUpdateDefaults +// ArrMatches returns true if any element in the array is greater than or equal to the matcher's value +func (g *GreaterThanEqMatcher) ArrMatches(arr []any) bool { + for _, element := range arr { + if nestedArr, ok := element.([]any); ok { + // array of array + for _, ne := range nestedArr { + if value.AnyCompare(ne, g.Value) >= 0 { + return true + } + } + } else if value.AnyCompare(element, g.Value) >= 0 { + return true + } + } + return false } -func (d *DefaultCollection) setFieldsForDefaults(parent string, fields []*Field) { - for _, f := range fields { - if len(f.Fields) > 0 { - d.setFieldsForDefaults(buildPath(parent, f.FieldName), f.Fields) - } +// Type returns the type of the matcher +func (*GreaterThanEqMatcher) Type() string { + return "$gte" +} - if f.Defaulter != nil { - if f.Defaulter.TaggedWithUpdatedAt() { - d.fieldsWithUpdateDefaults[buildPath(parent, f.FieldName)] = struct{}{} - } else { - d.fieldsWithInsertDefaults[buildPath(parent, f.FieldName)] = struct{}{} - } - } - } +// String returns the string representation of the matcher +func (g *GreaterThanEqMatcher) String() string { + return fmt.Sprintf("{$gte:%v}", g.Value) } -func buildPath(parent string, field string) string { - if len(parent) > 0 { - if len(field) > 0 { - parent = parent + "." + field - } - return parent - } +// LessThanMatcher implements "$lt" operand. +type LessThanMatcher struct { + Value value.Value +} - return field +// GetValue returns the value on which the Matcher is operating +func (l *LessThanMatcher) GetValue() value.Value { + return l.Value } -func init() { - jsonschema.Formats[FieldNames[ByteType]] = func(i any) bool { - if i == nil { - return true - } +// Matches returns true if the input value is less than the matcher's value +func (l *LessThanMatcher) Matches(input value.Value) bool { + res, _ := input.CompareTo(l.Value) + return res < 0 +} - if v, ok := i.(string); ok { - _, err := base64.StdEncoding.DecodeString(v) - return err == nil - } - return false - } - jsonschema.Formats[FieldNames[Int32Type]] = func(i any) bool { - if i == nil { - return true - } +// ArrMatches returns true if any element in the array is less than the matcher's value +func (l *LessThanMatcher) ArrMatches(arr []any) bool { + for _, element := range arr { + if nestedArr, ok := element.([]any); ok { + // array of array + for _, ne := range nestedArr { + if value.AnyCompare(ne, l.Value) < 0 { + return true + } + } + } else if value.AnyCompare(element, l.Value) < 0 { + return true + } + } + return false +} - val, err := parseInt(i) - if err != nil { - return false - } +// Type returns the type of the matcher +func (*LessThanMatcher) Type() string { + return "$lt" +} - return !(val < math.MinInt32 || val > math.MaxInt32) - } - jsonschema.Formats[FieldNames[Int64Type]] = func(i any) bool { - if i == nil { - return true - } +// String returns the string representation of the matcher +func (l *LessThanMatcher) String() string { + return fmt.Sprintf("{$lt:%v}", l.Value) +} - _, err := parseInt(i) - return err == nil - } +// LessThanEqMatcher implements "$lte" operand. +type LessThanEqMatcher struct { + Value value.Value } -func parseInt(i any) (int64, error) { - switch i.(type) { - case json.Number, float64, int, int32, int64: - n, err := strconv.ParseInt(fmt.Sprint(i), 10, 64) - if err != nil { - return 0, err - } - return n, nil - } - return 0, errors.InvalidArgument("expected integer but found %T", i) +// GetValue returns the value on which the Matcher is operating +func (l *LessThanEqMatcher) GetValue() value.Value { + return l.Value } -type int64PathBuilder struct { - int64FieldsPath map[string]struct{} +// Matches returns true if the input value is less than or equal to the matcher's value +func (l *LessThanEqMatcher) Matches(input value.Value) bool { + res, _ := input.CompareTo(l.Value) + return res <= 0 } -func buildInt64Path(fields []*Field) *int64PathBuilder { - i := &int64PathBuilder{ - int64FieldsPath: make(map[string]struct{}), - } - i.buildInt64PathLow("", fields) +// ArrMatches returns true if any element in the array is less than or equal to the matcher's value +func (l *LessThanEqMatcher) ArrMatches(arr []any) bool { + for _, element := range arr { + if nestedArr, ok := element.([]any); ok { + // array of array + for _, ne := range nestedArr { + if value.AnyCompare(ne, l.Value) <= 0 { + return true + } + } + } else if value.AnyCompare(element, l.Value) <= 0 { + return true + } + } + return false +} + +// Type returns the type of the matcher +func (*LessThanEqMatcher) Type() string { + return "$lte" +} - return i +// String returns the string representation of the matcher +func (l *LessThanEqMatcher) String() string { + return fmt.Sprintf("{$lte:%v}", l.Value) } -func (builder *int64PathBuilder) buildInt64PathLow(parent string, fields []*Field) { - for _, f := range fields { - if len(f.Fields) > 0 { - builder.buildInt64PathLow(buildPath(parent, f.FieldName), f.Fields) - } +// RegexMatcher implements "$regex" operand. +// When matching against text, the regexp returns a match that +// begins as early as possible in the input (leftmost), and among those +// it chooses the one that a backtracking search would have found first. +// This so-called leftmost-first matching is the same semantics +// that Perl, Python, and other implementations use. +type RegexMatcher struct { + regex *regexp.Regexp + collation *value.Collation +} - if f.DataType == Int64Type { - builder.int64FieldsPath[buildPath(parent, f.FieldName)] = struct{}{} - } - } -} - -func (builder *int64PathBuilder) get() map[string]struct{} { - return builder.int64FieldsPath +// NewRegexMatcher returns a new RegexMatcher object. +func NewRegexMatcher(value string, collation *value.Collation) (LikeMatcher, error) { + regexp, err := regexp.Compile(value) + if err != nil { + return nil, err + } + + return &RegexMatcher{ + regex: regexp, + collation: collation, + }, nil +} + +// Matches returns true if the input value matches the regex pattern +func (c *RegexMatcher) Matches(docValue any) bool { + switch dv := docValue.(type) { + case string: + return c.regex.MatchString(dv) + case []string: + for _, e := range dv { + if c.regex.MatchString(e) { + return true + } + } + case []byte: + return c.regex.Match(dv) + } + return false +} + +// Type returns the type of the matcher +func (*RegexMatcher) Type() string { + return "$regex" +} + +// String returns the string representation of the matcher +func (c *RegexMatcher) String() string { + return fmt.Sprintf("{regex:%v}", c.regex.String()) +} + +// ContainsMatcher implements "$contains" operand. +type ContainsMatcher struct { + value string + collation *value.Collation +} + +// NewContainsMatcher returns a new ContainsMatcher object. +func NewContainsMatcher(value string, collation *value.Collation) (LikeMatcher, error) { + return &ContainsMatcher{ + value: value, + collation: collation, + }, nil +} + +// Matches returns true if the input value contains the matcher's value +func (c *ContainsMatcher) Matches(docValue any) bool { + switch dv := docValue.(type) { + case string: + return StringContains(dv, c.value, c.collation) + case []string: + for _, e := range dv { + if StringContains(e, c.value, c.collation) { + return true + } + } + case []byte: + if c.collation.IsCaseInsensitive() { + return bytes.Contains(bytes.ToLower(dv), bytes.ToLower([]byte(c.value))) + } + return bytes.Contains(dv, []byte(c.value)) + } + return false +} + +// Type returns the type of the matcher +func (*ContainsMatcher) Type() string { + return "$contains" +} + +// String returns the string representation of the matcher +func (c *ContainsMatcher) String() string { + return fmt.Sprintf("{$contains:%v}", c.value) +} + +// NotMatcher implements "$not" operand. +type NotMatcher struct { + value string + collation *value.Collation +} + +// NewNotMatcher returns a new NotMatcher object. +func NewNotMatcher(value string, collation *value.Collation) (LikeMatcher, error) { + return &NotMatcher{ + value: value, + collation: collation, + }, nil +} + +// Matches returns true if the input value does not contain the matcher's value +func (n *NotMatcher) Matches(docValue any) bool { + switch dv := docValue.(type) { + case string: + return !StringContains(dv, n.value, n.collation) + case []string: + for _, e := range dv { + if StringContains(e, n.value, n.collation) { + return false + } + } + return true + case []byte: + if n.collation.IsCaseInsensitive() { + return !bytes.Contains(bytes.ToLower(dv), bytes.ToLower([]byte(n.value))) + } + return !bytes.Contains(dv, []byte(n.value)) + } + return false +} + +// Type returns the type of the matcher +func (*NotMatcher) Type() string { + return "$not" +} + +// String returns the string representation of the matcher +func (n *NotMatcher) String() string { + return fmt.Sprintf("{$not:%v}", n.value) } + +// StringContains checks if the substring is contained within the string, considering collation. +// It performs a case-insensitive check if the collation is case-insensitive. +func StringContains(s string, substr string, collation *value.Collation) bool { + if collation.IsCaseInsensitive() { + return strings.Contains(strings.ToLower(s), strings.ToLower(substr)) + } + return strings.Contains(s, substr) +} + +// MatcherForArray checks if the matcher operates on an array type +func MatcherForArray(matcher ValueMatcher) bool { + return matcher.GetValue().DataType() == schema.ArrayType +} \ No newline at end of file