Skip to content
Open
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
2f56826
Initial not indexed value support (MissingValue, EmptyValue)
andbag May 5, 2019
29b0bf3
Reorganize NotIndexedValue classes
andbag May 6, 2019
ce61b42
Fix BTree TypeError
andbag May 6, 2019
05c8079
Add not indexed value tests
andbag May 6, 2019
2de46a3
Fix get_object_datum for not indexed value support
andbag May 7, 2019
c61afae
Add additional tests for not indexed value support
andbag May 7, 2019
a47ebc7
Fix EmptyValue support
andbag May 7, 2019
d140ffb
Disable Missing/EmptyValue interface of CompositeIndex
andbag May 8, 2019
7f246be
Fix flake8
andbag May 8, 2019
a0c8546
Merge branch 'master' into notindexed_value_support
May 9, 2019
0f6a2ce
Merge branch 'master' into notindexed_value_support
May 10, 2019
7c9cf99
Merge branch 'master' into notindexed_value_support
May 14, 2019
a2a1ee7
New naming of methods and variables
andbag May 10, 2019
6ceb6dc
Store special values in `_unindex`
andbag May 10, 2019
7b9313c
Fix KeywordIndex tests
andbag May 10, 2019
0911a0c
Fix unindex and refactor KeywordIndex
andbag May 13, 2019
47d5b95
Return False for SpecialValues on truth value testing
andbag May 14, 2019
e305aca
Fix flake8
andbag May 14, 2019
8e47c81
Merge branch 'master' into notindexed_value_support
May 17, 2019
b529bd2
Raise Error if multiple indexed attributes are set
andbag May 22, 2019
6b7b84d
Prepare index_object for multiple indexed attributes
andbag May 23, 2019
52114da
Merge branch 'fix_indexed_attr' into notindexed_value_support
andbag May 24, 2019
b494b75
Add test for multiple indexed attributes
andbag May 24, 2019
e319a05
Replace __nonzero__ with __bool__ for py3
andbag May 24, 2019
bcbd74d
Store keywords always as OOset
andbag May 24, 2019
5f709ac
Fix tests for OOset
andbag May 24, 2019
5f6c287
Create debug entry if datum for attribute cannot be determined
andbag May 24, 2019
0c5493a
Fix consistent check for missing `_unindex` entry
andbag May 24, 2019
090bc9e
Remove obsolete code
andbag May 24, 2019
e7c5e07
flake8
andbag May 24, 2019
cf950e6
Fix clearing of special values of KexwordIndex
andbag May 26, 2019
78efffb
Continue to fix clearing of special values
andbag May 27, 2019
7dd55b7
Add definition map for special values
andbag May 27, 2019
0d58199
Refinement of special value support
andbag May 28, 2019
6d5fa3e
Fix test for empty value
andbag May 29, 2019
4110f65
Log then ignore keys not indexable
andbag Jun 4, 2019
2815927
Consolidate code
andbag Jun 4, 2019
2a45691
Avoid obsolete type casting
andbag Jun 4, 2019
6c5e52c
Consolidate special value handling step one
andbag Jun 4, 2019
fd0a9d2
Consolidate special value handling step two
andbag Jun 6, 2019
225df14
Consolidate code of CompositeIndex
andbag Jun 6, 2019
9bbfdfb
Completion of interfaces
andbag Jun 6, 2019
906a858
Code further generalized
andbag Jun 6, 2019
ab99560
flake8
andbag Jun 6, 2019
e595088
Continue cleaning
andbag Jun 6, 2019
d059521
Code further generalized II
andbag Jun 11, 2019
9883352
Reorganize code and complete tests
andbag Jun 11, 2019
62321b9
Fix for py2 backward compatibility
andbag Jun 11, 2019
3913a78
Merge branch 'master' into notindexed_value_support
Jun 26, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions src/Products/PluginIndexes/CompositeIndex/CompositeIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,23 @@
from BTrees.OOBTree import difference
from BTrees.OOBTree import OOSet
from Persistence import PersistentMapping
from zope.interface import implementer

from Products.PluginIndexes.interfaces import ITransposeQuery
from zope.interface import implementer_only

from Products.PluginIndexes.interfaces import (
ILimitedResultIndex,
IQueryIndex,
ISortIndex,
IUniqueValueIndex,
IRequestCacheIndex,
ITransposeQuery,
missing,
empty,
)
from Products.PluginIndexes.KeywordIndex.KeywordIndex import KeywordIndex
from Products.PluginIndexes.unindex import _marker
from Products.ZCatalog.query import IndexQuery


LOG = logging.getLogger('CompositeIndex')

QUERY_OPTIONS = {
Expand Down Expand Up @@ -172,7 +182,8 @@ def __repr__(self):
'attributes: {0.attributes}>').format(self)


@implementer(ITransposeQuery)
@implementer_only(ILimitedResultIndex, IQueryIndex, IUniqueValueIndex,
ISortIndex, IRequestCacheIndex, ITransposeQuery)
class CompositeIndex(KeywordIndex):

"""Index for composition of simple fields.
Expand Down Expand Up @@ -380,6 +391,14 @@ def make_query(self, query):
if c.meta_type == 'BooleanIndex':
rec.keys = [int(bool(v)) for v in rec.keys[:]]

# cannot currently support KeywordIndex's
# missing/empty feature
if c.meta_type == 'KeywordIndex':
if missing in rec.keys:
continue
if empty in rec.keys:
continue

# rec with 'not' parameter
not_parm = rec.get('not', None)
if not_parm:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,32 +21,29 @@
types = ['Document', 'News', 'File', 'Image']
default_pages = [True, False, False, False, False, False]
subjects = list(map(lambda x: 'subject_{0}'.format(x), range(6)))
keywords = list(map(lambda x: 'keyword_{0}'.format(x), range(6)))


class TestObject(object):

def __init__(self, id, portal_type, review_state,
is_default_page=False, subject=(), keyword=()):
is_default_page=False, subject=()):
self.id = id
self.portal_type = portal_type
self.review_state = review_state
self.is_default_page = is_default_page
self.subject = subject
self.keyword = keyword

def getPhysicalPath(self):
return ['', self.id, ]

def __repr__(self):
return ('< {id}, {portal_type}, {review_state},\
{is_default_page}, {subject} , {keyword}>'.format(
{is_default_page}, {subject}>'.format(
id=self.id,
portal_type=self.portal_type,
review_state=self.review_state,
is_default_page=self.is_default_page,
subject=self.subject,
keyword=self.keyword))
subject=self.subject))


class RandomTestObject(TestObject):
Expand All @@ -63,11 +60,10 @@ def __init__(self, id):
is_default_page = default_pages[i]

subject = random.sample(subjects, random.randint(1, len(subjects)))
keyword = random.sample(keywords, random.randint(1, len(keywords)))

super(RandomTestObject, self).__init__(id, portal_type,
review_state, is_default_page,
subject, keyword)
subject)


# Pseudo ContentLayer class to support quick
Expand All @@ -92,7 +88,7 @@ def setUp(self):
KeywordIndex('subject',
extra={
'indexed_attrs':
'keyword,subject'}
'subject'}
),
CompositeIndex('comp01',
extra=[{'id': 'portal_type',
Expand All @@ -107,7 +103,7 @@ def setUp(self):
{'id': 'subject',
'meta_type': 'KeywordIndex',
'attributes':
'keyword,subject'}
'subject'}
])
]

Expand Down Expand Up @@ -206,9 +202,6 @@ def testPerformance(self):
('query02_default_two_indexes',
{'portal_type': {'query': 'Document'},
'subject': {'query': 'subject_2'}}),
('query02_default_two_indexes_zero_hits',
{'portal_type': {'query': 'Document'},
'subject': {'query': ['keyword_1', 'keyword_2']}}),
('query03_default_two_indexes',
{'portal_type': {'query': 'Document'},
'subject': {'query': ['subject_1', 'subject_3']}}),
Expand Down Expand Up @@ -340,8 +333,7 @@ def testSearch(self):
subject=('subject_1', 'subject_2'))
self.populateIndexes(3, obj)
obj = TestObject('obj_4', 'Event', 'private',
subject=('subject_1', 'subject_2'),
keyword=('keyword_1', ))
subject=('subject_1', 'subject_2'))
self.populateIndexes(4, obj)

queries = [
Expand Down Expand Up @@ -379,12 +371,6 @@ def testSearch(self):
'is_default_page': {'query': False},
'subject': {'query': ('subject_1', 'subject_2'),
'operator': 'and'}},
# query on five attributes with
{'review_state': {'not': ('pending', 'visible')},
'portal_type': {'query': ('News', 'Document')},
'is_default_page': {'query': False},
'subject': {'query': ('subject_1', )},
'keyword': {'query': ('keyword_1',)}},
]

for query in queries:
Expand Down
153 changes: 121 additions & 32 deletions src/Products/PluginIndexes/KeywordIndex/KeywordIndex.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,24 @@
#
##############################################################################

import sys
from logging import getLogger

from BTrees.OOBTree import difference
from BTrees.OOBTree import OOSet
from App.special_dtml import DTMLFile
from zope.interface import implementer

from Products.PluginIndexes.unindex import UnIndex
from Products.PluginIndexes.util import safe_callable

from Products.PluginIndexes.interfaces import (
IIndexingMissingValue,
missing,
IIndexingEmptyValue,
empty,
)

_marker = []
LOG = getLogger('Zope.KeywordIndex')

try:
Expand All @@ -29,6 +38,7 @@
basestring = (bytes, str)


@implementer(IIndexingMissingValue, IIndexingEmptyValue)
class KeywordIndex(UnIndex):
"""Like an UnIndex only it indexes sequences of items.

Expand All @@ -38,6 +48,10 @@ class KeywordIndex(UnIndex):
"""
meta_type = 'KeywordIndex'
query_options = ('query', 'range', 'not', 'operator')
special_values = {TypeError: missing,
AttributeError: missing,
None: missing,
(): empty}

manage_options = (
{'label': 'Settings', 'action': 'manage_main'},
Expand All @@ -58,59 +72,120 @@ def _index_object(self, documentId, obj, threshold=None, attr=''):
# we'll do so.

newKeywords = self._get_object_keywords(obj, attr)
oldKeywords = self._unindex.get(documentId, _marker)

oldKeywords = self._unindex.get(documentId, None)

if oldKeywords is None:
if oldKeywords is _marker:
# we've got a new document, let's not futz around.
try:
if newKeywords in (missing, empty):
self.insertSpecialIndexEntry(newKeywords, documentId)
else:
keys = list()
for kw in newKeywords:
self.insertForwardIndexEntry(kw, documentId)
if newKeywords:
self._unindex[documentId] = list(newKeywords)
except TypeError:
return 0
try:
self.insertForwardIndexEntry(kw, documentId)
keys.append(kw)
except TypeError:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I doubt that this exception handling is right: it does not index the object if one key cannot be indexed - and the problem is only reported via a log entry.
In my opinion, other alternatives would be better:

  • log then ignore keys not indexable
  • do not catch the exception (and let the whole operation fail)
  • handle this TypeError in the same way as if it had occurred during the object value determination (e.g. map to missing).

In any case, the logic is at the wrong place. One would need similar logic for "update existing index info" and it should not be duplicated.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the original version there was a bug that could lead to inconsistencies in the index. Also, the problem was not logged. In order not to have to abolish the old behavior completely, I would prefer the first variant. Consequently, _unindex is only allowed to store indexable keywords.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correction: Since the type OOSet is forced for keywords in the meantime, a TypeError can also be raised under python3 e.g. in the method map_value. For consistency reasons, TypeError is now always handled in the same way when determining the attribute value.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@d-maurer I'm beginning to wonder if it wouldn't be more sensible to escalate TypeError when a value in the keyword list is incompatible with the already indexed values. Otherwise the new values would have to be pre-validated before being indexed.

# key is not valid for this Btree so we have to
# roll back insertForwardIndexEntry
LOG.error('%(context)s: Unable to insert forward '
'index entry for document with id '
'%(doc_id)s and keyword %(kw)r '
'for index %{index}r.', dict(
context=self.__class__.__name__,
kw=kw,
doc_id=documentId,
index=self.id))

self.unindex_objectKeywords(documentId, keys)
return 0

newKeywords = OOSet(newKeywords)

self._unindex[documentId] = newKeywords
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _unindex update could be done together with the "update existing index info" case.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay


else:
# we have an existing entry for this document, and we need
# to figure out if any of the keywords have actually changed
if type(oldKeywords) is not OOSet:
oldKeywords = OOSet(oldKeywords)
newKeywords = OOSet(newKeywords)
fdiff = difference(oldKeywords, newKeywords)
rdiff = difference(newKeywords, oldKeywords)
if oldKeywords in (missing, empty):
self.removeSpecialIndexEntry(oldKeywords, documentId)
oldSet = OOSet()
else:
if not isinstance(oldKeywords, OOSet):
oldKeywords = OOSet(oldKeywords)
oldSet = oldKeywords

if newKeywords in (missing, empty):
self.insertSpecialIndexEntry(newKeywords, documentId)
newSet = OOSet()
else:
newSet = newKeywords = OOSet(newKeywords)

fdiff = difference(oldSet, newSet)
rdiff = difference(newSet, oldSet)
if fdiff or rdiff:
# if we've got forward or reverse changes
if newKeywords:
self._unindex[documentId] = list(newKeywords)
else:
del self._unindex[documentId]
if fdiff:
self.unindex_objectKeywords(documentId, fdiff)
if rdiff:
for kw in rdiff:
self.insertForwardIndexEntry(kw, documentId)

self._unindex[documentId] = newKeywords

return 1

def _get_object_keywords(self, obj, attr):
newKeywords = getattr(obj, attr, ())
newKeywords = getattr(obj, attr, None)

def _getSpecialValueFor(datum):
try:
special_value = self.special_values[datum]
except TypeError:
raise KeyError(datum)

if self.providesSpecialIndex(special_value):
return special_value
raise KeyError(datum)

if safe_callable(newKeywords):
try:
newKeywords = newKeywords()
except (AttributeError, TypeError):
return ()
if not newKeywords:
return ()
elif isinstance(newKeywords, basestring):
return (newKeywords,)
LOG.debug('%(context)s: Cannot determine datum for attribute '
'%(attr)s of object %(obj)r', dict(
context=self.__class__.__name__,
attr=attr,
obj=obj),
exc_info=True)

newKeywords = sys.exc_info()[0]
try:
return _getSpecialValueFor(newKeywords)
except KeyError:
return _marker

try:
return _getSpecialValueFor(newKeywords)
except KeyError:
pass

# normalize datum
if isinstance(newKeywords, basestring):
newKeywords = (newKeywords,)
else:
try:
# unique
newKeywords = set(newKeywords)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At another place, the keywords are collected in an OOSet. Using different set types increases the constraints placed on the usable types for keywords: OOSet requires orderability (as the BTrees package as a whole); set requires hashability. I recommend to use OOSet uniformly (and avoid the tuple recasting).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay

except TypeError:
# Not a sequence
return (newKeywords,)
newKeywords = (newKeywords,)
else:
return tuple(newKeywords)
newKeywords = tuple(newKeywords)

try:
return _getSpecialValueFor(newKeywords)
except KeyError:
return newKeywords

def unindex_objectKeywords(self, documentId, keywords):
""" carefully unindex the object with integer id 'documentId'"""
Expand All @@ -122,13 +197,27 @@ def unindex_objectKeywords(self, documentId, keywords):
def unindex_object(self, documentId):
""" carefully unindex the object with integer id 'documentId'"""

keywords = self._unindex.get(documentId, None)
keywords = self._unindex.get(documentId, _marker)

# Couldn't we return 'None' immediately
# if keywords is 'None' (or _marker)???
if keywords is _marker:
return

if keywords is not None:
self._increment_counter()
self._increment_counter()

if keywords in (missing, empty):
try:
if not self.removeSpecialIndexEntry(keywords, documentId):
raise KeyError
del self._unindex[documentId]

except KeyError:
LOG.debug('%(context)s: Attempt to unindex nonexistent '
'document with id %(doc_id)s', dict(
context=self.__class__.__name__,
doc_id=documentId),
exc_info=True)

return None

self.unindex_objectKeywords(documentId, keywords)
try:
Expand Down
Loading