-
Couldn't load subscription status.
- Fork 21
Not indexed value support (MissingValue, EmptyValue) #74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 35 commits
2f56826
29b0bf3
ce61b42
05c8079
2de46a3
c61afae
a47ebc7
d140ffb
7f246be
a0c8546
0f6a2ce
7c9cf99
a2a1ee7
6ceb6dc
7b9313c
0911a0c
47d5b95
e305aca
8e47c81
b529bd2
6b7b84d
52114da
b494b75
e319a05
bcbd74d
5f709ac
5f6c287
0c5493a
090bc9e
e7c5e07
cf950e6
78efffb
7dd55b7
0d58199
6d5fa3e
4110f65
2815927
2a45691
6c5e52c
fd0a9d2
225df14
9bbfdfb
906a858
ab99560
e595088
d059521
9883352
62321b9
3913a78
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,15 +11,24 @@ | |
| # | ||
| ############################################################################## | ||
|
|
||
| import sys | ||
| from logging import getLogger | ||
|
|
||
| from BTrees.OOBTree import difference | ||
| from BTrees.OOBTree import OOSet | ||
| from App.special_dtml import DTMLFile | ||
| from zope.interface import implementer | ||
|
|
||
| from Products.PluginIndexes.unindex import UnIndex | ||
| from Products.PluginIndexes.util import safe_callable | ||
|
|
||
| from Products.PluginIndexes.interfaces import ( | ||
| IIndexingMissingValue, | ||
| missing, | ||
| IIndexingEmptyValue, | ||
| empty, | ||
| ) | ||
|
|
||
| _marker = [] | ||
| LOG = getLogger('Zope.KeywordIndex') | ||
|
|
||
| try: | ||
|
|
@@ -29,6 +38,7 @@ | |
| basestring = (bytes, str) | ||
|
|
||
|
|
||
| @implementer(IIndexingMissingValue, IIndexingEmptyValue) | ||
| class KeywordIndex(UnIndex): | ||
| """Like an UnIndex only it indexes sequences of items. | ||
|
|
||
|
|
@@ -38,6 +48,10 @@ class KeywordIndex(UnIndex): | |
| """ | ||
| meta_type = 'KeywordIndex' | ||
| query_options = ('query', 'range', 'not', 'operator') | ||
| special_values = {TypeError: missing, | ||
| AttributeError: missing, | ||
| None: missing, | ||
| (): empty} | ||
|
|
||
| manage_options = ( | ||
| {'label': 'Settings', 'action': 'manage_main'}, | ||
|
|
@@ -58,59 +72,120 @@ def _index_object(self, documentId, obj, threshold=None, attr=''): | |
| # we'll do so. | ||
|
|
||
| newKeywords = self._get_object_keywords(obj, attr) | ||
| oldKeywords = self._unindex.get(documentId, _marker) | ||
|
|
||
| oldKeywords = self._unindex.get(documentId, None) | ||
|
|
||
| if oldKeywords is None: | ||
| if oldKeywords is _marker: | ||
| # we've got a new document, let's not futz around. | ||
| try: | ||
| if newKeywords in (missing, empty): | ||
| self.insertSpecialIndexEntry(newKeywords, documentId) | ||
| else: | ||
| keys = list() | ||
| for kw in newKeywords: | ||
| self.insertForwardIndexEntry(kw, documentId) | ||
| if newKeywords: | ||
| self._unindex[documentId] = list(newKeywords) | ||
| except TypeError: | ||
| return 0 | ||
| try: | ||
| self.insertForwardIndexEntry(kw, documentId) | ||
| keys.append(kw) | ||
| except TypeError: | ||
| # key is not valid for this Btree so we have to | ||
| # roll back insertForwardIndexEntry | ||
| LOG.error('%(context)s: Unable to insert forward ' | ||
| 'index entry for document with id ' | ||
| '%(doc_id)s and keyword %(kw)r ' | ||
| 'for index %{index}r.', dict( | ||
| context=self.__class__.__name__, | ||
| kw=kw, | ||
| doc_id=documentId, | ||
| index=self.id)) | ||
|
|
||
| self.unindex_objectKeywords(documentId, keys) | ||
| return 0 | ||
|
|
||
| newKeywords = OOSet(newKeywords) | ||
|
|
||
| self._unindex[documentId] = newKeywords | ||
|
||
|
|
||
| else: | ||
| # we have an existing entry for this document, and we need | ||
| # to figure out if any of the keywords have actually changed | ||
| if type(oldKeywords) is not OOSet: | ||
| oldKeywords = OOSet(oldKeywords) | ||
| newKeywords = OOSet(newKeywords) | ||
| fdiff = difference(oldKeywords, newKeywords) | ||
| rdiff = difference(newKeywords, oldKeywords) | ||
| if oldKeywords in (missing, empty): | ||
| self.removeSpecialIndexEntry(oldKeywords, documentId) | ||
| oldSet = OOSet() | ||
| else: | ||
| if not isinstance(oldKeywords, OOSet): | ||
| oldKeywords = OOSet(oldKeywords) | ||
| oldSet = oldKeywords | ||
|
|
||
| if newKeywords in (missing, empty): | ||
| self.insertSpecialIndexEntry(newKeywords, documentId) | ||
| newSet = OOSet() | ||
| else: | ||
| newSet = newKeywords = OOSet(newKeywords) | ||
|
|
||
| fdiff = difference(oldSet, newSet) | ||
| rdiff = difference(newSet, oldSet) | ||
| if fdiff or rdiff: | ||
| # if we've got forward or reverse changes | ||
| if newKeywords: | ||
| self._unindex[documentId] = list(newKeywords) | ||
| else: | ||
| del self._unindex[documentId] | ||
| if fdiff: | ||
| self.unindex_objectKeywords(documentId, fdiff) | ||
| if rdiff: | ||
| for kw in rdiff: | ||
| self.insertForwardIndexEntry(kw, documentId) | ||
|
|
||
| self._unindex[documentId] = newKeywords | ||
|
|
||
| return 1 | ||
|
|
||
| def _get_object_keywords(self, obj, attr): | ||
| newKeywords = getattr(obj, attr, ()) | ||
| newKeywords = getattr(obj, attr, None) | ||
|
|
||
| def _getSpecialValueFor(datum): | ||
| try: | ||
| special_value = self.special_values[datum] | ||
| except TypeError: | ||
| raise KeyError(datum) | ||
|
|
||
| if self.providesSpecialIndex(special_value): | ||
| return special_value | ||
| raise KeyError(datum) | ||
|
|
||
| if safe_callable(newKeywords): | ||
| try: | ||
| newKeywords = newKeywords() | ||
| except (AttributeError, TypeError): | ||
| return () | ||
| if not newKeywords: | ||
| return () | ||
| elif isinstance(newKeywords, basestring): | ||
| return (newKeywords,) | ||
| LOG.debug('%(context)s: Cannot determine datum for attribute ' | ||
| '%(attr)s of object %(obj)r', dict( | ||
| context=self.__class__.__name__, | ||
| attr=attr, | ||
| obj=obj), | ||
| exc_info=True) | ||
|
|
||
| newKeywords = sys.exc_info()[0] | ||
| try: | ||
| return _getSpecialValueFor(newKeywords) | ||
| except KeyError: | ||
| return _marker | ||
|
|
||
| try: | ||
| return _getSpecialValueFor(newKeywords) | ||
| except KeyError: | ||
| pass | ||
|
|
||
| # normalize datum | ||
| if isinstance(newKeywords, basestring): | ||
| newKeywords = (newKeywords,) | ||
| else: | ||
| try: | ||
| # unique | ||
| newKeywords = set(newKeywords) | ||
|
||
| except TypeError: | ||
| # Not a sequence | ||
| return (newKeywords,) | ||
| newKeywords = (newKeywords,) | ||
| else: | ||
| return tuple(newKeywords) | ||
| newKeywords = tuple(newKeywords) | ||
|
|
||
| try: | ||
| return _getSpecialValueFor(newKeywords) | ||
| except KeyError: | ||
| return newKeywords | ||
|
|
||
| def unindex_objectKeywords(self, documentId, keywords): | ||
| """ carefully unindex the object with integer id 'documentId'""" | ||
|
|
@@ -122,13 +197,27 @@ def unindex_objectKeywords(self, documentId, keywords): | |
| def unindex_object(self, documentId): | ||
| """ carefully unindex the object with integer id 'documentId'""" | ||
|
|
||
| keywords = self._unindex.get(documentId, None) | ||
| keywords = self._unindex.get(documentId, _marker) | ||
|
|
||
| # Couldn't we return 'None' immediately | ||
| # if keywords is 'None' (or _marker)??? | ||
| if keywords is _marker: | ||
| return | ||
|
|
||
| if keywords is not None: | ||
| self._increment_counter() | ||
| self._increment_counter() | ||
|
|
||
| if keywords in (missing, empty): | ||
| try: | ||
| if not self.removeSpecialIndexEntry(keywords, documentId): | ||
| raise KeyError | ||
| del self._unindex[documentId] | ||
|
|
||
| except KeyError: | ||
| LOG.debug('%(context)s: Attempt to unindex nonexistent ' | ||
| 'document with id %(doc_id)s', dict( | ||
| context=self.__class__.__name__, | ||
| doc_id=documentId), | ||
| exc_info=True) | ||
|
|
||
| return None | ||
|
|
||
| self.unindex_objectKeywords(documentId, keywords) | ||
| try: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I doubt that this exception handling is right: it does not index the object if one key cannot be indexed - and the problem is only reported via a log entry.
In my opinion, other alternatives would be better:
TypeErrorin the same way as if it had occurred during the object value determination (e.g. map tomissing).In any case, the logic is at the wrong place. One would need similar logic for "update existing index info" and it should not be duplicated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the original version there was a bug that could lead to inconsistencies in the index. Also, the problem was not logged. In order not to have to abolish the old behavior completely, I would prefer the first variant. Consequently,
_unindexis only allowed to store indexable keywords.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correction: Since the type OOSet is forced for keywords in the meantime, a TypeError can also be raised under python3 e.g. in the method
map_value. For consistency reasons, TypeError is now always handled in the same way when determining the attribute value.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@d-maurer I'm beginning to wonder if it wouldn't be more sensible to escalate TypeError when a value in the keyword list is incompatible with the already indexed values. Otherwise the new values would have to be pre-validated before being indexed.