@@ -47,20 +47,9 @@ public class NodePattern extends SemgrexPattern {
4747 private final String name ;
4848 private String descString ;
4949 SemgrexPattern child ;
50- // specifies the groups in a regex that are captured as
51- // matcher-global string variables
52- private List <Pair <Integer , String >> variableGroups ;
5350
5451 public NodePattern (GraphRelation r , boolean negDesc ,
5552 NodeAttributes attrs , boolean isLink , String name ) {
56- this (r , negDesc , attrs , isLink , name ,
57- new ArrayList <>(0 ));
58- }
59-
60- // TODO: there is no capacity for named variable groups in the parser right now
61- public NodePattern (GraphRelation r , boolean negDesc ,
62- NodeAttributes attrs , boolean isLink , String name ,
63- List <Pair <Integer , String >> variableGroups ) {
6453 this .reln = r ;
6554 this .negDesc = negDesc ;
6655 this .isLink = isLink ;
@@ -72,20 +61,21 @@ public NodePattern(GraphRelation r, boolean negDesc,
7261 this .regexPartialAttributes = new ArrayList <>();
7362
7463 descString = "{" ;
75- for (Triple <String , String , Boolean > entry : attrs .attributes ()) {
64+ for (Quadruple <String , String , Boolean , List < Pair < Integer , String >> > entry : attrs .attributes ()) {
7665 if (!descString .equals ("{" ))
7766 descString += ";" ;
7867 String key = entry .first ();
7968 String value = entry .second ();
8069 boolean negated = entry .third ();
70+ List <Pair <Integer , String >> varGroups = entry .fourth ();
8171
8272 // Add the attributes for this key
8373 if (value .equals ("__" )) {
84- attributes .add (new Attribute (key , true , true , negated ));
74+ attributes .add (new Attribute (key , true , true , negated , varGroups ));
8575 } else if (value .matches ("/.*/" )) {
86- attributes .add (buildRegexAttribute (key , value , negated ));
76+ attributes .add (buildRegexAttribute (key , value , negated , varGroups ));
8777 } else { // raw description
88- attributes .add (new Attribute (key , value , value , negated ));
78+ attributes .add (new Attribute (key , value , value , negated , varGroups ));
8979 }
9080
9181 if (negated ) {
@@ -100,6 +90,8 @@ public NodePattern(GraphRelation r, boolean negDesc,
10090 String key = entry .second ();
10191 String value = entry .third ();
10292 boolean negated = entry .fourth ();
93+ // TODO: can add varGroups, especially for the regex matches
94+ List <Pair <Integer , String >> varGroups = Collections .emptyList ();
10395
10496 Class <?> clazz = AnnotationLookup .getValueType (AnnotationLookup .toCoreKey (annotation ));
10597 boolean isMap = clazz != null && Map .class .isAssignableFrom (clazz );
@@ -115,11 +107,11 @@ public NodePattern(GraphRelation r, boolean negDesc,
115107 } else {
116108 // Add the attributes for this key
117109 if (value .equals ("__" )) {
118- attr = new Attribute (key , true , true , negated );
110+ attr = new Attribute (key , true , true , negated , varGroups );
119111 } else if (value .matches ("/.*/" )) {
120- attr = buildRegexAttribute (key , value , negated );
112+ attr = buildRegexAttribute (key , value , negated , varGroups );
121113 } else { // raw description
122- attr = new Attribute (key , value , value , negated );
114+ attr = new Attribute (key , value , value , negated , varGroups );
123115 }
124116 partialAttributes .add (new Pair <>(annotation , attr ));
125117 }
@@ -148,15 +140,13 @@ public NodePattern(GraphRelation r, boolean negDesc,
148140 this .child = null ;
149141 this .isRoot = attrs .root ();
150142 this .isEmpty = attrs .empty ();
151-
152- this .variableGroups = Collections .unmodifiableList (variableGroups );
153143 }
154144
155145 /**
156146 * Tests the value to see if it's really a regex, or just a string wrapped in regex.
157147 * Return an Attribute which matches this expression
158148 */
159- private Attribute buildRegexAttribute (String key , String value , boolean negated ) {
149+ private Attribute buildRegexAttribute (String key , String value , boolean negated , List < Pair < Integer , String >> varGroups ) {
160150 boolean isRegexp = false ;
161151 for (int i = 1 ; i < value .length () - 1 ; ++i ) {
162152 char chr = value .charAt (i );
@@ -170,13 +160,29 @@ private Attribute buildRegexAttribute(String key, String value, boolean negated)
170160 return new Attribute (key ,
171161 Pattern .compile (patternContent ),
172162 Pattern .compile (patternContent , Pattern .CASE_INSENSITIVE |Pattern .UNICODE_CASE ),
173- negated );
163+ negated , varGroups );
174164 } else {
175- return new Attribute (key , patternContent , patternContent , negated );
165+ return new Attribute (key , patternContent , patternContent , negated , varGroups );
166+ }
167+ }
168+
169+ private static boolean checkVarMatch (String key , String matchedString ,
170+ VariableStrings variableStrings , VariableStrings tempVariableStrings ) {
171+ String existingString = variableStrings .getString (key );
172+ if (existingString == null ) {
173+ existingString = tempVariableStrings .getString (key );
174+ }
175+ if (existingString != null && !existingString .equals (matchedString )) {
176+ return false ;
177+ }
178+ if (matchedString != null ) {
179+ tempVariableStrings .setVar (key , matchedString );
176180 }
181+ return true ;
177182 }
178183
179- private boolean checkMatch (Attribute attr , boolean ignoreCase , String nodeValue ) {
184+ private boolean checkMatch (Attribute attr , boolean ignoreCase , String nodeValue ,
185+ VariableStrings variableStrings , VariableStrings tempVariableStrings ) {
180186 if (nodeValue == null ) {
181187 // treat non-existent attributes has having matched a negated expression
182188 // so for example, `cpos!:NUM` matches not having a cpos at all
@@ -188,14 +194,51 @@ private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue)
188194 boolean matches ;
189195 if (toMatch instanceof Boolean ) {
190196 matches = ((Boolean ) toMatch );
197+
198+ if (matches ) {
199+ for (Pair <Integer , String > varGroup : attr .variableGroups ) {
200+ // TODO possibly a bug here - it is not honoring ignoreCase
201+ String matchedString = nodeValue ;
202+ String key = varGroup .second ();
203+ if (!checkVarMatch (key , matchedString , variableStrings , tempVariableStrings )) {
204+ matches = false ;
205+ break ;
206+ }
207+ }
208+ }
191209 } else if (toMatch instanceof String ) {
192210 if (ignoreCase ) {
193211 matches = nodeValue .equalsIgnoreCase (toMatch .toString ());
194212 } else {
195213 matches = nodeValue .equals (toMatch .toString ());
196214 }
215+
216+ if (matches ) {
217+ for (Pair <Integer , String > varGroup : attr .variableGroups ) {
218+ // TODO possibly a bug here - it is not honoring ignoreCase
219+ String matchedString = nodeValue ;
220+ String key = varGroup .second ();
221+ if (!checkVarMatch (key , matchedString , variableStrings , tempVariableStrings )) {
222+ matches = false ;
223+ break ;
224+ }
225+ }
226+ }
197227 } else if (toMatch instanceof Pattern ) {
198- matches = ((Pattern ) toMatch ).matcher (nodeValue ).matches ();
228+ Matcher matcher = ((Pattern ) toMatch ).matcher (nodeValue );
229+ if (matcher .matches ()) {
230+ matches = true ;
231+ for (Pair <Integer , String > varGroup : attr .variableGroups ) {
232+ String matchedString = matcher .group (varGroup .first ());
233+ String key = varGroup .second ();
234+ if (!checkVarMatch (key , matchedString , variableStrings , tempVariableStrings )) {
235+ matches = false ;
236+ break ;
237+ }
238+ }
239+ } else {
240+ matches = false ;
241+ }
199242 } else {
200243 throw new IllegalStateException ("Unknown matcher type: " + toMatch + " (of class + " + toMatch .getClass () + ")" );
201244 }
@@ -206,7 +249,8 @@ private boolean checkMatch(Attribute attr, boolean ignoreCase, String nodeValue)
206249 }
207250
208251 @ SuppressWarnings ("unchecked" )
209- public boolean nodeAttrMatch (IndexedWord node , final SemanticGraph sg , boolean ignoreCase ) {
252+ public boolean nodeAttrMatch (IndexedWord node , final SemanticGraph sg , boolean ignoreCase ,
253+ VariableStrings variableStrings , VariableStrings tempVariableStrings ) {
210254 // System.out.println(node.word());
211255 if (isRoot ) {
212256 // System.out.println("checking root");
@@ -240,7 +284,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
240284 // }
241285 // System.out.println(nodeValue);
242286
243- boolean matches = checkMatch (attr , ignoreCase , nodeValue );
287+ boolean matches = checkMatch (attr , ignoreCase , nodeValue , variableStrings , tempVariableStrings );
288+
244289 if (!matches ) {
245290 // System.out.println("doesn't match");
246291 // System.out.println("");
@@ -266,7 +311,8 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
266311 nodeValue = (value == null ) ? null : value .toString ();
267312 }
268313
269- boolean matches = checkMatch (attr , ignoreCase , nodeValue );
314+ // TODO: not connected to varGroups yet
315+ boolean matches = checkMatch (attr , ignoreCase , nodeValue , variableStrings , tempVariableStrings );
270316 if (!matches ) {
271317 return negDesc ;
272318 }
@@ -282,6 +328,7 @@ public boolean nodeAttrMatch(IndexedWord node, final SemanticGraph sg, boolean i
282328 throw new RuntimeException ("Can only use partial attributes with Maps... this should have been checked at creation time!" );
283329 map = (Map ) rawmap ;
284330 }
331+ // TODO: check varGroups here
285332 boolean matches = partialAttribute .checkMatches (map , ignoreCase );
286333 if (!matches ) {
287334 return negDesc ;
@@ -411,6 +458,7 @@ private static class NodeMatcher extends SemgrexMatcher {
411458 private SemgrexMatcher childMatcher ;
412459 private boolean matchedOnce = false ;
413460 private boolean committedVariables = false ;
461+ private VariableStrings localVariableStrings = null ;
414462
415463 private String nextMatchReln = null ;
416464 private SemanticGraphEdge nextMatchEdge = null ;
@@ -420,7 +468,7 @@ private static class NodeMatcher extends SemgrexMatcher {
420468 private boolean relnNamedFirst = false ;
421469 private boolean edgeNamedFirst = false ;
422470
423- private boolean ignoreCase = false ;
471+ private final boolean ignoreCase ;
424472
425473 // universal: childMatcher is null if and only if
426474 // myNode.child == null OR resetChild has never been called
@@ -477,7 +525,8 @@ private void goToNextNodeMatch() {
477525 decommitNamedNodes ();
478526 decommitNamedRelations ();
479527 finished = true ;
480- Matcher m = null ;
528+ VariableStrings tempVariableStrings = new VariableStrings ();
529+
481530 while (nodeMatchCandidateIterator .hasNext ()) {
482531 if (myNode .reln .getName () != null ) {
483532 String foundReln = namesToRelations .get (myNode .reln .getName ());
@@ -517,21 +566,8 @@ private void goToNextNodeMatch() {
517566 } else {
518567 boolean found = myNode .nodeAttrMatch (nextMatch ,
519568 hyp ? sg : sg_aligned ,
520- ignoreCase );
569+ ignoreCase , variableStrings , tempVariableStrings );
521570 if (found ) {
522- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
523- // if variables have been captured from a regex, they
524- // must match any previous matchings
525- String thisVariable = varGroup .second ();
526- String thisVarString = variableStrings .getString (thisVariable );
527- if (thisVarString != null &&
528- !thisVarString .equals (m .group (varGroup .first ()))) {
529- // failed to match a variable
530- found = false ;
531- break ;
532- }
533- }
534-
535571 // nodeAttrMatch already checks negDesc, so no need to
536572 // check for that here
537573 finished = false ;
@@ -541,21 +577,8 @@ private void goToNextNodeMatch() {
541577 } else { // try to match the description pattern.
542578 boolean found = myNode .nodeAttrMatch (nextMatch ,
543579 hyp ? sg : sg_aligned ,
544- ignoreCase );
580+ ignoreCase , variableStrings , tempVariableStrings );
545581 if (found ) {
546- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
547- // if variables have been captured from a regex, they
548- // must match any previous matchings
549- String thisVariable = varGroup .second ();
550- String thisVarString = variableStrings .getString (thisVariable );
551- if (thisVarString != null &&
552- !thisVarString .equals (m .group (varGroup .first ()))) {
553- // failed to match a variable
554- found = false ;
555- break ;
556- }
557- }
558-
559582 // nodeAttrMatch already checks negDesc, so no need to
560583 // check for that here
561584 finished = false ;
@@ -586,26 +609,23 @@ private void goToNextNodeMatch() {
586609 edgeNamedFirst = true ;
587610 namesToEdges .put (myNode .reln .getEdgeName (), nextMatchEdge );
588611 }
589- commitVariableGroups (m ); // commit my variable groups.
612+ commitVariableGroups (tempVariableStrings ); // commit my variable groups.
590613 }
591614 // finished is false exiting this if and only if nextChild exists
592615 // and has a label or backreference that matches
593616 // (also it will just have been reset)
594617 }
595618
596- private void commitVariableGroups (Matcher m ) {
619+ private void commitVariableGroups (VariableStrings tempVariableStrings ) {
597620 committedVariables = true ; // commit all my variable groups.
598- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
599- String thisVarString = m .group (varGroup .first ());
600- variableStrings .setVar (varGroup .second (), thisVarString );
601- }
621+ localVariableStrings = tempVariableStrings ;
622+ variableStrings .setVars (tempVariableStrings );
602623 }
603624
604625 private void decommitVariableGroups () {
605626 if (committedVariables ) {
606- for (Pair <Integer , String > varGroup : myNode .variableGroups ) {
607- variableStrings .unsetVar (varGroup .second ());
608- }
627+ variableStrings .unsetVars (localVariableStrings );
628+ localVariableStrings = null ;
609629 }
610630 committedVariables = false ;
611631 }
0 commit comments