View Javadoc

1   package org.apache.helix.alerts;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.util.ArrayList;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.Map;
26  import java.util.Set;
27  import java.util.StringTokenizer;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.apache.helix.HelixException;
32  import org.apache.log4j.Logger;
33  
34  
35  public class ExpressionParser
36  {
37    private static Logger logger = Logger.getLogger(ExpressionParser.class);
38  
39    final static String opDelim = "|";
40    final static String opDelimForSplit = "\\|";
41    final static String argDelim = ",";
42    final public static String statFieldDelim = ".";
43    final static String wildcardChar = "*";
44  
45    // static Map<String, ExpressionOperatorType> operatorMap = new
46    // HashMap<String, ExpressionOperatorType>();
47  
48    static Map<String, Operator> operatorMap = new HashMap<String, Operator>();
49    static Map<String, Aggregator> aggregatorMap = new HashMap<String, Aggregator>();
50  
51    static
52    {
53  
54      addOperatorEntry("EXPAND", new ExpandOperator());
55      addOperatorEntry("DIVIDE", new DivideOperator());
56      addOperatorEntry("SUM", new SumOperator());
57      addOperatorEntry("SUMEACH", new SumEachOperator());
58  
59      addAggregatorEntry("ACCUMULATE", new AccumulateAggregator());
60      addAggregatorEntry("DECAY", new DecayAggregator());
61      addAggregatorEntry("WINDOW", new WindowAggregator());
62      /*
63       * addEntry("EACH", ExpressionOperatorType.EACH); addEntry("SUM",
64       * ExpressionOperatorType.SUM); addEntry("DIVIDE",
65       * ExpressionOperatorType.DIVIDE); addEntry("ACCUMULATE",
66       * ExpressionOperatorType.ACCUMULATE);
67       */
68    }
69  
70    // static Pattern pattern = Pattern.compile("(\\{.+?\\})");
71  
72    private static void addOperatorEntry(String label, Operator op)
73    {
74      if (!operatorMap.containsKey(label))
75      {
76        operatorMap.put(label, op);
77      }
78      logger.info("Adding operator: " + op);
79    }
80  
81    private static void addAggregatorEntry(String label, Aggregator agg)
82    {
83      if (!aggregatorMap.containsKey(label.toUpperCase()))
84      {
85        aggregatorMap.put(label.toUpperCase(), agg);
86      }
87      logger.info("Adding aggregator: " + agg);
88    }
89  
90    /*
91     * private static void addEntry(String label, ExpressionOperatorType type) {
92     * if (!operatorMap.containsKey(label)) { operatorMap.put(label, type); }
93     * logger.info("Adding operator type: "+type); }
94     */
95  
96    public static boolean isExpressionNested(String expression)
97    {
98      return expression.contains("(");
99    }
100 
101   /*
102    * public static Operator getOperatorType(String expression) throws Exception
103    * { String op = expression.substring(0,expression.indexOf("(")); if
104    * (!operatorMap.containsKey(op)) { throw new
105    * Exception(op+" is not a valid op type"); } return operatorMap.get(op); }
106    */
107 
108   public static String getInnerExpression(String expression)
109   {
110     return expression.substring(expression.indexOf("(") + 1,
111         expression.lastIndexOf(")"));
112   }
113 
114   /*
115    * public static String[] getBaseStats(ExpressionOperatorType type, String
116    * expression) throws Exception { String[] items = null; if
117    * (isExpressionNested(expression)) { ExpressionOperatorType nextType =
118    * getOperatorType(expression); String innerExp =
119    * getInnerExpression(expression); items = getBaseStats(nextType, innerExp); }
120    * else { //base class, no nesting items = expression.split(","); }
121    * 
122    * if (type != null && type.isBaseOp()) { //surround items with type. for (int
123    * i=0; i<items.length; i++) { items[i] = type + "(" + items[i] + ")"; //!!!!
124    * NEED type to behave like string here
125    * logger.debug("Forming item "+items[i]); } } return items; }
126    * 
127    * public static String[] getBaseStats(String expression) throws Exception {
128    * expression = expression.replaceAll("\\s+", ""); return getBaseStats(null,
129    * expression); }
130    */
131 
132   /*
133    * Validate 2 sets of parenthesis exist, all before first opDelim
134    * 
135    * extract agg type and validate it exists. validate number of args passed in
136    */
137   public static void validateAggregatorFormat(String expression)
138       throws HelixException
139   {
140     logger.debug("validating aggregator for expression: " + expression);
141     // have 0 or more args, 1 or more stats...e.g. ()(x) or (2)(x,y)
142     Pattern pattern = Pattern.compile("\\(.*?\\)");
143     Matcher matcher = pattern.matcher(expression);
144     String aggComponent = null;
145     String statComponent = null;
146     int lastMatchEnd = -1;
147     if (matcher.find())
148     {
149       aggComponent = matcher.group();
150       aggComponent = aggComponent.substring(1, aggComponent.length() - 1);
151       if (aggComponent.contains(")") || aggComponent.contains("("))
152       {
153         throw new HelixException(expression
154             + " has invalid aggregate component");
155       }
156     }
157     else
158     {
159       throw new HelixException(expression + " has invalid aggregate component");
160     }
161     if (matcher.find())
162     {
163       statComponent = matcher.group();
164       statComponent = statComponent.substring(1, statComponent.length() - 1);
165       // statComponent must have at least 1 arg between paren
166       if (statComponent.contains(")") || statComponent.contains("(")
167           || statComponent.length() == 0)
168       {
169         throw new HelixException(expression + " has invalid stat component");
170       }
171       lastMatchEnd = matcher.end();
172     }
173     else
174     {
175       throw new HelixException(expression + " has invalid stat component");
176     }
177     if (matcher.find())
178     {
179       throw new HelixException(expression
180           + " has too many parenthesis components");
181     }
182 
183     if (expression.length() >= lastMatchEnd + 1)
184     { // lastMatchEnd is pos 1 past the pattern. check if there are paren there
185       if (expression.substring(lastMatchEnd).contains("(")
186           || expression.substring(lastMatchEnd).contains(")"))
187       {
188         throw new HelixException(expression + " has extra parenthesis");
189       }
190     }
191 
192     // check wildcard locations. each part can have at most 1 wildcard, and must
193     // be at end
194     // String expStatNamePart = expression.substring(expression.)
195     StringTokenizer fieldTok = new StringTokenizer(statComponent,
196         statFieldDelim);
197     while (fieldTok.hasMoreTokens())
198     {
199       String currTok = fieldTok.nextToken();
200       if (currTok.contains(wildcardChar))
201       {
202         if (currTok.indexOf(wildcardChar) != currTok.length() - 1
203             || currTok.lastIndexOf(wildcardChar) != currTok.length() - 1)
204         {
205           throw new HelixException(currTok
206               + " is illegal stat name.  Single wildcard must appear at end.");
207         }
208       }
209     }
210   }
211 
212   public static boolean statContainsWildcards(String stat)
213   {
214     return stat.contains(wildcardChar);
215   }
216 
217   /*
218    * Return true if stat name matches exactly...incomingStat has no agg type
219    * currentStat can have any
220    * 
221    * Function can match for 2 cases extractStatFromAgg=false. Match
222    * accumulate()(dbFoo.partition10.latency) with
223    * accumulate()(dbFoo.partition10.latency)...trival extractStatFromAgg=true.
224    * Match accumulate()(dbFoo.partition10.latency) with
225    * dbFoo.partition10.latency
226    */
227   public static boolean isExactMatch(String currentStat, String incomingStat,
228       boolean extractStatFromAgg)
229   {
230     String currentStatName = currentStat;
231     if (extractStatFromAgg)
232     {
233       currentStatName = getSingleAggregatorStat(currentStat);
234     }
235     return (incomingStat.equals(currentStatName));
236   }
237 
238   /*
239    * Return true if incomingStat matches wildcardStat except currentStat has 1+
240    * fields with "*" a*.c* matches a5.c7 a*.c* does not match a5.b6.c7
241    * 
242    * Function can match for 2 cases extractStatFromAgg=false. Match
243    * accumulate()(dbFoo.partition*.latency) with
244    * accumulate()(dbFoo.partition10.latency) extractStatFromAgg=true. Match
245    * accumulate()(dbFoo.partition*.latency) with dbFoo.partition10.latency
246    */
247   public static boolean isWildcardMatch(String currentStat,
248       String incomingStat, boolean statCompareOnly, ArrayList<String> bindings)
249   {
250     if (!statCompareOnly)
251     { // need to check for match on agg type and stat
252       String currentStatAggType = (currentStat.split("\\)"))[0];
253       String incomingStatAggType = (incomingStat.split("\\)"))[0];
254       if (!currentStatAggType.equals(incomingStatAggType))
255       {
256         return false;
257       }
258     }
259     // now just get the stats
260     String currentStatName = getSingleAggregatorStat(currentStat);
261     String incomingStatName = getSingleAggregatorStat(incomingStat);
262 
263     if (!currentStatName.contains(wildcardChar))
264     { // no wildcards in stat name
265       return false;
266     }
267     
268     String currentStatNamePattern = currentStatName.replace(".", "\\.");
269     currentStatNamePattern = currentStatNamePattern.replace("*", ".*");
270     boolean result =  Pattern.matches(currentStatNamePattern, incomingStatName);
271     if(result && bindings != null)
272     {
273       bindings.add(incomingStatName);
274     }
275     return result;
276     /*
277     StringTokenizer currentStatTok = new StringTokenizer(currentStatName,
278         statFieldDelim);
279     StringTokenizer incomingStatTok = new StringTokenizer(incomingStatName,
280         statFieldDelim);
281     if (currentStatTok.countTokens() != incomingStatTok.countTokens())
282     { // stat names different numbers of fields
283       return false;
284     }
285     // for each token, if not wildcarded, must be an exact match
286     while (currentStatTok.hasMoreTokens())
287     {
288       String currTok = currentStatTok.nextToken();
289       String incomingTok = incomingStatTok.nextToken();
290       logger.debug("curTok: " + currTok);
291       logger.debug("incomingTok: " + incomingTok);
292       if (!currTok.contains(wildcardChar))
293       { // no wildcard, but have exact match
294         if (!currTok.equals(incomingTok))
295         { // not exact match
296           return false;
297         }
298       }
299       else
300       { // currTok has a wildcard
301         if (currTok.indexOf(wildcardChar) != currTok.length() - 1
302             || currTok.lastIndexOf(wildcardChar) != currTok.length() - 1)
303         {
304           throw new HelixException(currTok
305               + " is illegal stat name.  Single wildcard must appear at end.");
306         }
307         // for wildcard matching, need to escape parentheses on currTok, so
308         // regex works
309         // currTok = currTok.replace("(", "\\(");
310         // currTok = currTok.replace(")", "\\)");
311         // incomingTok = incomingTok.replace("(", "\\(");
312         // incomingTok = incomingTok.replace(")", "\\)");
313         String currTokPreWildcard = currTok.substring(0, currTok.length() - 1);
314         // TODO: if current token has a "(" in it, pattern compiling throws
315         // error
316         // Pattern pattern = Pattern.compile(currTokPreWildcard+".+"); //form
317         // pattern...wildcard part can be anything
318         // Matcher matcher = pattern.matcher(incomingTok); //see if incomingTok
319         // matches
320         if (incomingTok.indexOf(currTokPreWildcard) != 0)
321         {
322           // if (!matcher.find()) { //no match on one tok, return false
323           return false;
324         }
325         // get the binding
326 
327         if (bindings != null)
328         {
329           // TODO: debug me!
330           String wildcardBinding = incomingTok.substring(incomingTok
331               .indexOf(currTokPreWildcard) + currTokPreWildcard.length());
332           bindings.add(wildcardBinding);
333         }
334       }
335     }
336     // all fields match or wildcard match...return true!
337     return true;*/
338   }
339 
340   /*
341    * For checking if an incoming stat (no agg type defined) matches a persisted
342    * stat (with agg type defined)
343    */
344   public static boolean isIncomingStatExactMatch(String currentStat,
345       String incomingStat)
346   {
347     return isExactMatch(currentStat, incomingStat, true);
348   }
349 
350   /*
351    * For checking if an incoming stat (no agg type defined) wildcard matches a
352    * persisted stat (with agg type defined) The persisted stat may have
353    * wildcards
354    */
355   public static boolean isIncomingStatWildcardMatch(String currentStat,
356       String incomingStat)
357   {
358     return isWildcardMatch(currentStat, incomingStat, true, null);
359   }
360 
361   /*
362    * For checking if a persisted stat matches a stat defined in an alert
363    */
364   public static boolean isAlertStatExactMatch(String alertStat,
365       String currentStat)
366   {
367     return isExactMatch(alertStat, currentStat, false);
368   }
369 
370   /*
371    * For checking if a maintained stat wildcard matches a stat defined in an
372    * alert. The alert may have wildcards
373    */
374   public static boolean isAlertStatWildcardMatch(String alertStat,
375       String currentStat, ArrayList<String> wildcardBindings)
376   {
377     return isWildcardMatch(alertStat, currentStat, false, wildcardBindings);
378   }
379 
380   public static Aggregator getAggregator(String aggStr) throws HelixException
381   {
382     aggStr = aggStr.toUpperCase();
383     Aggregator agg = aggregatorMap.get(aggStr);
384     if (agg == null)
385     {
386       throw new HelixException("Unknown aggregator type " + aggStr);
387     }
388     return agg;
389   }
390 
391   public static String getAggregatorStr(String expression)
392       throws HelixException
393   {
394     if (!expression.contains("("))
395     {
396       throw new HelixException(expression
397           + " does not contain a valid aggregator.  No parentheses found");
398     }
399     String aggName = expression.substring(0, expression.indexOf("("));
400     if (!aggregatorMap.containsKey(aggName.toUpperCase()))
401     {
402       throw new HelixException("aggregator <" + aggName + "> is unknown type");
403     }
404     return aggName;
405   }
406 
407   public static String[] getAggregatorArgs(String expression)
408       throws HelixException
409   {
410     String aggregator = getAggregatorStr(expression);
411     String argsStr = getAggregatorArgsStr(expression);
412     String[] args = argsStr.split(argDelim);
413     logger.debug("args size: " + args.length);
414     int numArgs = (argsStr.length() == 0) ? 0 : args.length;
415     // String[] argList = (expression.substring(expression.indexOf("(")+1,
416     // expression.indexOf(")"))).split(argDelim);
417     // verify correct number of args
418     int requiredNumArgs = aggregatorMap.get(aggregator.toUpperCase())
419         .getRequiredNumArgs();
420     if (numArgs != requiredNumArgs)
421     {
422       throw new HelixException(expression + " contains " + args.length
423           + " arguments, but requires " + requiredNumArgs);
424     }
425     return args;
426   }
427 
428   /*
429    * public static String[] getAggregatorArgsList(String expression) { String
430    * argsStr = getAggregatorArgsStr(expression); String[] args =
431    * argsStr.split(argDelim); return args; }
432    */
433 
434   public static String getAggregatorArgsStr(String expression)
435   {
436     return expression.substring(expression.indexOf("(") + 1,
437         expression.indexOf(")"));
438   }
439 
440   public static String[] getAggregatorStats(String expression)
441       throws HelixException
442   {
443     String justStats = expression;
444     if (expression.contains("(") && expression.contains(")"))
445     {
446       justStats = (expression.substring(expression.lastIndexOf("(") + 1,
447           expression.lastIndexOf(")")));
448     }
449     String[] statList = justStats.split(argDelim);
450     if (statList.length < 1)
451     {
452       throw new HelixException(expression
453           + " does not contain any aggregator stats");
454     }
455     return statList;
456   }
457 
458   public static String getSingleAggregatorStat(String expression)
459       throws HelixException
460   {
461     String[] stats = getAggregatorStats(expression);
462     if (stats.length > 1)
463     {
464       throw new HelixException(expression + " contains more than 1 stat");
465     }
466     return stats[0];
467   }
468 
469   public static String getWildcardStatSubstitution(String wildcardStat,
470       String fixedStat)
471   {
472     int lastOpenParenLoc = wildcardStat.lastIndexOf("(");
473     int lastCloseParenLoc = wildcardStat.lastIndexOf(")");
474     StringBuilder builder = new StringBuilder();
475     builder.append(wildcardStat.substring(0, lastOpenParenLoc + 1));
476     builder.append(fixedStat);
477     builder.append(")");
478     logger.debug("wildcardStat: " + wildcardStat);
479     logger.debug("fixedStat: " + fixedStat);
480     logger.debug("subbedStat: " + builder.toString());
481     return builder.toString();
482   }
483 
484   // XXX: each op type should have number of inputs, number of outputs. do
485   // validation.
486   // (dbFoo.partition*.latency, dbFoo.partition*.count)|EACH|ACCUMULATE|DIVIDE
487   public static String[] getBaseStats(String expression) throws HelixException
488   {
489     expression = expression.replaceAll("\\s+", "");
490     validateAggregatorFormat(expression);
491 
492     String aggName = getAggregatorStr(expression);
493     String[] aggArgs = getAggregatorArgs(expression);
494     String[] aggStats = getAggregatorStats(expression);
495 
496     // form aggArgs
497     String aggArgList = getAggregatorArgsStr(expression);
498 
499     String[] baseStats = new String[aggStats.length];
500     for (int i = 0; i < aggStats.length; i++)
501     {
502       StringBuilder stat = new StringBuilder();
503       stat.append(aggName);
504       stat.append("(");
505       stat.append(aggArgList);
506       stat.append(")");
507       stat.append("(");
508       stat.append(aggStats[i]);
509       stat.append(")");
510       baseStats[i] = stat.toString();
511     }
512     return baseStats;
513   }
514 
515   public static String[] getOperators(String expression) throws HelixException
516   {
517     String[] ops = null;
518     int numAggStats = (getAggregatorStats(expression)).length;
519     int opDelimLoc = expression.indexOf(opDelim);
520     if (opDelimLoc < 0)
521     {
522       return null;
523     }
524     logger.debug("ops str: " + expression.substring(opDelimLoc + 1));
525     ops = expression.substring(opDelimLoc + 1).split(opDelimForSplit);
526 
527     // validate this string of ops
528     // verify each op exists
529     // take num input tuples sets and verify ops will output exactly 1 tuple
530     // sets
531     int currNumTuples = numAggStats;
532     for (String op : ops)
533     {
534       logger.debug("op: " + op);
535       if (!operatorMap.containsKey(op.toUpperCase()))
536       {
537         throw new HelixException("<" + op + "> is not a valid operator type");
538       }
539       Operator currOpType = operatorMap.get(op.toUpperCase());
540       if (currNumTuples < currOpType.minInputTupleLists
541           || currNumTuples > currOpType.maxInputTupleLists)
542       {
543         throw new HelixException("<" + op + "> cannot process " + currNumTuples
544             + " input tuples");
545       }
546       // reset num tuples to this op's output size
547       if (!currOpType.inputOutputTupleListsCountsEqual)
548       { // if equal, this number does not change
549         currNumTuples = currOpType.numOutputTupleLists;
550       }
551     }
552     if (currNumTuples != 1)
553     {
554       throw new HelixException(expression
555           + " does not terminate in a single tuple set");
556     }
557     return ops;
558   }
559 
560   public static void validateOperators(String expression) throws HelixException
561   {
562     getOperators(expression);
563   }
564 
565   public static Operator getOperator(String opName) throws HelixException
566   {
567     if (!operatorMap.containsKey(opName))
568     {
569       throw new HelixException(opName + " is unknown op type");
570     }
571     return operatorMap.get(opName);
572   }
573 
574   public static void validateExpression(String expression)
575       throws HelixException
576   {
577     // 1. extract stats part and validate
578     validateAggregatorFormat(expression);
579     // 2. extract ops part and validate the ops exist and the inputs/outputs are
580     // correct
581     validateOperators(expression);
582   }
583 }