com.ebay.erl.mobius.core.criterion
Class TupleRestrictions

java.lang.Object
  extended by com.ebay.erl.mobius.core.criterion.TupleRestrictions

public class TupleRestrictions
extends java.lang.Object

Factory class that provides methods to define TupleCriterion for filtering Tuples in a Dataset.

This product is licensed under the Apache License, Version 2.0, available at http://www.apache.org/licenses/LICENSE-2.0. This product contains portions derived from Apache hadoop which is licensed under the Apache License, Version 2.0, available at http://hadoop.apache.org. © 2007 – 2012 eBay Inc., Evan Chiu, Woody Zhou, Neel Sundaresan


Field Summary
protected static org.apache.hadoop.conf.Configuration conf
          Hadoop configuration
 
Constructor Summary
TupleRestrictions()
           
 
Method Summary
static void configure(org.apache.hadoop.conf.Configuration conf)
          Setup Hadoop configuration.
static TupleCriterion eq(java.lang.String column, boolean trueFalse)
          Specify the given column's value equals to trueFalse
static TupleCriterion eq(java.lang.String column, java.util.Date date)
          Return a TupleCriterion that only accepts tuples with the value of column is equal to the specified date.
static TupleCriterion eq(java.lang.String column, java.lang.Number value)
          Specify the given column's value equals to value
static TupleCriterion eq(java.lang.String column, java.lang.String value)
          Specify the given column's value equals to value
static TupleCriterion eq(java.lang.String column, java.lang.String columnDateFormat, java.util.Date date)
          Return a TupleCriterion that parses the value of column with the given columnDateFormat into milliseconds, comparing the milliseconds (A) with the date (B) and only accept tuples records when A equals to B.
static TupleCriterion eqColumns(java.lang.String column1, java.lang.String column2)
          Create a TupleCriterion that only accepts tuples with the two columns' values are equals.
static TupleCriterion ge(java.lang.String columnName, java.util.Calendar date)
           
static TupleCriterion ge(java.lang.String columnName, java.util.Date date)
           
static TupleCriterion ge(java.lang.String columnName, java.lang.Number value)
           
static TupleCriterion ge(java.lang.String columnName, java.lang.String value)
          greater than or equal
static TupleCriterion ge(java.lang.String columnName, java.lang.String columnFormat, java.util.Calendar date)
           
static TupleCriterion ge(java.lang.String columnName, java.lang.String columnFormat, java.util.Date date)
           
static TupleCriterion geColumns(java.lang.String column1, java.lang.String column2)
          compare if column1's value greater or equals to column2's value
static TupleCriterion gt(java.lang.String columnName, java.util.Calendar date)
           
static TupleCriterion gt(java.lang.String columnName, java.util.Date date)
           
static TupleCriterion gt(java.lang.String columnName, java.lang.Number value)
           
static TupleCriterion gt(java.lang.String columnName, java.lang.String value)
          greater than
static TupleCriterion gt(java.lang.String columnName, java.lang.String columnFormat, java.util.Calendar date)
           
static TupleCriterion gt(java.lang.String columnName, java.lang.String columnFormat, java.util.Date date)
           
static TupleCriterion gtColumns(java.lang.String column1, java.lang.String column2)
          compare if column1's value greater than column2's value
static TupleCriterion le(java.lang.String columnName, java.util.Calendar date)
           
static TupleCriterion le(java.lang.String columnName, java.util.Date date)
           
static TupleCriterion le(java.lang.String columnName, java.lang.Number value)
           
static TupleCriterion le(java.lang.String columnName, java.lang.String value)
          less than or equal
static TupleCriterion le(java.lang.String columnName, java.lang.String columnFormat, java.util.Calendar date)
           
static TupleCriterion le(java.lang.String columnName, java.lang.String columnFormat, java.util.Date date)
           
static TupleCriterion leColumns(java.lang.String column1, java.lang.String column2)
          compare if column1's value less than column2's value
static TupleCriterion lt(java.lang.String columnName, java.util.Calendar date)
           
static TupleCriterion lt(java.lang.String columnName, java.util.Date date)
           
static TupleCriterion lt(java.lang.String columnName, java.lang.Number value)
           
static TupleCriterion lt(java.lang.String columnName, java.lang.String value)
          less than
static TupleCriterion lt(java.lang.String columnName, java.lang.String columnFormat, java.util.Calendar date)
           
static TupleCriterion lt(java.lang.String columnName, java.lang.String columnFormat, java.util.Date date)
           
static TupleCriterion ltColumns(java.lang.String column1, java.lang.String column2)
          compare if column1's value less or equals to column2's value
static TupleCriterion ne(java.lang.String column, boolean trueFalse)
           
static TupleCriterion ne(java.lang.String columnName, java.util.Calendar date)
           
static TupleCriterion ne(java.lang.String columnName, java.util.Date date)
           
static TupleCriterion ne(java.lang.String columnName, java.lang.Number value)
           
static TupleCriterion ne(java.lang.String columnName, java.lang.String value)
          not equals
static TupleCriterion ne(java.lang.String columnName, java.lang.String columnFormat, java.util.Calendar date)
           
static TupleCriterion ne(java.lang.String columnName, java.lang.String columnFormat, java.util.Date date)
           
static TupleCriterion neColumns(java.lang.String column1, java.lang.String column2)
          compare if two column's values are not equals.
static TupleCriterion not_within(java.lang.String column, java.io.File file)
          Create a tuple criterion that only accepts tuples with the value of column that is NOT presented in the given file The assumption of the file is that, it's single column and one to many line text file.
static TupleCriterion notNull(java.lang.String column)
          Create a TupleCriterion that only accepts tuples with the value of the given column is not null nor empty string.
static TupleCriterion notWithinNumber(java.lang.String column, java.util.ArrayList<java.lang.Double> values)
          Create a TupleCriterion that only accept tuples with the value of the specified column is not within the provide list.
static TupleCriterion notWithinString(java.lang.String column, java.util.ArrayList<java.lang.String> values)
          Create a TupleCriterion that only accept tuples with the value of the specified column is not within the provide list.
static TupleCriterion regex(java.lang.String column, java.lang.String regex)
          Define a TupleCriterion that only extracts records when the value of the column meets the regex.
static TupleCriterion within(java.lang.String column, java.io.File file)
          Create a tuple criterion that only accepts tuples when the value of the column are presented in the given file
static TupleCriterion withinNumber(java.lang.String column, java.util.ArrayList<java.lang.Double> list)
          Create a TupleCriterion that only accepts tuples with the value of the specified column that is within the provide list.
static TupleCriterion withinString(java.lang.String column, java.util.ArrayList<java.lang.String> list)
          Create a TupleCriterion that only accept tuples with the value of the specified column that is within the provide list.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

conf

protected static org.apache.hadoop.conf.Configuration conf
Hadoop configuration

Constructor Detail

TupleRestrictions

public TupleRestrictions()
Method Detail

configure

public static final void configure(org.apache.hadoop.conf.Configuration conf)
Setup Hadoop configuration.


withinString

public static TupleCriterion withinString(java.lang.String column,
                                          java.util.ArrayList<java.lang.String> list)
Create a TupleCriterion that only accept tuples with the value of the specified column that is within the provide list.

The value of the column will be converted into string, if it's not string, to compare.


withinNumber

public static TupleCriterion withinNumber(java.lang.String column,
                                          java.util.ArrayList<java.lang.Double> list)
Create a TupleCriterion that only accepts tuples with the value of the specified column that is within the provide list.

The value of the column will be converted into double, if it's not number, to compare.


within

public static TupleCriterion within(java.lang.String column,
                                    java.io.File file)
                             throws java.io.FileNotFoundException
Create a tuple criterion that only accepts tuples when the value of the column are presented in the given file

The assumption of the file is that, it's single column and one to many line text file. Each line is read into a case insensitive set, and using the set to check the value of the column within the set or not.

Parameters:
column - the name of a column to be tested that whether its value is in the given file or not
file - a single column and multiple lines of file that contains strings/numbers, each line is treated as a single unit.
Returns:
an instance of TupleCriterion that extracts only the records when the value of its column are presented in the given file.
Throws:
java.io.FileNotFoundException - if the given file cannot be found.

not_within

public static TupleCriterion not_within(java.lang.String column,
                                        java.io.File file)
                                 throws java.io.FileNotFoundException
Create a tuple criterion that only accepts tuples with the value of column that is NOT presented in the given file The assumption of the file is that, it's single column and one to many line text file. Each line is read into a case insensitive set, and using the set to check the value of the column within the set or not.

Parameters:
column - the name of a column to be tested that whether its value is in the given file or not
file - a single column and multiple lines of file that contains strings/numbers, each line is treated as a single unit.
Returns:
an instance of TupleCriterion that extracts only the records when the value of its column are NOTpresented in the given file.
Throws:
java.io.FileNotFoundException - if the given file cannot be found.

notWithinNumber

public static TupleCriterion notWithinNumber(java.lang.String column,
                                             java.util.ArrayList<java.lang.Double> values)
Create a TupleCriterion that only accept tuples with the value of the specified column is not within the provide list.

The value of the column will be converted into double to compare, if it's not double.


notWithinString

public static TupleCriterion notWithinString(java.lang.String column,
                                             java.util.ArrayList<java.lang.String> values)
Create a TupleCriterion that only accept tuples with the value of the specified column is not within the provide list.

The value of the column will be converted into string to compare, if it's not string.


regex

public static TupleCriterion regex(java.lang.String column,
                                   java.lang.String regex)
Define a TupleCriterion that only extracts records when the value of the column meets the regex.

Parameters:
column - the name of a column to be tested on its value whether it meets the specified regex or not.
regex - a regular expression to test.
Returns:
a TupleCriterion accepts value from the column match the given regex.

notNull

public static TupleCriterion notNull(java.lang.String column)
Create a TupleCriterion that only accepts tuples with the value of the given column is not null nor empty string.


eq

public static TupleCriterion eq(java.lang.String column,
                                java.lang.String value)
Specify the given column's value equals to value


eq

public static TupleCriterion eq(java.lang.String column,
                                java.lang.Number value)
Specify the given column's value equals to value


eq

public static TupleCriterion eq(java.lang.String column,
                                boolean trueFalse)
Specify the given column's value equals to trueFalse


eq

public static TupleCriterion eq(java.lang.String column,
                                java.lang.String columnDateFormat,
                                java.util.Date date)
Return a TupleCriterion that parses the value of column with the given columnDateFormat into milliseconds, comparing the milliseconds (A) with the date (B) and only accept tuples records when A equals to B.

Parameters:
column - name of a column to be tested in a dataset.
columnDateFormat - the date format of the specified column in the dataset. The columnFormat pattern is the same as SimpleDateFormat
date - a date constraint to be test.

eq

public static TupleCriterion eq(java.lang.String column,
                                java.util.Date date)
Return a TupleCriterion that only accepts tuples with the value of column is equal to the specified date.

If the type of the value for the column is and instance of Date, then the comparison is done by calling the method of Date.getTime() for the value and compare it with date.getTime().

If the type of the value is not an instance of Date, then it will be parsed into date format using either the format of yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.


eqColumns

public static TupleCriterion eqColumns(java.lang.String column1,
                                       java.lang.String column2)
Create a TupleCriterion that only accepts tuples with the two columns' values are equals.


ne

public static TupleCriterion ne(java.lang.String columnName,
                                java.lang.String value)
not equals


ne

public static TupleCriterion ne(java.lang.String columnName,
                                java.lang.Number value)

ne

public static TupleCriterion ne(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Date date)

ne

public static TupleCriterion ne(java.lang.String columnName,
                                java.util.Date date)

ne

public static TupleCriterion ne(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Calendar date)

ne

public static TupleCriterion ne(java.lang.String columnName,
                                java.util.Calendar date)

ne

public static TupleCriterion ne(java.lang.String column,
                                boolean trueFalse)

neColumns

public static TupleCriterion neColumns(java.lang.String column1,
                                       java.lang.String column2)
compare if two column's values are not equals.


gt

public static TupleCriterion gt(java.lang.String columnName,
                                java.lang.String value)
greater than


gt

public static TupleCriterion gt(java.lang.String columnName,
                                java.lang.Number value)

gt

public static TupleCriterion gt(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Date date)

gt

public static TupleCriterion gt(java.lang.String columnName,
                                java.util.Date date)

gt

public static TupleCriterion gt(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Calendar date)

gt

public static TupleCriterion gt(java.lang.String columnName,
                                java.util.Calendar date)

gtColumns

public static TupleCriterion gtColumns(java.lang.String column1,
                                       java.lang.String column2)
compare if column1's value greater than column2's value


ge

public static TupleCriterion ge(java.lang.String columnName,
                                java.lang.String value)
greater than or equal


ge

public static TupleCriterion ge(java.lang.String columnName,
                                java.lang.Number value)

ge

public static TupleCriterion ge(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Date date)

ge

public static TupleCriterion ge(java.lang.String columnName,
                                java.util.Date date)

ge

public static TupleCriterion ge(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Calendar date)

ge

public static TupleCriterion ge(java.lang.String columnName,
                                java.util.Calendar date)

geColumns

public static TupleCriterion geColumns(java.lang.String column1,
                                       java.lang.String column2)
compare if column1's value greater or equals to column2's value


le

public static TupleCriterion le(java.lang.String columnName,
                                java.lang.String value)
less than or equal


le

public static TupleCriterion le(java.lang.String columnName,
                                java.lang.Number value)

le

public static TupleCriterion le(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Date date)

le

public static TupleCriterion le(java.lang.String columnName,
                                java.util.Date date)

le

public static TupleCriterion le(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Calendar date)

le

public static TupleCriterion le(java.lang.String columnName,
                                java.util.Calendar date)

leColumns

public static TupleCriterion leColumns(java.lang.String column1,
                                       java.lang.String column2)
compare if column1's value less than column2's value


lt

public static TupleCriterion lt(java.lang.String columnName,
                                java.lang.String value)
less than


lt

public static TupleCriterion lt(java.lang.String columnName,
                                java.lang.Number value)

lt

public static TupleCriterion lt(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Date date)

lt

public static TupleCriterion lt(java.lang.String columnName,
                                java.util.Date date)

lt

public static TupleCriterion lt(java.lang.String columnName,
                                java.lang.String columnFormat,
                                java.util.Calendar date)

lt

public static TupleCriterion lt(java.lang.String columnName,
                                java.util.Calendar date)

ltColumns

public static TupleCriterion ltColumns(java.lang.String column1,
                                       java.lang.String column2)
compare if column1's value less or equals to column2's value