Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text() 

Source Link

Usage

From source file:co.cask.cdap.data.stream.TextStreamInputFormat.java

License:Apache License

@Override
protected StreamEventDecoder<LongWritable, Text> createStreamEventDecoder() {
    return new StreamEventDecoder<LongWritable, Text>() {
        private final LongWritable key = new LongWritable();
        private final Text value = new Text();

        @Override/*from  w  ww . ja va2 s  .c  om*/
        public DecodeResult<LongWritable, Text> decode(StreamEvent event,
                DecodeResult<LongWritable, Text> result) {
            key.set(event.getTimestamp());
            value.set(Charsets.UTF_8.decode(event.getBody()).toString());
            return result.setKey(key).setValue(value);
        }
    };
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

/**
 * //  ww w  .j  av  a 2 s  . com
 * @param delimiter
 * @param column
 * 
 * 
 */

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF);
    this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

public boolean nextKeyValue() throws IOException {

    if (value == null) {
        value = new Text();
    }//from   w ww.ja va  2  s  . com
    int newSize = 0;
    while (pos < end) {
        newSize = in.readLine(value, maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
        if (newSize == 0) {
            break;
        }
        pos += newSize;
        if (newSize < maxLineLength) {
            break;
        }

        // line too long. try again
        logger.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }
    if (newSize == 0) {
        key = null;
        value = null;
        return false;
    } else {
        // we calculate the key from the value here
        if (value != null) {
            logger.debug("Value is: " + value);
            logger.debug("Column is: " + column);
            logger.debug("Delimiter is: " + delimiter);

            key = getColumn(value, column, delimiter);

            logger.debug("Value after generating keyColumn: " + value);
            logger.debug("Key is: " + key);

        }
        return true;
    }
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

public Text getColumn(Text val, int column, String delimiter) throws IOException {
    if (delimiter == null || delimiter.equals("")) {
        throw new IOException("Value of delimiter is empty");
    }//w ww  .j  a v a 2  s.co  m
    int lastOccurance = 0;
    int occurance = 0;
    for (int i = 0; i < column; i++) {
        occurance = val.find(delimiter, lastOccurance) - lastOccurance;
        lastOccurance = lastOccurance + occurance + delimiter.length();
    }

    logger.debug("text value is: " + val);
    int delimiterLength = delimiter.length();
    int startPosition = lastOccurance - (occurance + delimiterLength);
    Text keyColumn = new Text();
    keyColumn.set(val.getBytes(), startPosition, occurance);
    return keyColumn;
}

From source file:co.nubetech.hiho.dedup.HihoTuple.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    logger.debug("Reading fields");
    hash = new MD5Hash();
    hash.readFields(in);/*from   w w  w . ja v a2  s. c  o m*/
    keyClass = new Text();
    keyClass.readFields(in);
    try {
        logger.debug("Key class in readField() of HihoTuple class is :" + keyClass);
        key = (K) Class.forName(keyClass.toString()).newInstance();

    } catch (Exception e) {
        e.printStackTrace();
        throw new IOException("Error in serializing the HihoTuple ", e);
    }
    key.readFields(in);
}

From source file:co.nubetech.hiho.mapred.input.FileStreamRecordReader.java

License:Apache License

@Override
public Text createKey() {
    logger.debug("Creating key");
    return new Text();
}

From source file:co.nubetech.hiho.mapreduce.DBInputDelimMapper.java

License:Apache License

public DBInputDelimMapper() {
    outkey = new Text();
    outval = new Text();
}

From source file:co.nubetech.hiho.mapreduce.TestDBInputDelimMapper.java

License:Apache License

@Test
public final void testMapperValidValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Configuration conf = new Configuration();
    conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, ",");
    when(context.getConfiguration()).thenReturn(conf);

    DBInputDelimMapper mapper = new DBInputDelimMapper();

    ColumnInfo intColumn = new ColumnInfo(0, Types.INTEGER, "intColumn");
    ColumnInfo stringColumn = new ColumnInfo(1, Types.VARCHAR, "stringColumn");
    ColumnInfo dateColumn = new ColumnInfo(1, Types.DATE, "dateColumn");
    ColumnInfo longColumn = new ColumnInfo(1, Types.BIGINT, "longColumn");
    ColumnInfo booleanColumn = new ColumnInfo(1, Types.BOOLEAN, "booleanColumn");
    ColumnInfo doubleColumn = new ColumnInfo(1, Types.DOUBLE, "doubleColumn");
    ColumnInfo charColumn = new ColumnInfo(1, Types.CHAR, "charColumn");
    ColumnInfo timeColumn = new ColumnInfo(1, Types.TIME, "timeColumn");
    ColumnInfo timeStampColumn = new ColumnInfo(1, Types.TIMESTAMP, "timeStampColumn");
    ColumnInfo floatColumn = new ColumnInfo(1, Types.FLOAT, "floatColumn");

    ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>();
    columns.add(intColumn);//from w  w  w  . j  a v a2  s .c  o  m
    columns.add(stringColumn);
    columns.add(dateColumn);
    columns.add(longColumn);
    columns.add(booleanColumn);
    columns.add(doubleColumn);
    columns.add(charColumn);
    columns.add(timeColumn);
    columns.add(timeStampColumn);
    columns.add(floatColumn);

    ArrayList<Comparable> values = new ArrayList<Comparable>();
    values.add(new Integer(12));
    values.add(new String("sam"));
    values.add(new Date());
    values.add(new Long(26564l));
    values.add(true);
    values.add(1.235);
    values.add('a');
    values.add(new Time(new Date().getTime()));
    values.add(new Time(new Date().getTime()));
    values.add(new Float(1.0f));

    GenericDBWritable val = new GenericDBWritable(columns, values);
    LongWritable key = new LongWritable(1);
    mapper.map(key, val, context);

    Text outkey = new Text();
    Text outval = new Text();
    StringBuilder builder = new StringBuilder();
    builder.append(new Integer(12) + "," + new String("sam") + "," + new Date() + "," + new Long(26564l) + ","
            + true + "," + 1.235 + "," + 'a' + "," + new Time(new Date().getTime()) + ","
            + new Time(new Date().getTime()) + "," + new Float(1.0f));

    outval.set(builder.toString());
    verify(context).write(outkey, outval);
}

From source file:co.nubetech.hiho.mapreduce.TestDBInputDelimMapper.java

License:Apache License

@Test
public final void testMapperValidValuesDelmiter() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Configuration conf = new Configuration();
    String delimiter = "DELIM";
    conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, delimiter);
    when(context.getConfiguration()).thenReturn(conf);

    DBInputDelimMapper mapper = new DBInputDelimMapper();

    ColumnInfo intColumn = new ColumnInfo(0, Types.INTEGER, "intColumn");
    ColumnInfo stringColumn = new ColumnInfo(1, Types.VARCHAR, "stringColumn");
    ColumnInfo dateColumn = new ColumnInfo(1, Types.DATE, "dateColumn");
    ColumnInfo longColumn = new ColumnInfo(1, Types.BIGINT, "longColumn");
    ColumnInfo booleanColumn = new ColumnInfo(1, Types.BOOLEAN, "booleanColumn");
    ColumnInfo doubleColumn = new ColumnInfo(1, Types.DOUBLE, "doubleColumn");
    ColumnInfo charColumn = new ColumnInfo(1, Types.CHAR, "charColumn");
    ColumnInfo timeColumn = new ColumnInfo(1, Types.TIME, "timeColumn");
    ColumnInfo timeStampColumn = new ColumnInfo(1, Types.TIMESTAMP, "timeStampColumn");
    ColumnInfo floatColumn = new ColumnInfo(1, Types.FLOAT, "floatColumn");

    ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>();
    columns.add(intColumn);/*from w w  w  .j  a v a2  s. c o m*/
    columns.add(stringColumn);
    columns.add(dateColumn);
    columns.add(longColumn);
    columns.add(booleanColumn);
    columns.add(doubleColumn);
    columns.add(charColumn);
    columns.add(timeColumn);
    columns.add(timeStampColumn);
    columns.add(floatColumn);

    ArrayList<Comparable> values = new ArrayList<Comparable>();
    values.add(new Integer(12));
    values.add(new String("sam"));
    values.add(new Date());
    values.add(new Long(26564l));
    values.add(true);
    values.add(1.235);
    values.add('a');
    values.add(new Time(new Date().getTime()));
    values.add(new Time(new Date().getTime()));
    values.add(new Float(1.0f));

    GenericDBWritable val = new GenericDBWritable(columns, values);
    LongWritable key = new LongWritable(1);
    mapper.map(key, val, context);

    Text outkey = new Text();
    Text outval = new Text();
    StringBuilder builder = new StringBuilder();
    builder.append(new Integer(12) + delimiter + new String("sam") + delimiter + new Date() + delimiter
            + new Long(26564l) + delimiter + true + delimiter + 1.235 + delimiter + 'a' + delimiter
            + new Time(new Date().getTime()) + delimiter + new Time(new Date().getTime()) + delimiter
            + new Float(1.0f));

    outval.set(builder.toString());
    verify(context).write(outkey, outval);
}

From source file:co.nubetech.hiho.mapreduce.TestDBInputDelimMapper.java

License:Apache License

@Test
public final void testMapperNullValues() throws IOException, InterruptedException {
    Mapper.Context context = mock(Mapper.Context.class);
    Configuration conf = new Configuration();
    conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, ",");
    when(context.getConfiguration()).thenReturn(conf);

    DBInputDelimMapper mapper = new DBInputDelimMapper();
    ArrayList<ColumnInfo> columns = new ArrayList<ColumnInfo>();
    ArrayList values = new ArrayList();
    GenericDBWritable val = new GenericDBWritable(columns, values);
    LongWritable key = new LongWritable(1);
    mapper.map(key, val, context);

    Text outkey = new Text();
    Text outval = new Text();
    verify(context).write(outkey, outval);
}