Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.moz.fiji.hive.HiveTableDescription.java

License:Apache License

/**
 * Creates the in-memory row object that contains the column data in the Hive table.
 *
 * <p>The returned object will be given to the object inspector for
 * extracting column data. Since our object inspector is the
 * standard java inspector, the structure of the object returned
 * should match the data types specified in the Hive table schema.</p>
 *
 * @param columnData The HBase data from the row.
 * @param objectInspector The object inspector defining the format of the columnData.
 * @return An object representing the row.
 * @throws IOException If there is an IO error.
 *//*from  ww  w.j av  a  2  s  .com*/
public FijiRowDataWritable createWritableObject(Object columnData, ObjectInspector objectInspector)
        throws IOException {

    Preconditions.checkArgument(objectInspector instanceof StandardStructObjectInspector);
    StandardStructObjectInspector structObjectInspector = (StandardStructObjectInspector) objectInspector;

    // Hive passes us a struct that should have all columns that are specified in the Hive table
    // description.
    Preconditions.checkState(mExpressions.size() == structObjectInspector.getAllStructFieldRefs().size(),
            "Table has {} columns, but query has {} columns", mExpressions.size(),
            structObjectInspector.getAllStructFieldRefs().size());

    List<Object> structColumnData = structObjectInspector.getStructFieldsDataAsList(columnData);
    Object entityIdShellStringObject = structColumnData.get(mEntityIdShellStringIndex);
    Text entityIdShellString = new Text((String) entityIdShellStringObject);
    EntityIdWritable entityIdWritable = new EntityIdWritable(entityIdShellString.toString());

    // TODO(FIJIHIVE-30) Process EntityId component columns here.

    Map<FijiColumnName, NavigableMap<Long, FijiCellWritable>> writableData = Maps.newHashMap();
    for (int c = 0; c < mExpressions.size(); c++) {
        if (mExpressions.get(c).isCellData()) {
            ObjectInspector colObjectInspector = structObjectInspector.getAllStructFieldRefs().get(c)
                    .getFieldObjectInspector();
            Map<FijiColumnName, NavigableMap<Long, FijiCellWritable>> writableTimeseriesData = mExpressions
                    .get(c).convertToTimeSeries(colObjectInspector, structColumnData.get(c));
            for (FijiColumnName fijiColumnName : writableTimeseriesData.keySet()) {
                NavigableMap<Long, FijiCellWritable> columnTimeseries = writableTimeseriesData
                        .get(fijiColumnName);

                if (writableData.containsKey(fijiColumnName)) {
                    // Merge these timeseries together.
                    writableData.get(fijiColumnName).putAll(columnTimeseries);
                } else {
                    writableData.put(fijiColumnName, columnTimeseries);
                }
            }
        }
    }

    FijiRowDataWritable fijiRowData = new FijiRowDataWritable(entityIdWritable, writableData);
    return fijiRowData;
}

From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java

License:Apache License

@Test
public void testNoRecord() throws IOException {
    XMLRecordReader reader = new XMLRecordReader();
    BufferedReader bReader = new BufferedReader(new StringReader("There's no record in here."));
    LongWritable key = new LongWritable();
    StringBuilder sb = new StringBuilder();
    Text record = new Text();
    bReader.mark(1000);/*from  w ww  . j  av a 2  s. c om*/
    assertFalse(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset
            100L, // End offset
            bReader, key, sb));
    assertEquals(0, key.get());
    assertEquals("", sb.toString());
    bReader.reset();
    assertFalse(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset
            100L, // Overrun allowance
            sb, record));
    assertEquals("There's no record in here.", sb.toString());
    assertEquals("", record.toString());
}

From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java

License:Apache License

@Test
public void testRegularRecord() throws IOException {
    XMLRecordReader reader = new XMLRecordReader();
    BufferedReader bReader = new BufferedReader(new StringReader("<user></user>"));
    LongWritable key = new LongWritable();
    StringBuilder sb = new StringBuilder();
    Text record = new Text();
    assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset
            100L, // End offset
            bReader, key, sb));/*from   w  w  w .  j  av  a2 s . co  m*/
    assertEquals(0, key.get());
    assertEquals("<user>", sb.toString());
    assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset
            100L, // Overrun allowance
            sb, record));
    assertEquals("<user></user>", record.toString());
}

From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java

License:Apache License

@Test
public void testTooLongRecord() throws IOException {
    XMLRecordReader reader = new XMLRecordReader();
    BufferedReader bReader = new BufferedReader(new StringReader("<user></user>"));
    StringBuilder sb = new StringBuilder();
    Text record = new Text();
    bReader.mark(1000);//from   www. j  a va  2  s  . co m
    // Small overrun allowance will break before finding the end of the record.
    assertFalse(reader.findRecordEnd("</user>".toCharArray(), bReader, 8L, // End offset
            1L, // Overrun allowance
            sb, record));
    assertEquals("<user></us", sb.toString());
    assertEquals("", record.toString());

    reader = new XMLRecordReader();
    sb = new StringBuilder();
    bReader.reset();
    // Large overrun allowance will find the end of the record.
    assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 8L, // End offset
            10L, // Overrun allowance
            sb, record));
    assertEquals("<user></user>", sb.toString());
    assertEquals("<user></user>", record.toString());
}

From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java

License:Apache License

@Test
public void testCloseMatches() throws IOException {
    XMLRecordReader reader = new XMLRecordReader();
    BufferedReader bReader = new BufferedReader(new StringReader("<use></use> <users></users>"));
    LongWritable key = new LongWritable();
    StringBuilder sb = new StringBuilder();
    Text record = new Text();
    bReader.mark(1000);// w  w  w  . j  av a2s .  c  o m
    assertFalse(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset
            100L, // End offset
            bReader, key, sb));
    assertEquals(0, key.get());
    assertEquals("", sb.toString());
    bReader.reset();
    assertFalse(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset
            100L, // Overrun allowance
            sb, record));
    assertEquals("<use></use> <users></users>", sb.toString());
    assertEquals("", record.toString());
}

From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java

License:Apache License

@Test
public void testCompleteRecord() throws IOException {
    XMLRecordReader reader = new XMLRecordReader();
    BufferedReader bReader = new BufferedReader(new StringReader("1<user><name>Bob</name></user>"));
    LongWritable key = new LongWritable();
    StringBuilder sb = new StringBuilder();
    Text record = new Text();

    assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset
            100L, // End offset
            bReader, key, sb));/*from  w  w w  . j  a va2  s  .  c om*/

    assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset
            100L, // Overrun allowance
            sb, record));
    assertEquals(1, key.get());
    assertEquals("<user><name>Bob</name></user>", record.toString());
}

From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java

License:Apache License

@Test
public void testTwoRecords() throws IOException {
    XMLRecordReader reader = new XMLRecordReader();
    BufferedReader bReader = new BufferedReader(new StringReader("<user>1</user><user>2</user>"));
    LongWritable key = new LongWritable();
    StringBuilder sb = new StringBuilder();
    Text record = new Text();

    // Find the first record.
    assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset
            100L, // End offset
            bReader, key, sb));//from w  w  w.ja  v  a 2  s. c  o  m
    assertEquals(0, key.get());
    assertEquals("<user>", sb.toString());
    assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset
            100L, // Overrun allowance
            sb, record));
    assertEquals("<user>1</user>", record.toString());

    // Find the second record.
    sb = new StringBuilder();
    record = new Text();
    assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset
            100L, // End offset
            bReader, key, sb));
    assertEquals("<user>", sb.toString());
    assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset
            100L, // Overrun allowance
            sb, record));
    assertEquals("<user>2</user>", record.toString());
}

From source file:com.moz.fiji.mapreduce.lib.bulkimport.CommonLogBulkImporter.java

License:Apache License

/** {@inheritDoc} */
@Override//from  w  w w. ja  v  a 2s .  c o  m
public void produce(Text value, FijiTableContext context) throws IOException {
    Map<Field, String> fieldMap;
    try {
        fieldMap = CommonLogParser.get().parseCommonLog(value.toString());
    } catch (ParseException pe) {
        reject(value, context, "Unable to parse row: " + value.toString());
        return;
    }

    Field entityIdSource = Field.valueOf(getEntityIdSource());
    EntityId eid = context.getEntityId(fieldMap.get(entityIdSource));

    for (FijiColumnName fijiColumnName : getDestinationColumns()) {
        Field source = Field.valueOf(getSource(fijiColumnName));
        String fieldValue = fieldMap.get(source);
        if (fieldValue != null) {
            // TODO(FIJIMRLIB-12) Add some ability to use timestamps derived from the log file.
            context.put(eid, fijiColumnName.getFamily(), fijiColumnName.getQualifier(), fieldValue);
        } else {
            reject(value, context, "Log file missing field: " + source);
        }
    }
}

From source file:com.moz.fiji.mapreduce.lib.bulkimport.CSVBulkImporter.java

License:Apache License

/** {@inheritDoc} */
@Override//from w ww  . j a va2s .  c o  m
public void produce(Text value, FijiTableContext context) throws IOException {
    // This is the header line since fieldList isn't populated
    if (mFieldMap == null) {
        List<String> fields = null;
        try {
            fields = split(value.toString());
        } catch (ParseException pe) {
            LOG.error("Unable to parse header row: {} with exception {}", value.toString(), pe.getMessage());
            throw new IOException("Unable to parse header row: " + value.toString());
        }
        initializeHeader(fields);
        // Don't actually import this line
        return;
    }

    List<String> fields = null;
    try {
        fields = split(value.toString());
    } catch (ParseException pe) {
        reject(value, context, pe.toString());
        return;
    }

    List<String> emptyFields = Lists.newArrayList();
    for (FijiColumnName fijiColumnName : getDestinationColumns()) {
        final EntityId eid = getEntityId(fields, context);
        String source = getSource(fijiColumnName);

        if (mFieldMap.get(source) < fields.size()) {
            String fieldValue = fields.get(mFieldMap.get(source));
            if (!fieldValue.isEmpty()) {
                String family = fijiColumnName.getFamily();
                String qualifier = fijiColumnName.getQualifier();
                if (isOverrideTimestamp()) {
                    // Override the timestamp from the imported source
                    Long timestamp = getTimestamp(fields);
                    context.put(eid, family, qualifier, timestamp, convert(fijiColumnName, fieldValue));
                } else {
                    // Use the system time as the timestamp
                    context.put(eid, family, qualifier, convert(fijiColumnName, fieldValue));
                }
            } else {
                emptyFields.add(source);
            }
        }
    }
    if (!emptyFields.isEmpty()) {
        incomplete(value, context, "Record is missing fields: " + StringUtils.join(emptyFields, ","));
    }

}

From source file:com.moz.fiji.mapreduce.lib.bulkimport.DescribedInputTextBulkImporter.java

License:Apache License

/**
 * Post-processes incomplete lines(Logging, keeping count, etc).
 *
 * @param line the line that was marked incomplete incomplete by the producer.
 * @param context the context in which the incompletion occured.
 * @param reason the reason why this line was incomplete.
 *///from w  ww .j  a  va 2s.c om
public void incomplete(Text line, FijiTableContext context, String reason) {
    if (mIncompleteLineCounter % mLogRate == 0L) {
        LOG.error("Incomplete line: {} with reason: {}", line.toString(), reason);
    }
    mIncompleteLineCounter++;

    //TODO(FIJIMRLIB-9) Abort this bulk importer job early if incomplete records exceed a threshold
    context.incrementCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE);

    //TODO(FIJIMRLIB-4) Add a strict mode where we reject incomplete lines
}