List of usage examples for org.apache.hadoop.io Text toString
@Override
public String toString()
From source file:com.moz.fiji.hive.HiveTableDescription.java
License:Apache License
/** * Creates the in-memory row object that contains the column data in the Hive table. * * <p>The returned object will be given to the object inspector for * extracting column data. Since our object inspector is the * standard java inspector, the structure of the object returned * should match the data types specified in the Hive table schema.</p> * * @param columnData The HBase data from the row. * @param objectInspector The object inspector defining the format of the columnData. * @return An object representing the row. * @throws IOException If there is an IO error. *//*from ww w.j av a 2 s .com*/ public FijiRowDataWritable createWritableObject(Object columnData, ObjectInspector objectInspector) throws IOException { Preconditions.checkArgument(objectInspector instanceof StandardStructObjectInspector); StandardStructObjectInspector structObjectInspector = (StandardStructObjectInspector) objectInspector; // Hive passes us a struct that should have all columns that are specified in the Hive table // description. Preconditions.checkState(mExpressions.size() == structObjectInspector.getAllStructFieldRefs().size(), "Table has {} columns, but query has {} columns", mExpressions.size(), structObjectInspector.getAllStructFieldRefs().size()); List<Object> structColumnData = structObjectInspector.getStructFieldsDataAsList(columnData); Object entityIdShellStringObject = structColumnData.get(mEntityIdShellStringIndex); Text entityIdShellString = new Text((String) entityIdShellStringObject); EntityIdWritable entityIdWritable = new EntityIdWritable(entityIdShellString.toString()); // TODO(FIJIHIVE-30) Process EntityId component columns here. Map<FijiColumnName, NavigableMap<Long, FijiCellWritable>> writableData = Maps.newHashMap(); for (int c = 0; c < mExpressions.size(); c++) { if (mExpressions.get(c).isCellData()) { ObjectInspector colObjectInspector = structObjectInspector.getAllStructFieldRefs().get(c) .getFieldObjectInspector(); Map<FijiColumnName, NavigableMap<Long, FijiCellWritable>> writableTimeseriesData = mExpressions .get(c).convertToTimeSeries(colObjectInspector, structColumnData.get(c)); for (FijiColumnName fijiColumnName : writableTimeseriesData.keySet()) { NavigableMap<Long, FijiCellWritable> columnTimeseries = writableTimeseriesData .get(fijiColumnName); if (writableData.containsKey(fijiColumnName)) { // Merge these timeseries together. writableData.get(fijiColumnName).putAll(columnTimeseries); } else { writableData.put(fijiColumnName, columnTimeseries); } } } } FijiRowDataWritable fijiRowData = new FijiRowDataWritable(entityIdWritable, writableData); return fijiRowData; }
From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java
License:Apache License
@Test public void testNoRecord() throws IOException { XMLRecordReader reader = new XMLRecordReader(); BufferedReader bReader = new BufferedReader(new StringReader("There's no record in here.")); LongWritable key = new LongWritable(); StringBuilder sb = new StringBuilder(); Text record = new Text(); bReader.mark(1000);/*from w ww . j av a 2 s. c om*/ assertFalse(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset 100L, // End offset bReader, key, sb)); assertEquals(0, key.get()); assertEquals("", sb.toString()); bReader.reset(); assertFalse(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset 100L, // Overrun allowance sb, record)); assertEquals("There's no record in here.", sb.toString()); assertEquals("", record.toString()); }
From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java
License:Apache License
@Test public void testRegularRecord() throws IOException { XMLRecordReader reader = new XMLRecordReader(); BufferedReader bReader = new BufferedReader(new StringReader("<user></user>")); LongWritable key = new LongWritable(); StringBuilder sb = new StringBuilder(); Text record = new Text(); assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset 100L, // End offset bReader, key, sb));/*from w w w . j av a2 s . co m*/ assertEquals(0, key.get()); assertEquals("<user>", sb.toString()); assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset 100L, // Overrun allowance sb, record)); assertEquals("<user></user>", record.toString()); }
From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java
License:Apache License
@Test public void testTooLongRecord() throws IOException { XMLRecordReader reader = new XMLRecordReader(); BufferedReader bReader = new BufferedReader(new StringReader("<user></user>")); StringBuilder sb = new StringBuilder(); Text record = new Text(); bReader.mark(1000);//from www. j a va 2 s . co m // Small overrun allowance will break before finding the end of the record. assertFalse(reader.findRecordEnd("</user>".toCharArray(), bReader, 8L, // End offset 1L, // Overrun allowance sb, record)); assertEquals("<user></us", sb.toString()); assertEquals("", record.toString()); reader = new XMLRecordReader(); sb = new StringBuilder(); bReader.reset(); // Large overrun allowance will find the end of the record. assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 8L, // End offset 10L, // Overrun allowance sb, record)); assertEquals("<user></user>", sb.toString()); assertEquals("<user></user>", record.toString()); }
From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java
License:Apache License
@Test public void testCloseMatches() throws IOException { XMLRecordReader reader = new XMLRecordReader(); BufferedReader bReader = new BufferedReader(new StringReader("<use></use> <users></users>")); LongWritable key = new LongWritable(); StringBuilder sb = new StringBuilder(); Text record = new Text(); bReader.mark(1000);// w w w . j av a2s . c o m assertFalse(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset 100L, // End offset bReader, key, sb)); assertEquals(0, key.get()); assertEquals("", sb.toString()); bReader.reset(); assertFalse(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset 100L, // Overrun allowance sb, record)); assertEquals("<use></use> <users></users>", sb.toString()); assertEquals("", record.toString()); }
From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java
License:Apache License
@Test public void testCompleteRecord() throws IOException { XMLRecordReader reader = new XMLRecordReader(); BufferedReader bReader = new BufferedReader(new StringReader("1<user><name>Bob</name></user>")); LongWritable key = new LongWritable(); StringBuilder sb = new StringBuilder(); Text record = new Text(); assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset 100L, // End offset bReader, key, sb));/*from w w w . j a va2 s . c om*/ assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset 100L, // Overrun allowance sb, record)); assertEquals(1, key.get()); assertEquals("<user><name>Bob</name></user>", record.toString()); }
From source file:com.moz.fiji.mapreduce.input.impl.TestXMLInputFormat.java
License:Apache License
@Test public void testTwoRecords() throws IOException { XMLRecordReader reader = new XMLRecordReader(); BufferedReader bReader = new BufferedReader(new StringReader("<user>1</user><user>2</user>")); LongWritable key = new LongWritable(); StringBuilder sb = new StringBuilder(); Text record = new Text(); // Find the first record. assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset 100L, // End offset bReader, key, sb));//from w w w.ja v a 2 s. c o m assertEquals(0, key.get()); assertEquals("<user>", sb.toString()); assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset 100L, // Overrun allowance sb, record)); assertEquals("<user>1</user>", record.toString()); // Find the second record. sb = new StringBuilder(); record = new Text(); assertTrue(reader.findRecordStart("<user".toCharArray(), 0L, // Start offset 100L, // End offset bReader, key, sb)); assertEquals("<user>", sb.toString()); assertTrue(reader.findRecordEnd("</user>".toCharArray(), bReader, 100L, // End offset 100L, // Overrun allowance sb, record)); assertEquals("<user>2</user>", record.toString()); }
From source file:com.moz.fiji.mapreduce.lib.bulkimport.CommonLogBulkImporter.java
License:Apache License
/** {@inheritDoc} */ @Override//from w w w. ja v a 2s . c o m public void produce(Text value, FijiTableContext context) throws IOException { Map<Field, String> fieldMap; try { fieldMap = CommonLogParser.get().parseCommonLog(value.toString()); } catch (ParseException pe) { reject(value, context, "Unable to parse row: " + value.toString()); return; } Field entityIdSource = Field.valueOf(getEntityIdSource()); EntityId eid = context.getEntityId(fieldMap.get(entityIdSource)); for (FijiColumnName fijiColumnName : getDestinationColumns()) { Field source = Field.valueOf(getSource(fijiColumnName)); String fieldValue = fieldMap.get(source); if (fieldValue != null) { // TODO(FIJIMRLIB-12) Add some ability to use timestamps derived from the log file. context.put(eid, fijiColumnName.getFamily(), fijiColumnName.getQualifier(), fieldValue); } else { reject(value, context, "Log file missing field: " + source); } } }
From source file:com.moz.fiji.mapreduce.lib.bulkimport.CSVBulkImporter.java
License:Apache License
/** {@inheritDoc} */ @Override//from w ww . j a va2s . c o m public void produce(Text value, FijiTableContext context) throws IOException { // This is the header line since fieldList isn't populated if (mFieldMap == null) { List<String> fields = null; try { fields = split(value.toString()); } catch (ParseException pe) { LOG.error("Unable to parse header row: {} with exception {}", value.toString(), pe.getMessage()); throw new IOException("Unable to parse header row: " + value.toString()); } initializeHeader(fields); // Don't actually import this line return; } List<String> fields = null; try { fields = split(value.toString()); } catch (ParseException pe) { reject(value, context, pe.toString()); return; } List<String> emptyFields = Lists.newArrayList(); for (FijiColumnName fijiColumnName : getDestinationColumns()) { final EntityId eid = getEntityId(fields, context); String source = getSource(fijiColumnName); if (mFieldMap.get(source) < fields.size()) { String fieldValue = fields.get(mFieldMap.get(source)); if (!fieldValue.isEmpty()) { String family = fijiColumnName.getFamily(); String qualifier = fijiColumnName.getQualifier(); if (isOverrideTimestamp()) { // Override the timestamp from the imported source Long timestamp = getTimestamp(fields); context.put(eid, family, qualifier, timestamp, convert(fijiColumnName, fieldValue)); } else { // Use the system time as the timestamp context.put(eid, family, qualifier, convert(fijiColumnName, fieldValue)); } } else { emptyFields.add(source); } } } if (!emptyFields.isEmpty()) { incomplete(value, context, "Record is missing fields: " + StringUtils.join(emptyFields, ",")); } }
From source file:com.moz.fiji.mapreduce.lib.bulkimport.DescribedInputTextBulkImporter.java
License:Apache License
/** * Post-processes incomplete lines(Logging, keeping count, etc). * * @param line the line that was marked incomplete incomplete by the producer. * @param context the context in which the incompletion occured. * @param reason the reason why this line was incomplete. *///from w ww .j a va 2s.c om public void incomplete(Text line, FijiTableContext context, String reason) { if (mIncompleteLineCounter % mLogRate == 0L) { LOG.error("Incomplete line: {} with reason: {}", line.toString(), reason); } mIncompleteLineCounter++; //TODO(FIJIMRLIB-9) Abort this bulk importer job early if incomplete records exceed a threshold context.incrementCounter(JobHistoryCounters.BULKIMPORTER_RECORDS_INCOMPLETE); //TODO(FIJIMRLIB-4) Add a strict mode where we reject incomplete lines }