List of usage examples for org.apache.hadoop.io Text find
public int find(String what, int start)
what
in the backing buffer, starting as position start
. From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
public Text getColumn(Text val, int column, String delimiter) throws IOException { if (delimiter == null || delimiter.equals("")) { throw new IOException("Value of delimiter is empty"); }/*from www . ja v a 2s . co m*/ int lastOccurance = 0; int occurance = 0; for (int i = 0; i < column; i++) { occurance = val.find(delimiter, lastOccurance) - lastOccurance; lastOccurance = lastOccurance + occurance + delimiter.length(); } logger.debug("text value is: " + val); int delimiterLength = delimiter.length(); int startPosition = lastOccurance - (occurance + delimiterLength); Text keyColumn = new Text(); keyColumn.set(val.getBytes(), startPosition, occurance); return keyColumn; }
From source file:de.tudarmstadt.ukp.dkpro.c4corpus.hadoop.full.Phase3Step2DistinctDataJobTest.java
License:Apache License
@Test public void testSplit() throws Exception { Text key = new Text("123_456789"); // hard-split using array copy int i = key.find("_", 0); Text outputKey = new Text(""); byte[] bytes = key.getBytes(); outputKey.append(bytes, i + 1, bytes.length - i - 2); String fileName = new String(bytes, 0, i); assertEquals("123", fileName); assertEquals("456789", outputKey.toString()); }
From source file:it.crs4.seal.common.CutText.java
License:Open Source License
public void loadRecord(Text record) throws FormatException { int pos = 0; // the byte position within the record int fieldno = 0; // the field index within the record int colno = 0; // the index within the list of requested fields (columns) try {//from www . j a v a 2s . c o m while (pos < record.getLength() && colno < columns.size()) // iterate over each field { int endpos = record.find(delim, pos); // the field's end position if (endpos < 0) endpos = record.getLength(); if (columns.get(colno) == fieldno) // if we're at a requested field { extractedFields[colno] = Text.decode(record.getBytes(), pos, endpos - pos); extractedFieldPositions[colno] = pos; colno += 1; // advance column } pos = endpos + 1; // the next starting position is the current end + 1 fieldno += 1; } } catch (java.nio.charset.CharacterCodingException e) { throw new FormatException("character coding exception. Message: " + e.getMessage(), record); } if (colno < columns.size()) throw new FormatException("Missing field(s) in record. Field " + colno + " (zero-based) not found.", record); }
From source file:it.crs4.seal.prq.PairReadsQSeqReducer.java
License:Open Source License
private int[] findFields(Text read) { int[] fieldsPos = new int[3]; fieldsPos[0] = 0;/*from w w w. j av a2 s. c o m*/ for (int i = 1; i <= 2; ++i) { fieldsPos[i] = read.find(delim, fieldsPos[i - 1]) + 1; // +1 since we get the position of the delimiter if (fieldsPos[i] <= 0) throw new RuntimeException("invalid read/quality format: " + read.toString()); } int seqLength = fieldsPos[1] - 1; int qualLength = fieldsPos[2] - fieldsPos[1] - 1; if (seqLength != qualLength) throw new RuntimeException( "sequence and quality lengths don't match! (got " + seqLength + " and " + qualLength + ")"); return fieldsPos; }