List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:cascading.scheme.hadoop.TextLine.java
License:Open Source License
protected String makeEncodedString(Object[] context) { Text text = (Text) context[1]; return new String(text.getBytes(), 0, text.getLength(), (Charset) context[2]); }
From source file:cn.com.diditaxi.hive.cf.GenericUDFInstr.java
License:Apache License
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null || arguments[1].get() == null) { return null; }//from ww w . ja va 2 s. c o m Text text = (Text) converters[0].convert(arguments[0].get()); Text subtext = (Text) converters[1].convert(arguments[1].get()); int startIndex = (arguments.length >= 3) ? ((IntWritable) converters[2].convert(arguments[2].get())).get() : DEFAULT_START_INDEX; int nth = (arguments.length == 4) ? ((IntWritable) converters[3].convert(arguments[3].get())).get() : DEFAULT_NTH; // argument checking if (startIndex < 0) { // if startIndex is negative, // the function counts back startIndex number of characters from the end of text and then searches // towards the beginning of text. startIndex = text.getLength() + startIndex; } if (startIndex <= 0 || startIndex > text.getLength()) { intWritable.set(0); return intWritable; } int index = 0; int currentIndex = startIndex; for (int i = 0; i < nth; i++) { index = GenericUDFUtils.findText(text, subtext, currentIndex - 1) + 1; if (index == 0) {// not found intWritable.set(0); return intWritable; } currentIndex = index + 1; } intWritable.set(index); return intWritable; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. *///from w w w . j a v a 2s . co m @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); ts = System.currentTimeMillis(); try { MutipleColumnImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey Put put = new Put(newRowKey.getBytes()); for (int i = 0; i < parsed.getColumnCount(); i++) { String columnQualifierStr = new String(parser.getQualifier(i)); String rowStr = newRowKey + new String(parser.getFamily(i) + columnQualifierStr); if (notNeedLoadColumnQulifiers.contains(columnQualifierStr)) { continue; } KeyValue kv = new KeyValue(rowStr.getBytes(), 0, newRowKey.getBytes().length, //roffset,rofflength parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0, parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); KeyValue newKv = new KeyValue(newRowKey.getBytes(), kv.getFamily(), kv.getQualifier(), ts, kv.getValue()); kv = null; put.add(newKv); } context.write(new ImmutableBytesWritable(newRowKey.getBytes()), put); } catch (MutipleColumnImportTsv.TsvParser.BadTsvLineException badLine) { if (skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); incrementBadLineCount(1); return; } else { throw new IOException(badLine); } } catch (IllegalArgumentException e) { if (skipBadLines) { System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage()); incrementBadLineCount(1); return; } else { throw new IOException(e); } } catch (InterruptedException e) { e.printStackTrace(); } catch (RowKeyGeneratorException e) { System.err.println("gen rowkey error, please check config in the ocnosqlTab.xml." + e.getMessage()); throw new IOException(e); } finally { totalLineCount.increment(1); } }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java
License:Apache License
/** * Convert a line of TSV text into an HBase table row. * /* ww w .j a v a 2 s. c o m*/ */ @Override public void map(LongWritable offset, Text value, Context context) throws IOException { byte[] lineBytes = value.getBytes(); try { TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength()); // Text[] texts = new Text[parsed.getColumnCount()]; int index = 0; for (int i = 0; i < parsed.getColumnCount(); i++) { // if (i == parser.getRowKeyColumnIndex()){ // continue; // } text = new Text(); //? text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i)); texts[index] = text; index++; } writer.set(texts); /* //rowkey String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength()); // hash rowkey String newRowKey = oriRowKey; if(rowkeyGenerator != null){ newRowKey = (String)rowkeyGenerator.generate(oriRowKey); } */ String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey //LOG.info("single column newRowKey = " + newRowKey); context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer); } catch (BadTsvLineException badLine) { if (skipBadLines) { LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage()); badLineCount.increment(1); return; } else { throw new IOException(badLine); } } catch (InterruptedException e) { e.printStackTrace(); } }
From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java
License:Apache License
/** * Serializes {@link StringOption} object. * @param option the target object/*from w w w . ja v a 2s . c o m*/ * @param output the target output * @throws IOException if I/O error was occurred while serializing the object */ public static void serialize(StringOption option, DataOutput output) throws IOException { if (option.isNull()) { writeCompactInt(UNSIGNED_NULL, output); } else { Text entity = option.get(); int length = entity.getLength(); writeCompactInt(length, output); output.write(entity.getBytes(), 0, length); } }
From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java
License:Apache License
/** * Deserializes {@link StringOption} object. * @param option the target object//from w w w.j a v a 2s . c om * @param input the source input * @throws IOException if I/O error was occurred while deserializing the object */ public static void deserialize(StringOption option, DataInput input) throws IOException { int length = readCompactInt(input); if (length == UNSIGNED_NULL) { option.setNull(); } else { if (option.isNull() == false) { Text entity = option.get(); if (length == entity.getLength()) { // optimize for same-length text properties input.readFully(entity.getBytes(), 0, length); return; } } else { // set as non-null option.reset(); } byte[] buffer = getLocalBuffer(length, Integer.MAX_VALUE); input.readFully(buffer, 0, length); option.modify(buffer, 0, length); } }
From source file:com.asakusafw.runtime.io.line.BasicLineOutput.java
License:Apache License
private void write(Text entity) throws IOException { if (entity.getLength() == 0) { return;// ww w. j ava 2 s . c o m } ByteBuffer buffer = wrapperCache; byte[] b = entity.getBytes(); if (buffer == null || buffer.array() != b) { buffer = ByteBuffer.wrap(b); wrapperCache = buffer; } buffer.position(0); buffer.limit(entity.getLength()); boolean flushing = false; CharBuffer cs = charBuffer; while (true) { cs.clear(); CoderResult result; if (flushing) { result = decoder.flush(cs); } else { result = decoder.decode(buffer, cs, true); } if (result.isError() == false) { cs.flip(); if (cs.hasRemaining()) { writer.append(cs); } if (result.isUnderflow()) { if (flushing) { flushing = true; } else { break; } } } else { assert result.isError(); try { result.throwException(); } catch (CharacterCodingException e) { throw new IOException(MessageFormat.format("exception occurred while decoding text: {0}", path), e); } } } }
From source file:com.asakusafw.runtime.io.line.Utf8LineOutput.java
License:Apache License
@Override public void write(StringOption model) throws IOException { if (model == null) { throw new IllegalArgumentException("model must not be null"); //$NON-NLS-1$ }//from w w w . ja va 2 s .c o m if (model.isNull()) { return; } Text entity = model.get(); output.write(entity.getBytes(), 0, entity.getLength()); output.write(LINE_BREAK); }
From source file:com.asakusafw.runtime.io.TsvEmitter.java
License:Apache License
@Override public void emit(StringOption option) throws IOException { startCell();// ww w . j av a 2s . com if (emitNull(option)) { return; } Text text = option.get(); if (text.getLength() == 0) { return; } byte[] bytes = text.getBytes(); ByteBuffer source = ByteBuffer.wrap(bytes, 0, text.getLength()); decoder.reset(); decodeBuffer.clear(); while (true) { CoderResult result = decoder.decode(source, decodeBuffer, true); if (result.isError()) { throw new RecordFormatException( MessageFormat.format("Cannot process a character string (\"{0}\")", result)); } if (result.isUnderflow()) { consumeDecoded(); break; } if (result.isOverflow()) { consumeDecoded(); } } while (true) { CoderResult result = decoder.flush(decodeBuffer); if (result.isError()) { throw new RecordFormatException( MessageFormat.format("Cannot process a character string (\"{0}\")", result)); } if (result.isUnderflow()) { consumeDecoded(); break; } if (result.isOverflow()) { consumeDecoded(); } } }
From source file:com.asakusafw.runtime.stage.directio.StringTemplate.java
License:Apache License
/** * Returns a generated name./*from w w w .j a v a2 s . co m*/ * @return the generated name */ public final String apply() { nameBuffer.clear(); for (int i = 0; i < formatters.length; i++) { Text text = formatters[i].representation; nameBuffer.append(text.getBytes(), 0, text.getLength()); } return nameBuffer.toString(); }