Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:cascading.scheme.hadoop.TextLine.java

License:Open Source License

protected String makeEncodedString(Object[] context) {
    Text text = (Text) context[1];
    return new String(text.getBytes(), 0, text.getLength(), (Charset) context[2]);
}

From source file:cn.com.diditaxi.hive.cf.GenericUDFInstr.java

License:Apache License

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (arguments[0].get() == null || arguments[1].get() == null) {
        return null;
    }//from   ww  w  .  ja  va  2  s.  c  o m

    Text text = (Text) converters[0].convert(arguments[0].get());
    Text subtext = (Text) converters[1].convert(arguments[1].get());
    int startIndex = (arguments.length >= 3) ? ((IntWritable) converters[2].convert(arguments[2].get())).get()
            : DEFAULT_START_INDEX;
    int nth = (arguments.length == 4) ? ((IntWritable) converters[3].convert(arguments[3].get())).get()
            : DEFAULT_NTH;

    // argument checking
    if (startIndex < 0) {
        // if startIndex is negative, 
        // the function counts back startIndex number of characters from the end of text and then searches
        // towards the beginning of text.
        startIndex = text.getLength() + startIndex;
    }
    if (startIndex <= 0 || startIndex > text.getLength()) {
        intWritable.set(0);
        return intWritable;
    }

    int index = 0;
    int currentIndex = startIndex;
    for (int i = 0; i < nth; i++) {
        index = GenericUDFUtils.findText(text, subtext, currentIndex - 1) + 1;
        if (index == 0) {// not found
            intWritable.set(0);
            return intWritable;
        }
        currentIndex = index + 1;
    }
    intWritable.set(index);
    return intWritable;
}

From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 *///from   w  w  w  .  j  a  v a 2s . co  m
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();
    ts = System.currentTimeMillis();

    try {
        MutipleColumnImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey

        Put put = new Put(newRowKey.getBytes());
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            String columnQualifierStr = new String(parser.getQualifier(i));
            String rowStr = newRowKey + new String(parser.getFamily(i) + columnQualifierStr);
            if (notNeedLoadColumnQulifiers.contains(columnQualifierStr)) {
                continue;
            }
            KeyValue kv = new KeyValue(rowStr.getBytes(), 0, newRowKey.getBytes().length, //roffset,rofflength
                    parser.getFamily(i), 0, parser.getFamily(i).length, parser.getQualifier(i), 0,
                    parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes, parsed.getColumnOffset(i),
                    parsed.getColumnLength(i));

            KeyValue newKv = new KeyValue(newRowKey.getBytes(), kv.getFamily(), kv.getQualifier(), ts,
                    kv.getValue());
            kv = null;
            put.add(newKv);
        }
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), put);
    } catch (MutipleColumnImportTsv.TsvParser.BadTsvLineException badLine) {
        if (skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (IllegalArgumentException e) {
        if (skipBadLines) {
            System.err.println("Bad line at offset: " + offset.get() + ":\n" + e.getMessage());
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(e);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (RowKeyGeneratorException e) {
        System.err.println("gen rowkey error, please check config in the ocnosqlTab.xml." + e.getMessage());
        throw new IOException(e);
    } finally {
        totalLineCount.increment(1);
    }
}

From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 * /* ww w .j  a v a  2 s. c  o  m*/
 */
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {
    byte[] lineBytes = value.getBytes();

    try {
        TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());
        //
        Text[] texts = new Text[parsed.getColumnCount()];
        int index = 0;
        for (int i = 0; i < parsed.getColumnCount(); i++) {
            //            if (i == parser.getRowKeyColumnIndex()){
            //               continue;
            //            }
            text = new Text();
            //?
            text.append(lineBytes, parsed.getColumnOffset(i), parsed.getColumnLength(i));
            texts[index] = text;
            index++;
        }
        writer.set(texts);
        /*
        //rowkey
        String oriRowKey = new String(lineBytes, parsed.getRowKeyOffset(), parsed.getRowKeyLength());
                
        // hash rowkey
        String newRowKey = oriRowKey;
        if(rowkeyGenerator != null){
           newRowKey = (String)rowkeyGenerator.generate(oriRowKey);
        }
        */
        String newRowKey = rowkeyGenerator.generateByGenRKStep(value.toString(), false);//???rowkey
        //LOG.info("single column newRowKey = " + newRowKey);
        context.write(new ImmutableBytesWritable(newRowKey.getBytes()), writer);
    } catch (BadTsvLineException badLine) {
        if (skipBadLines) {
            LOG.error("Bad line at offset: " + offset.get() + ":\n" + badLine.getMessage());
            badLineCount.increment(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java

License:Apache License

/**
 * Serializes {@link StringOption} object.
 * @param option the target object/*from w w w . ja  v  a 2s .  c  o  m*/
 * @param output the target output
 * @throws IOException if I/O error was occurred while serializing the object
 */
public static void serialize(StringOption option, DataOutput output) throws IOException {
    if (option.isNull()) {
        writeCompactInt(UNSIGNED_NULL, output);
    } else {
        Text entity = option.get();
        int length = entity.getLength();
        writeCompactInt(length, output);
        output.write(entity.getBytes(), 0, length);
    }
}

From source file:com.asakusafw.dag.runtime.io.ValueOptionSerDe.java

License:Apache License

/**
 * Deserializes {@link StringOption} object.
 * @param option the target object//from  w  w w.j a  v  a  2s .  c om
 * @param input the source input
 * @throws IOException if I/O error was occurred while deserializing the object
 */
public static void deserialize(StringOption option, DataInput input) throws IOException {
    int length = readCompactInt(input);
    if (length == UNSIGNED_NULL) {
        option.setNull();
    } else {
        if (option.isNull() == false) {
            Text entity = option.get();
            if (length == entity.getLength()) {
                // optimize for same-length text properties
                input.readFully(entity.getBytes(), 0, length);
                return;
            }
        } else {
            // set as non-null
            option.reset();
        }
        byte[] buffer = getLocalBuffer(length, Integer.MAX_VALUE);
        input.readFully(buffer, 0, length);
        option.modify(buffer, 0, length);
    }
}

From source file:com.asakusafw.runtime.io.line.BasicLineOutput.java

License:Apache License

private void write(Text entity) throws IOException {
    if (entity.getLength() == 0) {
        return;// ww w.  j  ava  2 s . c  o m
    }
    ByteBuffer buffer = wrapperCache;
    byte[] b = entity.getBytes();
    if (buffer == null || buffer.array() != b) {
        buffer = ByteBuffer.wrap(b);
        wrapperCache = buffer;
    }
    buffer.position(0);
    buffer.limit(entity.getLength());

    boolean flushing = false;
    CharBuffer cs = charBuffer;
    while (true) {
        cs.clear();
        CoderResult result;
        if (flushing) {
            result = decoder.flush(cs);
        } else {
            result = decoder.decode(buffer, cs, true);
        }
        if (result.isError() == false) {
            cs.flip();
            if (cs.hasRemaining()) {
                writer.append(cs);
            }
            if (result.isUnderflow()) {
                if (flushing) {
                    flushing = true;
                } else {
                    break;
                }
            }
        } else {
            assert result.isError();
            try {
                result.throwException();
            } catch (CharacterCodingException e) {
                throw new IOException(MessageFormat.format("exception occurred while decoding text: {0}", path),
                        e);
            }
        }
    }
}

From source file:com.asakusafw.runtime.io.line.Utf8LineOutput.java

License:Apache License

@Override
public void write(StringOption model) throws IOException {
    if (model == null) {
        throw new IllegalArgumentException("model must not be null"); //$NON-NLS-1$
    }//from  w w  w . ja  va 2  s  .c o m
    if (model.isNull()) {
        return;
    }
    Text entity = model.get();
    output.write(entity.getBytes(), 0, entity.getLength());
    output.write(LINE_BREAK);
}

From source file:com.asakusafw.runtime.io.TsvEmitter.java

License:Apache License

@Override
public void emit(StringOption option) throws IOException {
    startCell();// ww w .  j  av  a  2s  . com
    if (emitNull(option)) {
        return;
    }
    Text text = option.get();
    if (text.getLength() == 0) {
        return;
    }

    byte[] bytes = text.getBytes();
    ByteBuffer source = ByteBuffer.wrap(bytes, 0, text.getLength());
    decoder.reset();
    decodeBuffer.clear();
    while (true) {
        CoderResult result = decoder.decode(source, decodeBuffer, true);
        if (result.isError()) {
            throw new RecordFormatException(
                    MessageFormat.format("Cannot process a character string (\"{0}\")", result));
        }
        if (result.isUnderflow()) {
            consumeDecoded();
            break;
        }
        if (result.isOverflow()) {
            consumeDecoded();
        }
    }
    while (true) {
        CoderResult result = decoder.flush(decodeBuffer);
        if (result.isError()) {
            throw new RecordFormatException(
                    MessageFormat.format("Cannot process a character string (\"{0}\")", result));
        }
        if (result.isUnderflow()) {
            consumeDecoded();
            break;
        }
        if (result.isOverflow()) {
            consumeDecoded();
        }
    }
}

From source file:com.asakusafw.runtime.stage.directio.StringTemplate.java

License:Apache License

/**
 * Returns a generated name./*from   w  w w  .j  a v  a2  s . co m*/
 * @return the generated name
 */
public final String apply() {
    nameBuffer.clear();
    for (int i = 0; i < formatters.length; i++) {
        Text text = formatters[i].representation;
        nameBuffer.append(text.getBytes(), 0, text.getLength());
    }
    return nameBuffer.toString();
}