Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.asakusafw.runtime.value.StringOption.java

License:Apache License

private static boolean equalsTexts(Text a, Text b) {
    return ByteArrayUtil.equals(a.getBytes(), 0, a.getLength(), b.getBytes(), 0, b.getLength());
}

From source file:com.asakusafw.runtime.value.StringOption.java

License:Apache License

private static int compareTexts(Text a, Text b) {
    return ByteArrayUtil.compare(a.getBytes(), 0, a.getLength(), b.getBytes(), 0, b.getLength());
}

From source file:com.asakusafw.runtime.value.StringOptionUtil.java

License:Apache License

/**
 * Returns a {@link Reader} to read the text contents in the {@link StringOption}.
 * @param option the target {@link StringOption}
 * @return the created reader/*from   ww  w.jav a2  s. co  m*/
 * @throws NullPointerException if the {@link StringOption} is/represents {@code null}
 */
public static Reader asReader(StringOption option) {
    Text text = option.get();
    InputBuffer buffer = new InputBuffer();
    buffer.reset(text.getBytes(), 0, text.getLength());
    return new InputStreamReader(buffer, ENCODING);
}

From source file:com.asakusafw.runtime.value.StringOptionUtil.java

License:Apache License

/**
 * Trims the leading/trailing classical whitespace characters in the {@link StringOption}.
 * This only removes the following characters:
 * <ul>// w ww.  ja  v a 2  s.c  o m
 * <li> {@code "\t" (HT:U+0009)} </li>
 * <li> {@code "\n" (LF:U+000a)} </li>
 * <li> {@code "\r" (CR:U+000d)} </li>
 * <li> {@code " " (SP:U+0020)} </li>
 * </ul>
 * This directly modifies the target {@link StringOption}.
 * @param option the target {@link StringOption}
 * @throws NullPointerException if the {@link StringOption} is/represents {@code null}
 */
public static void trim(StringOption option) {
    Text text = option.get();
    byte[] bytes = text.getBytes();
    int length = text.getLength();
    int start = 0;
    int last = length - 1;
    for (; start <= last; start++) {
        if (isTrimTarget(bytes[start]) == false) {
            break;
        }
    }
    for (; last >= start; last--) {
        if (isTrimTarget(bytes[last]) == false) {
            break;
        }
    }
    if (start == 0 && last == length - 1) {
        return;
    }
    text.set(bytes, start, last + 1 - start);
}

From source file:com.asakusafw.runtime.value.StringOptionUtil.java

License:Apache License

private static void append(StringOption target, Text text) {
    target.get().append(text.getBytes(), 0, text.getLength());
}

From source file:com.ashishpaliwal.hadoop.utils.inputformat.CsvLineReader.java

License:Apache License

/**
 * Read from the InputStream into the given Text.
 *
 * @param txt               the object to store the given line
 * @param maxLineLength     the maximum number of bytes to store into txt.
 * @param maxBytesToConsume the maximum number of bytes to consume in this
 *                          call.// w w  w .  j a v  a  2 s  . c  o  m
 * @return the number of bytes read including the newline
 * @throws IOException if the underlying stream throws
 */
public int readLine(Text txt, int maxLineLength, int maxBytesToConsume) throws IOException {
    txt.clear();
    boolean hadFinalNewline = false;
    boolean hadFinalReturn = false;
    boolean hitEndOfFile = false;
    int startPosn = bufferPosn;
    long bytesConsumed = 0;
    boolean inQuote = false;
    boolean isLastCharEscapeChar = false;

    outerLoop: while (true) {
        if (bufferPosn >= bufferLength) {
            if (!backfill()) {
                hitEndOfFile = true;
                break;
            }
        }

        startPosn = bufferPosn;

        for (; bufferPosn < bufferLength; ++bufferPosn) {

            switch (buffer[bufferPosn]) {

            case '\\':
                isLastCharEscapeChar = !isLastCharEscapeChar;
                break;

            case '"':
                if (!inQuote && hadFinalReturn) {
                    break outerLoop;
                }

                if (!isLastCharEscapeChar) {
                    inQuote = !inQuote;
                }
                isLastCharEscapeChar = false;
                break;

            case '\n':
                isLastCharEscapeChar = false;
                if (!inQuote) {
                    hadFinalNewline = true;
                    bufferPosn += 1;
                    break outerLoop;
                }
                break;

            case '\r':
                isLastCharEscapeChar = false;
                if (!inQuote) {
                    if (hadFinalReturn) {
                        // leave this \r in the stream, so we'll get it next time
                        break outerLoop;
                    }
                    hadFinalReturn = true;
                }
                break;

            default:
                isLastCharEscapeChar = false;
                if (!inQuote && hadFinalReturn) {
                    break outerLoop;
                }
            }
        }

        bytesConsumed += bufferPosn - startPosn;
        int length = bufferPosn - startPosn - (hadFinalReturn ? 1 : 0);
        length = Math.min(length, maxLineLength - txt.getLength());

        if (length >= 0)
            txt.append(buffer, startPosn, length);

        if (bytesConsumed >= maxBytesToConsume)
            return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE);
    }

    int newlineLength = (hadFinalNewline ? 1 : 0) + (hadFinalReturn ? 1 : 0);

    if (!hitEndOfFile) {
        bytesConsumed += bufferPosn - startPosn;
        int length = bufferPosn - startPosn - newlineLength;
        length = Math.min(length, maxLineLength - txt.getLength());

        if (length > 0)
            txt.append(buffer, startPosn, length);
    }
    return (int) Math.min(bytesConsumed, (long) Integer.MAX_VALUE);
}

From source file:com.asp.tranlog.TsvImporterMapper.java

License:Apache License

/**
 * Convert a line of TSV text into an HBase table row.
 *//* w  w  w  .  ja va 2 s  .  c om*/
@Override
public void map(LongWritable offset, Text value, Context context) throws IOException {

    byte[] lineBytes = value.getBytes();

    // String lineStr = new String(value.getBytes(), 0, value.getLength(),
    // "gb18030");
    // byte[] lineBytes = new Text(lineStr).getBytes();

    int i = 0;
    try {
        ImportTsv.TsvParser.ParsedLine parsed = parser.parse(lineBytes, value.getLength());

        // ImportTsv.TsvParser.ParsedLine parsed = parser.parse(
        // lineBytes, Text.utf8Length(lineStr));

        byte[] rowKeyBytes = createRowkeyByteArray(lineBytes, parsed);
        ImmutableBytesWritable rowKey = new ImmutableBytesWritable(rowKeyBytes);

        Put put = new Put(rowKeyBytes);
        put.setWriteToWAL(false);

        for (i = 0; i < parsed.getColumnCount(); i++) {

            KeyValue kv = null;
            if (columnTypes[i] == ImportTsv.COL_TYPE_STRING) {
                kv = new KeyValue(rowKeyBytes, parser.getFamily(i), parser.getQualifier(i), 0,
                        parser.getQualifier(i).length, ts, KeyValue.Type.Put, lineBytes,
                        parsed.getColumnOffset(i), parsed.getColumnLength(i));
            } else {
                byte[] colBytes = convertColBytes(lineBytes, parsed, i);
                if (colBytes == null)
                    throw new ImportTsv.TsvParser.BadTsvLineException("Failed to get bytes for column " + i);
                kv = new KeyValue(rowKeyBytes, parser.getFamily(i), parser.getQualifier(i), ts, colBytes);
            }
            if (kv == null)
                throw new ImportTsv.TsvParser.BadTsvLineException("Failed to get bytes for column " + i);
            put.add(kv);
        }
        context.write(rowKey, put);
    } catch (ImportTsv.TsvParser.BadTsvLineException badLine) {
        if (skipBadLines) {
            System.err.println("Bad line: " + new String(lineBytes, "gb18030") + ":" + i + "\n");
            LOG.error("Bad line: " + new String(lineBytes, "gb18030") + "," + i);
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(badLine);
        }
    } catch (IllegalArgumentException e) {
        if (skipBadLines) {
            System.err.println("Bad line: " + new String(lineBytes, "gb18030") + ":" + i + "\n");
            LOG.error("Bad line: " + new String(lineBytes, "gb18030") + "," + i);
            incrementBadLineCount(1);
            return;
        } else {
            throw new IOException(e);
        }
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}

From source file:com.chinamobile.bcbsp.partition.HashWithBalancerWritePartition.java

License:Apache License

/**
 * This method is used to partition graph vertexes. Writing Each vertex to the
 * corresponding partition. In this method calls recordParse method to create
 * an HeadNode object. The last call partitioner's getPartitionId method to
 * calculate the HeadNode belongs to partition's id. If the HeadNode belongs
 * local partition then written to the local partition or send it to the
 * appropriate partition.// w ww . j  a va  2s. c o m
 * @param recordReader The recordreader of the split.
 * @throws IOException The io exception
 * @throws InterruptedException The Interrupted Exception
 */
@Override
public void write(RecordReader recordReader) throws IOException, InterruptedException {
    int headNodeNum = 0;
    int local = 0;
    int send = 0;
    int lost = 0;
    ThreadPool tpool = new ThreadPool(this.sendThreadNum);
    int staffNum = this.staff.getStaffNum();
    BytesWritable kbytes = new BytesWritable();
    int ksize = 0;
    BytesWritable vbytes = new BytesWritable();
    int vsize = 0;
    DataOutputBuffer bb = new DataOutputBuffer();
    int bufferSize = (int) ((this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER) * PART);
    int dataBufferSize = (this.TotalCacheSize * CONTAINERNUMBER * CONTAINERNUMBER)
            / (this.staff.getStaffNum() + this.sendThreadNum);
    byte[] buffer = new byte[bufferSize];
    int bufindex = 0;
    SerializationFactory sFactory = new SerializationFactory(new Configuration());
    Serializer<IntWritable> psserializer = sFactory.getSerializer(IntWritable.class);
    byte[] pidandsize = new byte[TIME * CONTAINERNUMBER * CONTAINERNUMBER];
    int psindex = 0;
    BytesWritable pidbytes = new BytesWritable();
    int psize = 0;
    BytesWritable sizebytes = new BytesWritable();
    int ssize = 0;
    try {
        this.keyserializer.open(bb);
        this.valueserializer.open(bb);
        psserializer.open(bb);
    } catch (IOException e) {
        throw e;
    }
    String path = "/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID();
    File dir = new File("/tmp/bcbsp/" + this.staff.getJobID());
    dir.mkdir();
    dir = new File("/tmp/bcbsp/" + this.staff.getJobID() + "/" + this.staff.getStaffID());
    dir.mkdir();
    ArrayList<File> files = new ArrayList<File>();
    try {
        File file = new File(path + "/" + "data" + ".txt");
        files.add(file);
        DataOutputStream dataWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "data" + ".txt", true)));
        DataInputStream dataReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "data" + ".txt")));
        File filet = new File(path + "/" + "pidandsize" + ".txt");
        files.add(filet);
        DataOutputStream psWriter = new DataOutputStream(
                new BufferedOutputStream(new FileOutputStream(path + "/" + "pidandsize" + ".txt", true)));
        DataInputStream psReader = new DataInputStream(
                new BufferedInputStream(new FileInputStream(path + "/" + "pidandsize" + ".txt")));
        while (recordReader != null && recordReader.nextKeyValue()) {
            headNodeNum++;
            Text key = new Text(recordReader.getCurrentKey().toString());
            Text value = new Text(recordReader.getCurrentValue().toString());
            int pid = -1;
            Text vertexID = this.recordParse.getVertexID(key);
            if (vertexID != null) {
                pid = this.partitioner.getPartitionID(vertexID);
            } else {
                lost++;
                continue;
            }
            if (this.counter.containsKey(pid)) {
                this.counter.put(pid, (this.counter.get(pid) + 1));
            } else {
                this.counter.put(pid, 1);
            }
            bb.reset();
            this.keyserializer.serialize(key);
            kbytes.set(bb.getData(), 0, bb.getLength());
            ksize = kbytes.getLength();
            bb.reset();
            this.valueserializer.serialize(value);
            vbytes.set(bb.getData(), 0, bb.getLength());
            vsize = vbytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(ksize + vsize));
            sizebytes.set(bb.getData(), 0, bb.getLength());
            ssize = sizebytes.getLength();
            bb.reset();
            psserializer.serialize(new IntWritable(pid));
            pidbytes.set(bb.getData(), 0, bb.getLength());
            psize = pidbytes.getLength();
            if ((pidandsize.length - psindex) > (ssize + psize)) {
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            } else {
                psWriter.write(pidandsize, 0, psindex);
                psindex = 0;
                System.arraycopy(sizebytes.getBytes(), 0, pidandsize, psindex, ssize);
                psindex += ssize;
                System.arraycopy(pidbytes.getBytes(), 0, pidandsize, psindex, psize);
                psindex += psize;
            }
            if ((buffer.length - bufindex) > (ksize + vsize)) {
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            } else if (buffer.length < (ksize + vsize)) {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                LOG.info("This is a super record");
                dataWriter.write(kbytes.getBytes(), 0, ksize);
                dataWriter.write(vbytes.getBytes(), 0, vsize);
            } else {
                dataWriter.write(buffer, 0, bufindex);
                bufindex = 0;
                System.arraycopy(kbytes.getBytes(), 0, buffer, bufindex, ksize);
                bufindex += ksize;
                System.arraycopy(vbytes.getBytes(), 0, buffer, bufindex, vsize);
                bufindex += vsize;
            }
        }
        if (psindex != 0) {
            psWriter.write(pidandsize, 0, psindex);
        }
        if (bufindex != 0) {
            dataWriter.write(buffer, 0, bufindex);
            bufindex = 0;
        }
        dataWriter.close();
        dataWriter = null;
        psWriter.close();
        psWriter = null;
        buffer = null;
        pidandsize = null;
        this.ssrc.setDirFlag(new String[] { "3" });
        this.ssrc.setCounter(this.counter);
        HashMap<Integer, Integer> hashBucketToPartition = this.sssc.loadDataInBalancerBarrier(ssrc,
                Constants.PARTITION_TYPE.HASH);
        this.staff.setHashBucketToPartition(hashBucketToPartition);
        byte[][] databuf = new byte[staffNum][dataBufferSize];
        int[] databufindex = new int[staffNum];
        try {
            IntWritable pid = new IntWritable();
            IntWritable size = new IntWritable();
            int belongPid = 0;
            while (true) {
                size.readFields(psReader);
                pid.readFields(psReader);
                belongPid = hashBucketToPartition.get(pid.get());
                if (belongPid != this.staff.getPartition()) {
                    send++;
                } else {
                    local++;
                }
                if ((databuf[belongPid].length - databufindex[belongPid]) > size.get()) {
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                } else if (databuf[belongPid].length < size.get()) {
                    LOG.info("This is a super record");
                    byte[] tmp = new byte[size.get()];
                    dataReader.read(tmp, 0, size.get());
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(
                                new BufferedInputStream(new ByteArrayInputStream(tmp)));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(tmp, 0, size.get());
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    tmp = null;
                } else {
                    if (belongPid == this.staff.getPartition()) {
                        DataInputStream reader = new DataInputStream(new BufferedInputStream(
                                new ByteArrayInputStream(databuf[belongPid], 0, databufindex[belongPid])));
                        try {
                            boolean stop = true;
                            while (stop) {
                                Text key = new Text();
                                key.readFields(reader);
                                Text value = new Text();
                                value.readFields(reader);
                                if (key.getLength() > 0 && value.getLength() > 0) {
                                    Vertex vertex = this.recordParse.recordParse(key.toString(),
                                            value.toString());
                                    if (vertex == null) {
                                        lost++;
                                        continue;
                                    }
                                    this.staff.getGraphData().addForAll(vertex);
                                } else {
                                    stop = false;
                                }
                            }
                        } catch (IOException e) {
                            LOG.info("IO exception: " + e.getStackTrace());
                        }
                    } else {
                        ThreadSignle t = tpool.getThread();
                        while (t == null) {
                            t = tpool.getThread();
                        }
                        t.setWorker(
                                this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), belongPid));
                        t.setJobId(staff.getJobID());
                        t.setTaskId(staff.getStaffID());
                        t.setBelongPartition(belongPid);
                        BytesWritable data = new BytesWritable();
                        data.set(databuf[belongPid], 0, databufindex[belongPid]);
                        t.setData(data);
                        LOG.info("Using Thread is: " + t.getThreadNumber());
                        t.setStatus(true);
                    }
                    databufindex[belongPid] = 0;
                    dataReader.read(databuf[belongPid], databufindex[belongPid], size.get());
                    databufindex[belongPid] += size.get();
                }
            }
        } catch (EOFException ex) {
            LOG.error("[write]", ex);
        }
        for (int i = 0; i < staffNum; i++) {
            if (databufindex[i] != 0) {
                if (i == this.staff.getPartition()) {
                    DataInputStream reader = new DataInputStream(
                            new BufferedInputStream(new ByteArrayInputStream(databuf[i], 0, databufindex[i])));
                    try {
                        boolean stop = true;
                        while (stop) {
                            Text key = new Text();
                            key.readFields(reader);
                            Text value = new Text();
                            value.readFields(reader);
                            if (key.getLength() > 0 && value.getLength() > 0) {
                                Vertex vertex = this.recordParse.recordParse(key.toString(), value.toString());
                                if (vertex == null) {
                                    lost++;
                                    continue;
                                }
                                this.staff.getGraphData().addForAll(vertex);
                            } else {
                                stop = false;
                            }
                        }
                    } catch (IOException e) {
                        LOG.info("IO exception: " + e.getStackTrace());
                    }
                } else {
                    ThreadSignle t = tpool.getThread();
                    while (t == null) {
                        t = tpool.getThread();
                    }
                    t.setWorker(this.workerAgent.getWorker(staff.getJobID(), staff.getStaffID(), i));
                    t.setJobId(staff.getJobID());
                    t.setTaskId(staff.getStaffID());
                    t.setBelongPartition(i);
                    BytesWritable data = new BytesWritable();
                    data.set(databuf[i], 0, databufindex[i]);
                    t.setData(data);
                    LOG.info("Using Thread is: " + t.getThreadNumber());
                    t.setStatus(true);
                }
            }
        }
        dataReader.close();
        dataReader = null;
        psReader.close();
        psReader = null;
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
        tpool.cleanup();
        tpool = null;
        databuf = null;
        databufindex = null;
        this.counter = null;
        LOG.info("The number of vertices that were read from the input file: " + headNodeNum);
        LOG.info("The number of vertices that were put into the partition: " + local);
        LOG.info("The number of vertices that were sent to other partitions: " + send);
        LOG.info("The number of verteices in the partition that cound not be " + "parsed:" + lost);
    } catch (IOException e) {
        throw e;
    } catch (InterruptedException e) {
        throw e;
    } finally {
        for (File f : files) {
            f.delete();
        }
        dir.delete();
        dir = new File(path.substring(0, path.lastIndexOf('/')));
        dir.delete();
    }
}

From source file:com.cloudera.castagna.logparser.pig.LogLoader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    try {//w  w w  . ja v  a 2 s  .c  o  m
        boolean notDone = in.nextKeyValue();
        if (!notDone) {
            return null;
        }
        Text value = (Text) in.getCurrentValue();
        byte[] ba = value.getBytes();
        // make a copy of the bytes representing the input since
        // TextInputFormat will reuse the byte array           
        return mTupleFactory.newTuple(new DataByteArray(ba, 0, value.getLength()));
    } catch (InterruptedException e) {
        throw new IOException("Error getting input");
    }
}

From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java

License:Apache License

private static String textifyBytes(Text t) {
    BytesWritable b = new BytesWritable();
    b.set(t.getBytes(), 0, t.getLength());
    return b.toString();
}