Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(byte[] utf8, int start, int len)

Source Link

Document

Set the Text to range of bytes

Usage

From source file:com.huayu.metis.flume.sink.hdfs.HDFSTextSerializer.java

License:Apache License

private Text makeText(Event e) {
    Text textObject = new Text();
    textObject.set(e.getBody(), 0, e.getBody().length);
    return textObject;
}

From source file:com.ibm.jaql.lang.expr.del.JsonToDel.java

License:Apache License

/**
 * Converts the given JSON value for a del line into a Text.
 * //from   www .  ja  va 2  s.  c o m
 * @param src JSON value
 * @return text text
 * @throws IOException
 * @throws IllegalArgumentException If no field names are provided for JSON
 *           record.
 */
public void convert(JsonValue src, Text text) throws IOException {
    /*
     * 1. If quoted is false and src can be extract as a JSON string, its   
     *    internal UTF-8 byte array is set to text.
     * 2. Extract JSON values into an array list. Looping the JSON values in the
     *    array list:
     * 2.1 quotes is false: If the value is a JSON string, its internal
     *     UTF-8 byte array is written to out. Otherwise, it is serialized
     *     to out.
     * 2.2 quoted is true: If the value does not contain characters which 
     *     need double-quote escaping or backslash escaping, it is 
     *     serialized to out. For JSON string, its internal UTF-8 byte array is 
     *     written to out byte-by-byte after escaping. For the value of other
     *     types, it is serialized to a field buffer. Then the array backing the
     *     field buffer is written to out byte-by-bye after escaping.
     * 2.3 Set the content of the buffer backing out to text.
     */
    try {
        if (!quoted) {
            if (src instanceof JsonRecord && fieldNames.length == 1) {
                JsonRecord rec = (JsonRecord) src;
                JsonValue only = rec.get(fieldNames[0]);
                if (only instanceof JsonString) {
                    JsonString js = (JsonString) only;
                    text.set(js.getInternalBytes(), js.bytesOffset(), js.bytesLength());
                    return;
                }
            } else if (src instanceof JsonArray) {
                JsonArray arr = (JsonArray) src;
                if (arr.count() == 1) {
                    JsonValue only = arr.get(0);
                    if (only instanceof JsonString) {
                        JsonString js = (JsonString) only;
                        text.set(js.getInternalBytes(), js.bytesOffset(), js.bytesLength());
                        return;
                    }
                }
            } else if (src instanceof JsonString) {
                JsonString js = (JsonString) src;
                text.set(js.getInternalBytes(), js.bytesOffset(), js.bytesLength());
                return;
            }
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    values.clear();
    if (src instanceof JsonRecord) {
        JsonRecord rec = (JsonRecord) src;
        if (fieldNames.length < 1)
            throw new IllegalArgumentException("fields are required to convert A JSON record into a del line.");
        for (JsonString n : fieldNames)
            values.add(rec.get(n));
    } else if (src instanceof JsonArray) {
        JsonArray arr = (JsonArray) src;
        for (JsonValue value : arr)
            values.add(value);
    } else {
        // If the value is not JsonRecord or JsonArray, then write it directly.
        values.add(src);
    }

    out.reset();
    for (int i = 0; i < values.size(); i++) {
        JsonValue value = values.get(i);
        if (i != 0)
            out.write(delimiter);
        if (value == null)
            continue;
        if (quoted)
            printFieldQuoted(out, value, escape);
        else
            printFieldUnquoted(out, value);
    }
    out.flush();
    text.set(out.toString());
}

From source file:com.kylinolap.job.hadoop.cube.CubeReducerTest.java

License:Apache License

private Text newValueText(MeasureCodec codec, String sum, String min, String max, int count) {
    Object[] values = new Object[] { new BigDecimal(sum), new BigDecimal(min), new BigDecimal(max),
            new LongWritable(count) };

    buf.clear();/*from  ww  w  .  j av a2 s .  c  om*/
    codec.encode(values, buf);

    Text t = new Text();
    t.set(buf.array(), 0, buf.position());
    return t;
}

From source file:com.ricemap.spateDB.core.RTree.java

License:Apache License

/**
 * Builds the RTree given a serialized list of elements. It uses the given
 * stockObject to deserialize these elements and build the tree. Also writes
 * the created tree to the disk directly.
 * //from w w  w .j av a2s .co  m
 * @param elements
 *            - serialization of elements to be written
 * @param offset
 *            - index of the first element to use in the elements array
 * @param len
 *            - number of bytes to user from the elements array
 * @param bytesAvailable
 *            - size available (in bytes) to store the tree structures
 * @param dataOut
 *            - an output to use for writing the tree to
 * @param fast_sort
 *            - setting this to <code>true</code> allows the method to run
 *            faster by materializing the offset of each element in the list
 *            which speeds up the comparison. However, this requires an
 *            additional 16 bytes per element. So, for each 1M elements, the
 *            method will require an additional 16 M bytes (approximately).
 */
public void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree,
        DataOutput dataOut, final boolean fast_sort, final boolean columnarStorage) {
    try {
        columnar = columnarStorage;
        //TODO: the order of fields should be stable under Oracle JVM, but not guaranteed
        Field[] fields = stockObject.getClass().getDeclaredFields();

        // Count number of elements in the given text
        int i_start = offset;
        final Text line = new Text();
        while (i_start < offset + len) {
            int i_end = skipToEOL(element_bytes, i_start);
            // Extract the line without end of line character
            line.set(element_bytes, i_start, i_end - i_start - 1);
            stockObject.fromText(line);

            elementCount++;
            i_start = i_end;
        }
        LOG.info("Bulk loading an RTree with " + elementCount + " elements");

        // It turns out the findBestDegree returns the best degree when the
        // whole
        // tree is loaded to memory when processed. However, as current
        // algorithms
        // process the tree while it's on disk, a higher degree should be
        // selected
        // such that a node fits one file block (assumed to be 4K).
        // final int degree = findBestDegree(bytesAvailable, elementCount);
        LOG.info("Writing an RTree with degree " + degree);

        int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree)));
        int leafNodeCount = (int) Math.pow(degree, height - 1);
        if (elementCount < 2 * leafNodeCount && height > 1) {
            height--;
            leafNodeCount = (int) Math.pow(degree, height - 1);
        }
        int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1));
        int nonLeafNodeCount = nodeCount - leafNodeCount;

        // Keep track of the offset of each element in the text
        final int[] offsets = new int[elementCount];
        final int[] ids = new int[elementCount];
        final double[] ts = fast_sort ? new double[elementCount] : null;
        final double[] xs = fast_sort ? new double[elementCount] : null;
        final double[] ys = fast_sort ? new double[elementCount] : null;

        //initialize columnar data output
        ByteArrayOutputStream index_bos = new ByteArrayOutputStream();
        DataOutputStream index_dos = new DataOutputStream(index_bos);
        ByteArrayOutputStream[] bos = new ByteArrayOutputStream[fields.length];
        DataOutputStream[] dos = new DataOutputStream[fields.length];
        for (int i = 0; i < bos.length; i++) {
            bos[i] = new ByteArrayOutputStream();
            dos[i] = new DataOutputStream(bos[i]);
        }

        i_start = offset;
        line.clear();
        for (int i = 0; i < elementCount; i++) {
            offsets[i] = i_start;
            ids[i] = i;
            int i_end = skipToEOL(element_bytes, i_start);
            if (xs != null) {
                // Extract the line with end of line character
                line.set(element_bytes, i_start, i_end - i_start - 1);
                stockObject.fromText(line);
                // Sample center of the shape
                ts[i] = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2;
                xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;

                //build columnar storage
                if (stockObject instanceof Point3d) {
                    index_dos.writeDouble(ts[i]);
                    index_dos.writeDouble(xs[i]);
                    index_dos.writeDouble(ys[i]);
                } else {
                    throw new RuntimeException("Indexing non-point shape with RTREE is not supported yet");
                }

                for (int j = 0; j < fields.length; j++) {
                    if (fields[j].getType().equals(Integer.TYPE)) {
                        dos[j].writeInt(fields[j].getInt(stockObject));
                    } else if (fields[j].getType().equals(Double.TYPE)) {
                        dos[j].writeDouble(fields[j].getDouble(stockObject));
                    } else if (fields[j].getType().equals(Long.TYPE)) {
                        dos[j].writeLong(fields[j].getLong(stockObject));
                    } else {
                        continue;
                        //throw new RuntimeException("Field type is not supported yet");
                    }
                }
            }
            i_start = i_end;
        }
        index_dos.close();
        for (int i = 0; i < dos.length; i++) {
            dos[i].close();
        }

        /** A struct to store information about a split */
        class SplitStruct extends Prism {
            /** Start and end index for this split */
            int index1, index2;
            /** Direction of this split */
            byte direction;
            /** Index of first element on disk */
            int offsetOfFirstElement;

            static final byte DIRECTION_T = 0;
            static final byte DIRECTION_X = 1;
            static final byte DIRECTION_Y = 2;

            SplitStruct(int index1, int index2, byte direction) {
                this.index1 = index1;
                this.index2 = index2;
                this.direction = direction;
            }

            @Override
            public void write(DataOutput out) throws IOException {
                //
                if (columnarStorage)
                    out.writeInt(index1);
                else
                    out.writeInt(offsetOfFirstElement);
                super.write(out);
            }

            void partition(Queue<SplitStruct> toBePartitioned) {
                IndexedSortable sortableT;
                IndexedSortable sortableX;
                IndexedSortable sortableY;

                if (fast_sort) {
                    // Use materialized xs[] and ys[] to do the comparisons
                    sortableT = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap ts
                            double tempt = ts[i];
                            ts[i] = ts[j];
                            ts[j] = tempt;
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ts[i] < ts[j])
                                return -1;
                            if (ts[i] > ts[j])
                                return 1;
                            return 0;
                        }
                    };
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap ts
                            double tempt = ts[i];
                            ts[i] = ts[j];
                            ts[j] = tempt;
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ts[i] < ts[j])
                                return -1;
                            if (xs[i] < xs[j])
                                return -1;
                            if (xs[i] > xs[j])
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap ts
                            double tempt = ts[i];
                            ts[i] = ts[j];
                            ts[j] = tempt;
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ys[i] < ys[j])
                                return -1;
                            if (ys[i] > ys[j])
                                return 1;
                            return 0;
                        }
                    };
                } else {
                    // No materialized xs and ys. Always deserialize objects
                    // to compare
                    sortableT = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            // Get end of line
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double ti = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double tj = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2;
                            if (ti < tj)
                                return -1;
                            if (ti > tj)
                                return 1;
                            return 0;
                        }
                    };
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            // Get end of line
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                            if (xi < xj)
                                return -1;
                            if (xi > xj)
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;
                            if (yi < yj)
                                return -1;
                            if (yi > yj)
                                return 1;
                            return 0;
                        }
                    };
                }

                final IndexedSorter sorter = new QuickSort();

                final IndexedSortable[] sortables = new IndexedSortable[3];
                sortables[SplitStruct.DIRECTION_T] = sortableT;
                sortables[SplitStruct.DIRECTION_X] = sortableX;
                sortables[SplitStruct.DIRECTION_Y] = sortableY;

                sorter.sort(sortables[direction], index1, index2);

                // Partition into maxEntries partitions (equally) and
                // create a SplitStruct for each partition
                int i1 = index1;
                for (int iSplit = 0; iSplit < degree; iSplit++) {
                    int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree;
                    SplitStruct newSplit;
                    if (direction == 0) {
                        newSplit = new SplitStruct(i1, i2, (byte) 1);
                    } else if (direction == 1) {
                        newSplit = new SplitStruct(i1, i2, (byte) 2);
                    } else {
                        newSplit = new SplitStruct(i1, i2, (byte) 0);
                    }
                    toBePartitioned.add(newSplit);
                    i1 = i2;
                }
            }
        }

        // All nodes stored in level-order traversal
        Vector<SplitStruct> nodes = new Vector<SplitStruct>();
        final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>();
        toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X));

        while (!toBePartitioned.isEmpty()) {
            SplitStruct split = toBePartitioned.poll();
            if (nodes.size() < nonLeafNodeCount) {
                // This is a non-leaf
                split.partition(toBePartitioned);
            }
            nodes.add(split);
        }

        if (nodes.size() != nodeCount) {
            throw new RuntimeException(
                    "Expected node count: " + nodeCount + ". Real node count: " + nodes.size());
        }

        // Now we have our data sorted in the required order. Start building
        // the tree.
        // Store the offset of each leaf node in the tree
        FSDataOutputStream fakeOut = new FSDataOutputStream(new java.io.OutputStream() {
            // Null output stream
            @Override
            public void write(int b) throws IOException {
                // Do nothing
            }

            @Override
            public void write(byte[] b, int off, int len) throws IOException {
                // Do nothing
            }

            @Override
            public void write(byte[] b) throws IOException {
                // Do nothing
            }
        }, null, TreeHeaderSize + nodes.size() * NodeSize);
        for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) {
            nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos();
            if (i != nodes.elementAt(i_leaf).index1)
                throw new RuntimeException();
            double t1, x1, y1, t2, x2, y2;

            // Initialize MBR to first object
            int eol = skipToEOL(element_bytes, offsets[i]);
            fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
            stockObject.fromText(line);
            Prism mbr = stockObject.getMBR();
            t1 = mbr.t1;
            x1 = mbr.x1;
            y1 = mbr.y1;
            t2 = mbr.t2;
            x2 = mbr.x2;
            y2 = mbr.y2;
            i++;

            while (i < nodes.elementAt(i_leaf).index2) {
                eol = skipToEOL(element_bytes, offsets[i]);
                fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
                line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                stockObject.fromText(line);
                mbr = stockObject.getMBR();
                if (mbr.t1 < t1)
                    t1 = mbr.t1;
                if (mbr.x1 < x1)
                    x1 = mbr.x1;
                if (mbr.y1 < y1)
                    y1 = mbr.y1;
                if (mbr.t2 > t2)
                    t2 = mbr.t2;
                if (mbr.x2 > x2)
                    x2 = mbr.x2;
                if (mbr.y2 > y2)
                    y2 = mbr.y2;
                i++;
            }
            nodes.elementAt(i_leaf).set(t1, x1, y1, t2, x2, y2);
        }
        fakeOut.close();
        fakeOut = null;

        // Calculate MBR and offsetOfFirstElement for non-leaves
        for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) {
            int i_first_child = i_node * degree + 1;
            nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement;
            int i_child = 0;
            Prism mbr;
            mbr = nodes.elementAt(i_first_child + i_child);
            double t1 = mbr.t1;
            double x1 = mbr.x1;
            double y1 = mbr.y1;
            double t2 = mbr.t2;
            double x2 = mbr.x2;
            double y2 = mbr.y2;
            i_child++;

            while (i_child < degree) {
                mbr = nodes.elementAt(i_first_child + i_child);
                if (mbr.t1 < t1)
                    t1 = mbr.t1;
                if (mbr.x1 < x1)
                    x1 = mbr.x1;
                if (mbr.y1 < y1)
                    y1 = mbr.y1;
                if (mbr.t2 > t2)
                    t2 = mbr.t2;
                if (mbr.x2 > x2)
                    x2 = mbr.x2;
                if (mbr.y2 > y2)
                    y2 = mbr.y2;
                i_child++;
            }
            nodes.elementAt(i_node).set(t1, x1, y1, t2, x2, y2);
        }

        // Start writing the tree
        // write tree header (including size)
        // Total tree size. (== Total bytes written - 8 bytes for the size
        // itself)
        dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len);
        // Tree height
        dataOut.writeInt(height);
        // Degree
        dataOut.writeInt(degree);
        dataOut.writeInt(elementCount);

        //isColumnar
        dataOut.writeInt(columnarStorage ? 1 : 0);

        // write nodes
        for (SplitStruct node : nodes) {
            node.write(dataOut);
        }
        // write elements
        if (columnarStorage) {
            byte[] index_bs = index_bos.toByteArray();
            byte[][] bss = new byte[bos.length][];
            for (int i = 0; i < bss.length; i++) {
                bss[i] = bos[i].toByteArray();
            }
            for (int element_i = 0; element_i < elementCount; element_i++) {
                //int eol = skipToEOL(element_bytes, offsets[element_i]);
                //dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
                dataOut.write(index_bs, ids[element_i] * IndexUnitSize, IndexUnitSize);
            }

            for (int i = 0; i < fields.length; i++) {
                int fieldSize = 0;
                if (fields[i].getType().equals(Integer.TYPE)) {
                    fieldSize = 4;
                } else if (fields[i].getType().equals(Long.TYPE)) {
                    fieldSize = 8;
                } else if (fields[i].getType().equals(Double.TYPE)) {
                    fieldSize = 8;
                } else {
                    //throw new RuntimeException("Unsupported field type: " + fields[i].getType().getName());
                    continue;
                }
                for (int element_i = 0; element_i < elementCount; element_i++) {
                    //int eol = skipToEOL(element_bytes, offsets[element_i]);
                    //dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
                    dataOut.write(bss[i], ids[element_i] * fieldSize, fieldSize);
                }
            }
        } else {
            for (int element_i = 0; element_i < elementCount; element_i++) {
                int eol = skipToEOL(element_bytes, offsets[element_i]);
                dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (IllegalArgumentException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Deserializes and consumes a long from the given text. Consuming means all
 * characters read for deserialization are removed from the given text.
 * If separator is non-zero, a long is read and consumed up to the first
 * occurrence of this separator. The separator is also consumed.
 * @param text/*from  w ww.  j a v a 2s .  c o m*/
 * @param separator
 * @return
 */
public static long consumeHexLong(Text text, char separator) {
    int i = 0;
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && HexadecimalChars[bytes[i]])
        i++;
    long l = deserializeHexLong(bytes, 0, i);
    // If the first char after the long is the separator, skip it
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    // Shift bytes after the long
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return l;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

/**
 * Deserializes and consumes a double from the given text. Consuming means all
 * characters read for deserialization are removed from the given text.
 * If separator is non-zero, a double is read and consumed up to the first
 * occurrence of this separator. The separator is also consumed.
 * @param text/*from  w  ww  .j a va2 s  .  c  om*/
 * @param separator
 * @return
 */
public static double consumeDouble(Text text, char separator) {
    int i = 0;
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E'
            || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.'))
        i++;
    double d = deserializeDouble(bytes, 0, i);
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return d;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static long consumeLong(Text text, char separator) {
    int i = 0;//from ww w  .j a v  a2  s .c om
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && DecimalChars[bytes[i]])
        i++;
    long l = deserializeLong(bytes, 0, i);
    // If the first char after the long is the separator, skip it
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    // Shift bytes after the long
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return l;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static int consumeInt(Text text, char separator) {
    int i = 0;//from   w  w w.j a  v a  2s .c om
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && DecimalChars[bytes[i]])
        i++;
    int l = deserializeInt(bytes, 0, i);
    // If the first char after the long is the separator, skip it
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    // Shift bytes after the long
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return l;
}

From source file:com.ricemap.spateDB.io.TextSerializerHelper.java

License:Apache License

public static void consumeMap(Text text, Map<String, String> tags) {
    tags.clear();//  w  ww  .jav a2s . c  o m
    if (text.getLength() > 0) {
        byte[] tagsBytes = text.getBytes();
        if (tagsBytes[0] != Separators[MapStart])
            return;
        int i1 = 1;
        while (i1 < text.getLength() && tagsBytes[i1] != Separators[MapEnd]) {
            int i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[KeyValueSeparator])
                i2++;
            String key = new String(tagsBytes, i1, i2 - i1);
            i1 = i2 + 1;

            i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[FieldSeparator]
                    && tagsBytes[i2] != Separators[MapEnd])
                i2++;
            String value = new String(tagsBytes, i1, i2 - i1);
            tags.put(key, value);
            i1 = i2;
            if (i1 < text.getLength() && tagsBytes[i1] == Separators[FieldSeparator])
                i1++;
        }
        text.set(tagsBytes, i1, text.getLength() - i1);
    }
}

From source file:de.l3s.streamcorpus.terrier.ThriftFileCollectionRecordReader.java

License:Apache License

/** 
 * Reading a bunch of lines of file paths in a list.
 * The code in this method is redistributed from Hadoop LineRecordReader
 * // www  .ja va  2  s. c  o  m
 * @throws IOException 
 */
private void loadPathsFromInputSplit(InputSplit split, Configuration conf) throws IOException {
    FileSplit fileSplit = (FileSplit) split;
    Path path = fileSplit.getPath();

    long begin = fileSplit.getStart();
    long end = begin + fileSplit.getLength();

    LOG.info("Reading paths in file " + path.getName());

    // First check the compression codec
    CompressionCodecFactory compressionCodec = new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodec.getCodec(path);
    FSDataInputStream fis = fs.open(path);
    SplitLineReader in;

    Seekable filePosition;

    boolean compressed = false;
    Decompressor decompressor = null;
    if (null != codec) {
        compressed = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(fis,
                    decompressor, begin, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, conf, (byte[]) null);
            begin = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fis, decompressor), conf, null);
            filePosition = fis;
        }
    } else {
        fis.seek(begin);
        in = new SplitLineReader(fis, conf, (byte[]) null);
        filePosition = fis;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (begin != 0) {
        begin += in.readLine(new Text(), 0, maxBytesToConsume(compressed, begin, end));
    }
    long pos = begin;

    int newSize = 0;
    final Text nextLine = new Text();
    paths = new ArrayList<>();
    while (getFilePosition(compressed, filePosition, pos) <= end || in.needAdditionalRecordAfterSplit()) {

        if (pos == 0) {
            // Strip BOM(Byte Order Mark)
            // Text only support UTF-8, we only need to check UTF-8 BOM
            // (0xEF,0xBB,0xBF) at the start of the text stream.
            newSize = in.readLine(nextLine, Integer.MAX_VALUE, Integer.MAX_VALUE);
            pos += newSize;
            int textLength = nextLine.getLength();
            byte[] textBytes = nextLine.getBytes();
            if ((textLength >= 3) && (textBytes[0] == (byte) 0xEF) && (textBytes[1] == (byte) 0xBB)
                    && (textBytes[2] == (byte) 0xBF)) {
                // find UTF-8 BOM, strip it.
                LOG.info("Found UTF-8 BOM and skipped it");
                textLength -= 3;
                newSize -= 3;
                if (textLength > 0) {
                    // It may work to use the same buffer and 
                    // not do the copyBytes
                    textBytes = nextLine.copyBytes();
                    nextLine.set(textBytes, 3, textLength);
                } else {
                    nextLine.clear();
                }
            }
        } else {
            newSize = in.readLine(nextLine, Integer.MAX_VALUE, maxBytesToConsume(compressed, pos, end));
            pos += newSize;
        }

        paths.add(nextLine.toString());
        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }

    try {
        if (in != null) {
            in.close();
        }
        if (fis != null) {
            fis.close();
        }
    } finally {
        if (decompressor != null) {
            CodecPool.returnDecompressor(decompressor);
        }
    }
}