Example usage for org.apache.hadoop.io Text set

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text set.

Prototype

public void set(byte[] utf8, int start, int len)

Source Link

Document

Set the Text to range of bytes

Usage

From source file:edu.cmu.cs.in.hadoop.HoopWholeFileRecordReader.java

License:Open Source License

/**
 * /*  w w w.ja v a2 s  . c o  m*/
 */
@Override
public boolean next(LongWritable key, Text value) throws IOException {
    debug("next ()");

    byte[] buffer = new byte[(int) fileSize];

    if (inStream == null) {
        debug("Internal error: no input stream available!");
        return (false);
    }

    len = inStream.read(buffer, 0, (int) fileSize);

    //debug ("Actually read: " +len);

    if (len != -1) {
        value.set(buffer, 0, len);
    } else {
        return (false);
    }

    debug("Read the file");

    return true;
}

From source file:edu.cshl.schatz.jnomics.util.TextCutter.java

License:Open Source License

/**
 * Sets the contents of <code>text</code> to the value of the requested cut.
 * //w ww. j  ava 2  s  . c om
 * @param text The {@link Text} instance to reset.
 * @param cutIndex The 0-based index of the desired cut.
 * @return The passed {@link Text} instance text (not a copy).
 * @throws ArrayIndexOutOfBoundsException if cutIndex is greater than or
 *             equal to the number of cuts.
 */
public Text getCut(Text text, int cutIndex) {
    if (modFlag) {
        reinitialize();
    }

    if (cutIndex < 0) {
        cutIndex = cutCount + cutIndex;
    }

    if (cutIndex >= cutCount) {
        throw new ArrayIndexOutOfBoundsException(
                "Cut " + cutIndex + " does not exist (cutCount=" + cutCount + ")");
    }

    int position = cutIndices[cutIndex][0];
    int length = cutIndices[cutIndex][1];

    text.set(sourceText.getBytes(), position, length);

    return text;
}

From source file:edu.cshl.schatz.jnomics.util.TextCutter.java

License:Open Source License

/**
 * Sets the contents of <code>text</code> to the value of the requested cuts
 * (including any intermediate delimiters).
 * <p>//from   w ww . j  a v  a 2s  .c o  m
 * Negative cut indices are interpreted as
 * <code>{@link #getCutCount()} + cutIndex</code>. For example, a
 * <code>cutIndex</code> of -1 would return the last cut.
 * <p>
 * If <code>lastIndex < firstIndex</code> (after converting negative indices
 * to their positive equivalents), then the resulting cut order is reversed.
 * For example, given the input "0 1 2 3 4", <code>getCutRange(4,2)</code>
 * and <code>getCutRange(-1,-3)</code> would both return "4 3 2".
 * 
 * @param text The {@link Text} instance to reset.
 * @param firstIndex The 0-based index of the first desired cut in the range
 *            (inclusive).
 * @param lastIndex The 0-based index of the last desired cut in the range
 *            (inclusive).
 * @return The passed {@link Text} instance text (not a copy).
 * @throws ArrayIndexOutOfBoundsException if cutIndex is greater than or
 *             equal to the number of cuts.
 */
public Text getCutRange(Text text, int firstIndex, int lastIndex) {
    if (modFlag) {
        reinitialize();
    }

    if (firstIndex < 0) {
        firstIndex = cutCount + firstIndex;
    }

    if (lastIndex < 0) {
        lastIndex = cutCount + lastIndex;
    }

    if (lastIndex >= cutCount) {
        throw new ArrayIndexOutOfBoundsException("Requested cut does not exist (cutCount=" + cutCount + ")");
    }

    int position, length;

    if (firstIndex <= lastIndex) {
        position = cutIndices[firstIndex][0];
        length = lastIndex - firstIndex;

        for (int i = firstIndex; i <= lastIndex; i++) {
            length += cutIndices[i][1];
        }

        text.set(sourceText.getBytes(), position, length);
    } else {
        final byte[] delimBytes = new byte[] { (byte) delimiterChar };

        position = cutIndices[firstIndex][0];
        length = cutIndices[firstIndex][1];
        text.set(sourceText.getBytes(), position, length);

        for (int i = firstIndex - 1; i >= lastIndex; i--) {
            position = cutIndices[i][0];
            length = cutIndices[i][1];

            text.append(delimBytes, 0, 1);
            text.append(sourceText.getBytes(), position, length);
        }
    }

    return text;
}

From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapRed.java

License:Apache License

/**
 * Split a line into key and value.//  w w w.  j  ava 2 s  . co m
 * @param line: a byte array of line containing UTF-8 bytes
 * @param key: key of a record
 * @param val: value of a record
 * @throws IOException
 */
void splitKeyVal(byte[] line, int length, Text key, Text val) throws IOException {
    int numKeyFields = getNumOfKeyFields();
    byte[] separator = getFieldSeparator();

    // Need to find numKeyFields separators
    int pos = UTF8ByteArrayUtils.findBytes(line, 0, length, separator);
    for (int k = 1; k < numKeyFields && pos != -1; k++) {
        pos = UTF8ByteArrayUtils.findBytes(line, pos + separator.length, length, separator);
    }
    try {
        if (pos == -1) {
            key.set(line, 0, length);
            val.set("");
        } else {
            StreamKeyValUtil.splitKeyVal(line, 0, length, key, val, pos, separator.length);
        }
    } catch (CharacterCodingException e) {
        LOG.warn(StringUtils.stringifyException(e));
    }
}

From source file:edu.umn.cs.spatialHadoop.core.CSVOGC.java

License:Open Source License

@Override
public void fromText(Text text) {
    byte[] bytes = text.getBytes();
    int separatorsEncountered = 0;
    int i1 = 0;/*  w w  w  .  jav a 2  s . co m*/
    // Locate the required column
    while (separatorsEncountered < column && i1 < text.getLength()) {
        if (bytes[i1++] == separator)
            separatorsEncountered++;
    }
    if (i1 == text.getLength()) {
        this.prefix = new byte[i1];
        System.arraycopy(bytes, 0, prefix, 0, i1);
        super.geom = null;
        this.suffix = null;
        return;
    }
    int i2 = i1 + 1;
    while (i2 < text.getLength() && bytes[i2] != separator)
        i2++;
    // Copy prefix and suffix
    if (i1 == 0) {
        prefix = null;
    } else {
        prefix = new byte[i1];
        System.arraycopy(bytes, 0, prefix, 0, i1);
    }
    if (i2 == text.getLength()) {
        suffix = null;
    } else {
        suffix = new byte[text.getLength() - i2];
        System.arraycopy(bytes, i2, suffix, 0, text.getLength() - i2);
    }

    // Chop prefix and suffix and leave only the selected column
    text.set(bytes, i1, i2 - i1);
    super.fromText(text);
}

From source file:edu.umn.cs.spatialHadoop.core.GridInfo.java

License:Open Source License

@Override
public void fromText(Text text) {
    super.fromText(text);
    if (text.getLength() > 0) {
        // Remove the first comma
        text.set(text.getBytes(), 1, text.getLength() - 1);
        columns = (int) TextSerializerHelper.consumeInt(text, ',');
        rows = (int) TextSerializerHelper.consumeInt(text, '\0');
    }//w ww .j  a  v a2s . co m
}

From source file:edu.umn.cs.spatialHadoop.core.Partition.java

License:Open Source License

@Override
public void fromText(Text text) {
    super.fromText(text);
    text.set(text.getBytes(), 1, text.getLength() - 1); // Skip comma
    this.recordCount = TextSerializerHelper.consumeLong(text, ',');
    this.size = TextSerializerHelper.consumeLong(text, ',');
    filename = text.toString();/*w ww .j av  a2s  .  c  o m*/
}

From source file:edu.umn.cs.spatialHadoop.core.RTree.java

License:Open Source License

/**
 * Builds the RTree given a serialized list of elements. It uses the given
 * stockObject to deserialize these elements using
 * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the
 * created tree to the disk directly./*from w  ww  .  j  a v a 2 s.  com*/
 * 
 * @param element_bytes
 *          - serialization of all elements separated by new lines
 * @param offset
 *          - offset of the first byte to use in elements_bytes
 * @param len
 *          - number of bytes to use in elements_bytes
 * @param degree
 *          - Degree of the R-tree to build in terms of number of children per
 *          node
 * @param dataOut
 *          - output stream to write the result to.
 * @param fast_sort
 *          - setting this to <code>true</code> allows the method to run
 *          faster by materializing the offset of each element in the list
 *          which speeds up the comparison. However, this requires an
 *          additional 16 bytes per element. So, for each 1M elements, the
 *          method will require an additional 16 M bytes (approximately).
 */
public void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree,
        DataOutput dataOut, final boolean fast_sort) {
    try {

        // Count number of elements in the given text
        int i_start = offset;
        final Text line = new Text();
        while (i_start < offset + len) {
            int i_end = skipToEOL(element_bytes, i_start);
            // Extract the line without end of line character
            line.set(element_bytes, i_start, i_end - i_start - 1);
            stockObject.fromText(line);
            elementCount++;
            i_start = i_end;
        }
        LOG.info("Bulk loading an RTree with " + elementCount + " elements");

        // It turns out the findBestDegree returns the best degree when the whole
        // tree is loaded to memory when processed. However, as current algorithms
        // process the tree while it's on disk, a higher degree should be selected
        // such that a node fits one file block (assumed to be 4K).
        //final int degree = findBestDegree(bytesAvailable, elementCount);
        LOG.info("Writing an RTree with degree " + degree);

        int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree)));
        int leafNodeCount = (int) Math.pow(degree, height - 1);
        if (elementCount < 2 * leafNodeCount && height > 1) {
            height--;
            leafNodeCount = (int) Math.pow(degree, height - 1);
        }
        int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1));
        int nonLeafNodeCount = nodeCount - leafNodeCount;

        // Keep track of the offset of each element in the text
        final int[] offsets = new int[elementCount];
        final double[] xs = fast_sort ? new double[elementCount] : null;
        final double[] ys = fast_sort ? new double[elementCount] : null;

        i_start = offset;
        line.clear();
        for (int i = 0; i < elementCount; i++) {
            offsets[i] = i_start;
            int i_end = skipToEOL(element_bytes, i_start);
            if (xs != null) {
                // Extract the line with end of line character
                line.set(element_bytes, i_start, i_end - i_start - 1);
                stockObject.fromText(line);
                // Sample center of the shape
                xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;
            }
            i_start = i_end;
        }

        /**A struct to store information about a split*/
        class SplitStruct extends Rectangle {
            /**Start and end index for this split*/
            int index1, index2;
            /**Direction of this split*/
            byte direction;
            /**Index of first element on disk*/
            int offsetOfFirstElement;

            static final byte DIRECTION_X = 0;
            static final byte DIRECTION_Y = 1;

            SplitStruct(int index1, int index2, byte direction) {
                this.index1 = index1;
                this.index2 = index2;
                this.direction = direction;
            }

            @Override
            public void write(DataOutput out) throws IOException {
                out.writeInt(offsetOfFirstElement);
                super.write(out);
            }

            void partition(Queue<SplitStruct> toBePartitioned) {
                IndexedSortable sortableX;
                IndexedSortable sortableY;

                if (fast_sort) {
                    // Use materialized xs[] and ys[] to do the comparisons
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (xs[i] < xs[j])
                                return -1;
                            if (xs[i] > xs[j])
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ys[i] < ys[j])
                                return -1;
                            if (ys[i] > ys[j])
                                return 1;
                            return 0;
                        }
                    };
                } else {
                    // No materialized xs and ys. Always deserialize objects to compare
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            // Get end of line
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                            if (xi < xj)
                                return -1;
                            if (xi > xj)
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;
                            if (yi < yj)
                                return -1;
                            if (yi > yj)
                                return 1;
                            return 0;
                        }
                    };
                }

                final IndexedSorter sorter = new QuickSort();

                final IndexedSortable[] sortables = new IndexedSortable[2];
                sortables[SplitStruct.DIRECTION_X] = sortableX;
                sortables[SplitStruct.DIRECTION_Y] = sortableY;

                sorter.sort(sortables[direction], index1, index2);

                // Partition into maxEntries partitions (equally) and
                // create a SplitStruct for each partition
                int i1 = index1;
                for (int iSplit = 0; iSplit < degree; iSplit++) {
                    int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree;
                    SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction));
                    toBePartitioned.add(newSplit);
                    i1 = i2;
                }
            }
        }

        // All nodes stored in level-order traversal
        Vector<SplitStruct> nodes = new Vector<SplitStruct>();
        final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>();
        toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X));

        while (!toBePartitioned.isEmpty()) {
            SplitStruct split = toBePartitioned.poll();
            if (nodes.size() < nonLeafNodeCount) {
                // This is a non-leaf
                split.partition(toBePartitioned);
            }
            nodes.add(split);
        }

        if (nodes.size() != nodeCount) {
            throw new RuntimeException(
                    "Expected node count: " + nodeCount + ". Real node count: " + nodes.size());
        }

        // Now we have our data sorted in the required order. Start building
        // the tree.
        // Store the offset of each leaf node in the tree
        FSDataOutputStream fakeOut = null;
        try {
            fakeOut = new FSDataOutputStream(new java.io.OutputStream() {
                // Null output stream
                @Override
                public void write(int b) throws IOException {
                    // Do nothing
                }

                @Override
                public void write(byte[] b, int off, int len) throws IOException {
                    // Do nothing
                }

                @Override
                public void write(byte[] b) throws IOException {
                    // Do nothing
                }
            }, null, TreeHeaderSize + nodes.size() * NodeSize);
            for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) {
                nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos();
                if (i != nodes.elementAt(i_leaf).index1)
                    throw new RuntimeException();
                double x1, y1, x2, y2;

                // Initialize MBR to first object
                int eol = skipToEOL(element_bytes, offsets[i]);
                fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
                line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                stockObject.fromText(line);
                Rectangle mbr = stockObject.getMBR();
                x1 = mbr.x1;
                y1 = mbr.y1;
                x2 = mbr.x2;
                y2 = mbr.y2;
                i++;

                while (i < nodes.elementAt(i_leaf).index2) {
                    eol = skipToEOL(element_bytes, offsets[i]);
                    fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
                    line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                    stockObject.fromText(line);
                    mbr = stockObject.getMBR();
                    if (mbr.x1 < x1)
                        x1 = mbr.x1;
                    if (mbr.y1 < y1)
                        y1 = mbr.y1;
                    if (mbr.x2 > x2)
                        x2 = mbr.x2;
                    if (mbr.y2 > y2)
                        y2 = mbr.y2;
                    i++;
                }
                nodes.elementAt(i_leaf).set(x1, y1, x2, y2);
            }

        } finally {
            if (fakeOut != null)
                fakeOut.close();
        }

        // Calculate MBR and offsetOfFirstElement for non-leaves
        for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) {
            int i_first_child = i_node * degree + 1;
            nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement;
            int i_child = 0;
            Rectangle mbr;
            mbr = nodes.elementAt(i_first_child + i_child);
            double x1 = mbr.x1;
            double y1 = mbr.y1;
            double x2 = mbr.x2;
            double y2 = mbr.y2;
            i_child++;

            while (i_child < degree) {
                mbr = nodes.elementAt(i_first_child + i_child);
                if (mbr.x1 < x1)
                    x1 = mbr.x1;
                if (mbr.y1 < y1)
                    y1 = mbr.y1;
                if (mbr.x2 > x2)
                    x2 = mbr.x2;
                if (mbr.y2 > y2)
                    y2 = mbr.y2;
                i_child++;
            }
            nodes.elementAt(i_node).set(x1, y1, x2, y2);
        }

        // Start writing the tree
        // write tree header (including size)
        // Total tree size. (== Total bytes written - 8 bytes for the size itself)
        dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len);
        // Tree height
        dataOut.writeInt(height);
        // Degree
        dataOut.writeInt(degree);
        dataOut.writeInt(elementCount);

        // write nodes
        for (SplitStruct node : nodes) {
            node.write(dataOut);
        }
        // write elements
        for (int element_i = 0; element_i < elementCount; element_i++) {
            int eol = skipToEOL(element_bytes, offsets[element_i]);
            dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:edu.umn.cs.spatialHadoop.indexing.RTree.java

License:Open Source License

/**
 * Builds the RTree given a serialized list of elements. It uses the given
 * stockObject to deserialize these elements using
 * {@link TextSerializable#fromText(Text)} and build the tree. Also writes the
 * created tree to the disk directly./*  w ww .  j ava2  s.  c o m*/
 * 
 * @param element_bytes
 *          - serialization of all elements separated by new lines
 * @param offset
 *          - offset of the first byte to use in elements_bytes
 * @param len
 *          - number of bytes to use in elements_bytes
 * @param degree
 *          - Degree of the R-tree to build in terms of number of children per
 *          node
 * @param dataOut
 *          - output stream to write the result to.
 * @param fast_sort
 *          - setting this to <code>true</code> allows the method to run
 *          faster by materializing the offset of each element in the list
 *          which speeds up the comparison. However, this requires an
 *          additional 16 bytes per element. So, for each 1M elements, the
 *          method will require an additional 16 M bytes (approximately).
 */
public static void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree,
        DataOutput dataOut, final Shape stockObject, final boolean fast_sort) {
    try {

        int elementCount = 0;
        // Count number of elements in the given text
        int i_start = offset;
        final Text line = new Text();
        while (i_start < offset + len) {
            int i_end = skipToEOL(element_bytes, i_start);
            // Extract the line without end of line character
            line.set(element_bytes, i_start, i_end - i_start - 1);
            stockObject.fromText(line);
            elementCount++;
            i_start = i_end;
        }
        LOG.info("Bulk loading an RTree with " + elementCount + " elements");

        // It turns out the findBestDegree returns the best degree when the whole
        // tree is loaded to memory when processed. However, as current algorithms
        // process the tree while it's on disk, a higher degree should be selected
        // such that a node fits one file block (assumed to be 4K).
        //final int degree = findBestDegree(bytesAvailable, elementCount);

        int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree)));
        int leafNodeCount = (int) Math.pow(degree, height - 1);
        if (elementCount < 2 * leafNodeCount && height > 1) {
            height--;
            leafNodeCount = (int) Math.pow(degree, height - 1);
        }
        int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1));
        int nonLeafNodeCount = nodeCount - leafNodeCount;

        // Keep track of the offset of each element in the text
        final int[] offsets = new int[elementCount];
        final double[] xs = fast_sort ? new double[elementCount] : null;
        final double[] ys = fast_sort ? new double[elementCount] : null;

        i_start = offset;
        line.clear();
        for (int i = 0; i < elementCount; i++) {
            offsets[i] = i_start;
            int i_end = skipToEOL(element_bytes, i_start);
            if (xs != null) {
                // Extract the line with end of line character
                line.set(element_bytes, i_start, i_end - i_start - 1);
                stockObject.fromText(line);
                // Sample center of the shape
                xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;
            }
            i_start = i_end;
        }

        /**A struct to store information about a split*/
        class SplitStruct extends Rectangle {
            /**Start and end index for this split*/
            int index1, index2;
            /**Direction of this split*/
            byte direction;
            /**Index of first element on disk*/
            int offsetOfFirstElement;

            static final byte DIRECTION_X = 0;
            static final byte DIRECTION_Y = 1;

            SplitStruct(int index1, int index2, byte direction) {
                this.index1 = index1;
                this.index2 = index2;
                this.direction = direction;
            }

            @Override
            public void write(DataOutput out) throws IOException {
                out.writeInt(offsetOfFirstElement);
                super.write(out);
            }

            void partition(Queue<SplitStruct> toBePartitioned) {
                IndexedSortable sortableX;
                IndexedSortable sortableY;

                if (fast_sort) {
                    // Use materialized xs[] and ys[] to do the comparisons
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (xs[i] < xs[j])
                                return -1;
                            if (xs[i] > xs[j])
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ys[i] < ys[j])
                                return -1;
                            if (ys[i] > ys[j])
                                return 1;
                            return 0;
                        }
                    };
                } else {
                    // No materialized xs and ys. Always deserialize objects to compare
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            // Get end of line
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                            if (xi < xj)
                                return -1;
                            if (xi > xj)
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;
                            if (yi < yj)
                                return -1;
                            if (yi > yj)
                                return 1;
                            return 0;
                        }
                    };
                }

                final IndexedSorter sorter = new QuickSort();

                final IndexedSortable[] sortables = new IndexedSortable[2];
                sortables[SplitStruct.DIRECTION_X] = sortableX;
                sortables[SplitStruct.DIRECTION_Y] = sortableY;

                sorter.sort(sortables[direction], index1, index2);

                // Partition into maxEntries partitions (equally) and
                // create a SplitStruct for each partition
                int i1 = index1;
                for (int iSplit = 0; iSplit < degree; iSplit++) {
                    int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree;
                    SplitStruct newSplit = new SplitStruct(i1, i2, (byte) (1 - direction));
                    toBePartitioned.add(newSplit);
                    i1 = i2;
                }
            }
        }

        // All nodes stored in level-order traversal
        Vector<SplitStruct> nodes = new Vector<SplitStruct>();
        final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>();
        toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X));

        while (!toBePartitioned.isEmpty()) {
            SplitStruct split = toBePartitioned.poll();
            if (nodes.size() < nonLeafNodeCount) {
                // This is a non-leaf
                split.partition(toBePartitioned);
            }
            nodes.add(split);
        }

        if (nodes.size() != nodeCount) {
            throw new RuntimeException(
                    "Expected node count: " + nodeCount + ". Real node count: " + nodes.size());
        }

        // Now we have our data sorted in the required order. Start building
        // the tree.
        // Store the offset of each leaf node in the tree
        FSDataOutputStream fakeOut = null;
        try {
            fakeOut = new FSDataOutputStream(new java.io.OutputStream() {
                // Null output stream
                @Override
                public void write(int b) throws IOException {
                    // Do nothing
                }

                @Override
                public void write(byte[] b, int off, int len) throws IOException {
                    // Do nothing
                }

                @Override
                public void write(byte[] b) throws IOException {
                    // Do nothing
                }
            }, null, TreeHeaderSize + nodes.size() * NodeSize);
            for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) {
                nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos();
                if (i != nodes.elementAt(i_leaf).index1)
                    throw new RuntimeException();
                double x1, y1, x2, y2;

                // Initialize MBR to first object
                int eol = skipToEOL(element_bytes, offsets[i]);
                fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
                line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                stockObject.fromText(line);
                Rectangle mbr = stockObject.getMBR();
                x1 = mbr.x1;
                y1 = mbr.y1;
                x2 = mbr.x2;
                y2 = mbr.y2;
                i++;

                while (i < nodes.elementAt(i_leaf).index2) {
                    eol = skipToEOL(element_bytes, offsets[i]);
                    fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
                    line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                    stockObject.fromText(line);
                    mbr = stockObject.getMBR();
                    if (mbr.x1 < x1)
                        x1 = mbr.x1;
                    if (mbr.y1 < y1)
                        y1 = mbr.y1;
                    if (mbr.x2 > x2)
                        x2 = mbr.x2;
                    if (mbr.y2 > y2)
                        y2 = mbr.y2;
                    i++;
                }
                nodes.elementAt(i_leaf).set(x1, y1, x2, y2);
            }

        } finally {
            if (fakeOut != null)
                fakeOut.close();
        }

        // Calculate MBR and offsetOfFirstElement for non-leaves
        for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) {
            int i_first_child = i_node * degree + 1;
            nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement;
            int i_child = 0;
            Rectangle mbr;
            mbr = nodes.elementAt(i_first_child + i_child);
            double x1 = mbr.x1;
            double y1 = mbr.y1;
            double x2 = mbr.x2;
            double y2 = mbr.y2;
            i_child++;

            while (i_child < degree) {
                mbr = nodes.elementAt(i_first_child + i_child);
                if (mbr.x1 < x1)
                    x1 = mbr.x1;
                if (mbr.y1 < y1)
                    y1 = mbr.y1;
                if (mbr.x2 > x2)
                    x2 = mbr.x2;
                if (mbr.y2 > y2)
                    y2 = mbr.y2;
                i_child++;
            }
            nodes.elementAt(i_node).set(x1, y1, x2, y2);
        }

        // Start writing the tree
        // write tree header (including size)
        // Total tree size. (== Total bytes written - 8 bytes for the size itself)
        dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len);
        // Tree height
        dataOut.writeInt(height);
        // Degree
        dataOut.writeInt(degree);
        dataOut.writeInt(elementCount);

        // write nodes
        for (SplitStruct node : nodes) {
            node.write(dataOut);
        }
        // write elements
        for (int element_i = 0; element_i < elementCount; element_i++) {
            int eol = skipToEOL(element_bytes, offsets[element_i]);
            dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

/**
 * Deserializes and consumes a double from the given text. Consuming means all
 * characters read for deserialization are removed from the given text.
 * If separator is non-zero, a double is read and consumed up to the first
 * occurrence of this separator. The separator is also consumed.
 * @param text/* www. jav a2s . co  m*/
 * @param separator
 * @return
 */
public static double consumeDouble(Text text, char separator) {
    int i = 0;
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E'
            || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.'))
        i++;
    double d = Double.parseDouble(new String(bytes, 0, i));
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return d;
}