Example usage for org.apache.hadoop.io Text clear

List of usage examples for org.apache.hadoop.io Text clear

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text clear.

Prototype

public void clear() 

Source Link

Document

Clear the string to empty.

Usage

From source file:com.blm.orc.DynamicByteArray.java

License:Apache License

/**
 * Set a text value from the bytes in this dynamic array.
 * @param result the value to set//from  w  w  w . j  av a 2 s . com
 * @param offset the start of the bytes to copy
 * @param length the number of bytes to copy
 */
public void setText(Text result, int offset, int length) {
    result.clear();
    int currentChunk = offset / chunkSize;
    int currentOffset = offset % chunkSize;
    int currentLength = Math.min(length, chunkSize - currentOffset);
    while (length > 0) {
        result.append(data[currentChunk], currentOffset, currentLength);
        length -= currentLength;
        currentChunk += 1;
        currentOffset = 0;
        currentLength = Math.min(length, chunkSize - currentOffset);
    }
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Read a line terminated by one of CR, LF, or CRLF.
 *///  w w w .j a v  a2 s .  co m
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /* We're reading data from in, but the head of the stream may be
     * already buffered in buffer, so we have several cases:
     * 1. No newline characters are in the buffer, so we need to copy
     *    everything and read another buffer from the stream.
     * 2. An unambiguously terminated line is in buffer, so we just
     *    copy to str.
     * 3. Ambiguously terminated line is in buffer, i.e. buffer ends
     *    in CR.  In this case we copy everything up to CR to str, but
     *    we also need to see what follows CR: if it's LF, then we
     *    need consume LF as well, so next call to readLine will read
     *    from after that.
     * We use a flag prevCharCR to signal if previous character was CR
     * and, if it happens to be at the end of the buffer, delay
     * consuming it until we have a chance to look at the char that
     * follows.
     */
    str.clear();
    int txtLength = 0; //tracks str.getLength(), as an optimization
    int newlineLength = 0; //length of terminating newline
    boolean prevCharCR = false; //true of prev char was CR
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; //starting from where we left off the last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; //account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following byte
                break;
            }
            if (prevCharCR) { //CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }
            prevCharCR = (buffer[bufferPosn] == CR);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; //CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.dinglicom.clouder.mapreduce.input.LineReader.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *//*from w  ww.  j a v  a2  s  .  com*/
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    long bytesConsumed = 0;
    int delPosn = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the last
        // time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else {
                delPosn = 0;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - delPosn;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);
    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Read a line terminated by one of CR, LF, or CRLF.
 *///from   w  w  w .ja v a2 s.  co m
private int readDefaultLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    /*
     * We're reading data from in, but the head of the stream may be already
     * buffered in buffer, so we have several cases: 1. No newline
     * characters are in the buffer, so we need to copy everything and read
     * another buffer from the stream. 2. An unambiguously terminated line
     * is in buffer, so we just copy to str. 3. Ambiguously terminated line
     * is in buffer, i.e. buffer ends in CR. In this case we copy everything
     * up to CR to str, but we also need to see what follows CR: if it's LF,
     * then we need consume LF as well, so next call to readLine will read
     * from after that. We use a flag prevCharCR to signal if previous
     * character was CR and, if it happens to be at the end of the buffer,
     * delay consuming it until we have a chance to look at the char that
     * follows.
     */
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    int newlineLength = 0; // length of terminating newline
    boolean prevCharCR = false; // true of prev char was CR
    long bytesConsumed = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
        // last time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            if (prevCharCR)
                ++bytesConsumed; // account for CR from previous read
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) { // search for
            // newline
            if (buffer[bufferPosn] == LF) {
                newlineLength = (prevCharCR) ? 2 : 1;
                ++bufferPosn; // at next invocation proceed from following
                              // byte
                break;
            }
            if (prevCharCR) { // CR + notLF, we are at notLF
                newlineLength = 1;
                break;
            }
            prevCharCR = (buffer[bufferPosn] == CR);
        }
        int readLength = bufferPosn - startPosn;
        if (prevCharCR && newlineLength == 0)
            --readLength; // CR at the end of the buffer
        bytesConsumed += readLength;
        int appendLength = readLength - newlineLength;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);

    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before newline: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.ery.hadoop.mrddx.file.LineReaders.java

License:Apache License

/**
 * Read a line terminated by a custom delimiter.
 *//*from   w w  w .jav  a2 s  .c o  m*/
private int readCustomLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
    str.clear();
    int txtLength = 0; // tracks str.getLength(), as an optimization
    long bytesConsumed = 0;
    int delPosn = 0;
    do {
        int startPosn = bufferPosn; // starting from where we left off the
        // last
        // time
        if (bufferPosn >= bufferLength) {
            startPosn = bufferPosn = 0;
            bufferLength = in.read(buffer);
            if (bufferLength <= 0)
                break; // EOF
        }
        for (; bufferPosn < bufferLength; ++bufferPosn) {
            if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) {
                delPosn++;
                if (delPosn >= recordDelimiterBytes.length) {
                    bufferPosn++;
                    break;
                }
            } else {
                delPosn = 0;
            }
        }
        int readLength = bufferPosn - startPosn;
        bytesConsumed += readLength;
        int appendLength = readLength - delPosn;
        if (appendLength > maxLineLength - txtLength) {
            appendLength = maxLineLength - txtLength;
        }
        if (appendLength > 0) {
            str.append(buffer, startPosn, appendLength);
            txtLength += appendLength;
        }
    } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume);
    if (bytesConsumed > (long) Integer.MAX_VALUE)
        throw new IOException("Too many bytes before delimiter: " + bytesConsumed);
    return (int) bytesConsumed;
}

From source file:com.facebook.hive.orc.DynamicByteArray.java

License:Open Source License

/**
 * Set a text value from the bytes in this dynamic array.
 * @param result the value to set//from  w w  w.  ja  v a 2s .  c  o m
 * @param offset the start of the bytes to copy
 * @param length the number of bytes to copy
 */
public void setText(Text result, int offset, int length) {
    result.clear();
    result.set(data.getBytes(), offset, length);
}

From source file:com.facebook.hive.orc.lazy.LazyStringDictionaryTreeReader.java

License:Open Source License

private void nextFromDictionary(Text result) throws IOException {
    int entry = (int) reader.next();
    int offset = dictionaryOffsets[entry];
    int length = dictionaryOffsets[entry + 1] - dictionaryOffsets[entry];

    // If the column is just empty strings, the size will be zero, so the buffer will be null,
    // in that case just return result as it will default to empty
    if (dictionaryBuffer != null) {
        dictionaryBuffer.setText(result, offset, length);
    } else {//from  w  w w . j  ava 2s.  co m
        result.clear();
    }
}

From source file:com.facebook.hive.orc.lazy.LazyStringDictionaryTreeReader.java

License:Open Source License

private void nextFromStrideDictionary(Text result) throws IOException {
    int indexEntry = computeRowIndexEntry(previousRow);
    if (indexEntry != currentUnitDictionary) {
        loadStrideDictionary(indexEntry);
    }/*from   ww w .j  a  v  a2s .  co  m*/
    int entry = (int) reader.next();
    int offset = strideDictionaryOffsets[entry];
    int length;
    // if it isn't the last entry, subtract the offsets otherwise use
    // the buffer length.
    if (entry < strideDictionaryOffsets.length - 1) {
        length = strideDictionaryOffsets[entry + 1] - offset;
    } else {
        length = strideDictionaryBuffer.size() - offset;
    }
    // If the column is just empty strings, the size will be zero, so the buffer will be null,
    // in that case just return result as it will default to empty
    if (strideDictionaryBuffer != null) {
        strideDictionaryBuffer.setText(result, offset, length);
    } else {
        result.clear();
    }
}

From source file:com.ricemap.spateDB.core.RTree.java

License:Apache License

/**
 * Builds the RTree given a serialized list of elements. It uses the given
 * stockObject to deserialize these elements and build the tree. Also writes
 * the created tree to the disk directly.
 * //ww w . j  a v a 2  s.co  m
 * @param elements
 *            - serialization of elements to be written
 * @param offset
 *            - index of the first element to use in the elements array
 * @param len
 *            - number of bytes to user from the elements array
 * @param bytesAvailable
 *            - size available (in bytes) to store the tree structures
 * @param dataOut
 *            - an output to use for writing the tree to
 * @param fast_sort
 *            - setting this to <code>true</code> allows the method to run
 *            faster by materializing the offset of each element in the list
 *            which speeds up the comparison. However, this requires an
 *            additional 16 bytes per element. So, for each 1M elements, the
 *            method will require an additional 16 M bytes (approximately).
 */
public void bulkLoadWrite(final byte[] element_bytes, final int offset, final int len, final int degree,
        DataOutput dataOut, final boolean fast_sort, final boolean columnarStorage) {
    try {
        columnar = columnarStorage;
        //TODO: the order of fields should be stable under Oracle JVM, but not guaranteed
        Field[] fields = stockObject.getClass().getDeclaredFields();

        // Count number of elements in the given text
        int i_start = offset;
        final Text line = new Text();
        while (i_start < offset + len) {
            int i_end = skipToEOL(element_bytes, i_start);
            // Extract the line without end of line character
            line.set(element_bytes, i_start, i_end - i_start - 1);
            stockObject.fromText(line);

            elementCount++;
            i_start = i_end;
        }
        LOG.info("Bulk loading an RTree with " + elementCount + " elements");

        // It turns out the findBestDegree returns the best degree when the
        // whole
        // tree is loaded to memory when processed. However, as current
        // algorithms
        // process the tree while it's on disk, a higher degree should be
        // selected
        // such that a node fits one file block (assumed to be 4K).
        // final int degree = findBestDegree(bytesAvailable, elementCount);
        LOG.info("Writing an RTree with degree " + degree);

        int height = Math.max(1, (int) Math.ceil(Math.log(elementCount) / Math.log(degree)));
        int leafNodeCount = (int) Math.pow(degree, height - 1);
        if (elementCount < 2 * leafNodeCount && height > 1) {
            height--;
            leafNodeCount = (int) Math.pow(degree, height - 1);
        }
        int nodeCount = (int) ((Math.pow(degree, height) - 1) / (degree - 1));
        int nonLeafNodeCount = nodeCount - leafNodeCount;

        // Keep track of the offset of each element in the text
        final int[] offsets = new int[elementCount];
        final int[] ids = new int[elementCount];
        final double[] ts = fast_sort ? new double[elementCount] : null;
        final double[] xs = fast_sort ? new double[elementCount] : null;
        final double[] ys = fast_sort ? new double[elementCount] : null;

        //initialize columnar data output
        ByteArrayOutputStream index_bos = new ByteArrayOutputStream();
        DataOutputStream index_dos = new DataOutputStream(index_bos);
        ByteArrayOutputStream[] bos = new ByteArrayOutputStream[fields.length];
        DataOutputStream[] dos = new DataOutputStream[fields.length];
        for (int i = 0; i < bos.length; i++) {
            bos[i] = new ByteArrayOutputStream();
            dos[i] = new DataOutputStream(bos[i]);
        }

        i_start = offset;
        line.clear();
        for (int i = 0; i < elementCount; i++) {
            offsets[i] = i_start;
            ids[i] = i;
            int i_end = skipToEOL(element_bytes, i_start);
            if (xs != null) {
                // Extract the line with end of line character
                line.set(element_bytes, i_start, i_end - i_start - 1);
                stockObject.fromText(line);
                // Sample center of the shape
                ts[i] = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2;
                xs[i] = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                ys[i] = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;

                //build columnar storage
                if (stockObject instanceof Point3d) {
                    index_dos.writeDouble(ts[i]);
                    index_dos.writeDouble(xs[i]);
                    index_dos.writeDouble(ys[i]);
                } else {
                    throw new RuntimeException("Indexing non-point shape with RTREE is not supported yet");
                }

                for (int j = 0; j < fields.length; j++) {
                    if (fields[j].getType().equals(Integer.TYPE)) {
                        dos[j].writeInt(fields[j].getInt(stockObject));
                    } else if (fields[j].getType().equals(Double.TYPE)) {
                        dos[j].writeDouble(fields[j].getDouble(stockObject));
                    } else if (fields[j].getType().equals(Long.TYPE)) {
                        dos[j].writeLong(fields[j].getLong(stockObject));
                    } else {
                        continue;
                        //throw new RuntimeException("Field type is not supported yet");
                    }
                }
            }
            i_start = i_end;
        }
        index_dos.close();
        for (int i = 0; i < dos.length; i++) {
            dos[i].close();
        }

        /** A struct to store information about a split */
        class SplitStruct extends Prism {
            /** Start and end index for this split */
            int index1, index2;
            /** Direction of this split */
            byte direction;
            /** Index of first element on disk */
            int offsetOfFirstElement;

            static final byte DIRECTION_T = 0;
            static final byte DIRECTION_X = 1;
            static final byte DIRECTION_Y = 2;

            SplitStruct(int index1, int index2, byte direction) {
                this.index1 = index1;
                this.index2 = index2;
                this.direction = direction;
            }

            @Override
            public void write(DataOutput out) throws IOException {
                //
                if (columnarStorage)
                    out.writeInt(index1);
                else
                    out.writeInt(offsetOfFirstElement);
                super.write(out);
            }

            void partition(Queue<SplitStruct> toBePartitioned) {
                IndexedSortable sortableT;
                IndexedSortable sortableX;
                IndexedSortable sortableY;

                if (fast_sort) {
                    // Use materialized xs[] and ys[] to do the comparisons
                    sortableT = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap ts
                            double tempt = ts[i];
                            ts[i] = ts[j];
                            ts[j] = tempt;
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ts[i] < ts[j])
                                return -1;
                            if (ts[i] > ts[j])
                                return 1;
                            return 0;
                        }
                    };
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap ts
                            double tempt = ts[i];
                            ts[i] = ts[j];
                            ts[j] = tempt;
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;
                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ts[i] < ts[j])
                                return -1;
                            if (xs[i] < xs[j])
                                return -1;
                            if (xs[i] > xs[j])
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap ts
                            double tempt = ts[i];
                            ts[i] = ts[j];
                            ts[j] = tempt;
                            // Swap xs
                            double tempx = xs[i];
                            xs[i] = xs[j];
                            xs[j] = tempx;
                            // Swap ys
                            double tempY = ys[i];
                            ys[i] = ys[j];
                            ys[j] = tempY;
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            if (ys[i] < ys[j])
                                return -1;
                            if (ys[i] > ys[j])
                                return 1;
                            return 0;
                        }
                    };
                } else {
                    // No materialized xs and ys. Always deserialize objects
                    // to compare
                    sortableT = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            // Get end of line
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double ti = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double tj = (stockObject.getMBR().t1 + stockObject.getMBR().t2) / 2;
                            if (ti < tj)
                                return -1;
                            if (ti > tj)
                                return 1;
                            return 0;
                        }
                    };
                    sortableX = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            // Get end of line
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double xi = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double xj = (stockObject.getMBR().x1 + stockObject.getMBR().x2) / 2;
                            if (xi < xj)
                                return -1;
                            if (xi > xj)
                                return 1;
                            return 0;
                        }
                    };

                    sortableY = new IndexedSortable() {
                        @Override
                        public void swap(int i, int j) {
                            // Swap id
                            int tempid = offsets[i];
                            offsets[i] = offsets[j];
                            offsets[j] = tempid;

                            tempid = ids[i];
                            ids[i] = ids[j];
                            ids[j] = tempid;
                        }

                        @Override
                        public int compare(int i, int j) {
                            int eol = skipToEOL(element_bytes, offsets[i]);
                            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                            stockObject.fromText(line);
                            double yi = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;

                            eol = skipToEOL(element_bytes, offsets[j]);
                            line.set(element_bytes, offsets[j], eol - offsets[j] - 1);
                            stockObject.fromText(line);
                            double yj = (stockObject.getMBR().y1 + stockObject.getMBR().y2) / 2;
                            if (yi < yj)
                                return -1;
                            if (yi > yj)
                                return 1;
                            return 0;
                        }
                    };
                }

                final IndexedSorter sorter = new QuickSort();

                final IndexedSortable[] sortables = new IndexedSortable[3];
                sortables[SplitStruct.DIRECTION_T] = sortableT;
                sortables[SplitStruct.DIRECTION_X] = sortableX;
                sortables[SplitStruct.DIRECTION_Y] = sortableY;

                sorter.sort(sortables[direction], index1, index2);

                // Partition into maxEntries partitions (equally) and
                // create a SplitStruct for each partition
                int i1 = index1;
                for (int iSplit = 0; iSplit < degree; iSplit++) {
                    int i2 = index1 + (index2 - index1) * (iSplit + 1) / degree;
                    SplitStruct newSplit;
                    if (direction == 0) {
                        newSplit = new SplitStruct(i1, i2, (byte) 1);
                    } else if (direction == 1) {
                        newSplit = new SplitStruct(i1, i2, (byte) 2);
                    } else {
                        newSplit = new SplitStruct(i1, i2, (byte) 0);
                    }
                    toBePartitioned.add(newSplit);
                    i1 = i2;
                }
            }
        }

        // All nodes stored in level-order traversal
        Vector<SplitStruct> nodes = new Vector<SplitStruct>();
        final Queue<SplitStruct> toBePartitioned = new LinkedList<SplitStruct>();
        toBePartitioned.add(new SplitStruct(0, elementCount, SplitStruct.DIRECTION_X));

        while (!toBePartitioned.isEmpty()) {
            SplitStruct split = toBePartitioned.poll();
            if (nodes.size() < nonLeafNodeCount) {
                // This is a non-leaf
                split.partition(toBePartitioned);
            }
            nodes.add(split);
        }

        if (nodes.size() != nodeCount) {
            throw new RuntimeException(
                    "Expected node count: " + nodeCount + ". Real node count: " + nodes.size());
        }

        // Now we have our data sorted in the required order. Start building
        // the tree.
        // Store the offset of each leaf node in the tree
        FSDataOutputStream fakeOut = new FSDataOutputStream(new java.io.OutputStream() {
            // Null output stream
            @Override
            public void write(int b) throws IOException {
                // Do nothing
            }

            @Override
            public void write(byte[] b, int off, int len) throws IOException {
                // Do nothing
            }

            @Override
            public void write(byte[] b) throws IOException {
                // Do nothing
            }
        }, null, TreeHeaderSize + nodes.size() * NodeSize);
        for (int i_leaf = nonLeafNodeCount, i = 0; i_leaf < nodes.size(); i_leaf++) {
            nodes.elementAt(i_leaf).offsetOfFirstElement = (int) fakeOut.getPos();
            if (i != nodes.elementAt(i_leaf).index1)
                throw new RuntimeException();
            double t1, x1, y1, t2, x2, y2;

            // Initialize MBR to first object
            int eol = skipToEOL(element_bytes, offsets[i]);
            fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
            line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
            stockObject.fromText(line);
            Prism mbr = stockObject.getMBR();
            t1 = mbr.t1;
            x1 = mbr.x1;
            y1 = mbr.y1;
            t2 = mbr.t2;
            x2 = mbr.x2;
            y2 = mbr.y2;
            i++;

            while (i < nodes.elementAt(i_leaf).index2) {
                eol = skipToEOL(element_bytes, offsets[i]);
                fakeOut.write(element_bytes, offsets[i], eol - offsets[i]);
                line.set(element_bytes, offsets[i], eol - offsets[i] - 1);
                stockObject.fromText(line);
                mbr = stockObject.getMBR();
                if (mbr.t1 < t1)
                    t1 = mbr.t1;
                if (mbr.x1 < x1)
                    x1 = mbr.x1;
                if (mbr.y1 < y1)
                    y1 = mbr.y1;
                if (mbr.t2 > t2)
                    t2 = mbr.t2;
                if (mbr.x2 > x2)
                    x2 = mbr.x2;
                if (mbr.y2 > y2)
                    y2 = mbr.y2;
                i++;
            }
            nodes.elementAt(i_leaf).set(t1, x1, y1, t2, x2, y2);
        }
        fakeOut.close();
        fakeOut = null;

        // Calculate MBR and offsetOfFirstElement for non-leaves
        for (int i_node = nonLeafNodeCount - 1; i_node >= 0; i_node--) {
            int i_first_child = i_node * degree + 1;
            nodes.elementAt(i_node).offsetOfFirstElement = nodes.elementAt(i_first_child).offsetOfFirstElement;
            int i_child = 0;
            Prism mbr;
            mbr = nodes.elementAt(i_first_child + i_child);
            double t1 = mbr.t1;
            double x1 = mbr.x1;
            double y1 = mbr.y1;
            double t2 = mbr.t2;
            double x2 = mbr.x2;
            double y2 = mbr.y2;
            i_child++;

            while (i_child < degree) {
                mbr = nodes.elementAt(i_first_child + i_child);
                if (mbr.t1 < t1)
                    t1 = mbr.t1;
                if (mbr.x1 < x1)
                    x1 = mbr.x1;
                if (mbr.y1 < y1)
                    y1 = mbr.y1;
                if (mbr.t2 > t2)
                    t2 = mbr.t2;
                if (mbr.x2 > x2)
                    x2 = mbr.x2;
                if (mbr.y2 > y2)
                    y2 = mbr.y2;
                i_child++;
            }
            nodes.elementAt(i_node).set(t1, x1, y1, t2, x2, y2);
        }

        // Start writing the tree
        // write tree header (including size)
        // Total tree size. (== Total bytes written - 8 bytes for the size
        // itself)
        dataOut.writeInt(TreeHeaderSize + NodeSize * nodeCount + len);
        // Tree height
        dataOut.writeInt(height);
        // Degree
        dataOut.writeInt(degree);
        dataOut.writeInt(elementCount);

        //isColumnar
        dataOut.writeInt(columnarStorage ? 1 : 0);

        // write nodes
        for (SplitStruct node : nodes) {
            node.write(dataOut);
        }
        // write elements
        if (columnarStorage) {
            byte[] index_bs = index_bos.toByteArray();
            byte[][] bss = new byte[bos.length][];
            for (int i = 0; i < bss.length; i++) {
                bss[i] = bos[i].toByteArray();
            }
            for (int element_i = 0; element_i < elementCount; element_i++) {
                //int eol = skipToEOL(element_bytes, offsets[element_i]);
                //dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
                dataOut.write(index_bs, ids[element_i] * IndexUnitSize, IndexUnitSize);
            }

            for (int i = 0; i < fields.length; i++) {
                int fieldSize = 0;
                if (fields[i].getType().equals(Integer.TYPE)) {
                    fieldSize = 4;
                } else if (fields[i].getType().equals(Long.TYPE)) {
                    fieldSize = 8;
                } else if (fields[i].getType().equals(Double.TYPE)) {
                    fieldSize = 8;
                } else {
                    //throw new RuntimeException("Unsupported field type: " + fields[i].getType().getName());
                    continue;
                }
                for (int element_i = 0; element_i < elementCount; element_i++) {
                    //int eol = skipToEOL(element_bytes, offsets[element_i]);
                    //dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
                    dataOut.write(bss[i], ids[element_i] * fieldSize, fieldSize);
                }
            }
        } else {
            for (int element_i = 0; element_i < elementCount; element_i++) {
                int eol = skipToEOL(element_bytes, offsets[element_i]);
                dataOut.write(element_bytes, offsets[element_i], eol - offsets[element_i]);
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (IllegalArgumentException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:com.ricemap.spateDB.mapred.SpatialRecordReader.java

License:Apache License

/**
 * Reads the next line from input and return true if a line was read.
 * If no more lines are available in this split, a false is returned.
 * @param value//  w  w w . j  a v  a2 s.co  m
 * @return
 * @throws IOException
 */
protected boolean nextLine(Text value) throws IOException {
    if (blockType == BlockType.RTREE && pos == 8) {
        // File is positioned at the RTree header
        // Skip the header and go to first data object in file
        pos += RTree.skipHeader(in);
        LOG.info("Skipped R-tree to position: " + pos);
        // Reinitialize record reader at the new position
        lineReader = new LineReader(in);
    }
    while (getFilePosition() <= end) {
        value.clear();
        int b = 0;
        if (buffer != null) {
            // Read the first line encountered in buffer
            int eol = RTree.skipToEOL(buffer, 0);
            b += eol;
            value.append(buffer, 0, eol);
            if (eol < buffer.length) {
                // There are still some bytes remaining in buffer
                byte[] tmp = new byte[buffer.length - eol];
                System.arraycopy(buffer, eol, tmp, 0, tmp.length);
            } else {
                buffer = null;
            }
            // Check if a complete line has been read from the buffer
            byte last_byte = value.getBytes()[value.getLength() - 1];
            if (last_byte == '\n' || last_byte == '\r')
                return true;
        }

        // Read the first line from stream
        Text temp = new Text();
        b += lineReader.readLine(temp);
        if (b == 0) {
            // Indicates an end of stream
            return false;
        }
        pos += b;

        // Append the part read from stream to the part extracted from buffer
        value.append(temp.getBytes(), 0, temp.getLength());

        if (value.getLength() > 1) {
            // Read a non-empty line. Note that end-of-line character is included
            return true;
        }
    }
    // Reached end of file
    return false;
}