Example usage for java.nio ByteBuffer getFloat

List of usage examples for java.nio ByteBuffer getFloat

Introduction

In this page you can find the example usage for java.nio ByteBuffer getFloat.

Prototype

public abstract float getFloat();

Source Link

Document

Returns the float at the current position and increases the position by 4.

Usage

From source file:hivemall.GeneralLearnerBaseUDTF.java

protected final void runIterativeTraining(@Nonnegative final int iterations) throws HiveException {
    final ByteBuffer buf = this.inputBuf;
    final NioStatefulSegment dst = this.fileIO;
    assert (buf != null);
    assert (dst != null);
    final long numTrainingExamples = count;

    final Reporter reporter = getReporter();
    final Counters.Counter iterCounter = (reporter == null) ? null
            : reporter.getCounter("hivemall.GeneralLearnerBase$Counter", "iteration");

    try {/*from   ww  w  . j  a va  2  s. c  o m*/
        if (dst.getPosition() == 0L) {// run iterations w/o temporary file
            if (buf.position() == 0) {
                return; // no training example
            }
            buf.flip();

            for (int iter = 2; iter <= iterations; iter++) {
                cvState.next();
                reportProgress(reporter);
                setCounterValue(iterCounter, iter);

                while (buf.remaining() > 0) {
                    int recordBytes = buf.getInt();
                    assert (recordBytes > 0) : recordBytes;
                    int featureVectorLength = buf.getInt();
                    final FeatureValue[] featureVector = new FeatureValue[featureVectorLength];
                    for (int j = 0; j < featureVectorLength; j++) {
                        featureVector[j] = readFeatureValue(buf, featureType);
                    }
                    float target = buf.getFloat();
                    train(featureVector, target);
                }
                buf.rewind();

                if (is_mini_batch) { // Update model with accumulated delta
                    batchUpdate();
                }

                if (cvState.isConverged(numTrainingExamples)) {
                    break;
                }
            }
            logger.info("Performed " + cvState.getCurrentIteration() + " iterations of "
                    + NumberUtils.formatNumber(numTrainingExamples) + " training examples on memory (thus "
                    + NumberUtils.formatNumber(numTrainingExamples * cvState.getCurrentIteration())
                    + " training updates in total) ");
        } else {// read training examples in the temporary file and invoke train for each example
            // write training examples in buffer to a temporary file
            if (buf.remaining() > 0) {
                writeBuffer(buf, dst);
            }
            try {
                dst.flush();
            } catch (IOException e) {
                throw new HiveException("Failed to flush a file: " + dst.getFile().getAbsolutePath(), e);
            }
            if (logger.isInfoEnabled()) {
                File tmpFile = dst.getFile();
                logger.info(
                        "Wrote " + numTrainingExamples + " records to a temporary file for iterative training: "
                                + tmpFile.getAbsolutePath() + " (" + FileUtils.prettyFileSize(tmpFile) + ")");
            }

            // run iterations
            for (int iter = 2; iter <= iterations; iter++) {
                cvState.next();
                setCounterValue(iterCounter, iter);

                buf.clear();
                dst.resetPosition();
                while (true) {
                    reportProgress(reporter);
                    // TODO prefetch
                    // writes training examples to a buffer in the temporary file
                    final int bytesRead;
                    try {
                        bytesRead = dst.read(buf);
                    } catch (IOException e) {
                        throw new HiveException("Failed to read a file: " + dst.getFile().getAbsolutePath(), e);
                    }
                    if (bytesRead == 0) { // reached file EOF
                        break;
                    }
                    assert (bytesRead > 0) : bytesRead;

                    // reads training examples from a buffer
                    buf.flip();
                    int remain = buf.remaining();
                    if (remain < SizeOf.INT) {
                        throw new HiveException("Illegal file format was detected");
                    }
                    while (remain >= SizeOf.INT) {
                        int pos = buf.position();
                        int recordBytes = buf.getInt();
                        remain -= SizeOf.INT;

                        if (remain < recordBytes) {
                            buf.position(pos);
                            break;
                        }

                        int featureVectorLength = buf.getInt();
                        final FeatureValue[] featureVector = new FeatureValue[featureVectorLength];
                        for (int j = 0; j < featureVectorLength; j++) {
                            featureVector[j] = readFeatureValue(buf, featureType);
                        }
                        float target = buf.getFloat();
                        train(featureVector, target);

                        remain -= recordBytes;
                    }
                    buf.compact();
                }

                if (is_mini_batch) { // Update model with accumulated delta
                    batchUpdate();
                }

                if (cvState.isConverged(numTrainingExamples)) {
                    break;
                }
            }
            logger.info("Performed " + cvState.getCurrentIteration() + " iterations of "
                    + NumberUtils.formatNumber(numTrainingExamples)
                    + " training examples on a secondary storage (thus "
                    + NumberUtils.formatNumber(numTrainingExamples * cvState.getCurrentIteration())
                    + " training updates in total)");
        }
    } catch (Throwable e) {
        throw new HiveException("Exception caused in the iterative training", e);
    } finally {
        // delete the temporary file and release resources
        try {
            dst.close(true);
        } catch (IOException e) {
            throw new HiveException("Failed to close a file: " + dst.getFile().getAbsolutePath(), e);
        }
        this.inputBuf = null;
        this.fileIO = null;
    }
}

From source file:com.healthmarketscience.jackcess.impl.ColumnImpl.java

/**
 * Deserialize a raw byte value for this column into an Object
 * @param data The raw byte value// ww w  . ja v a2s .  co  m
 * @param order Byte order in which the raw value is stored
 * @return The deserialized Object
 * @usage _advanced_method_
 */
public Object read(byte[] data, ByteOrder order) throws IOException {
    ByteBuffer buffer = ByteBuffer.wrap(data).order(order);

    switch (getType()) {
    case BOOLEAN:
        throw new IOException("Tried to read a boolean from data instead of null mask.");
    case BYTE:
        return Byte.valueOf(buffer.get());
    case INT:
        return Short.valueOf(buffer.getShort());
    case LONG:
        return Integer.valueOf(buffer.getInt());
    case DOUBLE:
        return Double.valueOf(buffer.getDouble());
    case FLOAT:
        return Float.valueOf(buffer.getFloat());
    case SHORT_DATE_TIME:
        return readDateValue(buffer);
    case BINARY:
        return data;
    case TEXT:
        return decodeTextValue(data);
    case MONEY:
        return readCurrencyValue(buffer);
    case NUMERIC:
        return readNumericValue(buffer);
    case GUID:
        return readGUIDValue(buffer, order);
    case UNKNOWN_0D:
    case UNKNOWN_11:
        // treat like "binary" data
        return data;
    case COMPLEX_TYPE:
        return new ComplexValueForeignKeyImpl(this, buffer.getInt());
    default:
        throw new IOException("Unrecognized data type: " + _type);
    }
}

From source file:au.org.ala.layers.intersect.Grid.java

/**
 * @return calculated min and max values of a grid file as float [] where [0] is min and [1] is max.
 *//* www  .ja  va  2s  . c  o  m*/
public float[] calculatetMinMax() {

    float[] ret = new float[2];
    ret[0] = Float.MAX_VALUE;
    ret[1] = Float.MAX_VALUE * -1;

    long i;
    int size;
    byte[] b;
    RandomAccessFile afile = null;

    try { //read of random access file can throw an exception
        File f2 = new File(filename + ".GRI");
        if (!f2.exists()) {
            afile = new RandomAccessFile(filename + ".gri", "r");
        } else {
            afile = new RandomAccessFile(filename + ".GRI", "r");
        }

        long length = ((long) nrows) * ((long) ncols);
        float f;

        if (datatype.equalsIgnoreCase("BYTE")) {
            size = 1;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                f = afile.readByte();
                if (f != (float) nodatavalue) {
                    ret[0] = Math.min(f * rescale, ret[0]);
                    ret[1] = Math.max(f * rescale, ret[1]);
                }
            }
        } else if (datatype.equalsIgnoreCase("UBYTE")) {
            size = 1;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                f = afile.readByte();
                if (f < 0) {
                    f += 256;
                }
                if (f != (float) nodatavalue) {
                    ret[0] = Math.min(f * rescale, ret[0]);
                    ret[1] = Math.max(f * rescale, ret[1]);
                }
            }
        } else if (datatype.equalsIgnoreCase("SHORT")) {
            size = 2;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                afile.read(b);
                if (byteorderLSB) {
                    f = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF));
                } else {
                    f = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF));
                }
                if (f != (float) nodatavalue) {
                    ret[0] = Math.min(f * rescale, ret[0]);
                    ret[1] = Math.max(f * rescale, ret[1]);
                }
            }
        } else if (datatype.equalsIgnoreCase("INT")) {
            size = 4;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                afile.read(b);
                if (byteorderLSB) {
                    f = ((0xFF & b[3]) << 24) | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF);
                } else {
                    f = ((0xFF & b[0]) << 24)
                            | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF);
                }
                if (f != (float) nodatavalue) {
                    ret[0] = Math.min(f * rescale, ret[0]);
                    ret[1] = Math.max(f * rescale, ret[1]);
                }
            }
        } else if (datatype.equalsIgnoreCase("LONG")) {
            size = 8;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                afile.read(b);
                if (byteorderLSB) {
                    f = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48)
                            + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32)
                            + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16)
                            + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]);
                } else {
                    f = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48)
                            + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32)
                            + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16)
                            + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]);
                }
                if (f != (float) nodatavalue) {
                    ret[0] = Math.min(f * rescale, ret[0]);
                    ret[1] = Math.max(f * rescale, ret[1]);
                }
            }
        } else if (datatype.equalsIgnoreCase("FLOAT")) {
            size = 4;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                afile.read(b);
                ByteBuffer bb = ByteBuffer.wrap(b);
                if (byteorderLSB) {
                    bb.order(ByteOrder.LITTLE_ENDIAN);
                }
                f = bb.getFloat();
                if (f != (float) nodatavalue) {
                    ret[0] = Math.min(f * rescale, ret[0]);
                    ret[1] = Math.max(f * rescale, ret[1]);
                }
            }
        } else if (datatype.equalsIgnoreCase("DOUBLE")) {
            size = 8;
            b = new byte[8];
            for (i = 0; i < length; i++) {
                afile.read(b);
                ByteBuffer bb = ByteBuffer.wrap(b);
                if (byteorderLSB) {
                    bb.order(ByteOrder.LITTLE_ENDIAN);
                }
                f = (float) bb.getDouble();
                if (f != (float) nodatavalue) {
                    ret[0] = Math.min(f * rescale, ret[0]);
                    ret[1] = Math.max(f * rescale, ret[1]);
                }
            }
        } else {
            logger.error("datatype not supported in Grid.getValues: " + datatype);
        }
    } catch (Exception e) {
        logger.error("error calculating min/max of a grid file", e);
    } finally {
        if (afile != null) {
            try {
                afile.close();
            } catch (Exception e) {
                logger.error(e.getMessage(), e);
            }
        }
    }
    return ret;
}

From source file:org.shaman.terrain.polygonal.GraphToHeightmap.java

/**
 * Renders the given scene in a top-down manner in the given matrix
 *
 * @param matrix/*from  w  ww .j  a va 2s.c  o  m*/
 * @param scene
 */
private void render(float[][] matrix, final Spatial scene, final ColorRGBA background, float min, float max) {
    final ByteBuffer data = BufferUtils.createByteBuffer(size * size * 4 * 4);
    try {
        app.enqueue(new Callable<Object>() {
            @Override
            public Object call() throws Exception {
                //init
                Camera cam = new Camera(size, size);
                cam.setParallelProjection(true);
                final ViewPort view = new ViewPort("Off", cam);
                view.setBackgroundColor(background);
                view.setClearFlags(true, true, true);
                final FrameBuffer buffer = new FrameBuffer(size, size, 1);
                buffer.setDepthBuffer(Image.Format.Depth);
                buffer.setColorBuffer(Image.Format.RGBA32F);
                view.setOutputFrameBuffer(buffer);
                view.attachScene(scene);
                //render
                scene.setCullHint(Spatial.CullHint.Never);
                scene.updateGeometricState();
                view.setEnabled(true);
                app.getRenderManager().renderViewPort(view, 0);
                app.getRenderer().readFrameBufferWithFormat(buffer, data, Image.Format.RGBA32F);
                return new Object();
            }
        }).get();
    } catch (InterruptedException | ExecutionException ex) {
        Logger.getLogger(GraphToHeightmap.class.getName()).log(Level.SEVERE, "unable to render", ex);
        return;
    }
    data.rewind();
    for (int y = 0; y < size; ++y) {
        for (int x = 0; x < size; ++x) {
            float v = data.getFloat();
            v *= (max - min);
            v += min;
            matrix[x][y] = v;
            data.getFloat();
            data.getFloat();
            data.getFloat();
        }
    }
}

From source file:org.shaman.terrain.polygonal.GraphToHeightmap.java

/**
 * Renders the given scene in a top-down manner in the given matrix
 *
 * @param matrix//from ww w.  ja v a  2 s . c  o  m
 * @param scene
 */
private void renderColor(float[][][] matrix, final Spatial scene, final ColorRGBA background, float min,
        float max) {
    final ByteBuffer data = BufferUtils.createByteBuffer(size * size * 4 * 4);
    try {
        app.enqueue(new Callable<Object>() {
            @Override
            public Object call() throws Exception {
                //init
                Camera cam = new Camera(size, size);
                cam.setParallelProjection(true);
                final ViewPort view = new ViewPort("Off", cam);
                view.setBackgroundColor(background);
                view.setClearFlags(true, true, true);
                final FrameBuffer buffer = new FrameBuffer(size, size, 1);
                buffer.setDepthBuffer(Image.Format.Depth);
                buffer.setColorBuffer(Image.Format.RGBA32F);
                view.setOutputFrameBuffer(buffer);
                view.attachScene(scene);
                //render
                scene.setCullHint(Spatial.CullHint.Never);
                scene.updateGeometricState();
                view.setEnabled(true);
                app.getRenderManager().renderViewPort(view, 0);
                app.getRenderer().readFrameBufferWithFormat(buffer, data, Image.Format.RGBA32F);
                return new Object();
            }
        }).get();
    } catch (InterruptedException | ExecutionException ex) {
        Logger.getLogger(GraphToHeightmap.class.getName()).log(Level.SEVERE, "unable to render", ex);
        return;
    }
    data.rewind();
    for (int y = 0; y < size; ++y) {
        for (int x = 0; x < size; ++x) {
            float v;
            v = data.getFloat();
            v *= (max - min);
            v += min;
            matrix[x][y][0] = v;

            v = data.getFloat();
            v *= (max - min);
            v += min;
            matrix[x][y][1] = v;

            v = data.getFloat();
            v *= (max - min);
            v += min;
            matrix[x][y][2] = v;

            v = data.getFloat();
            v *= (max - min);
            v += min;
            matrix[x][y][3] = v;
        }
    }
}

From source file:au.org.ala.layers.intersect.Grid.java

/**
 * @param points input array for longitude and latitude
 *               double[number_of_points][2]
 * @return array of .gri file values corresponding to the
 * points provided/*from   ww w .j  ava  2s .c o m*/
 */
public float[] getValues(double[][] points) {

    //confirm inputs since they come from somewhere else
    if (points == null || points.length == 0) {
        return null;
    }

    //use preloaded grid data if available
    Grid g = Grid.getLoadedGrid(filename);
    if (g != null) {
        return g.getValues2(points);
    }

    if (subgrids != null) {
        return getValues3(points, Math.min(1024 * 1024, 64 * points.length));
    }

    float[] ret = new float[points.length];

    int length = points.length;
    long size;
    int i, pos;
    byte[] b;
    RandomAccessFile afile = null;
    File f2 = new File(filename + ".GRI");

    try { //read of random access file can throw an exception
        if (!f2.exists()) {
            afile = new RandomAccessFile(filename + ".gri", "r");
        } else {
            afile = new RandomAccessFile(filename + ".GRI", "r");
        }

        if (datatype.equalsIgnoreCase("BYTE")) {
            size = 1;
            b = new byte[(int) size];
            for (i = 0; i < length; i++) {
                pos = (int) getcellnumber(points[i][0], points[i][1]);
                if (pos >= 0) {
                    afile.seek(pos * size);
                    ret[i] = afile.readByte();
                } else {
                    ret[i] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("UBYTE")) {
            size = 1;
            b = new byte[(int) size];
            for (i = 0; i < length; i++) {
                pos = (int) getcellnumber(points[i][0], points[i][1]);
                if (pos >= 0) {
                    afile.seek(pos * size);
                    ret[i] = afile.readByte();
                    if (ret[i] < 0) {
                        ret[i] += 256;
                    }
                } else {
                    ret[i] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("SHORT")) {
            size = 2;
            b = new byte[(int) size];
            for (i = 0; i < length; i++) {
                pos = (int) getcellnumber(points[i][0], points[i][1]);
                if (pos >= 0) {
                    afile.seek(pos * size);
                    afile.read(b);
                    if (byteorderLSB) {
                        ret[i] = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF));
                    } else {
                        ret[i] = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF));
                    }
                    //ret[i] = afile.readShort();
                } else {
                    ret[i] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("INT")) {
            size = 4;
            b = new byte[(int) size];
            for (i = 0; i < length; i++) {
                pos = (int) getcellnumber(points[i][0], points[i][1]);
                if (pos >= 0) {
                    afile.seek(pos * size);
                    afile.read(b);
                    if (byteorderLSB) {
                        ret[i] = ((0xFF & b[3]) << 24)
                                | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF);
                    } else {
                        ret[i] = ((0xFF & b[0]) << 24)
                                | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF);
                    }
                    //ret[i] = afile.readInt();
                } else {
                    ret[i] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("LONG")) {
            size = 8;
            b = new byte[(int) size];
            for (i = 0; i < length; i++) {
                pos = (int) getcellnumber(points[i][0], points[i][1]);
                if (pos >= 0) {
                    afile.seek(pos * size);
                    afile.read(b);
                    if (byteorderLSB) {
                        ret[i] = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48)
                                + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32)
                                + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16)
                                + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]);
                    } else {
                        ret[i] = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48)
                                + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32)
                                + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16)
                                + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]);
                    }
                    //ret[i] = afile.readLong();
                } else {
                    ret[i] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("FLOAT")) {
            size = 4;
            b = new byte[(int) size];
            for (i = 0; i < length; i++) {
                pos = (int) getcellnumber(points[i][0], points[i][1]);
                if (pos >= 0) {
                    afile.seek(pos * size);
                    afile.read(b);
                    ByteBuffer bb = ByteBuffer.wrap(b);
                    if (byteorderLSB) {
                        bb.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    ret[i] = bb.getFloat();
                } else {
                    ret[i] = Float.NaN;
                }

            }
        } else if (datatype.equalsIgnoreCase("DOUBLE")) {
            size = 8;
            b = new byte[8];
            for (i = 0; i < length; i++) {
                pos = (int) getcellnumber(points[i][0], points[i][1]);
                if (pos >= 0) {
                    afile.seek(pos * size);
                    afile.read(b);
                    ByteBuffer bb = ByteBuffer.wrap(b);
                    if (byteorderLSB) {
                        bb.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    ret[i] = (float) bb.getDouble();

                    //ret[i] = afile.readFloat();
                } else {
                    ret[i] = Float.NaN;
                }
            }
        } else {
            logger.error("datatype not supported in Grid.getValues: " + datatype);
            // / should not happen; catch anyway...
            for (i = 0; i < length; i++) {
                ret[i] = Float.NaN;
            }
        }
        //replace not a number
        for (i = 0; i < length; i++) {
            if ((float) ret[i] == (float) nodatavalue) {
                ret[i] = Float.NaN;
            } else {
                ret[i] *= rescale;
            }
        }
    } catch (Exception e) {
        logger.error("error getting grid file values", e);
    } finally {
        if (afile != null) {
            try {
                afile.close();
            } catch (Exception e) {
                logger.error(e.getMessage(), e);
            }
        }
    }
    return ret;
}

From source file:au.org.ala.layers.intersect.Grid.java

/**
 * @param points input array for longitude and latitude
 *               double[number_of_points][2] and sorted latitude then longitude
 * @return array of .gri file values corresponding to the
 * points provided/*  w  w  w.  j a v  a2s  . co  m*/
 */
public float[] getValues3(double[][] points, int bufferSize) {
    //confirm inputs since they come from somewhere else
    if (points == null || points.length == 0) {
        return null;
    }

    if (subgrids != null) {
        return getValuesSubgrids(points, bufferSize);
    }

    //use preloaded grid data if available
    Grid g = Grid.getLoadedGrid(filename);
    if (g != null && g.grid_data != null) {
        return g.getValues2(points);
    }

    int length = points.length;
    int size, i;
    byte[] b;

    RandomAccessFile afile = null;

    File f2 = new File(filename + ".GRI");

    try { //read of random access file can throw an exception
        if (!f2.exists()) {
            afile = new RandomAccessFile(filename + ".gri", "r");
        } else {
            afile = new RandomAccessFile(filename + ".GRI", "r");
        }

        //do not cache subgrids (using getValues2)
        if (!subgrid && afile.length() < 80 * 1024 * 1024) {
            try {
                afile.close();
                afile = null;
            } catch (Exception e) {
            }
            return getValues2(points);
        }

        byte[] buffer = new byte[bufferSize]; //must be multiple of 64
        Long bufferOffset = afile.length();

        float[] ret = new float[points.length];

        //get cell numbers
        long[][] cells = new long[points.length][2];
        for (int j = 0; j < points.length; j++) {
            if (Double.isNaN(points[j][0]) || Double.isNaN(points[j][1])) {
                cells[j][0] = -1;
                cells[j][1] = j;
            } else {
                cells[j][0] = getcellnumber(points[j][0], points[j][1]);
                cells[j][1] = j;
            }
        }
        java.util.Arrays.sort(cells, new Comparator<long[]>() {

            @Override
            public int compare(long[] o1, long[] o2) {
                if (o1[0] == o2[0]) {
                    return o1[1] > o2[1] ? 1 : -1;
                } else {
                    return o1[0] > o2[0] ? 1 : -1;
                }
            }
        });

        if (datatype.equalsIgnoreCase("BYTE")) {
            size = 1;
            for (i = 0; i < length; i++) {
                if (i > 0 && cells[i - 1][0] == cells[i][0]) {
                    ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]];
                    continue;
                }
                if (cells[i][0] >= 0) {
                    ret[(int) cells[i][1]] = getByte(afile, buffer, bufferOffset, cells[i][0] * size);
                } else {
                    ret[(int) cells[i][1]] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("UBYTE")) {
            size = 1;
            for (i = 0; i < length; i++) {
                if (i > 0 && cells[i - 1][0] == cells[i][0]) {
                    ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]];
                    continue;
                }
                if (cells[i][0] >= 0) {
                    ret[(int) cells[i][1]] = getByte(afile, buffer, bufferOffset, cells[i][0] * size);
                    if (ret[(int) cells[i][1]] < 0) {
                        ret[(int) cells[i][1]] += 256;
                    }
                } else {
                    ret[(int) cells[i][1]] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("SHORT")) {
            size = 2;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                if (i > 0 && cells[i - 1][0] == cells[i][0]) {
                    ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]];
                    continue;
                }
                if (cells[i][0] >= 0) {
                    bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b);
                    if (byteorderLSB) {
                        ret[(int) cells[i][1]] = (short) (((0xFF & b[1]) << 8) | (b[0] & 0xFF));
                    } else {
                        ret[(int) cells[i][1]] = (short) (((0xFF & b[0]) << 8) | (b[1] & 0xFF));
                    }
                } else {
                    ret[(int) cells[i][1]] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("INT")) {
            size = 4;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                if (i > 0 && cells[i - 1][0] == cells[i][0]) {
                    ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]];
                    continue;
                }
                if (cells[i][0] >= 0) {
                    bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b);
                    if (byteorderLSB) {
                        ret[(int) cells[i][1]] = ((0xFF & b[3]) << 24)
                                | ((0xFF & b[2]) << 16) + ((0xFF & b[1]) << 8) + (b[0] & 0xFF);
                    } else {
                        ret[(int) cells[i][1]] = ((0xFF & b[0]) << 24)
                                | ((0xFF & b[1]) << 16) + ((0xFF & b[2]) << 8) + ((0xFF & b[3]) & 0xFF);
                    }
                } else {
                    ret[(int) cells[i][1]] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("LONG")) {
            size = 8;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                if (i > 0 && cells[i - 1][0] == cells[i][0]) {
                    ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]];
                    continue;
                }
                if (cells[i][0] >= 0) {
                    bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b);
                    if (byteorderLSB) {
                        ret[(int) cells[i][1]] = ((long) (0xFF & b[7]) << 56) + ((long) (0xFF & b[6]) << 48)
                                + ((long) (0xFF & b[5]) << 40) + ((long) (0xFF & b[4]) << 32)
                                + ((long) (0xFF & b[3]) << 24) + ((long) (0xFF & b[2]) << 16)
                                + ((long) (0xFF & b[1]) << 8) + (0xFF & b[0]);
                    } else {
                        ret[(int) cells[i][1]] = ((long) (0xFF & b[0]) << 56) + ((long) (0xFF & b[1]) << 48)
                                + ((long) (0xFF & b[2]) << 40) + ((long) (0xFF & b[3]) << 32)
                                + ((long) (0xFF & b[4]) << 24) + ((long) (0xFF & b[5]) << 16)
                                + ((long) (0xFF & b[6]) << 8) + (0xFF & b[7]);
                    }
                } else {
                    ret[(int) cells[i][1]] = Float.NaN;
                }
            }
        } else if (datatype.equalsIgnoreCase("FLOAT")) {
            size = 4;
            b = new byte[size];
            for (i = 0; i < length; i++) {
                if (i > 0 && cells[i - 1][0] == cells[i][0]) {
                    ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]];
                    continue;
                }
                if (cells[i][0] >= 0) {
                    bufferOffset = getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b);
                    ByteBuffer bb = ByteBuffer.wrap(b);
                    if (byteorderLSB) {
                        bb.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    ret[(int) cells[i][1]] = bb.getFloat();
                } else {
                    ret[(int) cells[i][1]] = Float.NaN;
                }

            }
        } else if (datatype.equalsIgnoreCase("DOUBLE")) {
            size = 8;
            b = new byte[8];
            for (i = 0; i < length; i++) {
                if (i > 0 && cells[i - 1][0] == cells[i][0]) {
                    ret[(int) cells[i][1]] = ret[(int) cells[i - 1][1]];
                    continue;
                }
                if (cells[i][0] >= 0) {
                    getBytes(afile, buffer, bufferOffset, cells[i][0] * (long) size, b);
                    ByteBuffer bb = ByteBuffer.wrap(b);
                    if (byteorderLSB) {
                        bb.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    ret[(int) cells[i][1]] = (float) bb.getDouble();

                } else {
                    ret[(int) cells[i][1]] = Float.NaN;
                }
            }
        } else {
            logger.error("datatype not supported in Grid.getValues: " + datatype);
            // / should not happen; catch anyway...
            for (i = 0; i < length; i++) {
                ret[i] = Float.NaN;
            }
        }
        //replace not a number
        for (i = 0; i < length; i++) {
            if ((float) ret[i] == (float) nodatavalue) {
                ret[i] = Float.NaN;
            } else {
                ret[i] *= rescale;
            }
        }
        return ret;
    } catch (Exception e) {
        logger.error("error getting grid file values", e);
    } finally {
        if (afile != null) {
            try {
                afile.close();
            } catch (Exception e) {
                logger.error(e.getMessage(), e);
            }
        }
    }
    return null;
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.dta.DTAFileReader.java

private void decodeData(BufferedInputStream stream) throws IOException {

    dbgLog.fine("\n***** decodeData(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }//from   w  w  w . j a  va2 s .co  m

    int nvar = (Integer) smd.getFileInformation().get("varQnty");
    int nobs = (Integer) smd.getFileInformation().get("caseQnty");
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("data diminsion[rxc]=(" + nobs + "," + nvar + ")");
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("bytes per row=" + bytes_per_row + " bytes");

    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("variableTypelList=" + Arrays.deepToString(variableTypelList));
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("StringVariableTable=" + StringVariableTable);

    FileOutputStream fileOutTab = null;
    PrintWriter pwout = null;

    // create a File object to save the tab-delimited data file
    File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");

    String tabDelimitedDataFileName = tabDelimitedDataFile.getAbsolutePath();

    // save the temp file name in the metadata object
    smd.getFileInformation().put("tabDelimitedDataFileLocation", tabDelimitedDataFileName);

    fileOutTab = new FileOutputStream(tabDelimitedDataFile);

    pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);

    // data storage
    // Object[][] dataTable = new Object[nobs][nvar];
    // for later variable-wise calculations of statistics
    // dataTable2 sotres cut-out data columnwise
    Object[][] dataTable2 = new Object[nvar][nobs];
    String[][] dateFormat = new String[nvar][nobs];

    for (int i = 0; i < nobs; i++) {
        byte[] dataRowBytes = new byte[bytes_per_row];
        Object[] dataRow = new Object[nvar];

        int nbytes = stream.read(dataRowBytes, 0, bytes_per_row);

        if (nbytes == 0) {
            String errorMessage = "reading data: no data were read at(" + i + "th row)";
            throw new IOException(errorMessage);
        }
        // decoding each row
        int byte_offset = 0;
        for (int columnCounter = 0; columnCounter < variableTypelList.length; columnCounter++) {

            Integer varType = variableTypeMap.get(variableTypelList[columnCounter]);

            String variableFormat = variableFormats[columnCounter];

            boolean isDateTimeDatum = isDateTimeDatumList[columnCounter];

            switch (varType != null ? varType : 256) {
            case -5:
                // Byte case
                // note: 1 byte signed
                byte byte_datum = dataRowBytes[byte_offset];

                if (dbgLog.isLoggable(Level.FINER))
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum);
                if (byte_datum >= BYTE_MISSING_VALUE) {
                    if (dbgLog.isLoggable(Level.FINER))
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum);
                    dataRow[columnCounter] = MissingValueForTextDataFileNumeric;
                    dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF
                } else {
                    dataRow[columnCounter] = byte_datum;
                    dataTable2[columnCounter][i] = byte_datum;
                }

                byte_offset++;
                break;
            case -4:
                // Stata-int (=java's short: 2byte) case
                // note: 2-byte signed int, not java's int
                ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2);
                if (isLittleEndian) {
                    int_buffer.order(ByteOrder.LITTLE_ENDIAN);

                }
                short short_datum = int_buffer.getShort();

                if (dbgLog.isLoggable(Level.FINER))
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum);
                if (short_datum >= INT_MISSIG_VALUE) {
                    if (dbgLog.isLoggable(Level.FINER))
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value="
                                + short_datum);
                    dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF
                    if (isDateTimeDatum) {
                        dataRow[columnCounter] = MissingValueForTextDataFileString;
                    } else {
                        dataRow[columnCounter] = MissingValueForTextDataFileNumeric;
                    }
                } else {

                    if (isDateTimeDatum) {

                        DecodedDateTime ddt = decodeDateTimeData("short", variableFormat,
                                Short.toString(short_datum));
                        if (dbgLog.isLoggable(Level.FINER))
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dateFormat[columnCounter][i] = ddt.format;
                        dataTable2[columnCounter][i] = dataRow[columnCounter];

                    } else {
                        dataTable2[columnCounter][i] = short_datum;
                        dataRow[columnCounter] = short_datum;
                    }
                }
                byte_offset += 2;
                break;
            case -3:
                // stata-Long (= java's int: 4 byte) case
                // note: 4-byte singed, not java's long
                dbgLog.fine("DATreader: stata long");

                ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4);
                if (isLittleEndian) {
                    long_buffer.order(ByteOrder.LITTLE_ENDIAN);

                }
                int int_datum = long_buffer.getInt();

                if (dbgLog.isLoggable(Level.FINE))
                    dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long =" + int_datum);
                if (int_datum >= LONG_MISSING_VALUE) {
                    if (dbgLog.isLoggable(Level.FINE))
                        dbgLog.fine(i + "-th row " + columnCounter + "=th column stata long missing value="
                                + int_datum);
                    dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF
                    if (isDateTimeDatum) {
                        dataRow[columnCounter] = MissingValueForTextDataFileString;
                    } else {
                        dataRow[columnCounter] = MissingValueForTextDataFileNumeric;
                    }
                } else {
                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("int", variableFormat,
                                Integer.toString(int_datum));
                        if (dbgLog.isLoggable(Level.FINER))
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dateFormat[columnCounter][i] = ddt.format;
                        dataTable2[columnCounter][i] = dataRow[columnCounter];

                    } else {
                        dataTable2[columnCounter][i] = int_datum;
                        dataRow[columnCounter] = int_datum;
                    }

                }
                byte_offset += 4;
                break;
            case -2:
                // float case
                // note: 4-byte
                ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4);
                if (isLittleEndian) {
                    float_buffer.order(ByteOrder.LITTLE_ENDIAN);
                }
                float float_datum = float_buffer.getFloat();

                if (dbgLog.isLoggable(Level.FINER))
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum);
                if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) {
                    if (dbgLog.isLoggable(Level.FINER))
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value="
                                + float_datum);
                    dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF
                    if (isDateTimeDatum) {
                        dataRow[columnCounter] = MissingValueForTextDataFileString;
                    } else {
                        dataRow[columnCounter] = MissingValueForTextDataFileNumeric;
                    }

                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("float", variableFormat,
                                doubleNumberFormatter.format(float_datum));
                        if (dbgLog.isLoggable(Level.FINER))
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dateFormat[columnCounter][i] = ddt.format;
                        dataTable2[columnCounter][i] = dataRow[columnCounter];
                    } else {
                        dataTable2[columnCounter][i] = float_datum;
                        dataRow[columnCounter] = float_datum;
                    }

                }
                byte_offset += 4;
                break;
            case -1:
                // double case
                // note: 8-byte
                ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8);
                if (isLittleEndian) {
                    double_buffer.order(ByteOrder.LITTLE_ENDIAN);
                }
                double double_datum = double_buffer.getDouble();

                if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) {
                    dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF
                    if (dbgLog.isLoggable(Level.FINER))
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value="
                                + double_datum);
                    if (isDateTimeDatum) {
                        dataRow[columnCounter] = MissingValueForTextDataFileString;
                    } else {
                        dataRow[columnCounter] = MissingValueForTextDataFileNumeric;
                    }
                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("double", variableFormat,
                                doubleNumberFormatter.format(double_datum));
                        if (dbgLog.isLoggable(Level.FINER))
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dateFormat[columnCounter][i] = ddt.format;
                        dataTable2[columnCounter][i] = dataRow[columnCounter];
                    } else {
                        dataTable2[columnCounter][i] = double_datum;
                        dataRow[columnCounter] = doubleNumberFormatter.format(double_datum);
                    }

                }
                byte_offset += 8;
                break;
            case 0:
                // String case
                int strVarLength = StringVariableTable.get(columnCounter);
                String raw_datum = new String(
                        Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)),
                        "ISO-8859-1");
                String string_datum = getNullStrippedString(raw_datum);
                if (dbgLog.isLoggable(Level.FINER))
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum);
                if (string_datum.equals("")) {
                    if (dbgLog.isLoggable(Level.FINER))
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value="
                                + string_datum);
                    dataRow[columnCounter] = MissingValueForTextDataFileString;
                    dataTable2[columnCounter][i] = null; //use null reference to indicate missing value in data that is passed to UNF
                } else {
                    String escapedString = string_datum.replaceAll("\"", Matcher.quoteReplacement("\\\""));
                    /*
                     * Fixing the bug we've had in the Stata reader for 
                     * a longest time: new lines and tabs need to 
                     * be escaped too - otherwise it breaks our 
                     * TAB file structure! -- L.A. 
                     */
                    escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t"));
                    escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n"));
                    escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r"));
                    // the escaped version of the string will be 
                    // stored in the tab file: 
                    dataRow[columnCounter] = "\"" + escapedString + "\"";
                    // but note that the "raw" version of it is 
                    // used for the UNF:
                    dataTable2[columnCounter][i] = string_datum;
                }
                byte_offset += strVarLength;
                break;
            default:
                dbgLog.fine("unknown variable type found");
                String errorMessage = "unknow variable Type found at data section";
                throw new InvalidObjectException(errorMessage);
            } // switch
        } // for-columnCounter

        // dump the row of data to the external file
        pwout.println(StringUtils.join(dataRow, "\t"));

        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};");

    } // for- i (row)

    pwout.close();
    if (dbgLog.isLoggable(Level.FINER)) {
        dbgLog.finer("\ndataTable2(variable-wise):\n");
        dbgLog.finer(Arrays.deepToString(dataTable2));
        dbgLog.finer("\ndateFormat(variable-wise):\n");
        dbgLog.finer(Arrays.deepToString(dateFormat));
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("variableTypelList:\n" + Arrays.deepToString(variableTypelList));
        dbgLog.fine("variableTypelListFinal:\n" + Arrays.deepToString(variableTypelListFinal));
    }
    String[] unfValues = new String[nvar];

    for (int j = 0; j < nvar; j++) {
        String variableType_j = variableTypelListFinal[j];

        unfValues[j] = getUNF(dataTable2[j], dateFormat[j], variableType_j, unfVersionNumber, j);
        if (dbgLog.isLoggable(Level.FINE))
            dbgLog.fine(j + "th unf value" + unfValues[j]);

    }

    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("unf set:\n" + Arrays.deepToString(unfValues));

    fileUnfValue = UNF5Util.calculateUNF(unfValues);

    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("file-unf=" + fileUnfValue);

    stataDataSection.setUnf(unfValues);

    stataDataSection.setFileUnf(fileUnfValue);

    smd.setVariableUNF(unfValues);

    smd.getFileInformation().put("fileUNF", fileUnfValue);
    if (dbgLog.isLoggable(Level.FINE))
        dbgLog.fine("unf values:\n" + unfValues);

    stataDataSection.setData(dataTable2);

    // close the stream

    dbgLog.fine("***** decodeData(): end *****\n\n");

}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.dta.DTAFileReader.java

private void decodeData(BufferedInputStream stream) throws IOException {

    dbgLog.fine("\n***** decodeData(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }/*from  ww  w .java  2  s.com*/

    //int nvar = (Integer)smd.getFileInformation().get("varQnty");
    int nvar = dataTable.getVarQuantity().intValue();
    //int nobs = (Integer)smd.getFileInformation().get("caseQnty");
    int nobs = dataTable.getCaseQuantity().intValue();

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("data dimensions[observations x variables] = (" + nobs + "x" + nvar + ")");
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("bytes per row=" + bytes_per_row + " bytes");
    }

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("variableTypes=" + Arrays.deepToString(variableTypes));
    }
    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("StringLengthTable=" + StringLengthTable);
    }

    // create a File object to save the tab-delimited data file
    FileOutputStream fileOutTab = null;
    PrintWriter pwout = null;
    File tabDelimitedDataFile = File.createTempFile("tempTabfile.", ".tab");

    // save the temp tab-delimited file in the return ingest object:        
    ingesteddata.setTabDelimitedFile(tabDelimitedDataFile);

    fileOutTab = new FileOutputStream(tabDelimitedDataFile);
    pwout = new PrintWriter(new OutputStreamWriter(fileOutTab, "utf8"), true);

    /* Should we lose this dateFormat thing in 4.0? 
     * the UNF should be calculatable on the app side solely from the data
     * stored in the tab file and the type information stored the dataVariable
     * object. 
     * furthermore, the very idea of storing a format entry not just for 
     * every variable, but for every value/observation is a bit strange. 
     * TODO: review and confirm that, in the 3.* implementation, every
     * entry in dateFormat[nvar][*] is indeed the same - except for the 
     * missing value entries. -- L.A. 4.0
      (OK, I got rid of the dateFormat; instead I kinda sorta assume
      that the format is the same for every value in a column, save for 
      the missing values... like this: 
      dataTable.getDataVariables().get(columnCounter).setFormatSchemaName(ddt.format);
      BUT, this needs to be reviewed/confirmed etc! 
     */
    //String[][] dateFormat = new String[nvar][nobs];

    for (int i = 0; i < nobs; i++) {
        byte[] dataRowBytes = new byte[bytes_per_row];
        Object[] dataRow = new Object[nvar];

        int nbytes = stream.read(dataRowBytes, 0, bytes_per_row);

        if (nbytes == 0) {
            String errorMessage = "reading data: no data were read at(" + i + "th row)";
            throw new IOException(errorMessage);
        }
        // decoding each row
        int byte_offset = 0;
        for (int columnCounter = 0; columnCounter < variableTypes.length; columnCounter++) {

            Integer varType = variableTypeMap.get(variableTypes[columnCounter]);

            // 4.0 Check if this is a time/date variable: 
            boolean isDateTimeDatum = false;
            String formatCategory = dataTable.getDataVariables().get(columnCounter).getFormatCategory();
            if (formatCategory != null && (formatCategory.equals("time") || formatCategory.equals("date"))) {
                isDateTimeDatum = true;
            }

            String variableFormat = dateVariableFormats[columnCounter];

            switch (varType != null ? varType : 256) {
            case -5:
                // Byte case
                // note: 1 byte signed
                byte byte_datum = dataRowBytes[byte_offset];

                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column byte =" + byte_datum);
                }
                if (byte_datum >= BYTE_MISSING_VALUE) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column byte MV=" + byte_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    dataRow[columnCounter] = byte_datum;
                }

                byte_offset++;
                break;
            case -4:
                // Stata-int (=java's short: 2byte) case
                // note: 2-byte signed int, not java's int
                ByteBuffer int_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 2);
                if (isLittleEndian) {
                    int_buffer.order(ByteOrder.LITTLE_ENDIAN);

                }
                short short_datum = int_buffer.getShort();

                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column stata int =" + short_datum);
                }
                if (short_datum >= INT_MISSIG_VALUE) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column stata long missing value="
                                + short_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {

                        DecodedDateTime ddt = decodeDateTimeData("short", variableFormat,
                                Short.toString(short_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        //dateFormat[columnCounter][i] = ddt.format;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = short_datum;
                    }
                }
                byte_offset += 2;
                break;
            case -3:
                // stata-Long (= java's int: 4 byte) case
                // note: 4-byte singed, not java's long
                //dbgLog.fine("DATreader: stata long");

                ByteBuffer long_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4);
                if (isLittleEndian) {
                    long_buffer.order(ByteOrder.LITTLE_ENDIAN);

                }
                int int_datum = long_buffer.getInt();

                if (dbgLog.isLoggable(Level.FINE)) {
                    //dbgLog.fine(i + "-th row " + columnCounter
                    //        + "=th column stata long =" + int_datum);
                }
                if (int_datum >= LONG_MISSING_VALUE) {
                    if (dbgLog.isLoggable(Level.FINE)) {
                        //dbgLog.fine(i + "-th row " + columnCounter
                        //        + "=th column stata long missing value=" + int_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("int", variableFormat,
                                Integer.toString(int_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);

                    } else {
                        dataRow[columnCounter] = int_datum;
                    }

                }
                byte_offset += 4;
                break;
            case -2:
                // float case
                // note: 4-byte
                ByteBuffer float_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 4);
                if (isLittleEndian) {
                    float_buffer.order(ByteOrder.LITTLE_ENDIAN);
                }
                float float_datum = float_buffer.getFloat();

                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column float =" + float_datum);
                }
                if (FLOAT_MISSING_VALUE_SET.contains(float_datum)) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column float missing value="
                                + float_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;

                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("float", variableFormat,
                                doubleNumberFormatter.format(float_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        dataRow[columnCounter] = float_datum;
                        // This may be temporary - but for now (as in, while I'm testing 
                        // 4.0 ingest against 3.* ingest, I need to be able to tell if a 
                        // floating point value was a single, or double float in the 
                        // original STATA file: -- L.A. Jul. 2014
                        dataTable.getDataVariables().get(columnCounter).setFormat("float");
                    }

                }
                byte_offset += 4;
                break;
            case -1:
                // double case
                // note: 8-byte
                ByteBuffer double_buffer = ByteBuffer.wrap(dataRowBytes, byte_offset, 8);
                if (isLittleEndian) {
                    double_buffer.order(ByteOrder.LITTLE_ENDIAN);
                }
                double double_datum = double_buffer.getDouble();

                if (DOUBLE_MISSING_VALUE_SET.contains(double_datum)) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column double missing value="
                                + double_datum);
                    }
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {

                    if (isDateTimeDatum) {
                        DecodedDateTime ddt = decodeDateTimeData("double", variableFormat,
                                doubleNumberFormatter.format(double_datum));
                        if (dbgLog.isLoggable(Level.FINER)) {
                            dbgLog.finer(i + "-th row , decodedDateTime " + ddt.decodedDateTime + ", format="
                                    + ddt.format);
                        }
                        dataRow[columnCounter] = ddt.decodedDateTime;
                        dataTable.getDataVariables().get(columnCounter).setFormat(ddt.format);
                    } else {
                        dataRow[columnCounter] = doubleNumberFormatter.format(double_datum);
                    }

                }
                byte_offset += 8;
                break;
            case 0:
                // String case
                int strVarLength = StringLengthTable.get(columnCounter);
                String raw_datum = new String(
                        Arrays.copyOfRange(dataRowBytes, byte_offset, (byte_offset + strVarLength)),
                        "ISO-8859-1");
                // TODO: 
                // is it the right thing to do, to default to "ISO-8859-1"?
                // (it may be; since there's no mechanism for specifying
                // alternative encodings in Stata, this may be their default;
                // it just needs to be verified. -- L.A. Jul. 2014)
                String string_datum = getNullStrippedString(raw_datum);
                if (dbgLog.isLoggable(Level.FINER)) {
                    dbgLog.finer(i + "-th row " + columnCounter + "=th column string =" + string_datum);
                }
                if (string_datum.isEmpty()) {
                    if (dbgLog.isLoggable(Level.FINER)) {
                        dbgLog.finer(i + "-th row " + columnCounter + "=th column string missing value="
                                + string_datum);
                    }
                    // TODO: 
                    /* Is this really a missing value case? 
                     * Or is it an honest empty string? 
                     * Is there such a thing as a missing value for a String in Stata?
                     * -- L.A. 4.0
                     */
                    dataRow[columnCounter] = MissingValueForTabDelimitedFile;
                } else {
                    /*
                     * Some special characters, like new lines and tabs need to 
                     * be escaped - otherwise they will break our TAB file 
                     * structure! 
                     * But before we escape anything, all the back slashes 
                     * already in the string need to be escaped themselves.
                     */
                    String escapedString = string_datum.replace("\\", "\\\\");
                    // escape quotes: 
                    escapedString = escapedString.replaceAll("\"", Matcher.quoteReplacement("\\\""));
                    // escape tabs and new lines:
                    escapedString = escapedString.replaceAll("\t", Matcher.quoteReplacement("\\t"));
                    escapedString = escapedString.replaceAll("\n", Matcher.quoteReplacement("\\n"));
                    escapedString = escapedString.replaceAll("\r", Matcher.quoteReplacement("\\r"));
                    // the escaped version of the string is stored in the tab file 
                    // enclosed in double-quotes; this is in order to be able 
                    // to differentiate between an empty string (tab-delimited empty string in 
                    // double quotes) and a missing value (tab-delimited empty string). 
                    // Although the question still remains - is it even possible 
                    // to store an empty string, that's not a missing value, in Stata? 
                    // - see the comment in the missing value case above. -- L.A. 4.0
                    dataRow[columnCounter] = "\"" + escapedString + "\"";
                }
                byte_offset += strVarLength;
                break;
            default:
                dbgLog.fine("unknown variable type found");
                String errorMessage = "unknow variable Type found at data section";
                throw new InvalidObjectException(errorMessage);
            } // switch
        } // for-columnCounter

        // Dump the row of data to the tab-delimited file we are producing:
        pwout.println(StringUtils.join(dataRow, "\t"));

        if (dbgLog.isLoggable(Level.FINE)) {
            //dbgLog.fine(i + "-th row's data={" + StringUtils.join(dataRow, ",") + "};");
        }

    } // for- i (row)

    pwout.close();

    if (dbgLog.isLoggable(Level.FINE)) {
        dbgLog.fine("variableTypes:\n" + Arrays.deepToString(variableTypes));
    }

    dbgLog.fine("DTA Ingest: decodeData(): end.");

}

From source file:com.healthmarketscience.jackcess.Column.java

/**
 * Deserialize a raw byte value for this column into an Object
 * @param data The raw byte value/*w w  w.  ja  v  a2 s  .c  o m*/
 * @param order Byte order in which the raw value is stored
 * @return The deserialized Object
 * @usage _advanced_method_
 */
public Object read(byte[] data, ByteOrder order) throws IOException {
    ByteBuffer buffer = ByteBuffer.wrap(data);
    buffer.order(order);
    if (_type == DataType.BOOLEAN) {
        throw new IOException("Tried to read a boolean from data instead of null mask.");
    } else if (_type == DataType.BYTE) {
        return Byte.valueOf(buffer.get());
    } else if (_type == DataType.INT) {
        return Short.valueOf(buffer.getShort());
    } else if (_type == DataType.LONG) {
        return Integer.valueOf(buffer.getInt());
    } else if (_type == DataType.DOUBLE) {
        return Double.valueOf(buffer.getDouble());
    } else if (_type == DataType.FLOAT) {
        return Float.valueOf(buffer.getFloat());
    } else if (_type == DataType.SHORT_DATE_TIME) {
        return readDateValue(buffer);
    } else if (_type == DataType.BINARY) {
        return data;
    } else if (_type == DataType.TEXT) {
        return decodeTextValue(data);
    } else if (_type == DataType.MONEY) {
        return readCurrencyValue(buffer);
    } else if (_type == DataType.OLE) {
        if (data.length > 0) {
            return readLongValue(data);
        }
        return null;
    } else if (_type == DataType.MEMO) {
        if (data.length > 0) {
            return readLongStringValue(data);
        }
        return null;
    } else if (_type == DataType.NUMERIC) {
        return readNumericValue(buffer);
    } else if (_type == DataType.GUID) {
        return readGUIDValue(buffer, order);
    } else if ((_type == DataType.UNKNOWN_0D) || (_type == DataType.UNKNOWN_11)) {
        // treat like "binary" data
        return data;
    } else if (_type == DataType.COMPLEX_TYPE) {
        return new ComplexValueForeignKey(this, buffer.getInt());
    } else if (_type.isUnsupported()) {
        return rawDataWrapper(data);
    } else {
        throw new IOException("Unrecognized data type: " + _type);
    }
}