Example usage for java.nio ByteBuffer order

List of usage examples for java.nio ByteBuffer order

Introduction

In this page you can find the example usage for java.nio ByteBuffer order.

Prototype

Endianness order

To view the source code for java.nio ByteBuffer order.

Click Source Link

Document

The byte order of this buffer, default is BIG_ENDIAN .

Usage

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordType3and4(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordType3and4(): start *****");
    Map<String, Map<String, String>> valueLabelTable = new LinkedHashMap<String, Map<String, String>>();

    int safteyCounter = 0;
    while (true) {
        try {/*from  ww  w  .  j  a  va 2s  .c o  m*/
            if (stream == null) {
                throw new IllegalArgumentException("stream == null!");
            }
            // this secton may not exit so first check the 4-byte header value
            //if (stream.markSupported()){
            stream.mark(1000);
            //}
            // 3.0 check the first 4 bytes
            byte[] headerCode = new byte[LENGTH_RECORD_TYPE3_CODE];

            int nbytes_rt3 = stream.read(headerCode, 0, LENGTH_RECORD_TYPE3_CODE);
            // to-do check against nbytes
            //printHexDump(headerCode, "RT3 header test");
            ByteBuffer bb_header_code = ByteBuffer.wrap(headerCode, 0, LENGTH_RECORD_TYPE3_CODE);
            if (isLittleEndian) {
                bb_header_code.order(ByteOrder.LITTLE_ENDIAN);
            }

            int intRT3test = bb_header_code.getInt();
            dbgLog.fine("header test value: RT3=" + intRT3test);
            if (intRT3test != 3) {
                //if (stream.markSupported()){
                dbgLog.fine("iteration=" + safteyCounter);

                // We have encountered a record that's not type 3. This means we've
                // processed all the type 3/4 record pairs. So we want to rewind
                // the stream and return -- so that the appropriate record type
                // reader can be called on it.
                // But before we return, we need to save all the value labels
                // we have found:

                smd.setValueLabelTable(valueLabelTable);

                stream.reset();
                return;
                //}
            }
            // 3.1 how many value-label pairs follow
            byte[] number_of_labels = new byte[LENGTH_RT3_HOW_MANY_LABELS];

            int nbytes_3_1 = stream.read(number_of_labels);
            if (nbytes_3_1 == 0) {
                throw new IOException("RT 3: reading recordType3.1: no byte was read");
            }
            ByteBuffer bb_number_of_labels = ByteBuffer.wrap(number_of_labels, 0, LENGTH_RT3_HOW_MANY_LABELS);
            if (isLittleEndian) {
                bb_number_of_labels.order(ByteOrder.LITTLE_ENDIAN);
            }

            int numberOfValueLabels = bb_number_of_labels.getInt();
            dbgLog.fine("number of value-label pairs=" + numberOfValueLabels);

            ByteBuffer[] tempBB = new ByteBuffer[numberOfValueLabels];

            String valueLabel[] = new String[numberOfValueLabels];

            for (int i = 0; i < numberOfValueLabels; i++) {

                // read 8-byte as value          
                byte[] value = new byte[LENGTH_RT3_VALUE];
                int nbytes_3_value = stream.read(value);

                if (nbytes_3_value == 0) {
                    throw new IOException("RT 3: reading recordType3 value: no byte was read");
                }
                // note these 8 bytes are interpreted later
                // currently no information about which variable's (=> type unknown)
                ByteBuffer bb_value = ByteBuffer.wrap(value, 0, LENGTH_RT3_VALUE);
                if (isLittleEndian) {
                    bb_value.order(ByteOrder.LITTLE_ENDIAN);
                }
                tempBB[i] = bb_value;
                dbgLog.fine("bb_value=" + Hex.encodeHex(bb_value.array()));
                /*
                  double valueD = bb_value.getDouble();                
                  dbgLog.fine("value="+valueD);
                */
                // read 1st byte as unsigned integer = label_length

                // read label_length byte as label
                byte[] labelLengthByte = new byte[LENGTH_RT3_LABEL_LENGTH];

                int nbytes_3_label_length = stream.read(labelLengthByte);

                // add check-routine here

                dbgLog.fine("labelLengthByte" + Hex.encodeHex(labelLengthByte));
                dbgLog.fine("label length = " + labelLengthByte[0]);
                // the net-length of a value label is saved as
                // unsigned byte; however, the length is less than 127
                // byte should be ok
                int rawLabelLength = labelLengthByte[0] & 0xFF;
                dbgLog.fine("rawLabelLength=" + rawLabelLength);
                // -1 =>1-byte already read
                int labelLength = getSAVobsAdjustedBlockLength(rawLabelLength + 1) - 1;
                byte[] valueLabelBytes = new byte[labelLength];
                int nbytes_3_value_label = stream.read(valueLabelBytes);

                // ByteBuffer bb_label = ByteBuffer.wrap(valueLabel,0,labelLength);

                valueLabel[i] = StringUtils.stripEnd(
                        new String(Arrays.copyOfRange(valueLabelBytes, 0, rawLabelLength), defaultCharSet),
                        " ");
                dbgLog.fine(i + "-th valueLabel=" + valueLabel[i] + "<-");

            } // iter rt3

            dbgLog.fine("end of RT3 block");
            dbgLog.fine("start of RT4 block");

            // 4.0 check the first 4 bytes
            byte[] headerCode4 = new byte[LENGTH_RECORD_TYPE4_CODE];

            int nbytes_rt4 = stream.read(headerCode4, 0, LENGTH_RECORD_TYPE4_CODE);

            if (nbytes_rt4 == 0) {
                throw new IOException("RT4: reading recordType4 value: no byte was read");
            }

            //printHexDump(headerCode4, "RT4 header test");

            ByteBuffer bb_header_code_4 = ByteBuffer.wrap(headerCode4, 0, LENGTH_RECORD_TYPE4_CODE);
            if (isLittleEndian) {
                bb_header_code_4.order(ByteOrder.LITTLE_ENDIAN);
            }

            int intRT4test = bb_header_code_4.getInt();
            dbgLog.fine("header test value: RT4=" + intRT4test);

            if (intRT4test != 4) {
                throw new IOException("RT 4: reading recordType4 header: no byte was read");
            }

            // 4.1 read the how-many-variables bytes
            byte[] howManyVariablesfollow = new byte[LENGTH_RT4_HOW_MANY_VARIABLES];

            int nbytes_rt4_1 = stream.read(howManyVariablesfollow, 0, LENGTH_RT4_HOW_MANY_VARIABLES);

            ByteBuffer bb_howManyVariablesfollow = ByteBuffer.wrap(howManyVariablesfollow, 0,
                    LENGTH_RT4_HOW_MANY_VARIABLES);
            if (isLittleEndian) {
                bb_howManyVariablesfollow.order(ByteOrder.LITTLE_ENDIAN);
            }

            int howManyVariablesRT4 = bb_howManyVariablesfollow.getInt();
            dbgLog.fine("how many variables follow: RT4=" + howManyVariablesRT4);

            int length_indicies = LENGTH_RT4_VARIABLE_INDEX * howManyVariablesRT4;
            byte[] variableIdicesBytes = new byte[length_indicies];

            int nbytes_rt4_2 = stream.read(variableIdicesBytes, 0, length_indicies);

            // !!!!! Caution: variableIndex in RT4 starts from 1 NOT ** 0 **
            int[] variableIndex = new int[howManyVariablesRT4];
            int offset = 0;
            for (int i = 0; i < howManyVariablesRT4; i++) {

                ByteBuffer bb_variable_index = ByteBuffer.wrap(variableIdicesBytes, offset,
                        LENGTH_RT4_VARIABLE_INDEX);
                offset += LENGTH_RT4_VARIABLE_INDEX;

                if (isLittleEndian) {
                    bb_variable_index.order(ByteOrder.LITTLE_ENDIAN);
                }

                variableIndex[i] = bb_variable_index.getInt();
                dbgLog.fine(i + "-th variable index number=" + variableIndex[i]);
            }

            dbgLog.fine("variable index set=" + ArrayUtils.toString(variableIndex));
            dbgLog.fine("subtract 1 from variableIndex for getting a variable info");

            boolean isNumeric = OBSwiseTypelList.get(variableIndex[0] - 1) == 0 ? true : false;

            Map<String, String> valueLabelPair = new LinkedHashMap<String, String>();
            if (isNumeric) {
                // numeric variable
                dbgLog.fine("processing of a numeric value-label table");
                for (int j = 0; j < numberOfValueLabels; j++) {
                    valueLabelPair.put(doubleNumberFormatter.format(tempBB[j].getDouble()), valueLabel[j]);
                }
            } else {
                // String variable
                dbgLog.fine("processing of a string value-label table");
                for (int j = 0; j < numberOfValueLabels; j++) {
                    valueLabelPair.put(
                            StringUtils.stripEnd(new String((tempBB[j].array()), defaultCharSet), " "),
                            valueLabel[j]);
                }
            }

            dbgLog.fine("valueLabePair=" + valueLabelPair);
            dbgLog.fine("key variable's (raw) index =" + variableIndex[0]);

            valueLabelTable.put(OBSIndexToVariableName.get(variableIndex[0] - 1), valueLabelPair);

            dbgLog.fine("valueLabelTable=" + valueLabelTable);

            // create a mapping table that finds the key variable for this mapping table
            String keyVariableName = OBSIndexToVariableName.get(variableIndex[0] - 1);
            for (int vn : variableIndex) {
                valueVariableMappingTable.put(OBSIndexToVariableName.get(vn - 1), keyVariableName);
            }

            dbgLog.fine("valueVariableMappingTable:\n" + valueVariableMappingTable);
        } catch (IOException ex) {
            //ex.printStackTrace();
            throw ex;
        }

        safteyCounter++;
        if (safteyCounter >= 1000000) {
            break;
        }
    } //while

    smd.setValueLabelTable(valueLabelTable);

    dbgLog.fine("***** decodeRecordType3and4(): end *****");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordType7(BufferedInputStream stream) throws IOException {
    dbgLog.fine("decodeRecordType7(): start");
    int counter = 0;
    int[] headerSection = new int[2];

    // the variables below may no longer needed; 
    // but they may be useful for debugging/logging purposes.

    /// // RecordType 7 
    /// // Subtype 3
    /// List<Integer> releaseMachineSpecificInfo = new ArrayList<Integer>();
    /// List<String> releaseMachineSpecificInfoHex = new ArrayList<String>();

    /// // Subytpe 4
    /// Map<String, Double> OBSTypeValue = new LinkedHashMap<String, Double>();
    /// Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>();    
    //Subtype 11/*from ww w .  j  ava  2  s  .  co m*/
    /// List<Integer> measurementLevel = new ArrayList<Integer>();
    /// List<Integer> columnWidth = new ArrayList<Integer>();
    /// List<Integer> alignment = new ArrayList<Integer>();

    while (true) {
        try {
            if (stream == null) {
                throw new IllegalArgumentException("RT7: stream == null!");
            }
            // first check the 4-byte header value
            //if (stream.markSupported()){
            stream.mark(1000);
            //}
            // 7.0 check the first 4 bytes
            byte[] headerCodeRt7 = new byte[LENGTH_RECORD_TYPE7_CODE];

            int nbytes_rt7 = stream.read(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE);
            // to-do check against nbytes
            //printHexDump(headerCodeRt7, "RT7 header test");
            ByteBuffer bb_header_code_rt7 = ByteBuffer.wrap(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE);
            if (isLittleEndian) {
                bb_header_code_rt7.order(ByteOrder.LITTLE_ENDIAN);
            }

            int intRT7test = bb_header_code_rt7.getInt();
            dbgLog.fine("RT7: header test value=" + intRT7test);
            if (intRT7test != 7) {
                //if (stream.markSupported()){
                //out.print("iteration="+safteyCounter);
                //dbgLog.fine("iteration="+safteyCounter);
                dbgLog.fine("intRT7test failed=" + intRT7test);
                dbgLog.fine("counter=" + counter);
                stream.reset();
                return;
                //}
            }

            // 7.1 check 4-byte integer Sub-Type Code

            byte[] length_sub_type_code = new byte[LENGTH_RT7_SUB_TYPE_CODE];

            int nbytes_rt7_1 = stream.read(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE);
            // to-do check against nbytes

            //printHexDump(length_how_many_line_bytes, "RT7 how_many_line_bytes");
            ByteBuffer bb_sub_type_code = ByteBuffer.wrap(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE);
            if (isLittleEndian) {
                bb_sub_type_code.order(ByteOrder.LITTLE_ENDIAN);
            }

            int subTypeCode = bb_sub_type_code.getInt();
            dbgLog.fine("RT7: subTypeCode=" + subTypeCode);

            switch (subTypeCode) {
            case 3:
                // 3: Release andMachine-Specific Integer Information

                //parseRT7SubTypefield(stream);

                headerSection = parseRT7SubTypefieldHeader(stream);
                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    int numberOfUnits = headerSection[1];

                    for (int i = 0; i < numberOfUnits; i++) {
                        dbgLog.finer(i + "-th fieldData");
                        byte[] work = new byte[unitLength];

                        int nb = stream.read(work);
                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work)));
                        ByteBuffer bb_field = ByteBuffer.wrap(work);
                        if (isLittleEndian) {
                            bb_field.order(ByteOrder.LITTLE_ENDIAN);
                        }
                        String dataInHex = new String(Hex.encodeHex(bb_field.array()));
                        /// releaseMachineSpecificInfoHex.add(dataInHex);

                        dbgLog.finer("raw bytes in Hex:" + dataInHex);
                        if (unitLength == 4) {
                            int fieldData = bb_field.getInt();
                            dbgLog.finer("fieldData(int)=" + fieldData);
                            dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData));
                            /// releaseMachineSpecificInfo.add(fieldData);
                        }

                    }

                    /// dbgLog.fine("releaseMachineSpecificInfo="+releaseMachineSpecificInfo);
                    /// dbgLog.fine("releaseMachineSpecificInfoHex="+releaseMachineSpecificInfoHex);

                } else {
                    // throw new IOException
                }

                dbgLog.fine("***** end of subType 3 ***** \n");

                break;
            case 4:
                // Release andMachine-SpecificOBS-Type Information
                headerSection = parseRT7SubTypefieldHeader(stream);
                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    int numberOfUnits = headerSection[1];

                    for (int i = 0; i < numberOfUnits; i++) {
                        dbgLog.finer(i + "-th fieldData:" + RecordType7SubType4Fields.get(i));
                        byte[] work = new byte[unitLength];

                        int nb = stream.read(work);

                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work)));
                        ByteBuffer bb_field = ByteBuffer.wrap(work);
                        dbgLog.finer("byte order=" + bb_field.order().toString());
                        if (isLittleEndian) {
                            bb_field.order(ByteOrder.LITTLE_ENDIAN);
                        }
                        ByteBuffer bb_field_dup = bb_field.duplicate();
                        OBSTypeHexValue.put(RecordType7SubType4Fields.get(i),
                                new String(Hex.encodeHex(bb_field.array())));
                        //                            dbgLog.finer("raw bytes in Hex:"+
                        //                                OBSTypeHexValue.get(RecordType7SubType4Fields.get(i)));
                        if (unitLength == 8) {
                            double fieldData = bb_field.getDouble();
                            /// OBSTypeValue.put(RecordType7SubType4Fields.get(i), fieldData);
                            dbgLog.finer("fieldData(double)=" + fieldData);
                            OBSTypeHexValue.put(RecordType7SubType4Fields.get(i),
                                    Double.toHexString(fieldData));
                            dbgLog.fine("fieldData in Hex=" + Double.toHexString(fieldData));
                        }
                    }
                    /// dbgLog.fine("OBSTypeValue="+OBSTypeValue);
                    /// dbgLog.fine("OBSTypeHexValue="+OBSTypeHexValue);

                } else {
                    // throw new IOException
                }

                dbgLog.fine("***** end of subType 4 ***** \n");
                break;
            case 5:
                // Variable Sets Information
                parseRT7SubTypefield(stream);
                break;
            case 6:
                // Trends date information
                parseRT7SubTypefield(stream);
                break;
            case 7:
                // Multiple response groups
                parseRT7SubTypefield(stream);
                break;
            case 8:
                // Windows Data Entry data
                parseRT7SubTypefield(stream);
                break;
            case 9:
                //
                parseRT7SubTypefield(stream);
                break;
            case 10:
                // TextSmart data
                parseRT7SubTypefield(stream);
                break;
            case 11:
                // Msmt level, col width, & alignment
                //parseRT7SubTypefield(stream);

                headerSection = parseRT7SubTypefieldHeader(stream);
                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    int numberOfUnits = headerSection[1];

                    for (int i = 0; i < numberOfUnits; i++) {
                        dbgLog.finer(i + "-th fieldData");
                        byte[] work = new byte[unitLength];

                        int nb = stream.read(work);
                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work)));
                        ByteBuffer bb_field = ByteBuffer.wrap(work);
                        if (isLittleEndian) {
                            bb_field.order(ByteOrder.LITTLE_ENDIAN);
                        }
                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array())));

                        if (unitLength == 4) {
                            int fieldData = bb_field.getInt();
                            dbgLog.finer("fieldData(int)=" + fieldData);
                            dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData));

                            int remainder = i % 3;
                            dbgLog.finer("remainder=" + remainder);
                            if (remainder == 0) {
                                /// measurementLevel.add(fieldData);
                            } else if (remainder == 1) {
                                /// columnWidth.add(fieldData);
                            } else if (remainder == 2) {
                                /// alignment.add(fieldData);
                            }
                        }

                    }

                } else {
                    // throw new IOException
                }
                /// dbgLog.fine("measurementLevel="+measurementLevel);
                /// dbgLog.fine("columnWidth="+columnWidth);
                /// dbgLog.fine("alignment="+alignment);
                dbgLog.fine("end of subType 11\n");

                break;
            case 12:
                // Windows Data Entry GUID
                parseRT7SubTypefield(stream);
                break;
            case 13:
                // Extended variable names
                // parseRT7SubTypefield(stream);
                headerSection = parseRT7SubTypefieldHeader(stream);

                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    dbgLog.fine("RT7: unitLength=" + unitLength);
                    int numberOfUnits = headerSection[1];
                    dbgLog.fine("RT7: numberOfUnits=" + numberOfUnits);
                    byte[] work = new byte[unitLength * numberOfUnits];
                    int nbtyes13 = stream.read(work);

                    String[] variableShortLongNamePairs = new String(work, "US-ASCII").split("\t");

                    for (int i = 0; i < variableShortLongNamePairs.length; i++) {
                        dbgLog.fine("RT7: " + i + "-th pair" + variableShortLongNamePairs[i]);
                        String[] pair = variableShortLongNamePairs[i].split("=");
                        shortToLongVariableNameTable.put(pair[0], pair[1]);
                    }

                    dbgLog.fine("RT7: shortToLongVarialbeNameTable" + shortToLongVariableNameTable);
                    // We are saving the short-to-long name map; at the
                    // end of ingest, we'll go through the data variables and
                    // change the names accordingly. 

                    // smd.setShortToLongVarialbeNameTable(shortToLongVarialbeNameTable);
                } else {
                    // throw new IOException
                }

                break;
            case 14:
                // Extended strings
                //parseRT7SubTypefield(stream);
                headerSection = parseRT7SubTypefieldHeader(stream);

                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    dbgLog.fine("RT7.14: unitLength=" + unitLength);
                    int numberOfUnits = headerSection[1];
                    dbgLog.fine("RT7.14: numberOfUnits=" + numberOfUnits);
                    byte[] work = new byte[unitLength * numberOfUnits];
                    int nbtyes13 = stream.read(work);

                    String[] extendedVariablesSizePairs = new String(work, defaultCharSet).split("\000\t");

                    for (int i = 0; i < extendedVariablesSizePairs.length; i++) {
                        dbgLog.fine("RT7.14: " + i + "-th pair" + extendedVariablesSizePairs[i]);
                        if (extendedVariablesSizePairs[i].indexOf("=") > 0) {
                            String[] pair = extendedVariablesSizePairs[i].split("=");
                            extendedVariablesSizeTable.put(pair[0], Integer.valueOf(pair[1]));
                        }
                    }

                    dbgLog.fine("RT7.14: extendedVariablesSizeTable" + extendedVariablesSizeTable);
                } else {
                    // throw new IOException
                }

                break;
            case 15:
                // Clementine Metadata
                parseRT7SubTypefield(stream);
                break;
            case 16:
                // 64 bit N of cases
                parseRT7SubTypefield(stream);
                break;
            case 17:
                // File level attributes
                parseRT7SubTypefield(stream);
                break;
            case 18:
                // Variable attributes
                parseRT7SubTypefield(stream);
                break;
            case 19:
                // Extended multiple response groups
                parseRT7SubTypefield(stream);
                break;
            case 20:
                // Character encoding, aka code page.
                // Must be a version 16+ feature (?).
                // Starting v.16, the default character encoding for SAV
                // files is UTF-8; but then it is possible to specify an 
                // alternative encoding here. 
                // A typical use case would be people setting it to "ISO-Latin" 
                // or "windows-1252", or a similar 8-bit encoding to store 
                // text with standard Western European accents.
                // -- L.A.

                headerSection = parseRT7SubTypefieldHeader(stream);

                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    dbgLog.fine("RT7-20: unitLength=" + unitLength);
                    int numberOfUnits = headerSection[1];
                    dbgLog.fine("RT7-20: numberOfUnits=" + numberOfUnits);
                    byte[] rt7st20bytes = new byte[unitLength * numberOfUnits];
                    int nbytes20 = stream.read(rt7st20bytes);

                    String dataCharSet = new String(rt7st20bytes, "US-ASCII");

                    if (dataCharSet != null && !(dataCharSet.equals(""))) {
                        dbgLog.fine("RT7-20: data charset: " + dataCharSet);
                        defaultCharSet = dataCharSet;
                    }
                } /*else {
                  // TODO: 
                  // decide if the exception should actually be thrown here!
                  // -- L.A. 4.0 beta
                  // throw new IOException
                  }*/

                break;
            case 21:
                // Value labels for long strings
                parseRT7SubTypefield(stream);
                break;
            case 22:
                // Missing values for long strings
                parseRT7SubTypefield(stream);
                break;
            default:
                parseRT7SubTypefield(stream);
            }

        } catch (IOException ex) {
            //ex.printStackTrace();
            throw ex;
        }

        counter++;

        if (counter > 20) {
            break;
        }
    }

    dbgLog.fine("RT7: counter=" + counter);
    dbgLog.fine("RT7: decodeRecordType7(): end");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataUnCompressed: stream == null!");
    }//from w  w  w  . jav  a2  s  . c  o m

    int varQnty = dataTable.getVarQuantity().intValue();

    // 
    // set-up tab file

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    variableFormatTypeList = new String[varQnty];
    dateFormatList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    int numberOfDecimalVariables = 0;

    // TODO: 
    // Make sure the date formats are actually preserved! 
    // (this is something that was collected in the code below and passed
    // to the UNF calculator). 
    // -- L.A. 4.0 alpha

    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    ///dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF   
    ///dateFormats = new String[varQnty][caseQnty];

    try {
        for (int i = 0;; i++) { // case-wise loop

            byte[] buffer = new byte[OBS * nOBS];

            int nbytesuc = stream.read(buffer);

            StringBuilder sb_stringStorage = new StringBuilder("");

            for (int k = 0; k < nOBS; k++) {
                int offset = OBS * k;

                // uncompressed case
                // numeric missing value == sysmis
                // FF FF FF FF FF FF eF FF(little endian)
                // string missing value
                // 20 20 20 20 20 20 20 20
                // cf: compressed case 
                // numeric type:sysmis == 0xFF
                // string type: missing value == 0xFE
                // 

                boolean isNumeric = OBSwiseTypelList.get(k) == 0 ? true : false;

                if (isNumeric) {
                    dbgLog.finer(k + "-th variable is numeric");
                    // interprete as double
                    ByteBuffer bb_double = ByteBuffer.wrap(buffer, offset, LENGTH_SAV_OBS_BLOCK);
                    if (isLittleEndian) {
                        bb_double.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    //char[] hexpattern =
                    String dphex = new String(Hex.encodeHex(
                            Arrays.copyOfRange(bb_double.array(), offset, offset + LENGTH_SAV_OBS_BLOCK)));
                    dbgLog.finer("dphex=" + dphex);

                    if ((dphex.equals("ffffffffffffefff")) || (dphex.equals("ffefffffffffffff"))) {
                        //casewiseRecordForTabFile.add(systemMissingValue);
                        // add the numeric missing value
                        dbgLog.fine("SAV Reader: adding: Missing Value (numeric)");
                        casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
                    } else {
                        Double ddatum = bb_double.getDouble();
                        dbgLog.fine("SAV Reader: adding: ddatum=" + ddatum);

                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                    }

                } else {
                    dbgLog.finer(k + "-th variable is string");
                    // string case
                    // strip space-padding
                    // do not trim: string might have spaces within it
                    // the missing value (hex) for a string variable is:
                    // "20 20 20 20 20 20 20 20"

                    String strdatum = new String(
                            Arrays.copyOfRange(buffer, offset, (offset + LENGTH_SAV_OBS_BLOCK)),
                            defaultCharSet);
                    dbgLog.finer("str_datum=" + strdatum);
                    // add this non-missing-value string datum 
                    casewiseRecordForTabFile.add(strdatum);

                } // if isNumeric

            } // k-loop

            // String-variable's continuous block exits:
            if (hasStringVarContinuousBlock) {
                // continuous blocks: string case
                // concatenating process
                //dbgLog.fine("concatenating process starts");

                //dbgLog.fine("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                //dbgLog.fine("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                StringBuilder sb = new StringBuilder("");
                int firstPosition = 0;

                Set<Integer> removeJset = new HashSet<Integer>();
                for (int j = 0; j < nOBS; j++) {
                    dbgLog.finer("j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                    if (OBSwiseTypelList.get(j) == -1) {
                        // String continued fount at j-th 
                        // look back the j-1 
                        firstPosition = j - 1;
                        int lastJ = j;
                        String concatanated = null;

                        removeJset.add(j);
                        sb.append(casewiseRecordForTabFile.get(j - 1));
                        sb.append(casewiseRecordForTabFile.get(j));
                        for (int jc = 1;; jc++) {
                            if (OBSwiseTypelList.get(j + jc) != -1) {
                                // j is the end unit of this string variable
                                concatanated = sb.toString();
                                sb.setLength(0);
                                lastJ = j + jc;
                                break;
                            } else {
                                sb.append(casewiseRecordForTabFile.get(j + jc));
                                removeJset.add(j + jc);
                            }
                        }
                        casewiseRecordForTabFile.set(j - 1, concatanated);

                        //out.println(j-1+"th concatanated="+concatanated);
                        j = lastJ - 1;

                    } // end-of-if: continuous-OBS only
                } // end of loop-j

                List<String> newDataLine = new ArrayList<String>();

                for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                    //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                    if (!removeJset.contains(jl)) {
                        newDataLine.add(casewiseRecordForTabFile.get(jl));
                    }
                }

                dbgLog.fine("new casewiseRecordForTabFile=" + newDataLine);
                dbgLog.fine("new casewiseRecordForTabFile(size)=" + newDataLine.size());

                casewiseRecordForTabFile = newDataLine;

            } // end-if: stringContinuousVar-exist case

            caseIndex++;
            dbgLog.finer("caseIndex=" + caseIndex);
            for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                if (variableTypelList.get(k) > 0) {

                    // See my comments for this padding removal logic
                    // in the "compressed" method -- L.A.

                    String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                    // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                    if (paddRemoved.equals("")) {
                        paddRemoved = " ";
                    }

                    //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
                    casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));

                    // end of String var case

                } // end of variable-type check

                if (casewiseRecordForTabFile.get(k) != null
                        && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {

                    // to do date conversion
                    String variableFormatType = variableFormatTypeList[k];
                    dbgLog.finer("k=" + k + "th variable format=" + variableFormatType);

                    int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                    if (variableFormatType.equals("date")) {
                        dbgLog.finer("date case");

                        long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                - SPSS_DATE_OFFSET;

                        String newDatum = sdf_ymd.format(new Date(dateDatum));
                        dbgLog.finer("k=" + k + ":" + newDatum);

                        casewiseRecordForTabFile.set(k, newDatum);
                        dateFormatList[k] = sdf_ymd.toPattern();
                    } else if (variableFormatType.equals("time")) {
                        dbgLog.finer("time case:DTIME or DATETIME or TIME");
                        //formatCategoryTable.put(variableNameList.get(k), "time");
                        // not treating DTIME as date/time; see comment elsewhere in 
                        // the code; 
                        // (but we do need to remember to treat the resulting values 
                        // as character strings, not numerics!)

                        if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_BIAS;
                                String newDatum = sdf_dhms.format(new Date(dateDatum));
                                // Note: DTIME is not a complete date, so we don't save a date format with it
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                StringBuilder sb_time = new StringBuilder(sdf_dhms.format(new Date(dateDatum)));

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }

                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {
                            // TODO: 
                            // (for both datetime and "dateless" time)
                            // keep the longest of the matching formats - i.e., if there are *some*
                            // values in the vector that have thousands of a second, that should be 
                            // part of the saved format!
                            //  -- L.A. Aug. 12 2014 

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;
                                String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                dateFormatList[k] = sdf_ymdhms.toPattern();
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                StringBuilder sb_time = new StringBuilder(
                                        sdf_ymdhms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                // datetime with milliseconds:
                                dateFormatList[k] = sdf_ymdhms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L;
                                String newDatum = sdf_hms.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                if (dateFormatList[k] == null) {
                                    dateFormatList[k] = sdf_hms.toPattern();
                                }
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                StringBuilder sb_time = new StringBuilder(sdf_hms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                // time, possibly with milliseconds:
                                String format_hmsS = sdf_hms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                if (dateFormatList[k] == null
                                        || (format_hmsS.length() > dateFormatList[k].length())) {
                                    dateFormatList[k] = format_hmsS;
                                }
                            }
                        }
                    } else if (variableFormatType.equals("other")) {
                        dbgLog.finer("other non-date/time case");

                        if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                            // day of week
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                            // month
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));

                        }
                    }
                    // end of date/time block
                } // end: date-time-datum check

            } // end: loop-k(2nd: variablte-wise-check)

            // write to tab file
            if (casewiseRecordForTabFile.size() > 0) {
                pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
            }

            // numeric contents-check
            for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                        || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                        || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {

                } else {
                    if (variableTypelList.get(l) <= 0) {
                        if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                            decimalVariableSet.add(l);
                        }
                    }
                }
            }

            // reset the case-wise working objects
            casewiseRecordForTabFile.clear();

            if (stream.available() == 0) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("reached the end of the file at " + ii + "th iteration");

                break;
            } // if eof processing
        } //i-loop: case(row) iteration

        // close the writer
        pwout.close();

    } catch (IOException ex) {
        throw ex;
    }

    // contents check
    dbgLog.fine("numberOfDecimalVariables=" + numberOfDecimalVariables);
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): end *****");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordType7(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordType7(): start *****");
    int counter = 0;
    int[] headerSection = new int[2];

    // the variables below may no longer needed; 
    // but they may be useful for debugging/logging purposes.

    /// // RecordType 7 
    /// // Subtype 3
    /// List<Integer> releaseMachineSpecificInfo = new ArrayList<Integer>();
    /// List<String> releaseMachineSpecificInfoHex = new ArrayList<String>();

    /// // Subytpe 4
    /// Map<String, Double> OBSTypeValue = new LinkedHashMap<String, Double>();
    /// Map<String, String> OBSTypeHexValue = new LinkedHashMap<String, String>();    
    //Subtype 11//from   w  w w  . j  a va2s.  c  om
    /// List<Integer> measurementLevel = new ArrayList<Integer>();
    /// List<Integer> columnWidth = new ArrayList<Integer>();
    /// List<Integer> alignment = new ArrayList<Integer>();

    Map<String, String> shortToLongVarialbeNameTable = new LinkedHashMap<String, String>();

    while (true) {
        try {
            if (stream == null) {
                throw new IllegalArgumentException("RT7: stream == null!");
            }
            // first check the 4-byte header value
            //if (stream.markSupported()){
            stream.mark(1000);
            //}
            // 7.0 check the first 4 bytes
            byte[] headerCodeRt7 = new byte[LENGTH_RECORD_TYPE7_CODE];

            int nbytes_rt7 = stream.read(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE);
            // to-do check against nbytes
            //printHexDump(headerCodeRt7, "RT7 header test");
            ByteBuffer bb_header_code_rt7 = ByteBuffer.wrap(headerCodeRt7, 0, LENGTH_RECORD_TYPE7_CODE);
            if (isLittleEndian) {
                bb_header_code_rt7.order(ByteOrder.LITTLE_ENDIAN);
            }

            int intRT7test = bb_header_code_rt7.getInt();
            dbgLog.fine("RT7: header test value=" + intRT7test);
            if (intRT7test != 7) {
                //if (stream.markSupported()){
                //out.print("iteration="+safteyCounter);
                //dbgLog.fine("iteration="+safteyCounter);
                dbgLog.fine("intRT7test failed=" + intRT7test);
                dbgLog.fine("counter=" + counter);
                stream.reset();
                return;
                //}
            }

            // 7.1 check 4-byte integer Sub-Type Code

            byte[] length_sub_type_code = new byte[LENGTH_RT7_SUB_TYPE_CODE];

            int nbytes_rt7_1 = stream.read(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE);
            // to-do check against nbytes

            //printHexDump(length_how_many_line_bytes, "RT7 how_many_line_bytes");
            ByteBuffer bb_sub_type_code = ByteBuffer.wrap(length_sub_type_code, 0, LENGTH_RT7_SUB_TYPE_CODE);
            if (isLittleEndian) {
                bb_sub_type_code.order(ByteOrder.LITTLE_ENDIAN);
            }

            int subTypeCode = bb_sub_type_code.getInt();
            dbgLog.fine("RT7: subTypeCode=" + subTypeCode);

            switch (subTypeCode) {
            case 3:
                // 3: Release andMachine-Specific Integer Information

                //parseRT7SubTypefield(stream);

                headerSection = parseRT7SubTypefieldHeader(stream);
                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    int numberOfUnits = headerSection[1];

                    for (int i = 0; i < numberOfUnits; i++) {
                        dbgLog.finer(i + "-th fieldData");
                        byte[] work = new byte[unitLength];

                        int nb = stream.read(work);
                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work)));
                        ByteBuffer bb_field = ByteBuffer.wrap(work);
                        if (isLittleEndian) {
                            bb_field.order(ByteOrder.LITTLE_ENDIAN);
                        }
                        String dataInHex = new String(Hex.encodeHex(bb_field.array()));
                        /// releaseMachineSpecificInfoHex.add(dataInHex);

                        dbgLog.finer("raw bytes in Hex:" + dataInHex);
                        if (unitLength == 4) {
                            int fieldData = bb_field.getInt();
                            dbgLog.finer("fieldData(int)=" + fieldData);
                            dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData));
                            /// releaseMachineSpecificInfo.add(fieldData);
                        }

                    }

                    /// dbgLog.fine("releaseMachineSpecificInfo="+releaseMachineSpecificInfo);
                    /// dbgLog.fine("releaseMachineSpecificInfoHex="+releaseMachineSpecificInfoHex);

                } else {
                    // throw new IOException
                }

                dbgLog.fine("***** end of subType 3 ***** \n");

                break;
            case 4:
                // Release andMachine-SpecificOBS-Type Information
                headerSection = parseRT7SubTypefieldHeader(stream);
                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    int numberOfUnits = headerSection[1];

                    for (int i = 0; i < numberOfUnits; i++) {
                        dbgLog.finer(i + "-th fieldData:" + RecordType7SubType4Fields.get(i));
                        byte[] work = new byte[unitLength];

                        int nb = stream.read(work);

                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work)));
                        ByteBuffer bb_field = ByteBuffer.wrap(work);
                        dbgLog.finer("byte order=" + bb_field.order().toString());
                        if (isLittleEndian) {
                            bb_field.order(ByteOrder.LITTLE_ENDIAN);
                        }
                        ByteBuffer bb_field_dup = bb_field.duplicate();
                        OBSTypeHexValue.put(RecordType7SubType4Fields.get(i),
                                new String(Hex.encodeHex(bb_field.array())));
                        //                            dbgLog.finer("raw bytes in Hex:"+
                        //                                OBSTypeHexValue.get(RecordType7SubType4Fields.get(i)));
                        if (unitLength == 8) {
                            double fieldData = bb_field.getDouble();
                            /// OBSTypeValue.put(RecordType7SubType4Fields.get(i), fieldData);
                            dbgLog.finer("fieldData(double)=" + fieldData);
                            OBSTypeHexValue.put(RecordType7SubType4Fields.get(i),
                                    Double.toHexString(fieldData));
                            dbgLog.fine("fieldData in Hex=" + Double.toHexString(fieldData));
                        }
                    }
                    /// dbgLog.fine("OBSTypeValue="+OBSTypeValue);
                    /// dbgLog.fine("OBSTypeHexValue="+OBSTypeHexValue);

                } else {
                    // throw new IOException
                }

                dbgLog.fine("***** end of subType 4 ***** \n");
                break;
            case 5:
                // Variable Sets Information
                parseRT7SubTypefield(stream);
                break;
            case 6:
                // Trends date information
                parseRT7SubTypefield(stream);
                break;
            case 7:
                // Multiple response groups
                parseRT7SubTypefield(stream);
                break;
            case 8:
                // Windows Data Entry data
                parseRT7SubTypefield(stream);
                break;
            case 9:
                //
                parseRT7SubTypefield(stream);
                break;
            case 10:
                // TextSmart data
                parseRT7SubTypefield(stream);
                break;
            case 11:
                // Msmt level, col width, & alignment
                //parseRT7SubTypefield(stream);

                headerSection = parseRT7SubTypefieldHeader(stream);
                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    int numberOfUnits = headerSection[1];

                    for (int i = 0; i < numberOfUnits; i++) {
                        dbgLog.finer(i + "-th fieldData");
                        byte[] work = new byte[unitLength];

                        int nb = stream.read(work);
                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(work)));
                        ByteBuffer bb_field = ByteBuffer.wrap(work);
                        if (isLittleEndian) {
                            bb_field.order(ByteOrder.LITTLE_ENDIAN);
                        }
                        dbgLog.finer("raw bytes in Hex:" + new String(Hex.encodeHex(bb_field.array())));

                        if (unitLength == 4) {
                            int fieldData = bb_field.getInt();
                            dbgLog.finer("fieldData(int)=" + fieldData);
                            dbgLog.finer("fieldData in Hex=0x" + Integer.toHexString(fieldData));

                            int remainder = i % 3;
                            dbgLog.finer("remainder=" + remainder);
                            if (remainder == 0) {
                                /// measurementLevel.add(fieldData);
                            } else if (remainder == 1) {
                                /// columnWidth.add(fieldData);
                            } else if (remainder == 2) {
                                /// alignment.add(fieldData);
                            }
                        }

                    }

                } else {
                    // throw new IOException
                }
                /// dbgLog.fine("measurementLevel="+measurementLevel);
                /// dbgLog.fine("columnWidth="+columnWidth);
                /// dbgLog.fine("alignment="+alignment);
                dbgLog.fine("***** end of subType 11 ***** \n");

                break;
            case 12:
                // Windows Data Entry GUID
                parseRT7SubTypefield(stream);
                break;
            case 13:
                // Extended variable names
                // parseRT7SubTypefield(stream);
                headerSection = parseRT7SubTypefieldHeader(stream);

                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    dbgLog.fine("RT7: unitLength=" + unitLength);
                    int numberOfUnits = headerSection[1];
                    dbgLog.fine("RT7: numberOfUnits=" + numberOfUnits);
                    byte[] work = new byte[unitLength * numberOfUnits];
                    int nbtyes13 = stream.read(work);

                    String[] variableShortLongNamePairs = new String(work, "US-ASCII").split("\t");

                    for (int i = 0; i < variableShortLongNamePairs.length; i++) {
                        dbgLog.fine("RT7: " + i + "-th pair" + variableShortLongNamePairs[i]);
                        String[] pair = variableShortLongNamePairs[i].split("=");
                        shortToLongVarialbeNameTable.put(pair[0], pair[1]);
                    }

                    dbgLog.fine("RT7: shortToLongVarialbeNameTable" + shortToLongVarialbeNameTable);
                    smd.setShortToLongVarialbeNameTable(shortToLongVarialbeNameTable);
                } else {
                    // throw new IOException
                }

                break;
            case 14:
                // Extended strings
                //parseRT7SubTypefield(stream);
                headerSection = parseRT7SubTypefieldHeader(stream);

                if (headerSection != null) {
                    int unitLength = headerSection[0];
                    dbgLog.fine("RT7.14: unitLength=" + unitLength);
                    int numberOfUnits = headerSection[1];
                    dbgLog.fine("RT7.14: numberOfUnits=" + numberOfUnits);
                    byte[] work = new byte[unitLength * numberOfUnits];
                    int nbtyes13 = stream.read(work);

                    String[] extendedVariablesSizePairs = new String(work, defaultCharSet).split("\000\t");

                    for (int i = 0; i < extendedVariablesSizePairs.length; i++) {
                        dbgLog.fine("RT7.14: " + i + "-th pair" + extendedVariablesSizePairs[i]);
                        if (extendedVariablesSizePairs[i].indexOf("=") > 0) {
                            String[] pair = extendedVariablesSizePairs[i].split("=");
                            extendedVariablesSizeTable.put(pair[0], Integer.valueOf(pair[1]));
                        }
                    }

                    dbgLog.fine("RT7.14: extendedVariablesSizeTable" + extendedVariablesSizeTable);
                } else {
                    // throw new IOException
                }

                break;
            case 15:
                // Clementine Metadata
                parseRT7SubTypefield(stream);
                break;
            case 16:
                // 64 bit N of cases
                parseRT7SubTypefield(stream);
                break;
            case 17:
                // File level attributes
                parseRT7SubTypefield(stream);
                break;
            case 18:
                // Variable attributes
                parseRT7SubTypefield(stream);
                break;
            case 19:
                // Extended multiple response groups
                parseRT7SubTypefield(stream);
                break;
            case 20:
                // Encoding, aka code page
                parseRT7SubTypefield(stream);
                /* TODO: This needs to be researched; 
                 * Is this field really used, ever?
                headerSection = parseRT7SubTypefieldHeader(stream);
                        
                if (headerSection != null){
                int unitLength = headerSection[0];
                dbgLog.fine("RT7-20: unitLength="+unitLength);
                int numberOfUnits = headerSection[1];
                dbgLog.fine("RT7-20: numberOfUnits="+numberOfUnits);
                byte[] rt7st20bytes = new byte[unitLength*numberOfUnits];
                int nbytes20 = stream.read(rt7st20bytes);
                        
                String dataCharSet = new String(rt7st20bytes,"US-ASCII");
                        
                if (dataCharSet != null && !(dataCharSet.equals(""))) {
                    dbgLog.fine("RT7-20: data charset: "+ dataCharSet);
                    defaultCharSet = dataCharSet; 
                }
                } else {
                // throw new IOException
                }
                 * 
                 */

                break;
            case 21:
                // Value labels for long strings
                parseRT7SubTypefield(stream);
                break;
            case 22:
                // Missing values for long strings
                parseRT7SubTypefield(stream);
                break;
            default:
                parseRT7SubTypefield(stream);
            }

        } catch (IOException ex) {
            //ex.printStackTrace();
            throw ex;
        }

        counter++;

        if (counter > 20) {
            break;
        }
    }

    dbgLog.fine("RT7: counter=" + counter);
    dbgLog.fine("RT7: ***** decodeRecordType7(): end *****");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataUnCompressed(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataUnCompressed: stream == null!");
    }//from ww w  .j  ava  2  s  .c om

    Map<String, String> formatCategoryTable = new LinkedHashMap<String, String>();

    // 
    // set-up tab file

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    dbgLog.fine("printFormatNameTable:\n" + printFormatNameTable);
    variableFormatTypeList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // contents (variable) checker concering decimals
    variableTypeFinal = new int[varQnty];
    Arrays.fill(variableTypeFinal, 0);

    int numberOfDecimalVariables = 0;

    List<String> casewiseRecordForTabFile = new ArrayList<String>();
    String[] caseWiseDateFormatForUNF = null;
    List<String> casewiseRecordForUNF = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF   
    dateFormats = new String[varQnty][caseQnty];

    try {
        for (int i = 0;; i++) { // case-wise loop

            byte[] buffer = new byte[OBS * nOBS];

            int nbytesuc = stream.read(buffer);

            StringBuilder sb_stringStorage = new StringBuilder("");

            for (int k = 0; k < nOBS; k++) {
                int offset = OBS * k;

                // uncompressed case
                // numeric missing value == sysmis
                // FF FF FF FF FF FF eF FF(little endian)
                // string missing value
                // 20 20 20 20 20 20 20 20
                // cf: compressed case 
                // numeric type:sysmis == 0xFF
                // string type: missing value == 0xFE
                // 

                boolean isNumeric = OBSwiseTypelList.get(k) == 0 ? true : false;

                if (isNumeric) {
                    dbgLog.finer(k + "-th variable is numeric");
                    // interprete as double
                    ByteBuffer bb_double = ByteBuffer.wrap(buffer, offset, LENGTH_SAV_OBS_BLOCK);
                    if (isLittleEndian) {
                        bb_double.order(ByteOrder.LITTLE_ENDIAN);
                    }
                    //char[] hexpattern =
                    String dphex = new String(Hex.encodeHex(
                            Arrays.copyOfRange(bb_double.array(), offset, offset + LENGTH_SAV_OBS_BLOCK)));
                    dbgLog.finer("dphex=" + dphex);

                    if ((dphex.equals("ffffffffffffefff")) || (dphex.equals("ffefffffffffffff"))) {
                        //casewiseRecordForTabFile.add(systemMissingValue);
                        // add the numeric missing value
                        dbgLog.fine("SAV Reader: adding: Missing Value (numeric)");
                        casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);
                    } else {
                        Double ddatum = bb_double.getDouble();
                        dbgLog.fine("SAV Reader: adding: ddatum=" + ddatum);

                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                    }

                } else {
                    dbgLog.finer(k + "-th variable is string");
                    // string case
                    // strip space-padding
                    // do not trim: string might have spaces within it
                    // the missing value (hex) for a string variable is:
                    // "20 20 20 20 20 20 20 20"

                    String strdatum = new String(
                            Arrays.copyOfRange(buffer, offset, (offset + LENGTH_SAV_OBS_BLOCK)),
                            defaultCharSet);
                    dbgLog.finer("str_datum=" + strdatum);
                    // add this non-missing-value string datum 
                    casewiseRecordForTabFile.add(strdatum);

                } // if isNumeric

            } // k-loop

            // String-variable's continuous block exits:
            if (hasStringVarContinuousBlock) {
                // continuous blocks: string case
                // concatenating process
                //dbgLog.fine("concatenating process starts");

                //dbgLog.fine("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                //dbgLog.fine("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                StringBuilder sb = new StringBuilder("");
                int firstPosition = 0;

                Set<Integer> removeJset = new HashSet<Integer>();
                for (int j = 0; j < nOBS; j++) {
                    dbgLog.finer("j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                    if (OBSwiseTypelList.get(j) == -1) {
                        // String continued fount at j-th 
                        // look back the j-1 
                        firstPosition = j - 1;
                        int lastJ = j;
                        String concatanated = null;

                        removeJset.add(j);
                        sb.append(casewiseRecordForTabFile.get(j - 1));
                        sb.append(casewiseRecordForTabFile.get(j));
                        for (int jc = 1;; jc++) {
                            if (OBSwiseTypelList.get(j + jc) != -1) {
                                // j is the end unit of this string variable
                                concatanated = sb.toString();
                                sb.setLength(0);
                                lastJ = j + jc;
                                break;
                            } else {
                                sb.append(casewiseRecordForTabFile.get(j + jc));
                                removeJset.add(j + jc);
                            }
                        }
                        casewiseRecordForTabFile.set(j - 1, concatanated);

                        //out.println(j-1+"th concatanated="+concatanated);
                        j = lastJ - 1;

                    } // end-of-if: continuous-OBS only
                } // end of loop-j

                List<String> newDataLine = new ArrayList<String>();

                for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                    //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                    if (!removeJset.contains(jl)) {
                        newDataLine.add(casewiseRecordForTabFile.get(jl));
                    }
                }

                dbgLog.fine("new casewiseRecordForTabFile=" + newDataLine);
                dbgLog.fine("new casewiseRecordForTabFile(size)=" + newDataLine.size());

                casewiseRecordForTabFile = newDataLine;

            } // end-if: stringContinuousVar-exist case

            for (int el = 0; el < casewiseRecordForTabFile.size(); el++) {
                casewiseRecordForUNF.add(casewiseRecordForTabFile.get(el));
            }

            caseWiseDateFormatForUNF = new String[casewiseRecordForTabFile.size()];

            caseIndex++;
            dbgLog.finer("caseIndex=" + caseIndex);
            for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                if (variableTypelList.get(k) > 0) {
                    // String variable case: set to  -1
                    variableTypeFinal[k] = -1;

                    // See my comments for this padding removal logic
                    // in the "compressed" method -- L.A.

                    String paddRemoved = StringUtils.stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                    // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                    if (paddRemoved.equals("")) {
                        paddRemoved = " ";
                    }

                    casewiseRecordForUNF.set(k, paddRemoved);
                    casewiseRecordForTabFile.set(k,
                            "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");

                    // end of String var case

                } else {
                    // numeric var case
                    if (casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {
                        casewiseRecordForUNF.set(k, null);
                    }

                } // end of variable-type check

                if (casewiseRecordForTabFile.get(k) != null
                        && !casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {

                    // to do date conversion
                    String variableFormatType = variableFormatTypeList[k];
                    dbgLog.finer("k=" + k + "th variable format=" + variableFormatType);

                    int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                    if (variableFormatType.equals("date")) {
                        dbgLog.finer("date case");

                        long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                - SPSS_DATE_OFFSET;

                        String newDatum = sdf_ymd.format(new Date(dateDatum));
                        dbgLog.finer("k=" + k + ":" + newDatum);
                        caseWiseDateFormatForUNF[k] = sdf_ymd.toPattern();

                        casewiseRecordForTabFile.set(k, newDatum);
                        casewiseRecordForUNF.set(k, newDatum);
                        //formatCategoryTable.put(variableNameList.get(k), "date");
                    } else if (variableFormatType.equals("time")) {
                        dbgLog.finer("time case:DTIME or DATETIME or TIME");
                        //formatCategoryTable.put(variableNameList.get(k), "time");

                        if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_BIAS;
                                String newDatum = sdf_dhms.format(new Date(dateDatum));
                                // Note: DTIME is not a complete date, so we don't save a date format with it
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                StringBuilder sb_time = new StringBuilder(sdf_dhms.format(new Date(dateDatum)));

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }

                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {

                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;
                                String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern();
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                StringBuilder sb_time = new StringBuilder(
                                        sdf_ymdhms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                            if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L;
                                String newDatum = sdf_hms.format(new Date(dateDatum));
                                caseWiseDateFormatForUNF[k] = sdf_hms.toPattern();
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                            } else {
                                // decimal point included
                                String[] timeData = casewiseRecordForTabFile.get(k).toString().split("\\.");

                                //dbgLog.finer(StringUtils.join(timeData, "|"));
                                long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                StringBuilder sb_time = new StringBuilder(sdf_hms.format(new Date(dateDatum)));
                                //dbgLog.finer(sb_time.toString());

                                if (formatDecimalPointPosition > 0) {
                                    sb_time.append("." + timeData[1].substring(0, formatDecimalPointPosition));
                                }
                                caseWiseDateFormatForUNF[k] = this.sdf_hms.toPattern()
                                        + (formatDecimalPointPosition > 0 ? ".S" : "");
                                dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                casewiseRecordForTabFile.set(k, sb_time.toString());
                                casewiseRecordForUNF.set(k, sb_time.toString());
                            }
                        }
                    } else if (variableFormatType.equals("other")) {
                        dbgLog.finer("other non-date/time case");

                        if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                            // day of week
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.WEEKDAY_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            casewiseRecordForUNF.set(k, newDatum);
                            dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                        } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                            // month
                            dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                            dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                            String newDatum = SPSSConstants.MONTH_LIST
                                    .get(Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                            casewiseRecordForTabFile.set(k, newDatum);
                            casewiseRecordForUNF.set(k, newDatum);
                            dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));

                        }
                    }
                    // end of date/time block
                } // end: date-time-datum check

            } // end: loop-k(2nd: variablte-wise-check)

            // write to tab file
            if (casewiseRecordForTabFile.size() > 0) {
                pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
            }

            if (casewiseRecordForTabFile.size() > 0) {
                for (int ij = 0; ij < varQnty; ij++) {
                    dataTable2[ij][caseIndex - 1] = casewiseRecordForUNF.get(ij);
                    if (variableFormatTypeList[ij].equals("date")
                            || variableFormatTypeList[ij].equals("time")) {
                        this.dateFormats[ij][caseIndex - 1] = caseWiseDateFormatForUNF[ij];
                    }
                }
            }

            // numeric contents-check
            for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                        || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                        || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                    variableTypeFinal[l] = -1;
                }

                if (variableTypeFinal[l] == 0) {
                    if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                        // l-th variable is not integer
                        variableTypeFinal[l] = 1;
                        decimalVariableSet.add(l);
                    }
                }
            }

            // reset the case-wise working objects
            casewiseRecordForTabFile.clear();
            casewiseRecordForUNF.clear();

            if (stream.available() == 0) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break;
            } // if eof processing
        } //i-loop: case(row) iteration

        // close the writer
        pwout.close();

    } catch (IOException ex) {
        throw ex;
    }

    smd.getFileInformation().put("caseQnty", caseQnty);
    smd.setDecimalVariables(decimalVariableSet);
    smd.setVariableFormatCategory(formatCategoryTable);

    // contents check
    dbgLog.fine("variableType=" + ArrayUtils.toString(variableTypeFinal));
    dbgLog.fine("numberOfDecimalVariables=" + numberOfDecimalVariables);
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("***** decodeRecordTypeDataUnCompressed(): end *****");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException {

    dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataCompressed: stream == null!");
    }//  w w w.j  a  v  a  2 s .c  om

    PrintWriter pwout = createOutputWriter(stream);

    int varQnty = dataTable.getVarQuantity().intValue();
    int caseQnty = dataTable.getCaseQuantity().intValue();

    dbgLog.fine("varQnty: " + varQnty);

    dateFormatList = new String[varQnty];

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);
    variableFormatTypeList = new String[varQnty];

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // TODO: 
    // Make sure the date formats are actually preserved! 
    // (this is something that was collected in the code below and passed
    // to the UNF calculator). 
    // -- L.A. 4.0 alpha
    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    try {
        // this compression is applied only to non-float data, i.e. integer;
        // 8-byte float datum is kept in tact
        boolean hasReachedEOF = false;

        OBSERVATION: while (true) {

            dbgLog.fine("SAV Reader: compressed: ii=" + ii + "-th iteration");

            byte[] octate = new byte[LENGTH_SAV_OBS_BLOCK];

            int nbytes = stream.read(octate);

            // processCompressedOBSblock ()

            // (this means process a block of 8 compressed OBS
            // values -- should result in 64 bytes of data total)

            for (int i = 0; i < LENGTH_SAV_OBS_BLOCK; i++) {

                dbgLog.finer("i=" + i + "-th iteration");
                int octate_i = octate[i];
                //dbgLog.fine("octate="+octate_i);
                if (octate_i < 0) {
                    octate_i += 256;
                }
                int byteCode = octate_i;//octate_i & 0xF;
                //out.println("byeCode="+byteCode);

                // processCompressedOBS

                switch (byteCode) {
                case 252:
                    // end of the file
                    dbgLog.fine("SAV Reader: compressed: end of file mark [FC] was found");
                    hasReachedEOF = true;
                    break;
                case 253:
                    // FD: uncompressed data follows after this octate
                    // long string datum or float datum
                    // read the following octate
                    byte[] uncompressedByte = new byte[LENGTH_SAV_OBS_BLOCK];
                    int ucbytes = stream.read(uncompressedByte);
                    int typeIndex = (ii * OBS + i) % nOBS;

                    if ((OBSwiseTypelList.get(typeIndex) > 0) || (OBSwiseTypelList.get(typeIndex) == -1)) {
                        // code= >0 |-1: string or its conitiguous block
                        // decode as a string object
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK), defaultCharSet);
                        //out.println("str_datum="+strdatum+"<-");
                        // add this non-missing-value string datum
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == -2) {
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK - 1),
                                defaultCharSet);
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == 0) {
                        // code= 0: numeric

                        ByteBuffer bb_double = ByteBuffer.wrap(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK);
                        if (isLittleEndian) {
                            bb_double.order(ByteOrder.LITTLE_ENDIAN);
                        }

                        Double ddatum = bb_double.getDouble();
                        // out.println("ddatum="+ddatum);
                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                        dbgLog.fine("SAV Reader: compressed: added value to dataLine: " + ddatum);

                    } else {
                        dbgLog.fine("SAV Reader: out-of-range exception");
                        throw new IOException("out-of-range value was found");
                    }

                    /*
                    // EOF-check after reading this octate
                    if (stream.available() == 0){
                    hasReachedEOF = true;
                    dbgLog.fine(
                    "SAV Reader: *** After reading an uncompressed octate," +
                    " reached the end of the file at "+ii
                    +"th iteration and i="+i+"th octate position [0-start] *****");
                    }
                     */

                    break;
                case 254:
                    // FE: used as the missing value for string variables
                    // an empty case in a string variable also takes this value
                    // string variable does not accept space-only data
                    // cf: uncompressed case
                    // 20 20 20 20 20 20 20 20
                    // add the string missing value
                    // out.println("254: String missing data");

                    casewiseRecordForTabFile.add(" "); // add "." here?

                    // Note that technically this byte flag (254/xFE) means
                    // that *eight* white space characters should be
                    // written to the output stream. This caused me
                    // a great amount of confusion, because it appeared
                    // to me that there was a mismatch between the number
                    // of bytes advertised in the variable metadata and
                    // the number of bytes actually found in the data
                    // section of a compressed SAV file; this is because
                    // these 8 bytes "come out of nowhere"; they are not
                    // written in the data section, but this flag specifies
                    // that they should be added to the output.
                    // Also, as I pointed out above, we are only writing
                    // out one whitespace character, not 8 as instructed.
                    // This appears to be legit; these blocks of 8 spaces
                    // seem to be only used for padding, and all such
                    // multiple padding spaces are stripped anyway during
                    // the post-processing.

                    break;
                case 255:
                    // FF: system missing value for numeric variables
                    // cf: uncompressed case (sysmis)
                    // FF FF FF FF FF FF eF FF(little endian)
                    // add the numeric missing value
                    dbgLog.fine("SAV Reader: compressed: Missing Value, numeric");
                    casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);

                    break;
                case 0:
                    // 00: do nothing
                    dbgLog.fine("SAV Reader: compressed: doing nothing (zero); ");

                    break;
                default:
                    //out.println("byte code(default)="+ byteCode);
                    if ((byteCode > 0) && (byteCode < 252)) {
                        // datum is compressed
                        //Integer unCompressed = Integer.valueOf(byteCode -100);
                        // add this uncompressed numeric datum
                        Double unCompressed = Double.valueOf(byteCode - 100);
                        dbgLog.fine("SAV Reader: compressed: default case: " + unCompressed);

                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(unCompressed));
                        // out.println("uncompressed="+unCompressed);
                        // out.println("dataline="+casewiseRecordForTabFile);
                    }
                }// end of switch

                // out.println("end of switch");

                // The-end-of-a-case(row)-processing

                // this line that follows, and the code around it
                // is really confusing:
                int varCounter = (ii * OBS + i + 1) % nOBS;
                // while both OBS and LENGTH_SAV_OBS_BLOCK = 8
                // (OBS was initialized as OBS=LENGTH_SAV_OBS_BLOCK),
                // the 2 values mean different things:
                // LENGTH_SAV_OBS_BLOCK is the number of bytes in one OBS;
                // and OBS is the number of OBS blocks that we process
                // at a time. I.e., we process 8 chunks of 8 bytes at a time.
                // This is how data is organized inside an SAV file:
                // 8 bytes of compression flags, followd by 8x8 or fewer
                // (depending on the flags) bytes of compressed data.
                // I should rename this OBS variable something more
                // meaningful.
                //
                // Also, the "varCounter" variable name is entirely
                // misleading -- it counts not variables, but OBS blocks.

                dbgLog.fine("SAV Reader: compressed: OBS counter=" + varCounter + "(ii=" + ii + ")");

                if ((ii * OBS + i + 1) % nOBS == 0) {

                    //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);

                    // out.println("all variables in a case are parsed == nOBS");
                    // out.println("hasStringVarContinuousBlock="+hasStringVarContinuousBlock);

                    // check whether a string-variable's continuous block exits
                    // if so, they must be joined

                    if (hasStringVarContinuousBlock) {

                        // string-variable's continuous-block-concatenating-processing

                        //out.println("concatenating process starts");
                        //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                        //out.println("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                        StringBuilder sb = new StringBuilder("");
                        int firstPosition = 0;

                        Set<Integer> removeJset = new HashSet<Integer>();
                        for (int j = 0; j < nOBS; j++) {
                            dbgLog.fine("RTD: j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                            if ((OBSwiseTypelList.get(j) == -1) || (OBSwiseTypelList.get(j) == -2)) {
                                // Continued String variable found at j-th
                                // position. look back the j-1
                                firstPosition = j - 1;
                                int lastJ = j;
                                String concatenated = null;

                                removeJset.add(j);
                                sb.append(casewiseRecordForTabFile.get(j - 1));
                                sb.append(casewiseRecordForTabFile.get(j));

                                for (int jc = 1;; jc++) {
                                    if ((j + jc == nOBS) || ((OBSwiseTypelList.get(j + jc) != -1)
                                            && (OBSwiseTypelList.get(j + jc) != -2))) {

                                        // j is the end unit of this string variable
                                        concatenated = sb.toString();
                                        sb.setLength(0);
                                        lastJ = j + jc;
                                        break;
                                    } else {
                                        sb.append(casewiseRecordForTabFile.get(j + jc));
                                        removeJset.add(j + jc);
                                    }
                                }
                                casewiseRecordForTabFile.set(j - 1, concatenated);

                                //out.println(j-1+"th concatenated="+concatenated);
                                j = lastJ - 1;

                            } // end-of-if: continuous-OBS only

                        } // end of loop-j

                        //out.println("removeJset="+removeJset);

                        // a new list that stores a new case with concatanated string data
                        List<String> newDataLine = new ArrayList<String>();

                        for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                            //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                            if (!removeJset.contains(jl)) {

                                //                                if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileString)){
                                //                                    out.println("NA-S jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileNumeric)){
                                //                                    out.println("NA-N jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl)==null){
                                //                                    out.println("null case jl="+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("NaN")){
                                //                                    out.println("NaN jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("")){
                                //                                    out.println("blank jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(" ")){
                                //                                    out.println("space jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                }

                                newDataLine.add(casewiseRecordForTabFile.get(jl));
                            } else {
                                //                                out.println("Excluded: jl="+jl+"-th datum=["+casewiseRecordForTabFile.get(jl)+"]");
                            }
                        } // end of loop-jl

                        //out.println("new casewiseRecordForTabFile="+newDataLine);
                        //out.println("new casewiseRecordForTabFile(size)="+newDataLine.size());

                        casewiseRecordForTabFile = newDataLine;

                    } // end-if: stringContinuousVar-exist case

                    // caseIndex starts from 1 not 0
                    caseIndex = (ii * OBS + i + 1) / nOBS;

                    for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                        dbgLog.fine("k=" + k + "-th variableTypelList=" + variableTypelList.get(k));

                        if (variableTypelList.get(k) > 0) {

                            // Strip the String variables off the
                            // whitespace padding:

                            // [ snipped ]

                            // I've removed the block of code above where
                            // String values were substring()-ed to the
                            // length specified in the variable metadata;
                            // Doing that was not enough, since a string
                            // can still be space-padded inside its
                            // advertised capacity. (note that extended
                            // variables can have many kylobytes of such
                            // padding in them!) Plus it was completely
                            // redundant, since we are stripping all the
                            // trailing white spaces with
                            // StringUtils.stripEnd() below:

                            String paddRemoved = StringUtils
                                    .stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                            // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                            if (paddRemoved.equals("")) {
                                paddRemoved = " ";
                            }
                            //casewiseRecordForTabFile.set(k, "\"" + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");
                            casewiseRecordForTabFile.set(k, escapeCharacterString(paddRemoved));

                            // end of String var case

                        } // end of variable-type check

                        if (casewiseRecordForTabFile.get(k) != null && !casewiseRecordForTabFile.get(k)
                                .equals(MissingValueForTextDataFileNumeric)) {

                            String variableFormatType = variableFormatTypeList[k];
                            dbgLog.finer("k=" + k + "th printFormatTable format="
                                    + printFormatTable.get(variableNameList.get(k)));

                            int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                            if (variableFormatType.equals("date")) {
                                dbgLog.finer("date case");

                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;

                                String newDatum = sdf_ymd.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                /* saving date format */
                                dbgLog.finer("saving dateFormat[k] = " + sdf_ymd.toPattern());
                                casewiseRecordForTabFile.set(k, newDatum);
                                dateFormatList[k] = sdf_ymd.toPattern();
                                //formatCategoryTable.put(variableNameList.get(k), "date");
                            } else if (variableFormatType.equals("time")) {
                                dbgLog.finer("time case:DTIME or DATETIME or TIME");
                                //formatCategoryTable.put(variableNameList.get(k), "time");

                                if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {
                                    // We're not even going to try to handle "DTIME"
                                    // values as time/dates in dataverse; this is a weird
                                    // format that nobody uses outside of SPSS.
                                    // (but we do need to remember to treat the resulting values 
                                    // as character strings, not numerics!)

                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_BIAS;
                                        String newDatum = sdf_dhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_dhms.format(new Date(dateDatum)));
                                        dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }

                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {
                                    // TODO: 
                                    // (for both datetime and "dateless" time)
                                    // keep the longest of the matching formats - i.e., if there are *some*
                                    // values in the vector that have thousands of a second, that should be 
                                    // part of the saved format!
                                    //  -- L.A. Aug. 12 2014 
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_OFFSET;
                                        String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        dateFormatList[k] = sdf_ymdhms.toPattern();
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_ymdhms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        dateFormatList[k] = sdf_ymdhms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                                    // TODO: 
                                    // double-check that we are handling "dateless" time correctly... -- L.A. Aug. 2014
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L;
                                        String newDatum = sdf_hms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        if (dateFormatList[k] == null) {
                                            dateFormatList[k] = sdf_hms.toPattern();
                                        }
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_hms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());

                                        String format_hmsS = sdf_hms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                        if (dateFormatList[k] == null
                                                || (format_hmsS.length() > dateFormatList[k].length())) {
                                            dateFormatList[k] = format_hmsS;
                                        }
                                    }
                                }

                            } else if (variableFormatType.equals("other")) {
                                dbgLog.finer("other non-date/time case:=" + i);

                                if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                                    // day of week
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                                    // month
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                }
                            }

                        } // end: date-time-datum check

                    } // end: loop-k(2nd: variable-wise-check)

                    // write to tab file
                    if (casewiseRecordForTabFile.size() > 0) {
                        pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
                    }

                    // numeric contents-check
                    for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {
                        if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                                || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                                || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                            // TODO: 
                            // figure out if any special handling is still needed here in 4.0. 
                            // -- L.A. - Aug. 2014

                        } else {
                            if (variableTypelList.get(l) <= 0) {
                                if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                                    decimalVariableSet.add(l);
                                }
                            }
                        }
                    }

                    // reset the case-wise working objects
                    casewiseRecordForTabFile.clear();

                    if (caseQnty > 0) {
                        if (caseIndex == caseQnty) {
                            hasReachedEOF = true;
                        }
                    }

                    if (hasReachedEOF) {
                        break;
                    }

                } // if(The-end-of-a-case(row)-processing)

            } // loop-i (OBS unit)

            if ((hasReachedEOF) || (stream.available() == 0)) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break OBSERVATION;
            }

            ii++;

        } // while loop

        pwout.close();
    } catch (IOException ex) {
        throw ex;
    }

    dbgLog.fine("<<<<<<");
    dbgLog.fine("formatCategoryTable = " + formatCategoryTable);
    dbgLog.fine(">>>>>>");

    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);

    dbgLog.fine("decodeRecordTypeDataCompressed(): end");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordType2(BufferedInputStream stream) throws IOException {
    dbgLog.fine("decodeRecordType2(): start");
    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }// ww  w.  ja v  a 2  s  . co  m

    Map<String, String> printFormatNameTable = new LinkedHashMap<String, String>();
    Map<String, String> variableLabelMap = new LinkedHashMap<String, String>();
    Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>();
    List<Integer> printFormatList = new ArrayList<Integer>();

    String caseWeightVariableName = null;
    int caseWeightVariableIndex = 0;

    boolean lastVariableIsExtendable = false;
    boolean extendedVariableMode = false;
    boolean obs255 = false;

    String lastVariableName = null;
    String lastExtendedVariable = null;

    // this field repeats as many as the number of variables in
    // this sav file

    // (note that the above statement is not technically correct, this
    //  record repeats not just for every variable in the file, but for
    //  every OBS (8 byte unit); i.e., if a string is split into multiple
    //  OBS units, each one will have its own RT2 record -- L.A.).

    // Each field constists of a fixed (32-byte) segment and
    // then a few variable segments:
    // if the variable has a label (3rd INT4 set to 1), then there's 4 more
    // bytes specifying the length of the label, and then that many bytes
    // holding the label itself (no more than 256).
    // Then if there are optional missing value units (4th INT4 set to 1)
    // there will be 3 more OBS units attached = 24 extra bytes.

    int variableCounter = 0;
    int obsSeqNumber = 0;

    int j;

    dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units.");

    for (j = 0; j < OBSUnitsPerCase; j++) {

        dbgLog.fine("RT2: " + j + "-th RT2 unit is being decoded.");
        // 2.0: read the fixed[=non-optional] 32-byte segment
        byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED];

        try {
            int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED);

            //printHexDump(recordType2Fixed, "recordType2 part 1");

            if (nbytes == 0) {
                throw new IOException("reading recordType2: no bytes read!");
            }

            int offset = 0;

            // 2.1: create int-view of the bytebuffer for the first 16-byte segment
            int rt2_1st_4_units = 4;
            ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units];
            int[] recordType2FixedPart1 = new int[rt2_1st_4_units];
            for (int i = 0; i < rt2_1st_4_units; i++) {

                bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset,
                        LENGTH_SAV_INT_BLOCK);

                offset += LENGTH_SAV_INT_BLOCK;
                if (isLittleEndian) {
                    bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN);
                }
                recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt();
            }

            ///dbgLog.fine("recordType2FixedPart="+
            ///        ReflectionToStringBuilder.toString(recordType2FixedPart1, ToStringStyle.MULTI_LINE_STYLE));

            // 1st ([0]) element must be 2 otherwise no longer Record Type 2
            if (recordType2FixedPart1[0] != 2) {
                dbgLog.warning(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]);
                break;
            }
            dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]);

            // 2.3 variable name: 8 byte(space[x20]-padded)
            // This field is located at the very end of the 32 byte
            // fixed-size RT2 header (bytes 24-31).
            // We are processing it now, so that
            // we can make the decision on whether this variable is part
            // of a compound variable:

            String RawVariableName = getNullStrippedString(new String(
                    Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet));
            //offset +=LENGTH_VARIABLE_NAME;
            String variableName = null;
            if (RawVariableName.indexOf(' ') >= 0) {
                variableName = RawVariableName.substring(0, RawVariableName.indexOf(' '));
            } else {
                variableName = RawVariableName;
            }

            // 2nd ([1]) element: numeric variable = 0 :for string variable
            // this block indicates its datum-length, i.e, >0 ;
            // if -1, this RT2 unit is a non-1st RT2 unit for a string variable
            // whose value is longer than 8 character.

            boolean isNumericVariable = false;

            dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]);

            //OBSwiseTypelList.add(recordType2FixedPart1[1]);

            int HowManyRt2Units = 1;

            if (recordType2FixedPart1[1] == -1) {
                dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable");
                if (obs255) {
                    if (obsSeqNumber < 30) {
                        OBSwiseTypelList.add(recordType2FixedPart1[1]);
                        obsSeqNumber++;
                    } else {
                        OBSwiseTypelList.add(-2);
                        obs255 = false;
                        obsSeqNumber = 0;
                    }
                } else {
                    OBSwiseTypelList.add(recordType2FixedPart1[1]);
                }

                obsNonVariableBlockSet.add(j);
                continue;
            } else if (recordType2FixedPart1[1] == 0) {
                // This is a numeric variable
                extendedVariableMode = false;
                // And as such, it cannot be an extension of a
                // previous, long string variable.
                OBSwiseTypelList.add(recordType2FixedPart1[1]);
                variableCounter++;
                isNumericVariable = true;
                variableTypelList.add(recordType2FixedPart1[1]);
            } else if (recordType2FixedPart1[1] > 0) {

                // This looks like a regular string variable. However,
                // it may still be a part of a compound variable
                // (a String > 255 bytes that was split into 255 byte
                // chunks, stored as individual String variables).

                if (recordType2FixedPart1[1] == 255) {
                    obs255 = true;
                }

                if (lastVariableIsExtendable) {
                    String varNameBase = null;
                    if (lastVariableName.length() > 5) {
                        varNameBase = lastVariableName.substring(0, 5);
                    } else {
                        varNameBase = lastVariableName;
                    }

                    if (extendedVariableMode) {
                        if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) {
                            OBSwiseTypelList.add(-1);
                            lastExtendedVariable = variableName;
                            // OK, we stay in the "extended variable" mode;
                            // but we can't move on to the next OBS (hence the commented out
                            // "continue" below:
                            //continue;
                            // see the next comment below for the explanation.
                            //
                            // Should we also set "extendable" flag to false at this point
                            // if it's shorter than 255 bytes, i.e. the last extended chunk?
                        } else {
                            extendedVariableMode = false;
                        }
                    } else {
                        if (variableNameIsAnIncrement(varNameBase, variableName)) {
                            OBSwiseTypelList.add(-1);
                            extendedVariableMode = true;
                            dbgLog.fine("RT2: in extended variable mode; variable " + variableName);
                            lastExtendedVariable = variableName;
                            // Before we move on to the next OBS unit, we need to check
                            // if this current extended variable has its own label specified;
                            // If so, we need to determine its length, then read and skip
                            // that many bytes.
                            // Hence the commented out "continue" below:
                            //continue;
                        }
                    }
                }

                if (!extendedVariableMode) {
                    // OK, this is a "real"
                    // string variable, and not a continuation chunk of a compound
                    // string.

                    OBSwiseTypelList.add(recordType2FixedPart1[1]);
                    variableCounter++;

                    if (recordType2FixedPart1[1] == 255) {
                        // This variable is 255 bytes long, i.e. this is
                        // either the single "atomic" variable of the
                        // max allowed size, or it's a 255 byte segment
                        // of a compound variable. So we will check
                        // the next variable and see if it is the continuation
                        // of this one.

                        lastVariableIsExtendable = true;
                    } else {
                        lastVariableIsExtendable = false;
                    }

                    if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) {
                        HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK;
                    } else {
                        HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1;
                    }
                    variableTypelList.add(recordType2FixedPart1[1]);
                }
            }

            if (!extendedVariableMode) {
                // Again, we only want to do the following steps for the "real"
                // variables, not the chunks of split mega-variables:

                dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units);

                lastVariableName = variableName;

                // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases
                if (j == (caseWeightVariableOBSIndex - 1)) {
                    caseWeightVariableName = variableName;
                    // TODO: do we need this "index"? -- 4.0 alpha
                    caseWeightVariableIndex = variableCounter;

                    ///smd.setCaseWeightVariableName(caseWeightVariableName);
                    ///smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex);
                }

                OBSIndexToVariableName.put(j, variableName);

                //dbgLog.fine("\nvariable name="+variableName+"<-");
                dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-");
                dbgLog.fine("RT2: raw variable: " + RawVariableName);

                variableNameList.add(variableName);
            }

            // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label
            //
            dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]);
            boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false;
            if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) {
                throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]);
            }

            // 2.4 [optional]The length of a variable label followed: 4-byte int
            // 3rd element of 2.1 indicates whether this field exists
            // *** warning: The label block is padded to a multiple of the 4-byte
            // NOT the raw integer value of this 4-byte block

            if (hasVariableLabel) {
                byte[] length_variable_label = new byte[4];
                int nbytes_2_4 = stream.read(length_variable_label);
                if (nbytes_2_4 == 0) {
                    throw new IOException("RT 2: error reading recordType2.4: no bytes read!");
                } else {
                    dbgLog.fine("nbytes_2_4=" + nbytes_2_4);
                }
                ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0,
                        LENGTH_VARIABLE_LABEL);
                if (isLittleEndian) {
                    bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN);
                }
                int rawVariableLabelLength = bb_length_variable_label.getInt();

                dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength);
                int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength);
                dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength);

                // 2.5 [optional]variable label whose length is found at 2.4

                String variableLabel = "";

                if (rawVariableLabelLength > 0) {
                    byte[] variable_label = new byte[variableLabelLength];
                    int nbytes_2_5 = stream.read(variable_label);
                    if (nbytes_2_5 == 0) {
                        throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength
                                + " bytes requested, no bytes read!");
                    } else {
                        dbgLog.fine("nbytes_2_5=" + nbytes_2_5);
                    }
                    variableLabel = getNullStrippedString(new String(
                            Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength), defaultCharSet));
                    dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-");

                    dbgLog.fine(variableName + " => " + variableLabel);
                } else {
                    dbgLog.fine("RT2: defaulting to empty variable label.");
                }

                if (!extendedVariableMode) {
                    // We only have any use for this label if it's a "real" variable.
                    // Thinking about it, it doesn't make much sense for the "fake"
                    // variables that are actually chunks of large strings to store
                    // their own labels. But in some files they do. Then failing to read
                    // the bytes would result in getting out of sync with the RT record
                    // borders. So we always read the bytes, but only use them for
                    // the real variable entries.
                    /*String variableLabel = new String(Arrays.copyOfRange(variable_label,
                        0, rawVariableLabelLength),"US-ASCII");*/

                    variableLabelMap.put(variableName, variableLabel);
                }
            }

            if (extendedVariableMode) {
                // there's nothing else left for us to do in this iteration of the loop.
                // Once again, this was not a real variable, but a dummy variable entry
                // created for a chunk of a string variable longer than 255 bytes --
                // that's how SPSS stores them.
                continue;
            }

            // 4th ([3]) element: Missing value type code
            // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point]

            dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]);
            boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3])
                    && (recordType2FixedPart1[3] != 0)) ? true : false;

            InvalidData invalidDataInfo = null;

            if (recordType2FixedPart1[3] != 0) {
                invalidDataInfo = new InvalidData(recordType2FixedPart1[3]);
                dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType());
            }

            // 2.2: print/write formats: 4-byte each = 8 bytes

            byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE);
            dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt)));

            offset += LENGTH_PRINT_FORMAT_CODE;
            int formatCode = isLittleEndian ? printFormt[2] : printFormt[1];
            int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2];

            // TODO: 
            // What should we be doing with these "format decimal positions" 
            // in 4.0? 
            // -- L.A. 4.0 alpha

            int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3];
            dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode);

            formatDecimalPointPositionList.add(formatDecimalPointPosition);

            if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) {
                throw new IOException("Unknown format code was found = " + formatCode);
            } else {
                printFormatList.add(formatCode);
            }

            byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE);

            dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt)));
            if (writeFormt[3] != 0x00) {
                dbgLog.fine("byte-order(write format): reversal required");
            }

            offset += LENGTH_WRITE_FORMAT_CODE;

            if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) {
                StringBuilder sb = new StringBuilder(
                        SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth);
                if (formatDecimalPointPosition > 0) {
                    sb.append("." + formatDecimalPointPosition);
                }
                dbgLog.fine("formattable[i] = " + variableName + " -> " + sb.toString());
                printFormatNameTable.put(variableName, sb.toString());

            }

            printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode));

            // 2.6 [optional] missing values:4-byte each if exists
            // 4th element of 2.1 indicates the structure of this sub-field

            // Should we perhaps check for this for the "fake" variables too?
            //

            if (hasMissingValues) {
                dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]);
                int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]);
                //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] :  0;

                dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits);

                byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits];
                int nbytes_2_6 = stream.read(missing_value_code_units);

                if (nbytes_2_6 == 0) {
                    throw new IOException("RT 2: reading recordType2.6: no byte was read");
                } else {
                    dbgLog.fine("nbytes_2_6=" + nbytes_2_6);
                }

                //printHexDump(missing_value_code_units, "missing value");

                if (isNumericVariable) {

                    double[] missingValues = new double[howManyMissingValueUnits];
                    //List<String> mvp = new ArrayList<String>();
                    List<String> mv = new ArrayList<String>();

                    ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits];

                    int offset_start = 0;

                    for (int i = 0; i < howManyMissingValueUnits; i++) {

                        bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start,
                                LENGTH_SAV_OBS_BLOCK);

                        offset_start += LENGTH_SAV_OBS_BLOCK;
                        if (isLittleEndian) {
                            bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN);
                        }

                        ByteBuffer temp = bb_missig_value_code[i].duplicate();

                        missingValues[i] = bb_missig_value_code[i].getDouble();
                        if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) {
                            dbgLog.fine("1st value is LOWEST");
                            mv.add(Double.toHexString(missingValues[i]));
                        } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) {
                            dbgLog.fine("2nd value is HIGHEST");
                            mv.add(Double.toHexString(missingValues[i]));
                        } else {
                            mv.add(doubleNumberFormatter.format(missingValues[i]));
                        }
                        dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i]));
                    }

                    dbgLog.fine("variableName=" + variableName);
                    if (recordType2FixedPart1[3] > 0) {
                        // point cases only
                        dbgLog.fine("mv(>0)=" + mv);
                        missingValueTable.put(variableName, mv);
                        invalidDataInfo.setInvalidValues(mv);
                    } else if (recordType2FixedPart1[3] == -2) {
                        dbgLog.fine("mv(-2)=" + mv);
                        // range
                        invalidDataInfo.setInvalidRange(mv);
                    } else if (recordType2FixedPart1[3] == -3) {
                        // mixed case
                        dbgLog.fine("mv(-3)=" + mv);
                        invalidDataInfo.setInvalidRange(mv.subList(0, 2));
                        invalidDataInfo.setInvalidValues(mv.subList(2, 3));
                        missingValueTable.put(variableName, mv.subList(2, 3));
                    }

                    dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|"));
                    dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo);
                    invalidDataTable.put(variableName, invalidDataInfo);
                } else {
                    // string variable case
                    String[] missingValues = new String[howManyMissingValueUnits];
                    List<String> mv = new ArrayList<String>();
                    int offset_start = 0;
                    int offset_end = LENGTH_SAV_OBS_BLOCK;
                    for (int i = 0; i < howManyMissingValueUnits; i++) {

                        missingValues[i] = StringUtils.stripEnd(new String(
                                Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end),
                                defaultCharSet), " ");
                        dbgLog.fine("missing value=" + missingValues[i] + "<-");

                        offset_start = offset_end;
                        offset_end += LENGTH_SAV_OBS_BLOCK;

                        mv.add(missingValues[i]);
                    }
                    invalidDataInfo.setInvalidValues(mv);
                    missingValueTable.put(variableName, mv);
                    invalidDataTable.put(variableName, invalidDataInfo);
                    dbgLog.fine(
                            "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|"));
                    dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo);

                } // string case
                dbgLog.fine("invalidDataTable:\n" + invalidDataTable);
            } // if msv

        } catch (IOException ex) {
            //ex.printStackTrace();
            throw ex;
        } catch (Exception ex) {
            ex.printStackTrace();
            // should we be throwing some exception here?
        }
    } // j-loop

    if (j != OBSUnitsPerCase) {
        dbgLog.fine("RT2: attention! didn't reach the end of the OBS list!");
        throw new IOException("RT2: didn't reach the end of the OBS list!");
    }

    dbgLog.fine("RT2 metadata-related exit-chores");
    ///smd.getFileInformation().put("varQnty", variableCounter);
    dataTable.setVarQuantity(new Long(variableCounter));
    dbgLog.fine("RT2: varQnty=" + variableCounter);

    // 4.0 Initialize variables: 
    List<DataVariable> variableList = new ArrayList<DataVariable>();

    for (int i = 0; i < variableCounter; i++) {
        DataVariable dv = new DataVariable();
        String varName = variableNameList.get(i);
        dbgLog.fine("name: " + varName);
        dv.setName(varName);
        String varLabel = variableLabelMap.get(varName);
        if (varLabel != null && varLabel.length() > 255) {
            // TODO: 
            // variable labels will be changed into type 'TEXT' in the 
            // database - this will eliminate the 255 char. limit. 
            // -- L.A. 4.0 beta11
            dbgLog.fine("Have to truncate label: " + varLabel);
            varLabel = varLabel.substring(0, 255);
        }
        dbgLog.fine("label: " + varLabel);
        dv.setLabel(varLabel);

        dv.setInvalidRanges(new ArrayList<VariableRange>());
        dv.setSummaryStatistics(new ArrayList<SummaryStatistic>());
        dv.setUnf("UNF:6:");
        dv.setCategories(new ArrayList<VariableCategory>());
        variableList.add(dv);

        dv.setFileOrder(i);

        dv.setDataTable(dataTable);
    }

    dataTable.setDataVariables(variableList);

    ///smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()]));
    ///smd.setVariableLabel(variableLabelMap);
    // TODO: 
    // figure out what to do with the missing value table!
    // -- 4.0 alpha
    // well, they were used to generate merged summary statistics for 
    // the variable. So need to verify what the DDI import was doing 
    // with them and replicate the same in 4.0.
    // (add appropriate value labels?)
    ///TODO: 4.0 smd.setMissingValueTable(missingValueTable);
    ///smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName);

    dbgLog.fine("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray()));

    dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList);

    dbgLog.fine("decodeRecordType2(): end");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordType2(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordType2(): start *****");
    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }/*  w w w.jav  a2 s  . c  om*/

    Map<String, String> variableLabelMap = new LinkedHashMap<String, String>();
    Map<String, List<String>> missingValueTable = new LinkedHashMap<String, List<String>>();
    List<Integer> printFormatList = new ArrayList<Integer>();

    String caseWeightVariableName = null;
    int caseWeightVariableIndex = 0;

    boolean lastVariableIsExtendable = false;
    boolean extendedVariableMode = false;
    boolean obs255 = false;

    String lastVariableName = null;
    String lastExtendedVariable = null;

    // this field repeats as many as the number of variables in
    // this sav file

    // (note that the above statement is not technically correct, this
    //  record repeats not just for every variable in the file, but for
    //  every OBS (8 byte unit); i.e., if a string is split into multiple
    //  OBS units, each one will have its own RT2 record -- L.A.).

    // Each field constists of a fixed (32-byte) segment and
    // then a few variable segments:
    // if the variable has a label (3rd INT4 set to 1), then there's 4 more
    // bytes specifying the length of the label, and then that many bytes
    // holding the label itself (no more than 256).
    // Then if there are optional missing value units (4th INT4 set to 1)
    // there will be 3 more OBS units attached = 24 extra bytes.

    int variableCounter = 0;
    int obsSeqNumber = 0;

    int j;

    dbgLog.fine("RT2: Reading " + OBSUnitsPerCase + " OBS units.");

    for (j = 0; j < OBSUnitsPerCase; j++) {

        dbgLog.fine("RT2: \n\n+++++++++++ " + j + "-th RT2 unit is to be decoded +++++++++++");
        // 2.0: read the fixed[=non-optional] 32-byte segment
        byte[] recordType2Fixed = new byte[LENGTH_RECORDTYPE2_FIXED];

        try {
            int nbytes = stream.read(recordType2Fixed, 0, LENGTH_RECORDTYPE2_FIXED);

            //printHexDump(recordType2Fixed, "recordType2 part 1");

            if (nbytes == 0) {
                throw new IOException("reading recordType2: no bytes read!");
            }

            int offset = 0;

            // 2.1: create int-view of the bytebuffer for the first 16-byte segment
            int rt2_1st_4_units = 4;
            ByteBuffer[] bb_record_type2_fixed_part1 = new ByteBuffer[rt2_1st_4_units];
            int[] recordType2FixedPart1 = new int[rt2_1st_4_units];
            for (int i = 0; i < rt2_1st_4_units; i++) {

                bb_record_type2_fixed_part1[i] = ByteBuffer.wrap(recordType2Fixed, offset,
                        LENGTH_SAV_INT_BLOCK);

                offset += LENGTH_SAV_INT_BLOCK;
                if (isLittleEndian) {
                    bb_record_type2_fixed_part1[i].order(ByteOrder.LITTLE_ENDIAN);
                }
                recordType2FixedPart1[i] = bb_record_type2_fixed_part1[i].getInt();
            }

            dbgLog.fine("recordType2FixedPart=" + ReflectionToStringBuilder.toString(recordType2FixedPart1,
                    ToStringStyle.MULTI_LINE_STYLE));

            // 1st ([0]) element must be 2 otherwise no longer Record Type 2
            if (recordType2FixedPart1[0] != 2) {
                dbgLog.info(j + "-th RT header value is no longet RT2! " + recordType2FixedPart1[0]);
                break;
                //throw new IOException("RT2 reading error: The current position is no longer Record Type 2");
            }
            dbgLog.fine("variable type[must be 2]=" + recordType2FixedPart1[0]);

            // 2.3 variable name: 8 byte(space[x20]-padded)
            // This field is located at the very end of the 32 byte
            // fixed-size RT2 header (bytes 24-31).
            // We are processing it now, so that
            // we can make the decision on whether this variable is part
            // of a compound variable:

            String RawVariableName = new String(
                    Arrays.copyOfRange(recordType2Fixed, 24, (24 + LENGTH_VARIABLE_NAME)), defaultCharSet);
            //offset +=LENGTH_VARIABLE_NAME;
            String variableName = null;
            if (RawVariableName.indexOf(' ') >= 0) {
                variableName = RawVariableName.substring(0, RawVariableName.indexOf(' '));
            } else {
                variableName = RawVariableName;
            }

            // 2nd ([1]) element: numeric variable = 0 :for string variable
            // this block indicates its datum-length, i.e, >0 ;
            // if -1, this RT2 unit is a non-1st RT2 unit for a string variable
            // whose value is longer than 8 character.

            boolean isNumericVariable = false;

            dbgLog.fine("variable type(0: numeric; > 0: String;-1 continue )=" + recordType2FixedPart1[1]);

            //OBSwiseTypelList.add(recordType2FixedPart1[1]);

            int HowManyRt2Units = 1;

            if (recordType2FixedPart1[1] == -1) {
                dbgLog.fine("this RT2 is an 8 bit continuation chunk of an earlier string variable");
                if (obs255) {
                    if (obsSeqNumber < 30) {
                        OBSwiseTypelList.add(recordType2FixedPart1[1]);
                        obsSeqNumber++;
                    } else {
                        OBSwiseTypelList.add(-2);
                        obs255 = false;
                        obsSeqNumber = 0;
                    }
                } else {
                    OBSwiseTypelList.add(recordType2FixedPart1[1]);
                }

                obsNonVariableBlockSet.add(j);
                continue;
            } else if (recordType2FixedPart1[1] == 0) {
                // This is a numeric variable
                extendedVariableMode = false;
                // And as such, it cannot be an extension of a
                // previous, long string variable.
                OBSwiseTypelList.add(recordType2FixedPart1[1]);
                variableCounter++;
                isNumericVariable = true;
                variableTypelList.add(recordType2FixedPart1[1]);
            } else if (recordType2FixedPart1[1] > 0) {

                // This looks like a regular string variable. However,
                // it may still be a part of a compound variable
                // (a String > 255 bytes that was split into 255 byte
                // chunks, stored as individual String variables).

                if (recordType2FixedPart1[1] == 255) {
                    obs255 = true;
                }

                if (lastVariableIsExtendable) {
                    String varNameBase = null;
                    if (lastVariableName.length() > 5) {
                        varNameBase = lastVariableName.substring(0, 5);
                    } else {
                        varNameBase = lastVariableName;
                    }

                    if (extendedVariableMode) {
                        if (variableNameIsAnIncrement(varNameBase, lastExtendedVariable, variableName)) {
                            OBSwiseTypelList.add(-1);
                            lastExtendedVariable = variableName;
                            // OK, we stay in the "extended variable" mode;
                            // but we can't move on to the next OBS (hence the commented out
                            // "continue" below:
                            //continue;
                            // see the next comment below for the explanation.
                            //
                            // Should we also set "extendable" flag to false at this point
                            // if it's shorter than 255 bytes, i.e. the last extended chunk?
                        } else {
                            extendedVariableMode = false;
                        }
                    } else {
                        if (variableNameIsAnIncrement(varNameBase, variableName)) {
                            OBSwiseTypelList.add(-1);
                            extendedVariableMode = true;
                            dbgLog.fine("RT2: in extended variable mode; variable " + variableName);
                            lastExtendedVariable = variableName;
                            // Before we move on to the next OBS unit, we need to check
                            // if this current extended variable has its own label specified;
                            // If so, we need to determine its length, then read and skip
                            // that many bytes.
                            // Hence the commented out "continue" below:
                            //continue;
                        }
                    }
                }

                if (!extendedVariableMode) {
                    // OK, this is a "real"
                    // string variable, and not a continuation chunk of a compound
                    // string.

                    OBSwiseTypelList.add(recordType2FixedPart1[1]);
                    variableCounter++;

                    if (recordType2FixedPart1[1] == 255) {
                        // This variable is 255 bytes long, i.e. this is
                        // either the single "atomic" variable of the
                        // max allowed size, or it's a 255 byte segment
                        // of a compound variable. So we will check
                        // the next variable and see if it is the continuation
                        // of this one.

                        lastVariableIsExtendable = true;
                    } else {
                        lastVariableIsExtendable = false;
                    }

                    if (recordType2FixedPart1[1] % LENGTH_SAV_OBS_BLOCK == 0) {
                        HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK;
                    } else {
                        HowManyRt2Units = recordType2FixedPart1[1] / LENGTH_SAV_OBS_BLOCK + 1;
                    }
                    variableTypelList.add(recordType2FixedPart1[1]);
                }
            }

            if (!extendedVariableMode) {
                // Again, we only want to do the following steps for the "real"
                // variables, not the chunks of split mega-variables:

                dbgLog.fine("RT2: HowManyRt2Units for this variable=" + HowManyRt2Units);

                lastVariableName = variableName;

                // caseWeightVariableOBSIndex starts from 1: 0 is used for does-not-exist cases
                if (j == (caseWeightVariableOBSIndex - 1)) {
                    caseWeightVariableName = variableName;
                    caseWeightVariableIndex = variableCounter;

                    smd.setCaseWeightVariableName(caseWeightVariableName);
                    smd.getFileInformation().put("caseWeightVariableIndex", caseWeightVariableIndex);
                }

                OBSIndexToVariableName.put(j, variableName);

                //dbgLog.fine("\nvariable name="+variableName+"<-");
                dbgLog.fine("RT2: " + j + "-th variable name=" + variableName + "<-");
                dbgLog.fine("RT2: raw variable: " + RawVariableName);

                variableNameList.add(variableName);
            }

            // 3rd ([2]) element: = 1 variable-label block follows; 0 = no label
            //
            dbgLog.fine("RT: variable label follows?(1:yes; 0: no)=" + recordType2FixedPart1[2]);
            boolean hasVariableLabel = recordType2FixedPart1[2] == 1 ? true : false;
            if ((recordType2FixedPart1[2] != 0) && (recordType2FixedPart1[2] != 1)) {
                throw new IOException("RT2: reading error: value is neither 0 or 1" + recordType2FixedPart1[2]);
            }

            // 2.4 [optional]The length of a variable label followed: 4-byte int
            // 3rd element of 2.1 indicates whether this field exists
            // *** warning: The label block is padded to a multiple of the 4-byte
            // NOT the raw integer value of this 4-byte block

            if (hasVariableLabel) {
                byte[] length_variable_label = new byte[4];
                int nbytes_2_4 = stream.read(length_variable_label);
                if (nbytes_2_4 == 0) {
                    throw new IOException("RT 2: error reading recordType2.4: no bytes read!");
                } else {
                    dbgLog.fine("nbytes_2_4=" + nbytes_2_4);
                }
                ByteBuffer bb_length_variable_label = ByteBuffer.wrap(length_variable_label, 0,
                        LENGTH_VARIABLE_LABEL);
                if (isLittleEndian) {
                    bb_length_variable_label.order(ByteOrder.LITTLE_ENDIAN);
                }
                int rawVariableLabelLength = bb_length_variable_label.getInt();

                dbgLog.fine("rawVariableLabelLength=" + rawVariableLabelLength);
                int variableLabelLength = getSAVintAdjustedBlockLength(rawVariableLabelLength);
                dbgLog.fine("RT2: variableLabelLength=" + variableLabelLength);

                // 2.5 [optional]variable label whose length is found at 2.4

                String variableLabel = "";

                if (rawVariableLabelLength > 0) {
                    byte[] variable_label = new byte[variableLabelLength];
                    int nbytes_2_5 = stream.read(variable_label);
                    if (nbytes_2_5 == 0) {
                        throw new IOException("RT 2: error reading recordType2.5: " + variableLabelLength
                                + " bytes requested, no bytes read!");
                    } else {
                        dbgLog.fine("nbytes_2_5=" + nbytes_2_5);
                    }
                    variableLabel = new String(Arrays.copyOfRange(variable_label, 0, rawVariableLabelLength),
                            defaultCharSet);
                    dbgLog.fine("RT2: variableLabel=" + variableLabel + "<-");

                    dbgLog.info(variableName + " => " + variableLabel);
                } else {
                    dbgLog.fine("RT2: defaulting to empty variable label.");
                }

                if (!extendedVariableMode) {
                    // We only have any use for this label if it's a "real" variable.
                    // Thinking about it, it doesn't make much sense for the "fake"
                    // variables that are actually chunks of large strings to store
                    // their own labels. But in some files they do. Then failing to read
                    // the bytes would result in getting out of sync with the RT record
                    // borders. So we always read the bytes, but only use them for
                    // the real variable entries.
                    /*String variableLabel = new String(Arrays.copyOfRange(variable_label,
                        0, rawVariableLabelLength),"US-ASCII");*/

                    variableLabelMap.put(variableName, variableLabel);
                }
            }

            if (extendedVariableMode) {
                // there's nothing else left for us to do in this iteration of the loop.
                // Once again, this was not a real variable, but a dummy variable entry
                // created for a chunk of a string variable longer than 255 bytes --
                // that's how SPSS stores them.
                continue;
            }

            // 4th ([3]) element: Missing value type code
            // 0[none], 1, 2, 3 [point-type],-2[range], -3 [range type+ point]

            dbgLog.fine("RT: missing value unit follows?(if 0, none)=" + recordType2FixedPart1[3]);
            boolean hasMissingValues = (validMissingValueCodeSet.contains(recordType2FixedPart1[3])
                    && (recordType2FixedPart1[3] != 0)) ? true : false;

            InvalidData invalidDataInfo = null;

            if (recordType2FixedPart1[3] != 0) {
                invalidDataInfo = new InvalidData(recordType2FixedPart1[3]);
                dbgLog.fine("RT: missing value type=" + invalidDataInfo.getType());
            }

            // 2.2: print/write formats: 4-byte each = 8 bytes

            byte[] printFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_PRINT_FORMAT_CODE);
            dbgLog.fine("printFrmt=" + new String(Hex.encodeHex(printFormt)));

            offset += LENGTH_PRINT_FORMAT_CODE;
            int formatCode = isLittleEndian ? printFormt[2] : printFormt[1];
            int formatWidth = isLittleEndian ? printFormt[1] : printFormt[2];
            int formatDecimalPointPosition = isLittleEndian ? printFormt[0] : printFormt[3];
            dbgLog.fine("RT2: format code{5=F, 1=A[String]}=" + formatCode);

            formatDecimalPointPositionList.add(formatDecimalPointPosition);

            if (!SPSSConstants.FORMAT_CODE_TABLE_SAV.containsKey(formatCode)) {
                throw new IOException("Unknown format code was found = " + formatCode);
            } else {
                printFormatList.add(formatCode);
            }

            byte[] writeFormt = Arrays.copyOfRange(recordType2Fixed, offset, offset + LENGTH_WRITE_FORMAT_CODE);

            dbgLog.fine("RT2: writeFrmt=" + new String(Hex.encodeHex(writeFormt)));
            if (writeFormt[3] != 0x00) {
                dbgLog.fine("byte-order(write format): reversal required");
            }

            offset += LENGTH_WRITE_FORMAT_CODE;

            if (!SPSSConstants.ORDINARY_FORMAT_CODE_SET.contains(formatCode)) {
                StringBuilder sb = new StringBuilder(
                        SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode) + formatWidth);
                if (formatDecimalPointPosition > 0) {
                    sb.append("." + formatDecimalPointPosition);
                }
                dbgLog.info("formattable[i] = " + variableName + " -> " + sb.toString());
                printFormatNameTable.put(variableName, sb.toString());

            }

            printFormatTable.put(variableName, SPSSConstants.FORMAT_CODE_TABLE_SAV.get(formatCode));

            // 2.6 [optional] missing values:4-byte each if exists
            // 4th element of 2.1 indicates the structure of this sub-field

            // Should we perhaps check for this for the "fake" variables too?
            //

            if (hasMissingValues) {
                dbgLog.fine("RT2: decoding missing value: type=" + recordType2FixedPart1[3]);
                int howManyMissingValueUnits = missingValueCodeUnits.get(recordType2FixedPart1[3]);
                //int howManyMissingValueUnits = recordType2FixedPart1[3] > 0 ? recordType2FixedPart1[3] :  0;

                dbgLog.fine("RT2: howManyMissingValueUnits=" + howManyMissingValueUnits);

                byte[] missing_value_code_units = new byte[LENGTH_SAV_OBS_BLOCK * howManyMissingValueUnits];
                int nbytes_2_6 = stream.read(missing_value_code_units);

                if (nbytes_2_6 == 0) {
                    throw new IOException("RT 2: reading recordType2.6: no byte was read");
                } else {
                    dbgLog.fine("nbytes_2_6=" + nbytes_2_6);
                }

                //printHexDump(missing_value_code_units, "missing value");

                if (isNumericVariable) {

                    double[] missingValues = new double[howManyMissingValueUnits];
                    //List<String> mvp = new ArrayList<String>();
                    List<String> mv = new ArrayList<String>();

                    ByteBuffer[] bb_missig_value_code = new ByteBuffer[howManyMissingValueUnits];

                    int offset_start = 0;

                    for (int i = 0; i < howManyMissingValueUnits; i++) {

                        bb_missig_value_code[i] = ByteBuffer.wrap(missing_value_code_units, offset_start,
                                LENGTH_SAV_OBS_BLOCK);

                        offset_start += LENGTH_SAV_OBS_BLOCK;
                        if (isLittleEndian) {
                            bb_missig_value_code[i].order(ByteOrder.LITTLE_ENDIAN);
                        }

                        ByteBuffer temp = bb_missig_value_code[i].duplicate();

                        missingValues[i] = bb_missig_value_code[i].getDouble();
                        if (Double.toHexString(missingValues[i]).equals("-0x1.ffffffffffffep1023")) {
                            dbgLog.fine("1st value is LOWEST");
                            mv.add(Double.toHexString(missingValues[i]));
                        } else if (Double.valueOf(missingValues[i]).equals(Double.MAX_VALUE)) {
                            dbgLog.fine("2nd value is HIGHEST");
                            mv.add(Double.toHexString(missingValues[i]));
                        } else {
                            mv.add(doubleNumberFormatter.format(missingValues[i]));
                        }
                        dbgLog.fine(i + "-th missing value=" + Double.toHexString(missingValues[i]));
                    }

                    dbgLog.fine("variableName=" + variableName);
                    if (recordType2FixedPart1[3] > 0) {
                        // point cases only
                        dbgLog.fine("mv(>0)=" + mv);
                        missingValueTable.put(variableName, mv);
                        invalidDataInfo.setInvalidValues(mv);
                    } else if (recordType2FixedPart1[3] == -2) {
                        dbgLog.fine("mv(-2)=" + mv);
                        // range
                        invalidDataInfo.setInvalidRange(mv);
                    } else if (recordType2FixedPart1[3] == -3) {
                        // mixed case
                        dbgLog.fine("mv(-3)=" + mv);
                        invalidDataInfo.setInvalidRange(mv.subList(0, 2));
                        invalidDataInfo.setInvalidValues(mv.subList(2, 3));
                        missingValueTable.put(variableName, mv.subList(2, 3));
                    }

                    dbgLog.fine("missing value=" + StringUtils.join(missingValueTable.get(variableName), "|"));
                    dbgLog.fine("invalidDataInfo(Numeric):\n" + invalidDataInfo);
                    invalidDataTable.put(variableName, invalidDataInfo);
                } else {
                    // string variable case
                    String[] missingValues = new String[howManyMissingValueUnits];
                    List<String> mv = new ArrayList<String>();
                    int offset_start = 0;
                    int offset_end = LENGTH_SAV_OBS_BLOCK;
                    for (int i = 0; i < howManyMissingValueUnits; i++) {

                        missingValues[i] = StringUtils.stripEnd(new String(
                                Arrays.copyOfRange(missing_value_code_units, offset_start, offset_end),
                                defaultCharSet), " ");
                        dbgLog.fine("missing value=" + missingValues[i] + "<-");

                        offset_start = offset_end;
                        offset_end += LENGTH_SAV_OBS_BLOCK;

                        mv.add(missingValues[i]);
                    }
                    invalidDataInfo.setInvalidValues(mv);
                    missingValueTable.put(variableName, mv);
                    invalidDataTable.put(variableName, invalidDataInfo);
                    dbgLog.fine(
                            "missing value(str)=" + StringUtils.join(missingValueTable.get(variableName), "|"));
                    dbgLog.fine("invalidDataInfo(String):\n" + invalidDataInfo);

                } // string case
                dbgLog.fine("invalidDataTable:\n" + invalidDataTable);
            } // if msv

        } catch (IOException ex) {
            //ex.printStackTrace();
            throw ex;
        } catch (Exception ex) {
            ex.printStackTrace();
            // should we be throwing some exception here?
        }
    } // j-loop

    if (j == OBSUnitsPerCase) {
        dbgLog.fine("RT2 metadata-related exit-chores");
        smd.getFileInformation().put("varQnty", variableCounter);
        varQnty = variableCounter;
        dbgLog.fine("RT2: varQnty=" + varQnty);

        smd.setVariableName(variableNameList.toArray(new String[variableNameList.size()]));
        smd.setVariableLabel(variableLabelMap);
        smd.setMissingValueTable(missingValueTable);
        smd.getFileInformation().put("caseWeightVariableName", caseWeightVariableName);

        dbgLog.info("sumstat:long case=" + Arrays.deepToString(variableTypelList.toArray()));

        smd.setVariableFormat(printFormatList);
        smd.setVariableFormatName(printFormatNameTable);

        dbgLog.info("<<<<<<");
        dbgLog.info("printFormatList = " + printFormatList);
        dbgLog.info("printFormatNameTable = " + printFormatNameTable);
        // dbgLog.info("formatCategoryTable = " + formatCategoryTable);
        dbgLog.info(">>>>>>");

        dbgLog.fine("RT2: OBSwiseTypelList=" + OBSwiseTypelList);

        // variableType is determined after the valueTable is finalized
    } else {
        dbgLog.info("RT2: attention! didn't reach the end of the OBS list!");
        throw new IOException("RT2: didn't reach the end of the OBS list!");
    }
    dbgLog.fine("***** decodeRecordType2(): end *****");
}

From source file:edu.harvard.iq.dvn.ingest.statdataio.impl.plugins.sav.SAVFileReader.java

void decodeRecordTypeDataCompressed(BufferedInputStream stream) throws IOException {

    dbgLog.fine("***** decodeRecordTypeDataCompressed(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("decodeRecordTypeDataCompressed: stream == null!");
    }//www. j av  a  2  s.  co  m
    Map<String, String> formatCategoryTable = new LinkedHashMap<String, String>();

    PrintWriter pwout = createOutputWriter(stream);

    boolean hasStringVarContinuousBlock = obsNonVariableBlockSet.size() > 0 ? true : false;
    dbgLog.fine("hasStringVarContinuousBlock=" + hasStringVarContinuousBlock);

    int ii = 0;

    int OBS = LENGTH_SAV_OBS_BLOCK;
    int nOBS = OBSUnitsPerCase;

    dbgLog.fine("OBSUnitsPerCase=" + OBSUnitsPerCase);

    int caseIndex = 0;

    dbgLog.fine("printFormatTable:\n" + printFormatTable);

    dbgLog.fine("printFormatNameTable:\n" + printFormatNameTable);
    variableFormatTypeList = new String[varQnty];

    dbgLog.fine("varQnty: " + varQnty);

    for (int i = 0; i < varQnty; i++) {
        variableFormatTypeList[i] = SPSSConstants.FORMAT_CATEGORY_TABLE
                .get(printFormatTable.get(variableNameList.get(i)));
        dbgLog.fine("i=" + i + "th variableFormatTypeList=" + variableFormatTypeList[i]);
        formatCategoryTable.put(variableNameList.get(i), variableFormatTypeList[i]);
    }
    dbgLog.fine("variableFormatType:\n" + Arrays.deepToString(variableFormatTypeList));
    dbgLog.fine("formatCategoryTable:\n" + formatCategoryTable);

    // contents (variable) checker concering decimals
    variableTypeFinal = new int[varQnty];
    Arrays.fill(variableTypeFinal, 0);

    List<String> casewiseRecordForUNF = new ArrayList<String>();
    String[] caseWiseDateFormatForUNF = null;
    List<String> casewiseRecordForTabFile = new ArrayList<String>();

    // missing values are written to the tab-delimited file by
    // using the default or user-specified missing-value  strings;
    // however, to calculate UNF/summary statistics,
    // classes for these calculations require their specific 
    // missing values that differ from the above missing-value
    // strings; therefore, after row data for the tab-delimited 
    // file are written, missing values in a row are changed to
    // UNF/summary-statistics-OK ones.

    // data-storage object for sumStat
    dataTable2 = new Object[varQnty][caseQnty];
    // storage of date formats to pass to UNF
    dateFormats = new String[varQnty][caseQnty];

    try {
        // this compression is applied only to non-float data, i.e. integer;
        // 8-byte float datum is kept in tact
        boolean hasReachedEOF = false;

        OBSERVATION: while (true) {

            dbgLog.fine("SAV Reader: compressed: ii=" + ii + "-th iteration");

            byte[] octate = new byte[LENGTH_SAV_OBS_BLOCK];

            int nbytes = stream.read(octate);

            // processCompressedOBSblock ()

            // (this means process a block of 8 compressed OBS
            // values -- should result in 64 bytes of data total)

            for (int i = 0; i < LENGTH_SAV_OBS_BLOCK; i++) {

                dbgLog.finer("i=" + i + "-th iteration");
                int octate_i = octate[i];
                //dbgLog.fine("octate="+octate_i);
                if (octate_i < 0) {
                    octate_i += 256;
                }
                int byteCode = octate_i;//octate_i & 0xF;
                //out.println("byeCode="+byteCode);

                // processCompressedOBS

                switch (byteCode) {
                case 252:
                    // end of the file
                    dbgLog.fine("SAV Reader: compressed: end of file mark [FC] was found");
                    hasReachedEOF = true;
                    break;
                case 253:
                    // FD: uncompressed data follows after this octate
                    // long string datum or float datum
                    // read the following octate
                    byte[] uncompressedByte = new byte[LENGTH_SAV_OBS_BLOCK];
                    int ucbytes = stream.read(uncompressedByte);
                    int typeIndex = (ii * OBS + i) % nOBS;

                    if ((OBSwiseTypelList.get(typeIndex) > 0) || (OBSwiseTypelList.get(typeIndex) == -1)) {
                        // code= >0 |-1: string or its conitiguous block
                        // decode as a string object
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK), defaultCharSet);
                        //out.println("str_datum="+strdatum+"<-");
                        // add this non-missing-value string datum
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == -2) {
                        String strdatum = new String(
                                Arrays.copyOfRange(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK - 1),
                                defaultCharSet);
                        casewiseRecordForTabFile.add(strdatum);
                        //out.println("casewiseRecordForTabFile(String)="+casewiseRecordForTabFile);
                    } else if (OBSwiseTypelList.get(typeIndex) == 0) {
                        // code= 0: numeric

                        ByteBuffer bb_double = ByteBuffer.wrap(uncompressedByte, 0, LENGTH_SAV_OBS_BLOCK);
                        if (isLittleEndian) {
                            bb_double.order(ByteOrder.LITTLE_ENDIAN);
                        }

                        Double ddatum = bb_double.getDouble();
                        // out.println("ddatum="+ddatum);
                        // add this non-missing-value numeric datum
                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(ddatum));
                        dbgLog.fine("SAV Reader: compressed: added value to dataLine: " + ddatum);

                    } else {
                        dbgLog.fine("SAV Reader: out-of-range exception");
                        throw new IOException("out-of-range value was found");
                    }

                    /*
                    // EOF-check after reading this octate
                    if (stream.available() == 0){
                    hasReachedEOF = true;
                    dbgLog.fine(
                    "SAV Reader: *** After reading an uncompressed octate," +
                    " reached the end of the file at "+ii
                    +"th iteration and i="+i+"th octate position [0-start] *****");
                    }
                     */

                    break;
                case 254:
                    // FE: used as the missing value for string variables
                    // an empty case in a string variable also takes this value
                    // string variable does not accept space-only data
                    // cf: uncompressed case
                    // 20 20 20 20 20 20 20 20
                    // add the string missing value
                    // out.println("254: String missing data");

                    casewiseRecordForTabFile.add(" "); // add "." here?

                    // Note that technically this byte flag (254/xFE) means
                    // that *eight* white space characters should be
                    // written to the output stream. This caused me
                    // a great amount of confusion, because it appeared
                    // to me that there was a mismatch between the number
                    // of bytes advertised in the variable metadata and
                    // the number of bytes actually found in the data
                    // section of a compressed SAV file; this is because
                    // these 8 bytes "come out of nowhere"; they are not
                    // written in the data section, but this flag specifies
                    // that they should be added to the output.
                    // Also, as I pointed out above, we are only writing
                    // out one whitespace character, not 8 as instructed.
                    // This appears to be legit; these blocks of 8 spaces
                    // seem to be only used for padding, and all such
                    // multiple padding spaces are stripped anyway during
                    // the post-processing.

                    break;
                case 255:
                    // FF: system missing value for numeric variables
                    // cf: uncompressed case (sysmis)
                    // FF FF FF FF FF FF eF FF(little endian)
                    // add the numeric missing value
                    dbgLog.fine("SAV Reader: compressed: Missing Value, numeric");
                    casewiseRecordForTabFile.add(MissingValueForTextDataFileNumeric);

                    break;
                case 0:
                    // 00: do nothing
                    dbgLog.fine("SAV Reader: compressed: doing nothing (zero); ");

                    break;
                default:
                    //out.println("byte code(default)="+ byteCode);
                    if ((byteCode > 0) && (byteCode < 252)) {
                        // datum is compressed
                        //Integer unCompressed = Integer.valueOf(byteCode -100);
                        // add this uncompressed numeric datum
                        Double unCompressed = Double.valueOf(byteCode - 100);
                        dbgLog.fine("SAV Reader: compressed: default case: " + unCompressed);

                        casewiseRecordForTabFile.add(doubleNumberFormatter.format(unCompressed));
                        // out.println("uncompressed="+unCompressed);
                        // out.println("dataline="+casewiseRecordForTabFile);
                    }
                }// end of switch

                // out.println("end of switch");

                // The-end-of-a-case(row)-processing

                // this line that follows, and the code around it
                // is really confusing:
                int varCounter = (ii * OBS + i + 1) % nOBS;
                // while both OBS and LENGTH_SAV_OBS_BLOCK = 8
                // (OBS was initialized as OBS=LENGTH_SAV_OBS_BLOCK),
                // the 2 values mean different things:
                // LENGTH_SAV_OBS_BLOCK is the number of bytes in one OBS;
                // and OBS is the number of OBS blocks that we process
                // at a time. I.e., we process 8 chunks of 8 bytes at a time.
                // This is how data is organized inside an SAV file:
                // 8 bytes of compression flags, followd by 8x8 or fewer
                // (depending on the flags) bytes of compressed data.
                // I should rename this OBS variable something more
                // meaningful.
                //
                // Also, the "varCounter" variable name is entirely
                // misleading -- it counts not variables, but OBS blocks.

                dbgLog.fine("SAV Reader: compressed: OBS counter=" + varCounter + "(ii=" + ii + ")");

                if ((ii * OBS + i + 1) % nOBS == 0) {

                    //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);

                    // out.println("all variables in a case are parsed == nOBS");
                    // out.println("hasStringVarContinuousBlock="+hasStringVarContinuousBlock);

                    // check whether a string-variable's continuous block exits
                    // if so, they must be joined

                    if (hasStringVarContinuousBlock) {

                        // string-variable's continuous-block-concatenating-processing

                        //out.println("concatenating process starts");
                        //out.println("casewiseRecordForTabFile(before)="+casewiseRecordForTabFile);
                        //out.println("casewiseRecordForTabFile(before:size)="+casewiseRecordForTabFile.size());

                        StringBuilder sb = new StringBuilder("");
                        int firstPosition = 0;

                        Set<Integer> removeJset = new HashSet<Integer>();
                        for (int j = 0; j < nOBS; j++) {
                            dbgLog.fine("RTD: j=" + j + "-th type =" + OBSwiseTypelList.get(j));
                            if ((OBSwiseTypelList.get(j) == -1) || (OBSwiseTypelList.get(j) == -2)) {
                                // Continued String variable found at j-th
                                // position. look back the j-1
                                firstPosition = j - 1;
                                int lastJ = j;
                                String concatenated = null;

                                removeJset.add(j);
                                sb.append(casewiseRecordForTabFile.get(j - 1));
                                sb.append(casewiseRecordForTabFile.get(j));

                                for (int jc = 1;; jc++) {
                                    if ((j + jc == nOBS) || ((OBSwiseTypelList.get(j + jc) != -1)
                                            && (OBSwiseTypelList.get(j + jc) != -2))) {

                                        // j is the end unit of this string variable
                                        concatenated = sb.toString();
                                        sb.setLength(0);
                                        lastJ = j + jc;
                                        break;
                                    } else {
                                        sb.append(casewiseRecordForTabFile.get(j + jc));
                                        removeJset.add(j + jc);
                                    }
                                }
                                casewiseRecordForTabFile.set(j - 1, concatenated);

                                //out.println(j-1+"th concatenated="+concatenated);
                                j = lastJ - 1;

                            } // end-of-if: continuous-OBS only

                        } // end of loop-j

                        //out.println("removeJset="+removeJset);

                        // a new list that stores a new case with concatanated string data
                        List<String> newDataLine = new ArrayList<String>();

                        for (int jl = 0; jl < casewiseRecordForTabFile.size(); jl++) {
                            //out.println("jl="+jl+"-th datum =["+casewiseRecordForTabFile.get(jl)+"]");

                            if (!removeJset.contains(jl)) {

                                //                                if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileString)){
                                //                                    out.println("NA-S jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(MissingValueForTextDataFileNumeric)){
                                //                                    out.println("NA-N jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl)==null){
                                //                                    out.println("null case jl="+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("NaN")){
                                //                                    out.println("NaN jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals("")){
                                //                                    out.println("blank jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                } else if (casewiseRecordForTabFile.get(jl).equals(" ")){
                                //                                    out.println("space jl= "+jl+"=["+casewiseRecordForTabFile.get(jl)+"]");
                                //                                }

                                newDataLine.add(casewiseRecordForTabFile.get(jl));
                            } else {
                                //                                out.println("Excluded: jl="+jl+"-th datum=["+casewiseRecordForTabFile.get(jl)+"]");
                            }
                        } // end of loop-jl

                        //out.println("new casewiseRecordForTabFile="+newDataLine);
                        //out.println("new casewiseRecordForTabFile(size)="+newDataLine.size());

                        casewiseRecordForTabFile = newDataLine;

                    } // end-if: stringContinuousVar-exist case

                    for (int el = 0; el < casewiseRecordForTabFile.size(); el++) {
                        casewiseRecordForUNF.add(casewiseRecordForTabFile.get(el));
                    }

                    caseWiseDateFormatForUNF = new String[casewiseRecordForTabFile.size()];

                    // caseIndex starts from 1 not 0
                    caseIndex = (ii * OBS + i + 1) / nOBS;

                    for (int k = 0; k < casewiseRecordForTabFile.size(); k++) {

                        dbgLog.fine("k=" + k + "-th variableTypelList=" + variableTypelList.get(k));

                        if (variableTypelList.get(k) > 0) {
                            // String variable case: set to  -1
                            variableTypeFinal[k] = -1;

                            // Strip the String variables off the
                            // whitespace padding:

                            // [ snipped ]

                            // I've removed the block of code above where
                            // String values were substring()-ed to the
                            // length specified in the variable metadata;
                            // Doing that was not enough, since a string
                            // can still be space-padded inside its
                            // advertised capacity. (note that extended
                            // variables can have many kylobytes of such
                            // padding in them!) Plus it was completely
                            // redundant, since we are stripping all the
                            // trailing white spaces with
                            // StringUtils.stripEnd() below:

                            String paddRemoved = StringUtils
                                    .stripEnd(casewiseRecordForTabFile.get(k).toString(), null);
                            // TODO: clean this up.  For now, just make sure that strings contain at least one blank space.
                            if (paddRemoved.equals("")) {
                                paddRemoved = " ";
                            }

                            casewiseRecordForUNF.set(k, paddRemoved);
                            casewiseRecordForTabFile.set(k, "\""
                                    + paddRemoved.replaceAll("\"", Matcher.quoteReplacement("\\\"")) + "\"");

                            // end of String var case

                        } else {
                            // numeric var case
                            if (casewiseRecordForTabFile.get(k).equals(MissingValueForTextDataFileNumeric)) {
                                casewiseRecordForUNF.set(k, null);

                            }

                        } // end of variable-type check

                        if (casewiseRecordForTabFile.get(k) != null && !casewiseRecordForTabFile.get(k)
                                .equals(MissingValueForTextDataFileNumeric)) {

                            String variableFormatType = variableFormatTypeList[k];
                            dbgLog.finer("k=" + k + "th printFormatTable format="
                                    + printFormatTable.get(variableNameList.get(k)));

                            int formatDecimalPointPosition = formatDecimalPointPositionList.get(k);

                            if (variableFormatType.equals("date")) {
                                dbgLog.finer("date case");

                                long dateDatum = Long.parseLong(casewiseRecordForTabFile.get(k).toString())
                                        * 1000L - SPSS_DATE_OFFSET;

                                String newDatum = sdf_ymd.format(new Date(dateDatum));
                                dbgLog.finer("k=" + k + ":" + newDatum);
                                caseWiseDateFormatForUNF[k] = sdf_ymd.toPattern();
                                /* saving date format */
                                dbgLog.finer("setting caseWiseDateFormatForUNF[k] = " + sdf_ymd.toPattern());
                                casewiseRecordForTabFile.set(k, newDatum);
                                casewiseRecordForUNF.set(k, newDatum);
                                //formatCategoryTable.put(variableNameList.get(k), "date");
                            } else if (variableFormatType.equals("time")) {
                                dbgLog.finer("time case:DTIME or DATETIME or TIME");
                                //formatCategoryTable.put(variableNameList.get(k), "time");

                                if (printFormatTable.get(variableNameList.get(k)).equals("DTIME")) {

                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_BIAS;
                                        String newDatum = sdf_dhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        casewiseRecordForUNF.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_BIAS;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_dhms.format(new Date(dateDatum)));
                                        dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }

                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        casewiseRecordForUNF.set(k, sb_time.toString());
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("DATETIME")) {

                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L
                                                - SPSS_DATE_OFFSET;
                                        String newDatum = sdf_ymdhms.format(new Date(dateDatum));
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern();
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        casewiseRecordForUNF.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L - SPSS_DATE_OFFSET;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_ymdhms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        caseWiseDateFormatForUNF[k] = sdf_ymdhms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        casewiseRecordForUNF.set(k, sb_time.toString());
                                    }
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("TIME")) {
                                    if (casewiseRecordForTabFile.get(k).toString().indexOf(".") < 0) {
                                        long dateDatum = Long
                                                .parseLong(casewiseRecordForTabFile.get(k).toString()) * 1000L;
                                        String newDatum = sdf_hms.format(new Date(dateDatum));
                                        caseWiseDateFormatForUNF[k] = sdf_hms.toPattern();
                                        dbgLog.finer("k=" + k + ":" + newDatum);
                                        casewiseRecordForTabFile.set(k, newDatum);
                                        casewiseRecordForUNF.set(k, newDatum);
                                    } else {
                                        // decimal point included
                                        String[] timeData = casewiseRecordForTabFile.get(k).toString()
                                                .split("\\.");

                                        //dbgLog.finer(StringUtils.join(timeData, "|"));
                                        long dateDatum = Long.parseLong(timeData[0]) * 1000L;
                                        StringBuilder sb_time = new StringBuilder(
                                                sdf_hms.format(new Date(dateDatum)));
                                        //dbgLog.finer(sb_time.toString());

                                        if (formatDecimalPointPosition > 0) {
                                            sb_time.append(
                                                    "." + timeData[1].substring(0, formatDecimalPointPosition));
                                        }
                                        caseWiseDateFormatForUNF[k] = this.sdf_hms.toPattern()
                                                + (formatDecimalPointPosition > 0 ? ".S" : "");
                                        dbgLog.finer("k=" + k + ":" + sb_time.toString());
                                        casewiseRecordForTabFile.set(k, sb_time.toString());
                                        casewiseRecordForUNF.set(k, sb_time.toString());
                                    }
                                }

                            } else if (variableFormatType.equals("other")) {
                                dbgLog.finer("other non-date/time case:=" + i);

                                if (printFormatTable.get(variableNameList.get(k)).equals("WKDAY")) {
                                    // day of week
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.WEEKDAY_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    casewiseRecordForUNF.set(k, newDatum);
                                    dbgLog.finer("wkday:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                } else if (printFormatTable.get(variableNameList.get(k)).equals("MONTH")) {
                                    // month
                                    dbgLog.finer("data k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                    dbgLog.finer("data k=" + k + ":" + SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1));
                                    String newDatum = SPSSConstants.MONTH_LIST.get(
                                            Integer.valueOf(casewiseRecordForTabFile.get(k).toString()) - 1);
                                    casewiseRecordForTabFile.set(k, newDatum);
                                    casewiseRecordForUNF.set(k, newDatum);
                                    dbgLog.finer("month:k=" + k + ":" + casewiseRecordForTabFile.get(k));
                                }
                            }

                        } // end: date-time-datum check

                    } // end: loop-k(2nd: variable-wise-check)

                    // write to tab file
                    if (casewiseRecordForTabFile.size() > 0) {
                        pwout.println(StringUtils.join(casewiseRecordForTabFile, "\t"));
                    }

                    if (casewiseRecordForTabFile.size() > 0) {
                        for (int ij = 0; ij < varQnty; ij++) {
                            dataTable2[ij][caseIndex - 1] = casewiseRecordForUNF.get(ij);

                            if (variableFormatTypeList[ij].equals("date")
                                    || variableFormatTypeList[ij].equals("time")) {
                                this.dateFormats[ij][caseIndex - 1] = caseWiseDateFormatForUNF[ij];
                            }
                        }
                    }

                    // numeric contents-check
                    for (int l = 0; l < casewiseRecordForTabFile.size(); l++) {

                        if (variableFormatTypeList[l].equals("date") || variableFormatTypeList[l].equals("time")
                                || printFormatTable.get(variableNameList.get(l)).equals("WKDAY")
                                || printFormatTable.get(variableNameList.get(l)).equals("MONTH")) {
                            variableTypeFinal[l] = -1;
                        }

                        if (variableTypeFinal[l] == 0) {
                            if (casewiseRecordForTabFile.get(l).toString().indexOf(".") >= 0) {
                                // TODO - check for large numbers
                                // l-th variable is not integer
                                variableTypeFinal[l] = 1;
                                decimalVariableSet.add(l);
                            }
                        }
                    }

                    // reset the case-wise working objects
                    casewiseRecordForUNF.clear();
                    casewiseRecordForTabFile.clear();

                    if (caseQnty > 0) {
                        if (caseIndex == caseQnty) {
                            hasReachedEOF = true;
                        }
                    }

                    if (hasReachedEOF) {
                        break;
                    }

                } // if(The-end-of-a-case(row)-processing)

            } // loop-i (OBS unit)

            if ((hasReachedEOF) || (stream.available() == 0)) {
                // reached the end of this file
                // do exit-processing

                dbgLog.fine("***** reached the end of the file at " + ii + "th iteration *****");

                break OBSERVATION;
            }

            ii++;

        } // while loop

        pwout.close();
    } catch (IOException ex) {
        throw ex;
    }

    smd.setDecimalVariables(decimalVariableSet);
    smd.getFileInformation().put("caseQnty", caseQnty);
    smd.setVariableFormatCategory(formatCategoryTable);
    dbgLog.info("<<<<<<");
    //dbgLog.info("printFormatList = " + printFormatList);
    //dbgLog.info("printFormatNameTable = " + printFormatNameTable);
    dbgLog.info("formatCategoryTable = " + formatCategoryTable);
    dbgLog.info(">>>>>>");

    // contents check
    //out.println("variableType="+ArrayUtils.toString(variableTypeFinal));
    dbgLog.fine("decimalVariableSet=" + decimalVariableSet);
    //out.println("variableTypelList=\n"+ variableTypelList.toString());

    // out.println("dataTable2:\n"+Arrays.deepToString(dataTable2));
    dbgLog.fine("***** decodeRecordTypeDataCompressed(): end *****");
}

From source file:edu.harvard.iq.dataverse.ingest.tabulardata.impl.plugins.sav.SAVFileReader.java

void decodeRecordType1(BufferedInputStream stream) throws IOException {
    dbgLog.fine("***** decodeRecordType1(): start *****");

    if (stream == null) {
        throw new IllegalArgumentException("stream == null!");
    }/*from  ww  w  .j  ava 2  s .c  o m*/
    // how to read each recordType
    // 1. set-up the following objects before reading bytes
    // a. the working byte array
    // b. the storage object
    // the length of this field: 172bytes = 60 + 4 + 12 + 4 + 8 + 84
    // this field consists of 6 distinct blocks

    byte[] recordType1 = new byte[LENGTH_RECORDTYPE1];
    // int caseWeightVariableOBSIndex = 0; 

    try {
        int nbytes = stream.read(recordType1, 0, LENGTH_RECORDTYPE1);

        //printHexDump(recordType1, "recordType1");

        if (nbytes == 0) {
            throw new IOException("reading recordType1: no byte was read");
        }

        // 1.1 60 byte-String that tells the platform/version of SPSS that
        // wrote this file

        int offset_start = 0;
        int offset_end = LENGTH_SPSS_PRODUCT_INFO; // 60 bytes

        String productInfo = new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII");

        dbgLog.fine("productInfo:\n" + productInfo + "\n");
        dataTable.setOriginalFormatVersion(productInfo);

        // try to parse out the SPSS version that created this data
        // file: 

        String spssVersionTag = null;

        String regexpVersionNumber = ".*Release ([0-9]*)";
        Pattern versionTagPattern = Pattern.compile(regexpVersionNumber);
        Matcher matcher = versionTagPattern.matcher(productInfo);
        if (matcher.find()) {
            spssVersionTag = matcher.group(1);
            dbgLog.fine("SPSS Version Number: " + spssVersionTag);
        }

        // TODO: 
        // try a more elaborate regex (like the one for the "new-style" 
        // productInfo line, below), to select the version number, the 
        // minor version number and the platform (windows vs. mac) separately. 
        // would be cleaner to save just that, rather than the entire 
        // productInfo tag. 
        // -- L.A. 4.0 beta

        if (spssVersionTag == null || spssVersionTag.equals("")) {
            // Later versions of SPSS have different formatting of the
            // productInfo line:
            regexpVersionNumber = ".* IBM SPSS STATISTICS.* ([^ ]*) ([0-9][0-9]*)([^ ]*)";
            versionTagPattern = Pattern.compile(regexpVersionNumber);
            matcher = versionTagPattern.matcher(productInfo);
            if (matcher.find()) {
                String spssPlatformTag = matcher.group(1);
                spssVersionTag = matcher.group(2);
                String spssVersionTagMinor = matcher.group(3);

                dbgLog.fine("SPSS Version Number (new style): " + spssVersionTag);
                dbgLog.fine("SPSS Version/Platform Identification (new style:) " + spssPlatformTag + " "
                        + spssVersionTag + spssVersionTagMinor);
                dataTable
                        .setOriginalFormatVersion(spssVersionTag + spssVersionTagMinor + " " + spssPlatformTag);

            }
        }

        if (spssVersionTag != null && !spssVersionTag.equals("")) {
            spssVersionNumber = Integer.valueOf(spssVersionTag).intValue();

            /*
             *  Starting with SPSS version 16, the default encoding is 
             *  UTF-8. 
             *  But we are only going to use it if the user did not explicitly
             *  specify the encoding on the addfiles page. Then we'd want 
             *  to stick with whatever they entered. 
             *  (also, it appears that (starting with the same version 16?)
             *  it is actually possible to define the locale/character set
             *  in the file - section 7, sub-type 20; TODO: decide which 
             *  one takes precedence, if we have the encoding defined both
             *  in the file and through the UI. -- L.A. 4.0 beta)
             */
            if (spssVersionNumber > 15) {
                if (getDataLanguageEncoding() == null) {
                    //defaultCharSet = "windows-1252"; // temporary! -- L.A. "UTF-8";
                    defaultCharSet = "UTF-8";
                }
            }
        }

        // TODO: 
        // decide if we want to save the [determined/guessed] character set
        // somewhere in the dataset object. 
        // this may be relevant in cases when accented/non-latin characters
        // get ingested incorrectly; 
        // -- L.A. 4.0 beta

        // 1.2) 4-byte file-layout-code (byte-order)

        offset_start = offset_end;
        offset_end += LENGTH_FILE_LAYOUT_CODE; // 4 byte

        ByteBuffer bb_fileLayout_code = ByteBuffer.wrap(recordType1, offset_start, LENGTH_FILE_LAYOUT_CODE);

        ByteBuffer byteOderTest = bb_fileLayout_code.duplicate();
        // interprete the 4 byte as int

        int int2test = byteOderTest.getInt();

        if (int2test == 2 || int2test == 3) {
            dbgLog.fine("integer == " + int2test + ": the byte-oder of the writer is the same "
                    + "as the counterpart of Java: Big Endian");
        } else {
            // Because Java's byte-order is always big endian, 
            // this(!=2) means this sav file was  written on a little-endian machine
            // non-string, multi-bytes blocks must be byte-reversed

            bb_fileLayout_code.order(ByteOrder.LITTLE_ENDIAN);

            int2test = bb_fileLayout_code.getInt();

            if (int2test == 2 || int2test == 3) {
                dbgLog.fine("The sav file was saved on a little endian machine");
                dbgLog.fine("Reveral of the bytes is necessary to decode " + "multi-byte, non-string blocks");

                isLittleEndian = true;

            } else {
                throw new IOException("reading recordType1:unknown file layout code=" + int2test);
            }
        }

        dbgLog.fine("Endian of this platform:" + ByteOrder.nativeOrder().toString());

        // 1.3 4-byte Number_Of_OBS_Units_Per_Case 
        // (= how many RT2 records => how many varilables)

        offset_start = offset_end;
        offset_end += LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE; // 4 byte

        ByteBuffer bb_OBS_units_per_case = ByteBuffer.wrap(recordType1, offset_start,
                LENGTH_NUMBER_OF_OBS_UNITS_PER_CASE);

        if (isLittleEndian) {
            bb_OBS_units_per_case.order(ByteOrder.LITTLE_ENDIAN);
        }

        OBSUnitsPerCase = bb_OBS_units_per_case.getInt();

        dbgLog.fine("RT1: OBSUnitsPerCase=" + OBSUnitsPerCase);

        // 1.4 4-byte Compression_Switch

        offset_start = offset_end;
        offset_end += LENGTH_COMPRESSION_SWITCH; // 4 byte

        ByteBuffer bb_compression_switch = ByteBuffer.wrap(recordType1, offset_start,
                LENGTH_COMPRESSION_SWITCH);

        if (isLittleEndian) {
            bb_compression_switch.order(ByteOrder.LITTLE_ENDIAN);
        }

        int compression_switch = bb_compression_switch.getInt();
        if (compression_switch == 0) {
            // data section is not compressed
            isDataSectionCompressed = false;
            dbgLog.fine("data section is not compressed");
        } else {
            dbgLog.fine("data section is compressed:" + compression_switch);
        }

        // 1.5 4-byte Case-Weight Variable Index
        // warning: this variable index starts from 1, not 0

        offset_start = offset_end;
        offset_end += LENGTH_CASE_WEIGHT_VARIABLE_INDEX; // 4 byte

        ByteBuffer bb_Case_Weight_Variable_Index = ByteBuffer.wrap(recordType1, offset_start,
                LENGTH_CASE_WEIGHT_VARIABLE_INDEX);

        if (isLittleEndian) {
            bb_Case_Weight_Variable_Index.order(ByteOrder.LITTLE_ENDIAN);
        }

        caseWeightVariableOBSIndex = bb_Case_Weight_Variable_Index.getInt();

        /// caseWeightVariableOBSIndex will be used later on to locate 
        /// the weight variable; so we'll be able to mark the corresponding
        /// variables properly. 
        // TODO: make sure case weight variables are properly handled! 
        // -- L.A. 4.0 beta
        ///smd.getFileInformation().put("caseWeightVariableOBSIndex", caseWeightVariableOBSIndex);

        // 1.6 4-byte Number of Cases

        offset_start = offset_end;
        offset_end += LENGTH_NUMBER_OF_CASES; // 4 byte

        ByteBuffer bb_Number_Of_Cases = ByteBuffer.wrap(recordType1, offset_start, LENGTH_NUMBER_OF_CASES);

        if (isLittleEndian) {
            bb_Number_Of_Cases.order(ByteOrder.LITTLE_ENDIAN);
        }

        int numberOfCases = bb_Number_Of_Cases.getInt();

        if (numberOfCases < 0) {
            // -1 if numberOfCases is unknown
            throw new RuntimeException("number of cases is not recorded in the header");
        } else {
            dbgLog.fine("RT1: number of cases is recorded= " + numberOfCases);
            dataTable.setCaseQuantity(new Long(numberOfCases));
            ///caseQnty = numberOfCases;
            ///smd.getFileInformation().put("caseQnty", numberOfCases);
        }

        // 1.7 8-byte compression-bias [not long but double]

        offset_start = offset_end;
        offset_end += LENGTH_COMPRESSION_BIAS; // 8 byte

        ByteBuffer bb_compression_bias = ByteBuffer
                .wrap(Arrays.copyOfRange(recordType1, offset_start, offset_end));

        if (isLittleEndian) {
            bb_compression_bias.order(ByteOrder.LITTLE_ENDIAN);
        }

        Double compressionBias = bb_compression_bias.getDouble();

        // TODO: 
        // check if this "compression bias" is being used anywhere? 
        // doesn't seem to be!
        // -- 4.0 alpha
        if (compressionBias == 100d) {
            // 100 is expected
            dbgLog.fine("compressionBias is 100 as expected");
            ///smd.getFileInformation().put("compressionBias", 100);
        } else {
            dbgLog.fine("compression bias is not 100: " + compressionBias);
            ///smd.getFileInformation().put("compressionBias", compressionBias);
        }

        // 1.8 84-byte File Creation Information (date/time: dd MM yyhh:mm:ss +
        // 64-bytelabel)

        offset_start = offset_end;
        offset_end += LENGTH_FILE_CREATION_INFO; // 84 bytes

        String fileCreationInfo = getNullStrippedString(
                new String(Arrays.copyOfRange(recordType1, offset_start, offset_end), "US-ASCII"));

        dbgLog.fine("fileCreationInfo:\n" + fileCreationInfo + "\n");

        String fileCreationDate = fileCreationInfo.substring(0, length_file_creation_date);
        int dateEnd = length_file_creation_date + length_file_creation_time;
        String fileCreationTime = fileCreationInfo.substring(length_file_creation_date, (dateEnd));
        String fileCreationNote = fileCreationInfo.substring(dateEnd, length_file_creation_label);

        dbgLog.fine("fileDate=" + fileCreationDate);
        dbgLog.fine("fileTime=" + fileCreationTime);
        dbgLog.fine("fileNote" + fileCreationNote);

    } catch (IOException ex) {
        throw ex;
    }

    dbgLog.fine("decodeRecordType1(): end");
}