Example usage for org.apache.hadoop.io BytesWritable getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getLength.

Prototype

@Override
public int getLength()

Source Link

Document

Get the current size of the buffer.

Usage

From source file:org.apache.pig.piggybank.storage.SequenceFileLoader.java

License:Apache License

protected Object translateWritableToPigDataType(Writable w, byte dataType) {
    switch (dataType) {
    case DataType.CHARARRAY:
        return ((Text) w).toString();
    case DataType.BYTEARRAY:
        BytesWritable bw = (BytesWritable) w;
        // Make a copy
        return new DataByteArray(bw.getBytes(), 0, bw.getLength());
    case DataType.BOOLEAN:
        return ((BooleanWritable) w).get();
    case DataType.INTEGER:
        return ((IntWritable) w).get();
    case DataType.LONG:
        return ((LongWritable) w).get();
    case DataType.FLOAT:
        return ((FloatWritable) w).get();
    case DataType.DOUBLE:
        return ((DoubleWritable) w).get();
    case DataType.BYTE:
        return ((ByteWritable) w).get();
    case DataType.DATETIME:
        return ((DateTimeWritable) w).get();
    }// w w w .  j  a  va2s. c om

    return null;
}

From source file:org.apache.sqoop.avro.AvroUtil.java

License:Apache License

/**
 * Convert a Sqoop's Java representation to Avro representation.
 *//* www  .ja  v a  2 s . c om*/
public static Object toAvro(Object o, Schema.Field field, boolean bigDecimalFormatString) {
    if (o instanceof BigDecimal && !isDecimal(field)) {
        if (bigDecimalFormatString) {
            // Returns a string representation of this without an exponent field.
            return ((BigDecimal) o).toPlainString();
        } else {
            return o.toString();
        }
    } else if (o instanceof Date) {
        return ((Date) o).getTime();
    } else if (o instanceof Time) {
        return ((Time) o).getTime();
    } else if (o instanceof Timestamp) {
        return ((Timestamp) o).getTime();
    } else if (o instanceof BytesWritable) {
        BytesWritable bw = (BytesWritable) o;
        return ByteBuffer.wrap(bw.getBytes(), 0, bw.getLength());
    } else if (o instanceof BlobRef) {
        BlobRef br = (BlobRef) o;
        // If blob data is stored in an external .lob file, save the ref file
        // as Avro bytes. If materialized inline, save blob data as Avro bytes.
        byte[] bytes = br.isExternal() ? br.toString().getBytes() : br.getData();
        return ByteBuffer.wrap(bytes);
    } else if (o instanceof ClobRef) {
        throw new UnsupportedOperationException("ClobRef not supported");
    }
    // primitive types (Integer, etc) are left unchanged
    return o;
}

From source file:org.apache.sqoop.lib.JdbcWritableBridge.java

License:Apache License

public static void writeBytesWritable(BytesWritable val, int paramIdx, int sqlType, PreparedStatement s)
        throws SQLException {
    if (null == val) {
        s.setNull(paramIdx, sqlType);/*from w  w  w . j  a  v  a2  s .  c o  m*/
    } else {
        // val.getBytes() is only valid in [0, len)
        byte[] rawBytes = val.getBytes();
        int len = val.getLength();
        byte[] outBytes = new byte[len];
        System.arraycopy(rawBytes, 0, outBytes, 0, len);
        s.setBytes(paramIdx, outBytes);
    }
}

From source file:org.apache.sqoop.manager.oracle.SystemImportTest.java

License:Apache License

/**
 * Generates pseudo-random test data across all supported data types in an
 * Oracle database. Imports the data into Hadoop and compares with the data in
 * Oracle./*from w  w w .  j a  v  a  2 s  . c o m*/
 *
 * @throws Exception
 */
@Test
public void importTest() throws Exception {
    // Generate test data in oracle
    setSqoopTargetDirectory(getSqoopTargetDirectory() + OracleUtils.SYSTEMTEST_TABLE_NAME);
    int numRows = OracleUtils.SYSTEMTEST_NUM_ROWS;
    Connection conn = getTestEnvConnection();
    OraOopOracleQueries.setConnectionTimeZone(conn, "GMT");
    try {
        Statement s = conn.createStatement();
        try {
            s.executeUpdate("CREATE TABLE " + OracleUtils.SYSTEMTEST_TABLE_NAME
                    + " (id NUMBER(10) PRIMARY KEY, bd BINARY_DOUBLE, bf BINARY_FLOAT, "
                    + "b BLOB, c CHAR(12), cl CLOB, d DATE, "
                    + "f FLOAT(126), l LONG, nc NCHAR(30), ncl NCLOB, n NUMBER(9,2), "
                    + "nvc NVARCHAR2(30), r ROWID, u URITYPE, iym INTERVAL YEAR(2) TO "
                    + "MONTH, ids INTERVAL DAY(2) TO SECOND(6), "
                    + "t TIMESTAMP(6), tz TIMESTAMP(6) WITH TIME ZONE, "
                    + "tltz TIMESTAMP(6) WITH LOCAL TIME ZONE, rawcol RAW(21))");
            BinaryDoubleGenerator bdg = new BinaryDoubleGenerator();
            BinaryFloatGenerator bfg = new BinaryFloatGenerator();
            BlobGenerator bg = new BlobGenerator(conn, 2 * 1024, 8 * 1024);
            CharGenerator cg = new CharGenerator(12, 12);
            CharGenerator clobg = new CharGenerator(2 * 1024, 8 * 1024);
            TimestampGenerator dateg = new TimestampGenerator(0);
            FloatGenerator fg = new FloatGenerator(126);
            CharGenerator lg = new CharGenerator(2 * 1024, 8 * 1024);
            NCharGenerator ncg = new NCharGenerator(30, 30);
            NCharGenerator nclobg = new NCharGenerator(2 * 1024, 8 * 1024);
            BigDecimalGenerator ng = new BigDecimalGenerator(9, 2);
            NCharGenerator nvcg = new NCharGenerator(1, 30);
            RowIdGenerator rg = new RowIdGenerator();
            URIGenerator ug = new URIGenerator();
            IntervalYearMonthGenerator iymg = new IntervalYearMonthGenerator(2);
            IntervalDaySecondGenerator idsg = new IntervalDaySecondGenerator(2, 6);
            TimestampGenerator tg = new TimestampGenerator(6);
            TimestampGenerator tzg = new TimestampGenerator(6);
            TimestampGenerator tltzg = new TimestampGenerator(6);
            BytesGenerator rawg = new BytesGenerator(21, 21);
            PreparedStatement ps = conn.prepareStatement("INSERT INTO " + OracleUtils.SYSTEMTEST_TABLE_NAME
                    + " ( id, bd, bf, b, c, cl, d, f, nc, ncl, n, nvc, r, u, iym, "
                    + "ids, t, tz, tltz, rawcol ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, "
                    + "?, ?, ?, ?, ?, sys.UriFactory.getUri(?), ?, ?, ?, ?, ?, ? )");
            try {
                for (int i = 0; i < numRows; i++) {
                    ps.setInt(1, i);
                    methSetBinaryDouble.invoke(ps, 2, bdg.next());
                    methSetBinaryFloat.invoke(ps, 3, bfg.next());
                    ps.setBlob(4, bg.next());
                    ps.setString(5, cg.next());
                    ps.setString(6, clobg.next());
                    ps.setTimestamp(7, dateg.next());
                    ps.setBigDecimal(8, fg.next());
                    ps.setString(9, ncg.next());
                    ps.setString(10, nclobg.next());
                    ps.setBigDecimal(11, ng.next());
                    ps.setString(12, nvcg.next());
                    ps.setRowId(13, rg.next());
                    ps.setString(14, ug.next());
                    ps.setString(15, iymg.next());
                    ps.setString(16, idsg.next());
                    ps.setTimestamp(17, tg.next());
                    ps.setTimestamp(18, tzg.next());
                    ps.setTimestamp(19, tltzg.next());
                    ps.setBytes(20, rawg.next());
                    ps.executeUpdate();
                }
            } finally {
                ps.close();
                conn.commit();
            }

            // Can't bind > 4000 bytes of data to LONG and LOB columns in the same
            // statement, so do LONG by itself
            ps = conn.prepareStatement(
                    "UPDATE " + OracleUtils.SYSTEMTEST_TABLE_NAME + " SET l = ? WHERE id = ?");
            try {
                for (int i = 0; i < numRows; i++) {
                    ps.setString(1, lg.next());
                    ps.setInt(2, i);
                    ps.executeUpdate();
                }
            } finally {
                ps.close();
                conn.commit();
            }

            try {
                // Import test data into hadoop

                int retCode = runImport(OracleUtils.SYSTEMTEST_TABLE_NAME, getSqoopConf(), true);
                assertEquals("Return code should be 0", 0, retCode);

                // Add sqoop generated code to the classpath
                String sqoopGenJarPath = "file://" + getSqoopGenLibDirectory() + "/" + getSqoopGenClassName()
                        + ".jar";
                URLClassLoader loader = new URLClassLoader(new URL[] { new URL(sqoopGenJarPath) },
                        getClass().getClassLoader());
                Thread.currentThread().setContextClassLoader(loader);

                // Read test data from hadoop
                Configuration hadoopConf = getSqoopConf();
                FileSystem hdfs = FileSystem.get(hadoopConf);
                Path path = new Path(getSqoopTargetDirectory());
                FileStatus[] statuses = hdfs.listStatus(path);
                int hadoopRecordCount = 0;
                for (FileStatus status : statuses) {
                    if (status.getPath().getName().startsWith("part-m-")) {

                        SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, status.getPath(),
                                hadoopConf);
                        LongWritable key = new LongWritable();
                        @SuppressWarnings("unchecked")
                        SqoopRecord value = ((Class<SqoopRecord>) reader.getValueClass()).getConstructor()
                                .newInstance();
                        ps = conn.prepareStatement("SELECT bd, bf, b, c, cl, d, f, l, nc, "
                                + "ncl, nvc, r, u, iym, ids, t, tz, tltz, rawcol FROM "
                                + OracleUtils.SYSTEMTEST_TABLE_NAME + " WHERE id = ?");
                        while (reader.next(key, value)) {
                            // Compare test data from hadoop with data in oracle
                            Map<String, Object> fields = value.getFieldMap();
                            BigDecimal id = (BigDecimal) fields.get("ID");
                            ps.setBigDecimal(1, id);
                            ResultSet rs = ps.executeQuery();
                            assertTrue("Did not find row with id " + id + " in oracle", rs.next());
                            assertEquals("BinaryDouble did not match for row " + id, fields.get("BD"),
                                    rs.getDouble(1));
                            assertEquals("BinaryFloat did not match for row " + id, fields.get("BF"),
                                    rs.getFloat(2));
                            // LONG column needs to be read before BLOB column
                            assertEquals("Long did not match for row " + id, fields.get("L"), rs.getString(8));
                            BlobRef hadoopBlob = (BlobRef) fields.get("B");
                            Blob oraBlob = rs.getBlob(3);
                            assertTrue("Blob did not match for row " + id, Arrays.equals(hadoopBlob.getData(),
                                    oraBlob.getBytes(1L, (int) oraBlob.length())));
                            assertEquals("Char did not match for row " + id, fields.get("C"), rs.getString(4));
                            ClobRef hadoopClob = (ClobRef) fields.get("CL");
                            Clob oraClob = rs.getClob(5);
                            assertEquals("Clob did not match for row " + id, hadoopClob.getData(),
                                    oraClob.getSubString(1, (int) oraClob.length()));
                            assertEquals("Date did not match for row " + id, fields.get("D"), rs.getString(6));
                            BigDecimal hadoopFloat = (BigDecimal) fields.get("F");
                            BigDecimal oraFloat = rs.getBigDecimal(7);
                            assertEquals("Float did not match for row " + id, hadoopFloat, oraFloat);
                            assertEquals("NChar did not match for row " + id, fields.get("NC"),
                                    rs.getString(9));
                            assertEquals("NClob did not match for row " + id, fields.get("NCL"),
                                    rs.getString(10));
                            assertEquals("NVarChar did not match for row " + id, fields.get("NVC"),
                                    rs.getString(11));
                            assertEquals("RowId did not match for row " + id, fields.get("R"),
                                    new String(rs.getRowId(12).getBytes()));
                            Struct url = (Struct) rs.getObject(13); // TODO: Find a fix for
                                                                    // this workaround
                            String urlString = (String) url.getAttributes()[0];
                            if (url.getSQLTypeName().equals("SYS.HTTPURITYPE")) {
                                urlString = "http://" + urlString;
                            } else if (url.getSQLTypeName().equals("SYS.DBURITYPE")) {
                                urlString = "/ORADB" + urlString;
                            }
                            assertEquals("UriType did not match for row " + id, fields.get("U"), urlString);
                            assertEquals("Interval Year to Month did not match for row " + id,
                                    fields.get("IYM"), rs.getString(14));
                            String ids = (String) fields.get("IDS"); // Strip trailing zeros
                                                                     // to match oracle
                                                                     // format
                            int lastNonZero = ids.length() - 1;
                            while (ids.charAt(lastNonZero) == '0') {
                                lastNonZero--;
                            }
                            ids = ids.substring(0, lastNonZero + 1);
                            assertEquals("Interval Day to Second did not match for row " + id, ids,
                                    rs.getString(15));
                            assertEquals("Timestamp did not match for row " + id, fields.get("T"),
                                    rs.getString(16));
                            assertEquals("Timestamp with Time Zone did not match for row " + id,
                                    fields.get("TZ"), rs.getString(17));
                            assertEquals("Timestamp with Local Time Zone did not match for row " + id,
                                    fields.get("TLTZ"), rs.getString(18));
                            BytesWritable rawCol = (BytesWritable) fields.get("RAWCOL");
                            byte[] rawColData = Arrays.copyOf(rawCol.getBytes(), rawCol.getLength());
                            assertTrue("RAW did not match for row " + id,
                                    Arrays.equals(rawColData, rs.getBytes(19)));

                            assertFalse("Found multiple rows with id " + id + " in oracle", rs.next());
                            hadoopRecordCount++;
                        }
                        reader.close();
                    }
                }
                ResultSet rs = s.executeQuery("SELECT COUNT(*) FROM " + OracleUtils.SYSTEMTEST_TABLE_NAME);
                rs.next();
                int oracleRecordCount = rs.getInt(1);
                assertEquals("Number of records in Hadoop does not match number of " + "records in oracle",
                        hadoopRecordCount, oracleRecordCount);
                rs.close();
            } finally {
                // Delete test data from hadoop
                cleanupFolders();
            }
        } finally {
            // Delete test data from oracle
            s.executeUpdate("DROP TABLE " + OracleUtils.SYSTEMTEST_TABLE_NAME);
            s.close();
        }

    } finally {
        closeTestEnvConnection();
    }
}

From source file:org.apache.sqoop.mapreduce.AvroImportMapper.java

License:Apache License

/**
 * Convert the Avro representation of a Java type (that has already been
 * converted from the SQL equivalent).//from  w  w  w . j  a  v  a2s .c om
 * @param o
 * @return
 */
private Object toAvro(Object o) {
    if (o instanceof BigDecimal) {
        if (bigDecimalFormatString) {
            return ((BigDecimal) o).toPlainString();
        } else {
            return o.toString();
        }
    } else if (o instanceof Date) {
        return ((Date) o).getTime();
    } else if (o instanceof Time) {
        return ((Time) o).getTime();
    } else if (o instanceof Timestamp) {
        return ((Timestamp) o).getTime();
    } else if (o instanceof BytesWritable) {
        BytesWritable bw = (BytesWritable) o;
        return ByteBuffer.wrap(bw.getBytes(), 0, bw.getLength());
    } else if (o instanceof BlobRef) {
        BlobRef br = (BlobRef) o;
        // If blob data is stored in an external .lob file, save the ref file
        // as Avro bytes. If materialized inline, save blob data as Avro bytes.
        byte[] bytes = br.isExternal() ? br.toString().getBytes() : br.getData();
        return ByteBuffer.wrap(bytes);
    } else if (o instanceof ClobRef) {
        throw new UnsupportedOperationException("ClobRef not suported");
    }
    // primitive types (Integer, etc) are left unchanged
    return o;
}

From source file:org.apache.tajo.storage.sequencefile.SequenceFileScanner.java

License:Apache License

/**
 * In hive, LazyBinarySerDe is serialized as follows: start A B A B A B end bytes[] ->
 * |-----|---------|--- ... ---|-----|---------|
 *
 * Section A is one null-byte, corresponding to eight struct fields in Section
 * B. Each bit indicates whether the corresponding field is null (0) or not null
 * (1). Each field is a LazyBinaryObject.
 *
 * Following B, there is another section A and B. This pattern repeats until the
 * all struct fields are serialized./*from w ww.j av a  2  s.  co m*/
 *
 * So, tajo must make a tuple after parsing hive style BinarySerDe.
 */
private Tuple makeTuple(BytesWritable value) throws IOException {
    Tuple tuple = new VTuple(schema.getColumns().size());

    int start = 0;
    int length = value.getLength();

    /**
     * Please note that one null byte is followed by eight fields, then more
     * null byte and fields.
     */
    int structByteEnd = start + length;
    byte[] bytes = value.getBytes();

    byte nullByte = bytes[start];
    int lastFieldByteEnd = start + 1;

    // Go through all bytes in the byte[]
    for (int i = 0; i < schema.getColumns().size(); i++) {
        fieldIsNull[i] = true;
        if ((nullByte & (1 << (i % 8))) != 0) {
            fieldIsNull[i] = false;
            parse(schema.getColumn(i), bytes, lastFieldByteEnd);

            fieldStart[i] = lastFieldByteEnd + elementOffset;
            fieldLength[i] = elementSize;
            lastFieldByteEnd = fieldStart[i] + fieldLength[i];

            for (int j = 0; j < projectionMap.length; j++) {
                if (projectionMap[j] == i) {
                    Datum datum = serde.deserialize(schema.getColumn(i), bytes, fieldStart[i], fieldLength[i],
                            nullChars);
                    tuple.put(i, datum);
                }
            }
        }

        // next byte is a null byte if there are more bytes to go
        if (7 == (i % 8)) {
            if (lastFieldByteEnd < structByteEnd) {
                nullByte = bytes[lastFieldByteEnd];
                lastFieldByteEnd++;
            } else {
                // otherwise all null afterwards
                nullByte = 0;
                lastFieldByteEnd++;
            }
        }
    }

    return tuple;
}

From source file:org.apache.tez.runtime.library.common.comparator.TezBytesComparator.java

License:Apache License

@Override
public int getProxy(BytesWritable key) {
    int prefix = 0;
    final int len = key.getLength();
    final byte[] content = key.getBytes();
    int b1 = 0, b2 = 0, b3 = 0;
    switch (len) {
    default:/*from w  w  w  .j  a va2s.c o  m*/
    case 3:
        b3 = content[2] & 0xff;
    case 2:
        b2 = content[1] & 0xff;
    case 1:
        b1 = content[0] & 0xff;
    case 0:
    }
    prefix = (b1 << 16) | (b2 << 8) | (b3);
    return prefix;
}

From source file:org.commoncrawl.hadoop.io.mapred.ArcFileInputFormatTests.java

License:Apache License

static void validateSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits,
        RecordReader<Text, BytesWritable> reader) throws IOException, InterruptedException {

    int splitDataIndex = getIndexOfSplit(splits, split);

    Assert.assertTrue(splitDataIndex != -1);

    List<TestRecord> records = splits.get(splitDataIndex).e1;

    int itemIndex = 0;
    // iterate and validate stuff ...
    Text key = new Text();
    BytesWritable value = new BytesWritable();
    while (reader.next(key, value)) {

        TestRecord testRecord = records.get(itemIndex++);
        // get test key bytes as utf-8 bytes ... 
        byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
        // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters 
        // with ?, which causes our test case (which does use invalid characters to from the key, to break.
        Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0,
                key.getLength()) == 0);//from  w w w . j  a  v  a  2  s  .c  om
        // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
        // we search for this specific byte pattern to locate start of content, then compare it against source ... 
        int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(),
                "\r\n\r\n".getBytes());
        indexofHeaderTerminator += 4;
        Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length,
                value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0);
    }
    reader.close();

    Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    splits.remove(splitDataIndex);

}

From source file:org.commoncrawl.hadoop.io.mapred.ArcFileRecordReaderTests.java

License:Apache License

@Test
public void TestARCFileRecordReader() throws IOException, InterruptedException {

    JobConf conf = new JobConf();
    FileSystem fs = LocalFileSystem.get(conf);
    Path path = new Path("/tmp/" + File.createTempFile("ARCRecordReader", "test"));
    List<TestRecord> records = ArcFileReaderTests.buildTestRecords(ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    FSDataOutputStream os = fs.create(path);
    try {/*  w  ww .  ja va2 s  .  com*/
        // write the ARC File into memory 
        ArcFileReaderTests.writeFirstRecord(os, "test", System.currentTimeMillis());

        long testAttemptTime = System.currentTimeMillis();

        for (TestRecord record : records) {
            ArcFileReaderTests.write(os, record.url, "test", 1, 1, record.data, 0, record.data.length,
                    new NIOHttpHeaders(), "text/html", MD5Hash.digest(record.data).toString(), 12345,
                    testAttemptTime);
        }
        os.flush();
    } finally {
        os.close();
    }

    FileSplit split = new FileSplit(path, 0, fs.getFileStatus(path).getLen(), new String[0]);
    ARCFileRecordReader reader = new ARCFileRecordReader();
    reader.initialize(conf, split);

    int index = 0;

    // iterate and validate stuff ... 
    Text key = reader.createKey();
    BytesWritable value = reader.createValue();

    while (reader.next(key, value)) {

        TestRecord testRecord = records.get(index++);
        // get test key bytes as utf-8 bytes ... 
        byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
        // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters 
        // with ?, which causes our test case (which does use invalid characters to from the key, to break.
        Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0,
                key.getLength()) == 0);
        // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
        // we search for this specific byte pattern to locate start of content, then compare it against source ... 
        int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(),
                "\r\n\r\n".getBytes());
        indexofHeaderTerminator += 4;
        Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length,
                value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0);
    }
    reader.close();

    Assert.assertEquals(index, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    fs.delete(path, false);
}

From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java

License:Apache License

static void validateSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits,
        RecordReader<Text, BytesWritable> reader) throws IOException, InterruptedException {

    int splitDataIndex = getIndexOfSplit(splits, split);

    Assert.assertTrue(splitDataIndex != -1);

    List<TestRecord> records = splits.get(splitDataIndex).e1;

    int itemIndex = 0;
    // iterate and validate stuff ... 
    while (reader.nextKeyValue()) {
        Text key = reader.getCurrentKey();
        BytesWritable value = reader.getCurrentValue();

        TestRecord testRecord = records.get(itemIndex++);
        // get test key bytes as utf-8 bytes ... 
        byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
        // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters 
        // with ?, which causes our test case (which does use invalid characters to from the key, to break.
        Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0,
                key.getLength()) == 0);// ww w  .jav a2 s.com
        // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
        // we search for this specific byte pattern to locate start of content, then compare it against source ... 
        int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(),
                "\r\n\r\n".getBytes());
        indexofHeaderTerminator += 4;
        Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length,
                value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0);
    }
    reader.close();

    Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    splits.remove(splitDataIndex);

}