Example usage for org.apache.hadoop.io BytesWritable getBytes

List of usage examples for org.apache.hadoop.io BytesWritable getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Get the data backing the BytesWritable.

Usage

From source file:org.apache.parquet.hadoop.thrift.ThriftBytesWriteSupport.java

License:Apache License

private TProtocol protocol(BytesWritable record) {
    TProtocol protocol = protocolFactory
            .getProtocol(new TIOStreamTransport(new ByteArrayInputStream(record.getBytes())));

    /* Reduce the chance of OOM when data is corrupted. When readBinary is called on TBinaryProtocol, it reads the length of the binary first,
     so if the data is corrupted, it could read a big integer as the length of the binary and therefore causes OOM to happen.
     Currently this fix only applies to TBinaryProtocol which has the setReadLength defined.
      *//*  w ww.  ja v a2  s . c  o m*/
    if (IS_READ_LENGTH_SETABLE && protocol instanceof TBinaryProtocol) {
        ((TBinaryProtocol) protocol).setReadLength(record.getLength());
    }

    return protocol;
}

From source file:org.apache.pig.builtin.TestOrcStorage.java

License:Apache License

@SuppressWarnings("rawtypes")
private void compareData(Object expected, Object actual) {
    if (expected instanceof Text) {
        assertEquals(String.class, actual.getClass());
        assertEquals(expected.toString(), actual);
    } else if (expected instanceof ShortWritable) {
        assertEquals(Integer.class, actual.getClass());
        assertEquals((int) ((ShortWritable) expected).get(), actual);
    } else if (expected instanceof IntWritable) {
        assertEquals(Integer.class, actual.getClass());
        assertEquals(((IntWritable) expected).get(), actual);
    } else if (expected instanceof LongWritable) {
        assertEquals(Long.class, actual.getClass());
        assertEquals(((LongWritable) expected).get(), actual);
    } else if (expected instanceof FloatWritable) {
        assertEquals(Float.class, actual.getClass());
        assertEquals(((FloatWritable) expected).get(), actual);
    } else if (expected instanceof HiveDecimalWritable) {
        assertEquals(BigDecimal.class, actual.getClass());
        assertEquals(((HiveDecimalWritable) expected).toString(), actual.toString());
    } else if (expected instanceof DoubleWritable) {
        assertEquals(Double.class, actual.getClass());
        assertEquals(((DoubleWritable) expected).get(), actual);
    } else if (expected instanceof BooleanWritable) {
        assertEquals(Boolean.class, actual.getClass());
        assertEquals(((BooleanWritable) expected).get(), actual);
    } else if (expected instanceof TimestampWritable) {
        assertEquals(DateTime.class, actual.getClass());
        assertEquals(((TimestampWritable) expected).getTimestamp().getTime(), ((DateTime) actual).getMillis());
    } else if (expected instanceof BytesWritable) {
        assertEquals(DataByteArray.class, actual.getClass());
        BytesWritable bw = (BytesWritable) expected;
        assertEquals(new DataByteArray(bw.getBytes(), 0, bw.getLength()), actual);
    } else if (expected instanceof ByteWritable) {
        assertEquals(Integer.class, actual.getClass());
        assertEquals((int) ((ByteWritable) expected).get(), actual);
    } else if (expected instanceof OrcStruct) {
        assertEquals(BinSedesTuple.class, actual.getClass());
        // TODO: compare actual values. No getters in OrcStruct
    } else if (expected instanceof ArrayList) {
        assertEquals(DefaultDataBag.class, actual.getClass());
        // TODO: compare actual values. No getters in OrcStruct
    } else if (expected instanceof HashMap) {
        assertEquals(HashMap.class, actual.getClass());
        assertEquals(((HashMap) expected).size(), ((HashMap) actual).size());
        // TODO: compare actual values. No getters in OrcStruct
    } else if (expected == null) {
        assertEquals(expected, actual);/*from   w w  w.j  av a  2  s  .  c om*/
    } else {
        Assert.fail("Unknown object type: " + expected.getClass().getName());
    }
}

From source file:org.apache.pig.impl.io.NullableBytesWritable.java

License:Apache License

public Object getValueAsPigType() {
    BytesWritable bw = (BytesWritable) mValue;
    return isNull() ? null : new DataByteArray(bw.getBytes(), 0, bw.getLength());
}

From source file:org.apache.pig.impl.util.orc.OrcUtils.java

License:Apache License

public static Object getPrimaryFromOrc(Object obj, PrimitiveObjectInspector poi) {
    Object result = null;/*from  w ww  . ja  va 2s .c o  m*/
    if (obj == null) {
        return result;
    }
    switch (poi.getPrimitiveCategory()) {
    case FLOAT:
    case DOUBLE:
    case BOOLEAN:
    case INT:
    case LONG:
    case STRING:
        result = poi.getPrimitiveJavaObject(obj);
        break;
    case BYTE:
        result = (int) (Byte) poi.getPrimitiveJavaObject(obj);
        break;
    case SHORT:
        result = (int) (Short) poi.getPrimitiveJavaObject(obj);
        break;
    case BINARY:
        BytesWritable bw = (BytesWritable) obj;
        // Make a copy
        result = new DataByteArray(bw.getBytes(), 0, bw.getLength());
        break;
    case TIMESTAMP:
        java.sql.Timestamp origTimeStamp = (java.sql.Timestamp) poi.getPrimitiveJavaObject(obj);
        result = new DateTime(origTimeStamp.getTime());
        break;
    case DATE:
        java.sql.Date origDate = (java.sql.Date) poi.getPrimitiveJavaObject(obj);
        result = new DateTime(origDate.getTime());
        break;
    case DECIMAL:
        org.apache.hadoop.hive.common.type.HiveDecimal origDecimal = (org.apache.hadoop.hive.common.type.HiveDecimal) poi
                .getPrimitiveJavaObject(obj);
        result = origDecimal.bigDecimalValue();
        break;
    default:
        throw new IllegalArgumentException("Unknown primitive type " + (poi).getPrimitiveCategory());
    }
    return result;
}

From source file:org.apache.pig.piggybank.storage.SequenceFileLoader.java

License:Apache License

protected Object translateWritableToPigDataType(Writable w, byte dataType) {
    switch (dataType) {
    case DataType.CHARARRAY:
        return ((Text) w).toString();
    case DataType.BYTEARRAY:
        BytesWritable bw = (BytesWritable) w;
        // Make a copy
        return new DataByteArray(bw.getBytes(), 0, bw.getLength());
    case DataType.BOOLEAN:
        return ((BooleanWritable) w).get();
    case DataType.INTEGER:
        return ((IntWritable) w).get();
    case DataType.LONG:
        return ((LongWritable) w).get();
    case DataType.FLOAT:
        return ((FloatWritable) w).get();
    case DataType.DOUBLE:
        return ((DoubleWritable) w).get();
    case DataType.BYTE:
        return ((ByteWritable) w).get();
    case DataType.DATETIME:
        return ((DateTimeWritable) w).get();
    }//  w  ww.  j  av  a  2  s.co m

    return null;
}

From source file:org.apache.pirk.inputformat.hadoop.BytesArrayWritable.java

License:Apache License

/**
 * Return the ith element from the underlying array
 * <p>/*from   ww w  . jav a  2s  .  c o m*/
 * Assumes that the underlying array consists of BytesWritable representations of BigInteger objects
 * <p>
 * Assumes that the underlying BigIntegers are unsigned, but have been stripped of zero padding (and hence the sign bit) -- must add it back in
 * 
 */
public BigInteger getBigInteger(int i) throws IOException {
    BytesWritable element = (BytesWritable) this.get()[i];

    return new BigInteger(pad(element.getBytes()));
}

From source file:org.apache.sqoop.avro.AvroUtil.java

License:Apache License

/**
 * Convert a Sqoop's Java representation to Avro representation.
 *//*from w w  w.jav  a 2s . c om*/
public static Object toAvro(Object o, Schema.Field field, boolean bigDecimalFormatString) {
    if (o instanceof BigDecimal && !isDecimal(field)) {
        if (bigDecimalFormatString) {
            // Returns a string representation of this without an exponent field.
            return ((BigDecimal) o).toPlainString();
        } else {
            return o.toString();
        }
    } else if (o instanceof Date) {
        return ((Date) o).getTime();
    } else if (o instanceof Time) {
        return ((Time) o).getTime();
    } else if (o instanceof Timestamp) {
        return ((Timestamp) o).getTime();
    } else if (o instanceof BytesWritable) {
        BytesWritable bw = (BytesWritable) o;
        return ByteBuffer.wrap(bw.getBytes(), 0, bw.getLength());
    } else if (o instanceof BlobRef) {
        BlobRef br = (BlobRef) o;
        // If blob data is stored in an external .lob file, save the ref file
        // as Avro bytes. If materialized inline, save blob data as Avro bytes.
        byte[] bytes = br.isExternal() ? br.toString().getBytes() : br.getData();
        return ByteBuffer.wrap(bytes);
    } else if (o instanceof ClobRef) {
        throw new UnsupportedOperationException("ClobRef not supported");
    }
    // primitive types (Integer, etc) are left unchanged
    return o;
}

From source file:org.apache.sqoop.lib.JdbcWritableBridge.java

License:Apache License

public static void writeBytesWritable(BytesWritable val, int paramIdx, int sqlType, PreparedStatement s)
        throws SQLException {
    if (null == val) {
        s.setNull(paramIdx, sqlType);//  w w w  .  j  a  v a2s.  co m
    } else {
        // val.getBytes() is only valid in [0, len)
        byte[] rawBytes = val.getBytes();
        int len = val.getLength();
        byte[] outBytes = new byte[len];
        System.arraycopy(rawBytes, 0, outBytes, 0, len);
        s.setBytes(paramIdx, outBytes);
    }
}

From source file:org.apache.sqoop.manager.oracle.SystemImportTest.java

License:Apache License

/**
 * Generates pseudo-random test data across all supported data types in an
 * Oracle database. Imports the data into Hadoop and compares with the data in
 * Oracle./*from w w w . ja v a 2s .  co m*/
 *
 * @throws Exception
 */
@Test
public void importTest() throws Exception {
    // Generate test data in oracle
    setSqoopTargetDirectory(getSqoopTargetDirectory() + OracleUtils.SYSTEMTEST_TABLE_NAME);
    int numRows = OracleUtils.SYSTEMTEST_NUM_ROWS;
    Connection conn = getTestEnvConnection();
    OraOopOracleQueries.setConnectionTimeZone(conn, "GMT");
    try {
        Statement s = conn.createStatement();
        try {
            s.executeUpdate("CREATE TABLE " + OracleUtils.SYSTEMTEST_TABLE_NAME
                    + " (id NUMBER(10) PRIMARY KEY, bd BINARY_DOUBLE, bf BINARY_FLOAT, "
                    + "b BLOB, c CHAR(12), cl CLOB, d DATE, "
                    + "f FLOAT(126), l LONG, nc NCHAR(30), ncl NCLOB, n NUMBER(9,2), "
                    + "nvc NVARCHAR2(30), r ROWID, u URITYPE, iym INTERVAL YEAR(2) TO "
                    + "MONTH, ids INTERVAL DAY(2) TO SECOND(6), "
                    + "t TIMESTAMP(6), tz TIMESTAMP(6) WITH TIME ZONE, "
                    + "tltz TIMESTAMP(6) WITH LOCAL TIME ZONE, rawcol RAW(21))");
            BinaryDoubleGenerator bdg = new BinaryDoubleGenerator();
            BinaryFloatGenerator bfg = new BinaryFloatGenerator();
            BlobGenerator bg = new BlobGenerator(conn, 2 * 1024, 8 * 1024);
            CharGenerator cg = new CharGenerator(12, 12);
            CharGenerator clobg = new CharGenerator(2 * 1024, 8 * 1024);
            TimestampGenerator dateg = new TimestampGenerator(0);
            FloatGenerator fg = new FloatGenerator(126);
            CharGenerator lg = new CharGenerator(2 * 1024, 8 * 1024);
            NCharGenerator ncg = new NCharGenerator(30, 30);
            NCharGenerator nclobg = new NCharGenerator(2 * 1024, 8 * 1024);
            BigDecimalGenerator ng = new BigDecimalGenerator(9, 2);
            NCharGenerator nvcg = new NCharGenerator(1, 30);
            RowIdGenerator rg = new RowIdGenerator();
            URIGenerator ug = new URIGenerator();
            IntervalYearMonthGenerator iymg = new IntervalYearMonthGenerator(2);
            IntervalDaySecondGenerator idsg = new IntervalDaySecondGenerator(2, 6);
            TimestampGenerator tg = new TimestampGenerator(6);
            TimestampGenerator tzg = new TimestampGenerator(6);
            TimestampGenerator tltzg = new TimestampGenerator(6);
            BytesGenerator rawg = new BytesGenerator(21, 21);
            PreparedStatement ps = conn.prepareStatement("INSERT INTO " + OracleUtils.SYSTEMTEST_TABLE_NAME
                    + " ( id, bd, bf, b, c, cl, d, f, nc, ncl, n, nvc, r, u, iym, "
                    + "ids, t, tz, tltz, rawcol ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, "
                    + "?, ?, ?, ?, ?, sys.UriFactory.getUri(?), ?, ?, ?, ?, ?, ? )");
            try {
                for (int i = 0; i < numRows; i++) {
                    ps.setInt(1, i);
                    methSetBinaryDouble.invoke(ps, 2, bdg.next());
                    methSetBinaryFloat.invoke(ps, 3, bfg.next());
                    ps.setBlob(4, bg.next());
                    ps.setString(5, cg.next());
                    ps.setString(6, clobg.next());
                    ps.setTimestamp(7, dateg.next());
                    ps.setBigDecimal(8, fg.next());
                    ps.setString(9, ncg.next());
                    ps.setString(10, nclobg.next());
                    ps.setBigDecimal(11, ng.next());
                    ps.setString(12, nvcg.next());
                    ps.setRowId(13, rg.next());
                    ps.setString(14, ug.next());
                    ps.setString(15, iymg.next());
                    ps.setString(16, idsg.next());
                    ps.setTimestamp(17, tg.next());
                    ps.setTimestamp(18, tzg.next());
                    ps.setTimestamp(19, tltzg.next());
                    ps.setBytes(20, rawg.next());
                    ps.executeUpdate();
                }
            } finally {
                ps.close();
                conn.commit();
            }

            // Can't bind > 4000 bytes of data to LONG and LOB columns in the same
            // statement, so do LONG by itself
            ps = conn.prepareStatement(
                    "UPDATE " + OracleUtils.SYSTEMTEST_TABLE_NAME + " SET l = ? WHERE id = ?");
            try {
                for (int i = 0; i < numRows; i++) {
                    ps.setString(1, lg.next());
                    ps.setInt(2, i);
                    ps.executeUpdate();
                }
            } finally {
                ps.close();
                conn.commit();
            }

            try {
                // Import test data into hadoop

                int retCode = runImport(OracleUtils.SYSTEMTEST_TABLE_NAME, getSqoopConf(), true);
                assertEquals("Return code should be 0", 0, retCode);

                // Add sqoop generated code to the classpath
                String sqoopGenJarPath = "file://" + getSqoopGenLibDirectory() + "/" + getSqoopGenClassName()
                        + ".jar";
                URLClassLoader loader = new URLClassLoader(new URL[] { new URL(sqoopGenJarPath) },
                        getClass().getClassLoader());
                Thread.currentThread().setContextClassLoader(loader);

                // Read test data from hadoop
                Configuration hadoopConf = getSqoopConf();
                FileSystem hdfs = FileSystem.get(hadoopConf);
                Path path = new Path(getSqoopTargetDirectory());
                FileStatus[] statuses = hdfs.listStatus(path);
                int hadoopRecordCount = 0;
                for (FileStatus status : statuses) {
                    if (status.getPath().getName().startsWith("part-m-")) {

                        SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, status.getPath(),
                                hadoopConf);
                        LongWritable key = new LongWritable();
                        @SuppressWarnings("unchecked")
                        SqoopRecord value = ((Class<SqoopRecord>) reader.getValueClass()).getConstructor()
                                .newInstance();
                        ps = conn.prepareStatement("SELECT bd, bf, b, c, cl, d, f, l, nc, "
                                + "ncl, nvc, r, u, iym, ids, t, tz, tltz, rawcol FROM "
                                + OracleUtils.SYSTEMTEST_TABLE_NAME + " WHERE id = ?");
                        while (reader.next(key, value)) {
                            // Compare test data from hadoop with data in oracle
                            Map<String, Object> fields = value.getFieldMap();
                            BigDecimal id = (BigDecimal) fields.get("ID");
                            ps.setBigDecimal(1, id);
                            ResultSet rs = ps.executeQuery();
                            assertTrue("Did not find row with id " + id + " in oracle", rs.next());
                            assertEquals("BinaryDouble did not match for row " + id, fields.get("BD"),
                                    rs.getDouble(1));
                            assertEquals("BinaryFloat did not match for row " + id, fields.get("BF"),
                                    rs.getFloat(2));
                            // LONG column needs to be read before BLOB column
                            assertEquals("Long did not match for row " + id, fields.get("L"), rs.getString(8));
                            BlobRef hadoopBlob = (BlobRef) fields.get("B");
                            Blob oraBlob = rs.getBlob(3);
                            assertTrue("Blob did not match for row " + id, Arrays.equals(hadoopBlob.getData(),
                                    oraBlob.getBytes(1L, (int) oraBlob.length())));
                            assertEquals("Char did not match for row " + id, fields.get("C"), rs.getString(4));
                            ClobRef hadoopClob = (ClobRef) fields.get("CL");
                            Clob oraClob = rs.getClob(5);
                            assertEquals("Clob did not match for row " + id, hadoopClob.getData(),
                                    oraClob.getSubString(1, (int) oraClob.length()));
                            assertEquals("Date did not match for row " + id, fields.get("D"), rs.getString(6));
                            BigDecimal hadoopFloat = (BigDecimal) fields.get("F");
                            BigDecimal oraFloat = rs.getBigDecimal(7);
                            assertEquals("Float did not match for row " + id, hadoopFloat, oraFloat);
                            assertEquals("NChar did not match for row " + id, fields.get("NC"),
                                    rs.getString(9));
                            assertEquals("NClob did not match for row " + id, fields.get("NCL"),
                                    rs.getString(10));
                            assertEquals("NVarChar did not match for row " + id, fields.get("NVC"),
                                    rs.getString(11));
                            assertEquals("RowId did not match for row " + id, fields.get("R"),
                                    new String(rs.getRowId(12).getBytes()));
                            Struct url = (Struct) rs.getObject(13); // TODO: Find a fix for
                                                                    // this workaround
                            String urlString = (String) url.getAttributes()[0];
                            if (url.getSQLTypeName().equals("SYS.HTTPURITYPE")) {
                                urlString = "http://" + urlString;
                            } else if (url.getSQLTypeName().equals("SYS.DBURITYPE")) {
                                urlString = "/ORADB" + urlString;
                            }
                            assertEquals("UriType did not match for row " + id, fields.get("U"), urlString);
                            assertEquals("Interval Year to Month did not match for row " + id,
                                    fields.get("IYM"), rs.getString(14));
                            String ids = (String) fields.get("IDS"); // Strip trailing zeros
                                                                     // to match oracle
                                                                     // format
                            int lastNonZero = ids.length() - 1;
                            while (ids.charAt(lastNonZero) == '0') {
                                lastNonZero--;
                            }
                            ids = ids.substring(0, lastNonZero + 1);
                            assertEquals("Interval Day to Second did not match for row " + id, ids,
                                    rs.getString(15));
                            assertEquals("Timestamp did not match for row " + id, fields.get("T"),
                                    rs.getString(16));
                            assertEquals("Timestamp with Time Zone did not match for row " + id,
                                    fields.get("TZ"), rs.getString(17));
                            assertEquals("Timestamp with Local Time Zone did not match for row " + id,
                                    fields.get("TLTZ"), rs.getString(18));
                            BytesWritable rawCol = (BytesWritable) fields.get("RAWCOL");
                            byte[] rawColData = Arrays.copyOf(rawCol.getBytes(), rawCol.getLength());
                            assertTrue("RAW did not match for row " + id,
                                    Arrays.equals(rawColData, rs.getBytes(19)));

                            assertFalse("Found multiple rows with id " + id + " in oracle", rs.next());
                            hadoopRecordCount++;
                        }
                        reader.close();
                    }
                }
                ResultSet rs = s.executeQuery("SELECT COUNT(*) FROM " + OracleUtils.SYSTEMTEST_TABLE_NAME);
                rs.next();
                int oracleRecordCount = rs.getInt(1);
                assertEquals("Number of records in Hadoop does not match number of " + "records in oracle",
                        hadoopRecordCount, oracleRecordCount);
                rs.close();
            } finally {
                // Delete test data from hadoop
                cleanupFolders();
            }
        } finally {
            // Delete test data from oracle
            s.executeUpdate("DROP TABLE " + OracleUtils.SYSTEMTEST_TABLE_NAME);
            s.close();
        }

    } finally {
        closeTestEnvConnection();
    }
}

From source file:org.apache.sqoop.mapreduce.AvroImportMapper.java

License:Apache License

/**
 * Convert the Avro representation of a Java type (that has already been
 * converted from the SQL equivalent).//from   w w  w.j  ava 2s  . c  o  m
 * @param o
 * @return
 */
private Object toAvro(Object o) {
    if (o instanceof BigDecimal) {
        if (bigDecimalFormatString) {
            return ((BigDecimal) o).toPlainString();
        } else {
            return o.toString();
        }
    } else if (o instanceof Date) {
        return ((Date) o).getTime();
    } else if (o instanceof Time) {
        return ((Time) o).getTime();
    } else if (o instanceof Timestamp) {
        return ((Timestamp) o).getTime();
    } else if (o instanceof BytesWritable) {
        BytesWritable bw = (BytesWritable) o;
        return ByteBuffer.wrap(bw.getBytes(), 0, bw.getLength());
    } else if (o instanceof BlobRef) {
        BlobRef br = (BlobRef) o;
        // If blob data is stored in an external .lob file, save the ref file
        // as Avro bytes. If materialized inline, save blob data as Avro bytes.
        byte[] bytes = br.isExternal() ? br.toString().getBytes() : br.getData();
        return ByteBuffer.wrap(bytes);
    } else if (o instanceof ClobRef) {
        throw new UnsupportedOperationException("ClobRef not suported");
    }
    // primitive types (Integer, etc) are left unchanged
    return o;
}