List of usage examples for org.apache.hadoop.io BytesWritable getLength
@Override public int getLength()
From source file:org.apache.pig.piggybank.storage.SequenceFileLoader.java
License:Apache License
protected Object translateWritableToPigDataType(Writable w, byte dataType) { switch (dataType) { case DataType.CHARARRAY: return ((Text) w).toString(); case DataType.BYTEARRAY: BytesWritable bw = (BytesWritable) w; // Make a copy return new DataByteArray(bw.getBytes(), 0, bw.getLength()); case DataType.BOOLEAN: return ((BooleanWritable) w).get(); case DataType.INTEGER: return ((IntWritable) w).get(); case DataType.LONG: return ((LongWritable) w).get(); case DataType.FLOAT: return ((FloatWritable) w).get(); case DataType.DOUBLE: return ((DoubleWritable) w).get(); case DataType.BYTE: return ((ByteWritable) w).get(); case DataType.DATETIME: return ((DateTimeWritable) w).get(); }// w w w . j a va2s. c om return null; }
From source file:org.apache.sqoop.avro.AvroUtil.java
License:Apache License
/** * Convert a Sqoop's Java representation to Avro representation. *//* www .ja v a 2 s . c om*/ public static Object toAvro(Object o, Schema.Field field, boolean bigDecimalFormatString) { if (o instanceof BigDecimal && !isDecimal(field)) { if (bigDecimalFormatString) { // Returns a string representation of this without an exponent field. return ((BigDecimal) o).toPlainString(); } else { return o.toString(); } } else if (o instanceof Date) { return ((Date) o).getTime(); } else if (o instanceof Time) { return ((Time) o).getTime(); } else if (o instanceof Timestamp) { return ((Timestamp) o).getTime(); } else if (o instanceof BytesWritable) { BytesWritable bw = (BytesWritable) o; return ByteBuffer.wrap(bw.getBytes(), 0, bw.getLength()); } else if (o instanceof BlobRef) { BlobRef br = (BlobRef) o; // If blob data is stored in an external .lob file, save the ref file // as Avro bytes. If materialized inline, save blob data as Avro bytes. byte[] bytes = br.isExternal() ? br.toString().getBytes() : br.getData(); return ByteBuffer.wrap(bytes); } else if (o instanceof ClobRef) { throw new UnsupportedOperationException("ClobRef not supported"); } // primitive types (Integer, etc) are left unchanged return o; }
From source file:org.apache.sqoop.lib.JdbcWritableBridge.java
License:Apache License
public static void writeBytesWritable(BytesWritable val, int paramIdx, int sqlType, PreparedStatement s) throws SQLException { if (null == val) { s.setNull(paramIdx, sqlType);/*from w w w . j a v a2 s . c o m*/ } else { // val.getBytes() is only valid in [0, len) byte[] rawBytes = val.getBytes(); int len = val.getLength(); byte[] outBytes = new byte[len]; System.arraycopy(rawBytes, 0, outBytes, 0, len); s.setBytes(paramIdx, outBytes); } }
From source file:org.apache.sqoop.manager.oracle.SystemImportTest.java
License:Apache License
/** * Generates pseudo-random test data across all supported data types in an * Oracle database. Imports the data into Hadoop and compares with the data in * Oracle./*from w w w . j a v a 2 s . c o m*/ * * @throws Exception */ @Test public void importTest() throws Exception { // Generate test data in oracle setSqoopTargetDirectory(getSqoopTargetDirectory() + OracleUtils.SYSTEMTEST_TABLE_NAME); int numRows = OracleUtils.SYSTEMTEST_NUM_ROWS; Connection conn = getTestEnvConnection(); OraOopOracleQueries.setConnectionTimeZone(conn, "GMT"); try { Statement s = conn.createStatement(); try { s.executeUpdate("CREATE TABLE " + OracleUtils.SYSTEMTEST_TABLE_NAME + " (id NUMBER(10) PRIMARY KEY, bd BINARY_DOUBLE, bf BINARY_FLOAT, " + "b BLOB, c CHAR(12), cl CLOB, d DATE, " + "f FLOAT(126), l LONG, nc NCHAR(30), ncl NCLOB, n NUMBER(9,2), " + "nvc NVARCHAR2(30), r ROWID, u URITYPE, iym INTERVAL YEAR(2) TO " + "MONTH, ids INTERVAL DAY(2) TO SECOND(6), " + "t TIMESTAMP(6), tz TIMESTAMP(6) WITH TIME ZONE, " + "tltz TIMESTAMP(6) WITH LOCAL TIME ZONE, rawcol RAW(21))"); BinaryDoubleGenerator bdg = new BinaryDoubleGenerator(); BinaryFloatGenerator bfg = new BinaryFloatGenerator(); BlobGenerator bg = new BlobGenerator(conn, 2 * 1024, 8 * 1024); CharGenerator cg = new CharGenerator(12, 12); CharGenerator clobg = new CharGenerator(2 * 1024, 8 * 1024); TimestampGenerator dateg = new TimestampGenerator(0); FloatGenerator fg = new FloatGenerator(126); CharGenerator lg = new CharGenerator(2 * 1024, 8 * 1024); NCharGenerator ncg = new NCharGenerator(30, 30); NCharGenerator nclobg = new NCharGenerator(2 * 1024, 8 * 1024); BigDecimalGenerator ng = new BigDecimalGenerator(9, 2); NCharGenerator nvcg = new NCharGenerator(1, 30); RowIdGenerator rg = new RowIdGenerator(); URIGenerator ug = new URIGenerator(); IntervalYearMonthGenerator iymg = new IntervalYearMonthGenerator(2); IntervalDaySecondGenerator idsg = new IntervalDaySecondGenerator(2, 6); TimestampGenerator tg = new TimestampGenerator(6); TimestampGenerator tzg = new TimestampGenerator(6); TimestampGenerator tltzg = new TimestampGenerator(6); BytesGenerator rawg = new BytesGenerator(21, 21); PreparedStatement ps = conn.prepareStatement("INSERT INTO " + OracleUtils.SYSTEMTEST_TABLE_NAME + " ( id, bd, bf, b, c, cl, d, f, nc, ncl, n, nvc, r, u, iym, " + "ids, t, tz, tltz, rawcol ) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, " + "?, ?, ?, ?, ?, sys.UriFactory.getUri(?), ?, ?, ?, ?, ?, ? )"); try { for (int i = 0; i < numRows; i++) { ps.setInt(1, i); methSetBinaryDouble.invoke(ps, 2, bdg.next()); methSetBinaryFloat.invoke(ps, 3, bfg.next()); ps.setBlob(4, bg.next()); ps.setString(5, cg.next()); ps.setString(6, clobg.next()); ps.setTimestamp(7, dateg.next()); ps.setBigDecimal(8, fg.next()); ps.setString(9, ncg.next()); ps.setString(10, nclobg.next()); ps.setBigDecimal(11, ng.next()); ps.setString(12, nvcg.next()); ps.setRowId(13, rg.next()); ps.setString(14, ug.next()); ps.setString(15, iymg.next()); ps.setString(16, idsg.next()); ps.setTimestamp(17, tg.next()); ps.setTimestamp(18, tzg.next()); ps.setTimestamp(19, tltzg.next()); ps.setBytes(20, rawg.next()); ps.executeUpdate(); } } finally { ps.close(); conn.commit(); } // Can't bind > 4000 bytes of data to LONG and LOB columns in the same // statement, so do LONG by itself ps = conn.prepareStatement( "UPDATE " + OracleUtils.SYSTEMTEST_TABLE_NAME + " SET l = ? WHERE id = ?"); try { for (int i = 0; i < numRows; i++) { ps.setString(1, lg.next()); ps.setInt(2, i); ps.executeUpdate(); } } finally { ps.close(); conn.commit(); } try { // Import test data into hadoop int retCode = runImport(OracleUtils.SYSTEMTEST_TABLE_NAME, getSqoopConf(), true); assertEquals("Return code should be 0", 0, retCode); // Add sqoop generated code to the classpath String sqoopGenJarPath = "file://" + getSqoopGenLibDirectory() + "/" + getSqoopGenClassName() + ".jar"; URLClassLoader loader = new URLClassLoader(new URL[] { new URL(sqoopGenJarPath) }, getClass().getClassLoader()); Thread.currentThread().setContextClassLoader(loader); // Read test data from hadoop Configuration hadoopConf = getSqoopConf(); FileSystem hdfs = FileSystem.get(hadoopConf); Path path = new Path(getSqoopTargetDirectory()); FileStatus[] statuses = hdfs.listStatus(path); int hadoopRecordCount = 0; for (FileStatus status : statuses) { if (status.getPath().getName().startsWith("part-m-")) { SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, status.getPath(), hadoopConf); LongWritable key = new LongWritable(); @SuppressWarnings("unchecked") SqoopRecord value = ((Class<SqoopRecord>) reader.getValueClass()).getConstructor() .newInstance(); ps = conn.prepareStatement("SELECT bd, bf, b, c, cl, d, f, l, nc, " + "ncl, nvc, r, u, iym, ids, t, tz, tltz, rawcol FROM " + OracleUtils.SYSTEMTEST_TABLE_NAME + " WHERE id = ?"); while (reader.next(key, value)) { // Compare test data from hadoop with data in oracle Map<String, Object> fields = value.getFieldMap(); BigDecimal id = (BigDecimal) fields.get("ID"); ps.setBigDecimal(1, id); ResultSet rs = ps.executeQuery(); assertTrue("Did not find row with id " + id + " in oracle", rs.next()); assertEquals("BinaryDouble did not match for row " + id, fields.get("BD"), rs.getDouble(1)); assertEquals("BinaryFloat did not match for row " + id, fields.get("BF"), rs.getFloat(2)); // LONG column needs to be read before BLOB column assertEquals("Long did not match for row " + id, fields.get("L"), rs.getString(8)); BlobRef hadoopBlob = (BlobRef) fields.get("B"); Blob oraBlob = rs.getBlob(3); assertTrue("Blob did not match for row " + id, Arrays.equals(hadoopBlob.getData(), oraBlob.getBytes(1L, (int) oraBlob.length()))); assertEquals("Char did not match for row " + id, fields.get("C"), rs.getString(4)); ClobRef hadoopClob = (ClobRef) fields.get("CL"); Clob oraClob = rs.getClob(5); assertEquals("Clob did not match for row " + id, hadoopClob.getData(), oraClob.getSubString(1, (int) oraClob.length())); assertEquals("Date did not match for row " + id, fields.get("D"), rs.getString(6)); BigDecimal hadoopFloat = (BigDecimal) fields.get("F"); BigDecimal oraFloat = rs.getBigDecimal(7); assertEquals("Float did not match for row " + id, hadoopFloat, oraFloat); assertEquals("NChar did not match for row " + id, fields.get("NC"), rs.getString(9)); assertEquals("NClob did not match for row " + id, fields.get("NCL"), rs.getString(10)); assertEquals("NVarChar did not match for row " + id, fields.get("NVC"), rs.getString(11)); assertEquals("RowId did not match for row " + id, fields.get("R"), new String(rs.getRowId(12).getBytes())); Struct url = (Struct) rs.getObject(13); // TODO: Find a fix for // this workaround String urlString = (String) url.getAttributes()[0]; if (url.getSQLTypeName().equals("SYS.HTTPURITYPE")) { urlString = "http://" + urlString; } else if (url.getSQLTypeName().equals("SYS.DBURITYPE")) { urlString = "/ORADB" + urlString; } assertEquals("UriType did not match for row " + id, fields.get("U"), urlString); assertEquals("Interval Year to Month did not match for row " + id, fields.get("IYM"), rs.getString(14)); String ids = (String) fields.get("IDS"); // Strip trailing zeros // to match oracle // format int lastNonZero = ids.length() - 1; while (ids.charAt(lastNonZero) == '0') { lastNonZero--; } ids = ids.substring(0, lastNonZero + 1); assertEquals("Interval Day to Second did not match for row " + id, ids, rs.getString(15)); assertEquals("Timestamp did not match for row " + id, fields.get("T"), rs.getString(16)); assertEquals("Timestamp with Time Zone did not match for row " + id, fields.get("TZ"), rs.getString(17)); assertEquals("Timestamp with Local Time Zone did not match for row " + id, fields.get("TLTZ"), rs.getString(18)); BytesWritable rawCol = (BytesWritable) fields.get("RAWCOL"); byte[] rawColData = Arrays.copyOf(rawCol.getBytes(), rawCol.getLength()); assertTrue("RAW did not match for row " + id, Arrays.equals(rawColData, rs.getBytes(19))); assertFalse("Found multiple rows with id " + id + " in oracle", rs.next()); hadoopRecordCount++; } reader.close(); } } ResultSet rs = s.executeQuery("SELECT COUNT(*) FROM " + OracleUtils.SYSTEMTEST_TABLE_NAME); rs.next(); int oracleRecordCount = rs.getInt(1); assertEquals("Number of records in Hadoop does not match number of " + "records in oracle", hadoopRecordCount, oracleRecordCount); rs.close(); } finally { // Delete test data from hadoop cleanupFolders(); } } finally { // Delete test data from oracle s.executeUpdate("DROP TABLE " + OracleUtils.SYSTEMTEST_TABLE_NAME); s.close(); } } finally { closeTestEnvConnection(); } }
From source file:org.apache.sqoop.mapreduce.AvroImportMapper.java
License:Apache License
/** * Convert the Avro representation of a Java type (that has already been * converted from the SQL equivalent).//from w w w . j a v a2s .c om * @param o * @return */ private Object toAvro(Object o) { if (o instanceof BigDecimal) { if (bigDecimalFormatString) { return ((BigDecimal) o).toPlainString(); } else { return o.toString(); } } else if (o instanceof Date) { return ((Date) o).getTime(); } else if (o instanceof Time) { return ((Time) o).getTime(); } else if (o instanceof Timestamp) { return ((Timestamp) o).getTime(); } else if (o instanceof BytesWritable) { BytesWritable bw = (BytesWritable) o; return ByteBuffer.wrap(bw.getBytes(), 0, bw.getLength()); } else if (o instanceof BlobRef) { BlobRef br = (BlobRef) o; // If blob data is stored in an external .lob file, save the ref file // as Avro bytes. If materialized inline, save blob data as Avro bytes. byte[] bytes = br.isExternal() ? br.toString().getBytes() : br.getData(); return ByteBuffer.wrap(bytes); } else if (o instanceof ClobRef) { throw new UnsupportedOperationException("ClobRef not suported"); } // primitive types (Integer, etc) are left unchanged return o; }
From source file:org.apache.tajo.storage.sequencefile.SequenceFileScanner.java
License:Apache License
/** * In hive, LazyBinarySerDe is serialized as follows: start A B A B A B end bytes[] -> * |-----|---------|--- ... ---|-----|---------| * * Section A is one null-byte, corresponding to eight struct fields in Section * B. Each bit indicates whether the corresponding field is null (0) or not null * (1). Each field is a LazyBinaryObject. * * Following B, there is another section A and B. This pattern repeats until the * all struct fields are serialized./*from w ww.j av a 2 s. co m*/ * * So, tajo must make a tuple after parsing hive style BinarySerDe. */ private Tuple makeTuple(BytesWritable value) throws IOException { Tuple tuple = new VTuple(schema.getColumns().size()); int start = 0; int length = value.getLength(); /** * Please note that one null byte is followed by eight fields, then more * null byte and fields. */ int structByteEnd = start + length; byte[] bytes = value.getBytes(); byte nullByte = bytes[start]; int lastFieldByteEnd = start + 1; // Go through all bytes in the byte[] for (int i = 0; i < schema.getColumns().size(); i++) { fieldIsNull[i] = true; if ((nullByte & (1 << (i % 8))) != 0) { fieldIsNull[i] = false; parse(schema.getColumn(i), bytes, lastFieldByteEnd); fieldStart[i] = lastFieldByteEnd + elementOffset; fieldLength[i] = elementSize; lastFieldByteEnd = fieldStart[i] + fieldLength[i]; for (int j = 0; j < projectionMap.length; j++) { if (projectionMap[j] == i) { Datum datum = serde.deserialize(schema.getColumn(i), bytes, fieldStart[i], fieldLength[i], nullChars); tuple.put(i, datum); } } } // next byte is a null byte if there are more bytes to go if (7 == (i % 8)) { if (lastFieldByteEnd < structByteEnd) { nullByte = bytes[lastFieldByteEnd]; lastFieldByteEnd++; } else { // otherwise all null afterwards nullByte = 0; lastFieldByteEnd++; } } } return tuple; }
From source file:org.apache.tez.runtime.library.common.comparator.TezBytesComparator.java
License:Apache License
@Override public int getProxy(BytesWritable key) { int prefix = 0; final int len = key.getLength(); final byte[] content = key.getBytes(); int b1 = 0, b2 = 0, b3 = 0; switch (len) { default:/*from w w w .j a va2s.c o m*/ case 3: b3 = content[2] & 0xff; case 2: b2 = content[1] & 0xff; case 1: b1 = content[0] & 0xff; case 0: } prefix = (b1 << 16) | (b2 << 8) | (b3); return prefix; }
From source file:org.commoncrawl.hadoop.io.mapred.ArcFileInputFormatTests.java
License:Apache License
static void validateSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits, RecordReader<Text, BytesWritable> reader) throws IOException, InterruptedException { int splitDataIndex = getIndexOfSplit(splits, split); Assert.assertTrue(splitDataIndex != -1); List<TestRecord> records = splits.get(splitDataIndex).e1; int itemIndex = 0; // iterate and validate stuff ... Text key = new Text(); BytesWritable value = new BytesWritable(); while (reader.next(key, value)) { TestRecord testRecord = records.get(itemIndex++); // get test key bytes as utf-8 bytes ... byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8")); // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters // with ?, which causes our test case (which does use invalid characters to from the key, to break. Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0, key.getLength()) == 0);//from w w w . j a v a 2 s .c om // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator // we search for this specific byte pattern to locate start of content, then compare it against source ... int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(), "\r\n\r\n".getBytes()); indexofHeaderTerminator += 4; Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length, value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0); } reader.close(); Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT); splits.remove(splitDataIndex); }
From source file:org.commoncrawl.hadoop.io.mapred.ArcFileRecordReaderTests.java
License:Apache License
@Test public void TestARCFileRecordReader() throws IOException, InterruptedException { JobConf conf = new JobConf(); FileSystem fs = LocalFileSystem.get(conf); Path path = new Path("/tmp/" + File.createTempFile("ARCRecordReader", "test")); List<TestRecord> records = ArcFileReaderTests.buildTestRecords(ArcFileReaderTests.BASIC_TEST_RECORD_COUNT); FSDataOutputStream os = fs.create(path); try {/* w ww . ja va2 s . com*/ // write the ARC File into memory ArcFileReaderTests.writeFirstRecord(os, "test", System.currentTimeMillis()); long testAttemptTime = System.currentTimeMillis(); for (TestRecord record : records) { ArcFileReaderTests.write(os, record.url, "test", 1, 1, record.data, 0, record.data.length, new NIOHttpHeaders(), "text/html", MD5Hash.digest(record.data).toString(), 12345, testAttemptTime); } os.flush(); } finally { os.close(); } FileSplit split = new FileSplit(path, 0, fs.getFileStatus(path).getLen(), new String[0]); ARCFileRecordReader reader = new ARCFileRecordReader(); reader.initialize(conf, split); int index = 0; // iterate and validate stuff ... Text key = reader.createKey(); BytesWritable value = reader.createValue(); while (reader.next(key, value)) { TestRecord testRecord = records.get(index++); // get test key bytes as utf-8 bytes ... byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8")); // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters // with ?, which causes our test case (which does use invalid characters to from the key, to break. Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0, key.getLength()) == 0); // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator // we search for this specific byte pattern to locate start of content, then compare it against source ... int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(), "\r\n\r\n".getBytes()); indexofHeaderTerminator += 4; Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length, value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0); } reader.close(); Assert.assertEquals(index, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT); fs.delete(path, false); }
From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java
License:Apache License
static void validateSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits, RecordReader<Text, BytesWritable> reader) throws IOException, InterruptedException { int splitDataIndex = getIndexOfSplit(splits, split); Assert.assertTrue(splitDataIndex != -1); List<TestRecord> records = splits.get(splitDataIndex).e1; int itemIndex = 0; // iterate and validate stuff ... while (reader.nextKeyValue()) { Text key = reader.getCurrentKey(); BytesWritable value = reader.getCurrentValue(); TestRecord testRecord = records.get(itemIndex++); // get test key bytes as utf-8 bytes ... byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8")); // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters // with ?, which causes our test case (which does use invalid characters to from the key, to break. Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0, key.getLength()) == 0);// ww w .jav a2 s.com // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator // we search for this specific byte pattern to locate start of content, then compare it against source ... int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(), "\r\n\r\n".getBytes()); indexofHeaderTerminator += 4; Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length, value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0); } reader.close(); Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT); splits.remove(splitDataIndex); }