List of usage examples for org.apache.hadoop.io BytesWritable set
public void set(byte[] newData, int offset, int length)
From source file:com.m6d.hive.protobuf.TestProto.java
License:Apache License
public void testNotdefined() throws Exception { Path p = new Path(this.ROOT_DIR, "nada"); SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p, BytesWritable.class, BytesWritable.class); AThing.Builder aThingBuild = AThing.newBuilder(); AThing aThing = aThingBuild.addLuckynumbers(7).addLuckynumbers(4).addToys("car").build(); AList.Builder aListBuild = AList.newBuilder(); AList aList = aListBuild.addAge(2).addAge(3).addThings(aThing).build(); BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); ByteArrayOutputStream s = new ByteArrayOutputStream(); aList.writeTo(s);/*from w w w .ja v a 2 s . co m*/ ByteArrayOutputStream t = new ByteArrayOutputStream(); aList.writeTo(t); key.set(s.toByteArray(), 0, s.size()); value.set(t.toByteArray(), 0, t.size()); w.append(key, value); w.close(); String jarFile; jarFile = KVAsVSeqFileBinaryInputFormat.class.getProtectionDomain().getCodeSource().getLocation().getFile(); System.out.println("set hive.aux.jars.path=file:///" + jarFile); client.execute("add jar " + jarFile); client.execute("set hive.aux.jars.path=file:///" + jarFile); client.execute("create table nada " + " ROW FORMAT SERDE '" + ProtobufDeserializer.class.getName() + "'" //+ " WITH SERDEPROPERTIES ('KEY_SERIALIZE_CLASS'='" + Ex.AList.class.getName() //+ "','VALUE_SERIALIZE_CLASS'='" + Ex.AList.class.getName() + "' )" + " STORED AS INPUTFORMAT '" + KVAsVSeqFileBinaryInputFormat.class.getName() + "'" + " OUTPUTFORMAT '" + SequenceFileOutputFormat.class.getName() + "'"); client.execute("load data local inpath '" + p.toString() + "' into table nada"); client.execute("SELECT key FROM nada"); List<String> results = client.fetchAll(); String expected = "null"; //expected = "{\"agecount\":2,\"agelist\":[2,3],\"thingscount\":1,\"thingslist\":[{\"luckynumberscount\":2,\"luckynumberslist\":[7,4],\"toyscount\":1,\"toyslist\":[\"car\"]}]}"; Assert.assertEquals(expected, results.get(0)); client.execute("drop table nada"); }
From source file:com.ostor.dedup.hadoop.BinaryRecordReader.java
License:Open Source License
public boolean next(LongWritable key, BytesWritable value) { if (written == true) return false; logger.debug("Setting value of length - " + buffer.length); value.set(buffer, 0, buffer.length); written = true;/*from w w w . j a v a2 s . c om*/ return true; }
From source file:com.rim.logdriver.mapred.avro.AvroBlockRecordReader.java
License:Apache License
@Override public boolean next(AvroFileHeader key, BytesWritable value) throws IOException { while (pos >= end) { if (in != null) { in.close();//from w w w . j a v a2 s. com } currentFile++; if (split.getNumPaths() > currentFile) { initCurrentFile(); } else { return false; } } key.set(header); // Get the number of entries in the next block int entries = AvroUtils.readInt(in); byte[] block = AvroUtils.readBytes(in); // Check that the sync marker is what we expect LOG.trace("Verifying sync marker"); byte[] syncMarker = AvroUtils.readBytes(in, DataFileConstants.SYNC_SIZE); if (!Arrays.equals(syncMarker, header.getSyncMarker())) { LOG.error("Sync marker does not match"); return false; } // Now, pack it all back into a byte[], and set the value of value { ByteBuffer bb = ByteBuffer.allocate(10 + 10 + block.length); bb.put(AvroUtils.encodeLong(entries)); bb.put(AvroUtils.encodeLong(block.length)); bb.put(block); byte[] result = new byte[bb.position()]; bb.rewind(); bb.get(result); value.set(result, 0, result.length); pos = in.getPos(); } return true; }
From source file:com.yahoo.sketches.hive.theta.ExcludeSketchUDF.java
License:Apache License
/** * Main logic called by hive if sketchSize is also passed in. Computes the * hash in first sketch excluding the hash in second sketch of two sketches of * same or different column./*from w w w . ja v a 2 s . c o m*/ * * @param firstSketchBytes * first sketch to be included. * @param secondSketchBytes * second sketch to be excluded. * @param hashSeed * Only required if input sketches were constructed using an update seed that was not the default. * @return resulting sketch of exclusion. */ public BytesWritable evaluate(final BytesWritable firstSketchBytes, final BytesWritable secondSketchBytes, final long hashSeed) { Sketch firstSketch = null; if (firstSketchBytes != null && firstSketchBytes.getLength() > 0) { firstSketch = Sketch.wrap(new NativeMemory(firstSketchBytes.getBytes()), hashSeed); } Sketch secondSketch = null; if (secondSketchBytes != null && secondSketchBytes.getLength() > 0) { secondSketch = Sketch.wrap(new NativeMemory(secondSketchBytes.getBytes()), hashSeed); } final AnotB anotb = SetOperation.builder().setSeed(hashSeed).buildANotB(); anotb.update(firstSketch, secondSketch); final byte[] excludeSketchBytes = anotb.getResult().toByteArray(); final BytesWritable result = new BytesWritable(); result.set(excludeSketchBytes, 0, excludeSketchBytes.length); return result; }
From source file:com.yahoo.sketches.hive.theta.SampleSketchUDF.java
License:Apache License
/** * Main logic called by hive, produces new sketch from original using * specified size and sampling probablility. * //from w w w . j a v a 2s.c o m * @param binarySketch * sketch to be sampled passed in as bytes writable. * @param sketchSize * Size to use for the new sketch. * This must be a power of 2 and larger than 16. If zero, DEFAULT is used. * @param probability * The sampling probability to use for the new sketch. * Should be greater than zero and less than or equal to 1.0 * @return The sampled sketch encoded as a BytesWritable */ public BytesWritable evaluate(BytesWritable binarySketch, int sketchSize, float probability) { // Null checks if (binarySketch == null) { return null; } byte[] serializedSketch = binarySketch.getBytes(); if (serializedSketch.length <= 8) { return null; } // The builder will catch errors with improper sketchSize or probability Union union = SetOperation.builder().setP(probability).buildUnion(sketchSize); union.update(new NativeMemory(serializedSketch)); //Union can accept Memory object directly Sketch intermediateSketch = union.getResult(false, null); //to CompactSketch(unordered, on-heap) byte[] resultSketch = intermediateSketch.toByteArray(); BytesWritable result = new BytesWritable(); result.set(resultSketch, 0, resultSketch.length); return result; }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override public boolean next(NullWritable key, BytesWritable value) throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null; try { in = fs.open(file);/*w ww. j a v a2s. com*/ IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:hadoop.FrameReader.java
License:Apache License
/** * Read a line terminated by a custom delimiter. *//*from w w w . ja v a2 s . co m*/ public int readFrame(BytesWritable frame, int maxFrameLength, int maxBytesToConsume, double startDate, double endDate) throws IOException { /* We're reading data from inputStream, but the head of the stream may be * already captured in the previous buffer, so we have several cases: * * 1. The buffer tail does not contain any character sequence which * matches with the head of delimiter. We count it as a * ambiguous byte count = 0 * * 2. The buffer tail contains a X number of characters, * that forms a sequence, which matches with the * head of delimiter. We count ambiguous byte count = X * * // *** eg: A segment of input file is as follows * * " record 1792: I found this bug very interesting and * I have completely read about it. record 1793: This bug * can be solved easily record 1794: This ." * * delimiter = "record"; * * supposing:- String at the end of buffer = * "I found this bug very interesting and I have completely re" * There for next buffer = "ad about it. record 179 ...." * * The matching characters in the input * buffer tail and delimiter head = "re" * Therefore, ambiguous byte count = 2 **** // * * 2.1 If the following bytes are the remaining characters of * the delimiter, then we have to capture only up to the starting * position of delimiter. That means, we need not include the * ambiguous characters in str. * * 2.2 If the following bytes are not the remaining characters of * the delimiter ( as mentioned in the example ), * then we have to include the ambiguous characters in str. */ int txtLength = 0; //tracks str.getLength(), as an optimization int separatorLength = 0; //length of terminating newline long bytesConsumed = 0; int delPosn = 0; int ambiguousByteCount = 0; // To capture the ambiguous characters count List<Byte> tram = new ArrayList<Byte>(); do { int startPosn = bufferPosn; //starting from where we left off the last time if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = fillBuffer(in, buffer, ambiguousByteCount > 0); if (bufferLength <= 0) //on gere pas le fait d'avoir un fichier qui finit par zero break; // EOF } for (; bufferPosn < bufferLength; ++bufferPosn) { if (buffer[bufferPosn] == recordDelimiterBytes[delPosn]) { delPosn++; if (delPosn >= recordDelimiterBytes.length) { bufferPosn++; break; } } else if (delPosn != 0) {//it means the last byte is zero //decode byte nbZeros = buffer[bufferPosn]; for (int k = 0; k < nbZeros; k++) tram.add((byte) 0); delPosn = 0; } else { //case that we have delPos=0 //ecrire le byte tram.add(buffer[bufferPosn]); } } // int readLength = bufferPosn - startPosn; // // bytesConsumed += readLength; // int appendLength = readLength - delPosn; // if (appendLength > maxFrameLength - txtLength) { // appendLength = maxFrameLength - txtLength; // } // if (appendLength > 0) { // // frame.set(FrameReader.transformerByte(tram),0,tram.size());// LA faire qu' la fin // txtLength += appendLength; // } if (delPosn > 0 && delPosn < recordDelimiterBytes.length) { ambiguousByteCount = delPosn; bytesConsumed -= ambiguousByteCount; //to be consumed in next } } while (delPosn < recordDelimiterBytes.length && bytesConsumed < maxBytesToConsume); // we have three cases depending on the Date of frame bf.clear(); bf.put(OldFrameReader.transformerByte(tram.subList(0, 7))); bf.flip(); double frameDate = bf.getDouble(); if (frameDate > endDate) { // we skip the file return 0; } else if (frameDate < startDate)// we skip the frame return readFrame(frame, maxFrameLength, maxBytesToConsume, startDate, endDate); else { // we read the frame frame.set(OldFrameReader.transformerByte(tram), 0, tram.size()); } if (bytesConsumed > (long) Integer.MAX_VALUE) { throw new IOException("Too many bytes before delimiter: " + bytesConsumed); } return (int) bytesConsumed; }
From source file:io.bfscan.data.VByteDocVector.java
License:Apache License
public static void toBytesWritable(BytesWritable bytes, int[] termids, int length) { try {/*from w w w . j a v a2 s. c o m*/ if (termids == null) { termids = new int[] {}; length = 0; } ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); WritableUtils.writeVInt(dataOut, length); for (int i = 0; i < length; i++) { WritableUtils.writeVInt(dataOut, termids[i]); } byte[] raw = bytesOut.toByteArray(); bytes.set(raw, 0, raw.length); } catch (IOException e) { bytes.set(new byte[] {}, 0, 0); } }
From source file:io.covert.binary.analysis.BuildSequenceFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { File inDir = new File(args[0]); Path name = new Path(args[1]); Text key = new Text(); BytesWritable val = new BytesWritable(); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, name, Text.class, BytesWritable.class, CompressionType.RECORD);// ww w. jav a 2 s.c o m for (File file : inDir.listFiles()) { if (!file.isFile()) { System.out.println("Skipping " + file + " (not a file) ..."); continue; } FileInputStream fileIn = new FileInputStream(file); ByteArrayOutputStream bytesOut = new ByteArrayOutputStream((int) file.length()); int b; while (-1 != (b = fileIn.read())) { bytesOut.write(b); } fileIn.close(); bytesOut.close(); byte[] bytes = bytesOut.toByteArray(); val.set(bytes, 0, bytes.length); key.set(file.getName()); writer.append(key, val); } writer.close(); return 0; }
From source file:io.covert.binary.analysis.BuildSequenceFileFromTarball.java
License:Apache License
public void load(FileSystem fs, Configuration conf, File inputTarball, Path outputDir) throws Exception { Text key = new Text(); BytesWritable val = new BytesWritable(); Path sequenceName = new Path(outputDir, inputTarball.getName() + ".seq"); System.out.println("Writing to " + sequenceName); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class, CompressionType.RECORD); InputStream is = new FileInputStream(inputTarball); if (inputTarball.toString().toLowerCase().endsWith(".gz")) { is = new GZIPInputStream(is); } else if (inputTarball.toString().toLowerCase().endsWith(".bz") || inputTarball.toString().endsWith(".bz2")) { is.read(); // read 'B' is.read(); // read 'Z' is = new CBZip2InputStream(is); }// ww w .j a v a2 s .co m final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { if (!entry.isDirectory()) { try { final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream(); IOUtils.copy(debInputStream, outputFileStream); outputFileStream.close(); byte[] outputFile = outputFileStream.toByteArray(); val.set(outputFile, 0, outputFile.length); MessageDigest md = MessageDigest.getInstance("MD5"); md.update(outputFile); byte[] digest = md.digest(); String hexdigest = ""; for (int i = 0; i < digest.length; i++) { hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1); } key.set(hexdigest); writer.append(key, val); } catch (IOException e) { System.err.println("Warning: tarball may be truncated: " + inputTarball); // Truncated Tarball break; } } } debInputStream.close(); writer.close(); }