List of usage examples for org.apache.hadoop.io BytesWritable getBytes
@Override public byte[] getBytes()
From source file:com.philiphubbard.digraph.MRVertex.java
License:Open Source License
public MRVertex(BytesWritable writable, Configuration config) { this.config = config; byte[] array = writable.getBytes(); flags = array[1];/*from w w w .j a v a 2 s . c om*/ int i = 2; id = getInt(array, i); i += 4; edges = new EdgeLink[2]; edges[INDEX_EDGES_TO] = null; edges[INDEX_EDGES_FROM] = null; short numEdgesTo = getShort(array, i); i += 2; for (int j = 0; j < numEdgesTo; j++) { addEdgeTo(getInt(array, i)); i += 4; } short numEdgesFrom = getShort(array, i); i += 2; for (int j = 0; j < numEdgesFrom; j++) { addEdgeFrom(getInt(array, i)); i += 4; } short numBytesInternal = getShort(array, i); i += 2; if (numBytesInternal != 0) fromWritableInternal(array, i, numBytesInternal); iterators = new ArrayList<WeakReference<EdgeHolder>>(); }
From source file:com.philiphubbard.digraph.MRVertex.java
License:Open Source License
protected static byte getFlags(BytesWritable writable) { byte[] bytes = writable.getBytes(); return bytes[1]; }
From source file:com.philiphubbard.digraph.MRVertexTest.java
License:Open Source License
private static void testError() throws IOException { Configuration config = new Configuration(); MRVertex v = new MRVertex(0, config); v.addEdgeTo(1);//from w ww .j av a 2 s . c om v.addEdgeTo(2); v.addEdgeFrom(3); BytesWritable bw = v.toWritable(MRVertex.EdgeFormat.EDGES_TO_FROM); byte b[] = bw.getBytes(); // Make a hadoop.io.BytesWritable that will have an error, because // the byte array is truncated in half. byte bTrunc[] = new byte[b.length / 2]; for (int i = 0; i < bTrunc.length; i++) bTrunc[i] = b[i]; BytesWritable bwTrunc = new BytesWritable(bTrunc); MRVertex vTrunc = new MRVertex(bwTrunc, config); int numEdges = 0; MRVertex.AdjacencyIterator it = vTrunc.createToAdjacencyIterator(); for (it.begin(); !it.done(); it.next()) numEdges++; it = vTrunc.createFromAdjacencyIterator(); for (it.begin(); !it.done(); it.next()) numEdges++; // Verify that not all the edges were recovered, but at least no // exception occurred. assert (numEdges != 3); }
From source file:com.pinterest.secor.tools.LogFilePrinter.java
License:Apache License
public void printFile(String path) throws Exception { FileSystem fileSystem = FileUtil.getFileSystem(path); Path fsPath = new Path(path); SequenceFile.Reader reader = new SequenceFile.Reader(fileSystem, fsPath, new Configuration()); LongWritable key = (LongWritable) reader.getKeyClass().newInstance(); BytesWritable value = (BytesWritable) reader.getValueClass().newInstance(); System.out.println("reading file " + path); while (reader.next(key, value)) { if (mPrintOffsetsOnly) { System.out.println(Long.toString(key.get())); } else {//from w ww .j a va2s. co m System.out.println(Long.toString(key.get()) + ": " + new String(value.getBytes())); } } }
From source file:com.pinterest.terrapin.hadoop.BaseUploader.java
License:Apache License
/** * Validates the first non-empty partition hfile has right partitioning function. * It reads several keys, then calculates the partition according to the partitioning function * client offering. If the calculated partition number is different with actual partition number * an exception is thrown. If all partition hfiles are empty, an exception is thrown. * * @param parts full absolute path for all partitions * @param partitionerType type of paritioning function * @param numShards total number of partitions * @throws IOException if something goes wrong when reading the hfiles * @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty *//*from w ww . j a v a 2 s. c o m*/ public void validate(List<Path> parts, PartitionerType partitionerType, int numShards) throws IOException { boolean hasNonEmptyPartition = false; HColumnDescriptor columnDescriptor = new HColumnDescriptor(); // Disable block cache to ensure it reads the actual file content. columnDescriptor.setBlockCacheEnabled(false); for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) { Path fileToBeValidated = parts.get(shardIndex); HFile.Reader reader = null; try { FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf); CacheConfig cc = new CacheConfig(conf, columnDescriptor); reader = HFile.createReader(fs, fileToBeValidated, cc); Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType); byte[] rowKey = reader.getFirstRowKey(); if (rowKey == null) { LOG.warn(String.format("empty partition %s", fileToBeValidated.toString())); reader.close(); continue; } hasNonEmptyPartition = true; BytesWritable key = new BytesWritable(rowKey); int partition = partitioner.getPartition(key, null, numShards); if (partition != shardIndex) { throw new IllegalArgumentException( String.format("wrong partition type %s for key %s in partition %d, expected %d", partitionerType.toString(), new String(key.getBytes()), shardIndex, partition)); } } finally { if (reader != null) { reader.close(); } } } if (!hasNonEmptyPartition) { throw new IllegalArgumentException("all partitions are empty"); } }
From source file:com.pinterest.terrapin.hadoop.HFileRecordWriter.java
License:Apache License
@Override public void write(BytesWritable key, BytesWritable value) throws IOException, InterruptedException { // Mapreduce reuses the same Text objects and hence sometimes they have some // additional left over garbage at the end from previous keys. So we need to // retrieve the String objects which do not have the additional garbage. getBytes() // call does not work well. byte[] row = new byte[key.getLength()]; byte[] val = new byte[value.getLength()]; for (int i = 0; i < row.length; i++) { row[i] = key.getBytes()[i]; }//from ww w .j ava 2 s . c om for (int i = 0; i < val.length; i++) { val[i] = value.getBytes()[i]; } writer.append(new KeyValue(row, Bytes.toBytes("cf"), Bytes.toBytes(""), val)); }
From source file:com.rapleaf.hank.hadoop.KeyAndPartitionWritable.java
License:Apache License
public KeyAndPartitionWritable(Domain domain, BytesWritable key) { this.key = key; int partition = domain.getPartitioner().partition(ByteBuffer.wrap(key.getBytes(), 0, key.getLength()), domain.getNumParts());/* ww w . ja va2s .co m*/ this.partition = new IntWritable(partition); }
From source file:com.rramos.bigdata.utils.GenericUDFSha2.java
License:Apache License
@Override public Object evaluate(DeferredObject[] arguments) throws HiveException { if (digest == null) { return null; }/*from w ww . j a va2 s. c o m*/ digest.reset(); if (isStr) { Text n = GenericUDFParamUtils.getTextValue(arguments, 0, converters); if (n == null) { return null; } digest.update(n.getBytes(), 0, n.getLength()); } else { BytesWritable bWr = GenericUDFParamUtils.getBinaryValue(arguments, 0, converters); if (bWr == null) { return null; } digest.update(bWr.getBytes(), 0, bWr.getLength()); } byte[] resBin = digest.digest(); String resStr = Hex.encodeHexString(resBin); output.set(resStr); return output; }
From source file:com.scaleunlimited.classify.datum.ModelDatum.java
License:Apache License
public BaseModel getModel() throws Exception { String className = _tupleEntry.getString(MODEL_FN); BytesWritable modelData = (BytesWritable) (_tupleEntry.getObject(MODEL_DATA_FN)); DataInputBuffer dib = new DataInputBuffer(); dib.reset(modelData.getBytes(), modelData.getLength()); BaseModel model = (BaseModel) Class.forName(className).newInstance(); model.readFields(dib);/*from ww w . j a v a2s. c o m*/ return model; }
From source file:com.shmsoft.dmass.main.Reduce.java
License:Apache License
protected void processMap(MapWritable value) throws IOException { Metadata allMetadata = getAllMetadata(value); Metadata standardMetadata = getStandardMetadata(allMetadata, outputFileCount); columnMetadata.addMetadata(standardMetadata); columnMetadata.addMetadata(allMetadata); if (!isMaster) { columnMetadata.addMetadataValue(DocumentMetadataKeys.MASTER_DUPLICATE, UPIFormat.format(outputFileCount)); }//ww w. j a v a 2 s . c om String originalFileName = new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); // add the text to the text folder String documentText = allMetadata.get(DocumentMetadataKeys.DOCUMENT_TEXT); String textEntryName = ParameterProcessing.TEXT + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName + ".txt"; if (textEntryName != null) { zipFileWriter.addTextFile(textEntryName, documentText); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_TEXT, textEntryName); // add the native file to the native folder String nativeEntryName = ParameterProcessing.NATIVE + "/" + UPIFormat.format(outputFileCount) + "_" + originalFileName; BytesWritable bytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE)); if (bytesWritable != null) { // some large exception files are not passed zipFileWriter.addBinaryFile(nativeEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); History.appendToHistory(nativeEntryName); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_NATIVE, nativeEntryName); // add the pdf made from native to the PDF folder String pdfNativeEntryName = ParameterProcessing.PDF_FOLDER + "/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName() + ".pdf"; BytesWritable pdfBytesWritable = (BytesWritable) value.get(new Text(ParameterProcessing.NATIVE_AS_PDF)); if (pdfBytesWritable != null) { zipFileWriter.addBinaryFile(pdfNativeEntryName, pdfBytesWritable.getBytes(), pdfBytesWritable.getLength()); History.appendToHistory(pdfNativeEntryName); } // add exception to the exception folder String exception = allMetadata.get(DocumentMetadataKeys.PROCESSING_EXCEPTION); if (exception != null) { String exceptionEntryName = "exception/" + UPIFormat.format(outputFileCount) + "_" + new File(allMetadata.get(DocumentMetadataKeys.DOCUMENT_ORIGINAL_PATH)).getName(); if (bytesWritable != null) { zipFileWriter.addBinaryFile(exceptionEntryName, bytesWritable.getBytes(), bytesWritable.getLength()); } columnMetadata.addMetadataValue(DocumentMetadataKeys.LINK_EXCEPTION, exceptionEntryName); } }