List of usage examples for org.apache.hadoop.fs FileChecksum getBytes
public abstract byte[] getBytes();
From source file:com.thinkbiganalytics.nifi.v2.hdfs.ComputeHDFSChecksums.java
License:Apache License
@Override public void onTrigger(@Nonnull final ProcessContext context, @Nonnull final ProcessSession session) throws ProcessException { FlowFile flowFile = session.get();//w w w .ja v a2 s.co m if (flowFile == null) { return; } final FileSystem fs = getFileSystem(context); if (fs == null) { getLog().error("Couldn't initialize HDFS"); session.transfer(flowFile, REL_FAILURE); return; } String filesJSON = context.getProperty(FILES).evaluateAttributeExpressions(flowFile).getValue(); String absolutePath = context.getProperty(DIRECTORY).evaluateAttributeExpressions(flowFile).getValue(); Boolean failIfWrongChecksum = context.getProperty(FAIL_IF_INCORRECT_CHECKSUM) .evaluateAttributeExpressions(flowFile).asBoolean(); Gson jsonParser = new Gson(); File[] filesList; try { filesList = jsonParser.fromJson(filesJSON, File[].class); if (filesList == null) { filesList = new File[0]; } for (File f : filesList) { String name = f.getName(); Path filePath; if (absolutePath == null || absolutePath.isEmpty()) { filePath = new Path(name); } else { filePath = new Path(absolutePath, name); } FileChecksum computed_checksum = fs.getFileChecksum(filePath); String b64_checksum = Base64.getEncoder().encodeToString(computed_checksum.getBytes()); f.setComputedChecksum( new Checksum(b64_checksum.length(), b64_checksum, computed_checksum.getAlgorithmName())); if (failIfWrongChecksum && !Objects.equals(b64_checksum, f.getChecksum().getValue())) { getLog().error("Checksums don't match! File: " + filePath.toString() + " checksum provided: " + f.getChecksum().getValue() + " checksum computed: " + b64_checksum); session.transfer(flowFile, REL_FAILURE); return; } } } catch (JsonSyntaxException e) { getLog().error("Files list attribute does not contain a proper JSON array"); session.transfer(flowFile, REL_FAILURE); return; } catch (FileNotFoundException e) { getLog().error("One of the provided files not found.\n" + e.getMessage()); session.transfer(flowFile, REL_FAILURE); return; } catch (IOException e) { throw new ProcessException(e); } flowFile = session.putAttribute(flowFile, FILES.getName(), jsonParser.toJson(filesList)); session.transfer(flowFile, REL_SUCCESS); }
From source file:com.twitter.elephanttwin.indexing.AbstractBlockIndexingJob.java
License:Open Source License
/** * Create a FileIndexDescriptor to describe what columns have been indexed * @param path/*from w w w .j a v a 2 s . c o m*/ * the path to the directory where index files are stored for the * input file * @return FileIndexDescriptor * @throws IOException */ protected void createIndexDescriptors(FileStatus inputFile, FileSystem fs) throws IOException { Path indexFilePath = new Path(getIndex() + inputFile.getPath().toUri().getRawPath()); FileIndexDescriptor fid = new FileIndexDescriptor(); fid.setSourcePath(inputFile.getPath().toString()); fid.setDocType(getExpectedDocType()); LOG.info("getting checksum from:" + inputFile.getPath()); FileChecksum cksum = fs.getFileChecksum(inputFile.getPath()); com.twitter.elephanttwin.gen.FileChecksum fidCksum = null; if (cksum != null) fidCksum = new com.twitter.elephanttwin.gen.FileChecksum(cksum.getAlgorithmName(), ByteBuffer.wrap(cksum.getBytes()), cksum.getLength()); fid.setChecksum(fidCksum); fid.setIndexedFields(getIndexedFields()); fid.setIndexType(getIndexType()); fid.setIndexVersion(getIndexVersion()); Path idxPath = new Path(indexFilePath + "/" + BlockIndexedFileInputFormat.INDEXMETAFILENAME); FSDataOutputStream os = fs.create(idxPath, true); @SuppressWarnings("unchecked") ThriftWritable<FileIndexDescriptor> writable = (ThriftWritable<FileIndexDescriptor>) ThriftWritable .newInstance(fid.getClass()); writable.set(fid); writable.write(os); os.close(); }
From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingJob.java
License:Apache License
/** * Populates FileIndexDescriptor with common things like name, checksum, etc. * @param path// w w w .j a v a 2 s .c om * @return * @throws IOException */ protected FileIndexDescriptor buildFileIndexDescriptor(Path path) throws IOException { FileIndexDescriptor fid = new FileIndexDescriptor(); fid.setSourcePath(path.toString()); fid.setDocType(getExpectedDocType()); FileChecksum cksum = path.getFileSystem(getConf()).getFileChecksum(path); com.twitter.elephanttwin.gen.FileChecksum fidCksum = new com.twitter.elephanttwin.gen.FileChecksum( cksum.getAlgorithmName(), ByteBuffer.wrap(cksum.getBytes()), cksum.getLength()); fid.setChecksum(fidCksum); fid.setIndexedFields(getIndexedFields()); fid.setIndexType(getIndexType()); fid.setIndexVersion(getIndexVersion()); return fid; }
From source file:fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.ReadsMapperHadoopModule.java
License:LGPL
/** * Compute the checksum of a ZIP file or use the HDFS checksum if available. * @param file the zip input file/*from w w w.j av a 2 s.c o m*/ * @param conf The Hadoop configuration * @return the checksum as a string * @throws IOException if an error occurs while creating the checksum */ static String computeZipCheckSum(final DataFile file, final Configuration conf) throws IOException { final Path path = new Path(file.getSource()); FileSystem fs = FileSystem.get(path.toUri(), conf); final FileChecksum checksum = fs.getFileChecksum(path); // If exists use checksum provided by the file system if (checksum != null) { return new BigInteger(1, checksum.getBytes()).toString(16); } // Fallback solution return computeZipCheckSum(file.open()); }