List of usage examples for org.apache.hadoop.io MD5Hash digest
null digest
To view the source code for org.apache.hadoop.io MD5Hash digest.
Click Source Link
From source file:co.nubetech.hiho.dedup.HashUtility.java
License:Apache License
public static MD5Hash getMD5Hash(BooleanWritable key) throws IOException { return MD5Hash.digest(key.toString()); }
From source file:co.nubetech.hiho.dedup.HashUtility.java
License:Apache License
public static MD5Hash getMD5Hash(FloatWritable key) throws IOException { return MD5Hash.digest(key.toString()); }
From source file:co.nubetech.hiho.dedup.HashUtility.java
License:Apache License
public static MD5Hash getMD5Hash(ByteWritable key) throws IOException { return MD5Hash.digest(key.toString()); }
From source file:co.nubetech.hiho.dedup.HashUtility.java
License:Apache License
public static MD5Hash getMD5Hash(DoubleWritable key) throws IOException { return MD5Hash.digest(key.toString()); }
From source file:com.iflytek.spider.crawl.MD5Signature.java
License:Apache License
public byte[] calculate(Content content, Parse parse) { byte[] data = content.getContent(); // if (data == null) data = content.getUrl().getBytes(); // StringBuilder buf = new StringBuilder().append(data).append(parse.getText()); return MD5Hash.digest(data).getDigest(); }
From source file:com.lightboxtechnologies.spectrum.BlockHashMapper.java
License:Apache License
@Override protected void map(NullWritable k, FileSplit split, Context context) throws IOException, InterruptedException { final long startOffset = split.getStart(), endOffset = startOffset + split.getLength(); LOG.info("startOffset = " + startOffset + "; endOffset = " + endOffset); context.setStatus("Offset " + startOffset); final Configuration conf = context.getConfiguration(); final FileSystem fs = FileSystem.get(conf); openImgFile(split.getPath(), fs);//from w ww .j a va 2 s. co m long numBlocks = 0; long curOffset = startOffset; while (curOffset < endOffset) { ImgFile.readFully(curOffset, Buffer); for (int i = 0; i < Buffer.length; i += BLOCK_SIZE) { BlockOffset.set(curOffset + i); context.write(BlockOffset, MD5Hash.digest(Buffer, i, BLOCK_SIZE)); ++numBlocks; } curOffset += Buffer.length; } LOG.info("This split had " + numBlocks + " blocks in it"); }
From source file:com.mellanox.r4h.DFSClient.java
License:Apache License
/** * Get the checksum of the whole file of a range of the file. Note that the * range always starts from the beginning of the file. * /*from w w w. ja va 2 s. c om*/ * @param src * The file path * @param length * the length of the range, i.e., the range is [0, length] * @return The checksum * @see DistributedFileSystem#getFileChecksum(Path) */ public MD5MD5CRC32FileChecksum getFileChecksum(String src, long length) throws IOException { checkOpen(); Preconditions.checkArgument(length >= 0); // get block locations for the file range LocatedBlocks blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } List<LocatedBlock> locatedblocks = blockLocations.getLocatedBlocks(); final DataOutputBuffer md5out = new DataOutputBuffer(); int bytesPerCRC = -1; DataChecksum.Type crcType = DataChecksum.Type.DEFAULT; long crcPerBlock = 0; boolean refetchBlocks = false; int lastRetriedIndex = -1; // get block checksum for each block long remaining = length; if (src.contains(HdfsConstants.SEPARATOR_DOT_SNAPSHOT_DIR_SEPARATOR)) { remaining = Math.min(length, blockLocations.getFileLength()); } for (int i = 0; i < locatedblocks.size() && remaining > 0; i++) { if (refetchBlocks) { // refetch to get fresh tokens blockLocations = callGetBlockLocations(namenode, src, 0, length); if (null == blockLocations) { throw new FileNotFoundException("File does not exist: " + src); } locatedblocks = blockLocations.getLocatedBlocks(); refetchBlocks = false; } LocatedBlock lb = locatedblocks.get(i); final ExtendedBlock block = lb.getBlock(); if (remaining < block.getNumBytes()) { block.setNumBytes(remaining); } remaining -= block.getNumBytes(); final DatanodeInfo[] datanodes = lb.getLocations(); // try each datanode location of the block final int timeout = 3000 * datanodes.length + dfsClientConf.socketTimeout(); boolean done = false; for (int j = 0; !done && j < datanodes.length; j++) { DataOutputStream out = null; DataInputStream in = null; try { // connect to a datanode IOStreamPair pair = connectToDN(datanodes[j], timeout, lb); out = new DataOutputStream(new BufferedOutputStream(pair.out, HdfsConstants.SMALL_BUFFER_SIZE)); in = new DataInputStream(pair.in); if (LOG.isDebugEnabled()) { LOG.debug("write to " + datanodes[j] + ": " + Op.BLOCK_CHECKSUM + ", block=" + block); } // get block MD5 new Sender(out).blockChecksum(block, lb.getBlockToken()); final BlockOpResponseProto reply = BlockOpResponseProto.parseFrom(PBHelper.vintPrefixed(in)); String logInfo = "for block " + block + " from datanode " + datanodes[j]; DataTransferProtoUtil.checkBlockOpStatus(reply, logInfo); OpBlockChecksumResponseProto checksumData = reply.getChecksumResponse(); // read byte-per-checksum final int bpc = checksumData.getBytesPerCrc(); if (i == 0) { // first block bytesPerCRC = bpc; } else if (bpc != bytesPerCRC) { throw new IOException( "Byte-per-checksum not matched: bpc=" + bpc + " but bytesPerCRC=" + bytesPerCRC); } // read crc-per-block final long cpb = checksumData.getCrcPerBlock(); if (locatedblocks.size() > 1 && i == 0) { crcPerBlock = cpb; } // read md5 final MD5Hash md5 = new MD5Hash(checksumData.getMd5().toByteArray()); md5.write(md5out); // read crc-type final DataChecksum.Type ct; if (checksumData.hasCrcType()) { ct = PBHelper.convert(checksumData.getCrcType()); } else { LOG.debug("Retrieving checksum from an earlier-version DataNode: " + "inferring checksum by reading first byte"); ct = inferChecksumTypeByReading(lb, datanodes[j]); } if (i == 0) { // first block crcType = ct; } else if (crcType != DataChecksum.Type.MIXED && crcType != ct) { // if crc types are mixed in a file crcType = DataChecksum.Type.MIXED; } done = true; if (LOG.isDebugEnabled()) { if (i == 0) { LOG.debug("set bytesPerCRC=" + bytesPerCRC + ", crcPerBlock=" + crcPerBlock); } LOG.debug("got reply from " + datanodes[j] + ": md5=" + md5); } } catch (InvalidBlockTokenException ibte) { if (i > lastRetriedIndex) { if (LOG.isDebugEnabled()) { LOG.debug("Got access token error in response to OP_BLOCK_CHECKSUM " + "for file " + src + " for block " + block + " from datanode " + datanodes[j] + ". Will retry the block once."); } lastRetriedIndex = i; done = true; // actually it's not done; but we'll retry i--; // repeat at i-th block refetchBlocks = true; break; } } catch (IOException ie) { LOG.warn("src=" + src + ", datanodes[" + j + "]=" + datanodes[j], ie); } finally { IOUtils.closeStream(in); IOUtils.closeStream(out); } } if (!done) { throw new IOException("Fail to get block MD5 for " + block); } } // compute file MD5 final MD5Hash fileMD5 = MD5Hash.digest(md5out.getData()); switch (crcType) { case CRC32: return new MD5MD5CRC32GzipFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); case CRC32C: return new MD5MD5CRC32CastagnoliFileChecksum(bytesPerCRC, crcPerBlock, fileMD5); default: // If there is no block allocated for the file, // return one with the magic entry that matches what previous // hdfs versions return. if (locatedblocks.size() == 0) { return new MD5MD5CRC32GzipFileChecksum(0, 0, fileMD5); } // we should never get here since the validity was checked // when getCrcType() was called above. return null; } }
From source file:com.shmsoft.dmass.main.FileProcessor.java
License:Apache License
public static MD5Hash createKeyHash(String fileName, Metadata metadata, String originalFileName) throws IOException { String extension = Util.getExtension(originalFileName); if ("eml".equalsIgnoreCase(extension)) { String hashNames = EmailProperties.getInstance().getProperty(EmailProperties.EMAIL_HASH_NAMES); String[] hashNamesArr = hashNames.split(","); StringBuffer data = new StringBuffer(); for (String hashName : hashNamesArr) { String value = metadata.get(hashName); if (value != null) { data.append(value);//www.j a v a 2 s .c om data.append(" "); } } return MD5Hash.digest(data.toString()); } else { //use MD5 of the input file as Hadoop key FileInputStream fileInputStream = new FileInputStream(fileName); MD5Hash key = MD5Hash.digest(fileInputStream); fileInputStream.close(); return key; } }
From source file:com.shmsoft.dmass.main.ZipFileProcessor.java
License:Apache License
/** * Emit the map with all metadata, native, and text * * @param fileName//from www. j a va2 s. c o m * @param metadata * @throws IOException * @throws InterruptedException */ @SuppressWarnings("unchecked") private void emitAsMap(String fileName, Metadata metadata) throws IOException, InterruptedException { Project project = Project.getProject(); if (project.checkSkip()) { return; } //History.appendToHistory("emitAsMap: fileName = " + fileName + " metadata = " + metadata.toString()); System.out.println("emitAsMap: fileName = " + fileName + " metadata = " + metadata.toString()); MapWritable mapWritable = createMapWritable(metadata); MD5Hash key = MD5Hash.digest(new FileInputStream(fileName)); if (PlatformUtil.isNix()) { getContext().write(key, mapWritable); getContext().progress(); } else { List<MapWritable> values = new ArrayList<MapWritable>(); values.add(mapWritable); WindowsReduce.getInstance().reduce(key, values, null); } // update stats Stats.getInstance().increaseItemCount(); }
From source file:com.siriuser.weibocrawl.MD5Signature.java
License:Apache License
public String calculate(String content) { byte[] data = content.getBytes(); return toHexString(MD5Hash.digest(data).getDigest()); }