Example usage for java.util.zip CRC32 getValue

List of usage examples for java.util.zip CRC32 getValue

Introduction

In this page you can find the example usage for java.util.zip CRC32 getValue.

Prototype

@Override
public long getValue() 

Source Link

Document

Returns CRC-32 value.

Usage

From source file:org.commoncrawl.util.MultiFileMergeUtils.java

public static void main(String[] args) {

    Path testPath = new Path(args[0]);

    LOG.info("Initializing Hadoop Config");

    Configuration conf = new Configuration();

    conf.addResource("nutch-default.xml");
    conf.addResource("nutch-site.xml");
    conf.addResource("mapred-site.xml");
    conf.addResource("hdfs-site.xml");
    conf.addResource("commoncrawl-default.xml");
    conf.addResource("commoncrawl-site.xml");

    conf.setClass(MultiFileInputReader.MULTIFILE_COMPARATOR_CLASS, URLFPV2RawComparator.class,
            RawComparator.class);
    conf.setClass(MultiFileInputReader.MULTIFILE_KEY_CLASS, URLFPV2.class, WritableComparable.class);

    CrawlEnvironment.setHadoopConfig(conf);
    CrawlEnvironment.setDefaultHadoopFSURI("hdfs://ccn01:9000/");

    try {//from   ww w  . java2s  . c  o  m
        FileSystem fs = CrawlEnvironment.getDefaultFileSystem();

        Vector<Path> paths = new Vector<Path>();

        paths.add(new Path(testPath, "part-00000"));
        // paths.add(new Path(testPath,"part-00000"));
        paths.add(new Path(testPath, "part-00001"));

        TreeSet<URLFPV2> directReadSet = new TreeSet<URLFPV2>();
        TreeSet<URLFPV2> multiFileReadSet = new TreeSet<URLFPV2>();

        MultiFileInputReader<URLFPV2> inputReader = new MultiFileInputReader<URLFPV2>(fs, paths, conf);

        KeyAndValueData<URLFPV2> keyValueData = null;
        int multiFileKeyCount = 0;
        while ((keyValueData = inputReader.readNextItem()) != null) {
            LOG.info("Got Key Domain:" + keyValueData._keyObject.getDomainHash() + " URLHash:"
                    + keyValueData._keyObject.getUrlHash() + " Item Count:" + keyValueData._values.size()
                    + " Path[0]:" + keyValueData._values.get(0).source);

            if (keyValueData._values.size() > 1) {
                LOG.error("Got more than one item");
                for (int i = 0; i < keyValueData._values.size(); ++i) {
                    CRC32 crc = new CRC32();
                    crc.update(keyValueData._keyData.getData(), 0, keyValueData._keyData.getLength());
                    LOG.error("Item at[" + i + "] Path:" + keyValueData._values.get(i).source + " CRC:"
                            + crc.getValue());
                }
            }
            if (multiFileKeyCount++ < 1000)
                multiFileReadSet.add((URLFPV2) keyValueData._keyObject.clone());
        }
        inputReader.close();

        addFirstNFPItemsToSet(fs, new Path(testPath, "part-00000"), conf, directReadSet, 1000);
        addFirstNFPItemsToSet(fs, new Path(testPath, "part-00001"), conf, directReadSet, 1000);

        Iterator<URLFPV2> directReadIterator = directReadSet.iterator();
        Iterator<URLFPV2> multiFileReadIterator = multiFileReadSet.iterator();

        for (int i = 0; i < 1000; ++i) {
            URLFPV2 directReadFP = directReadIterator.next();
            URLFPV2 multiFileReadFP = multiFileReadIterator.next();

            if (directReadFP.compareTo(multiFileReadFP) != 0) {
                LOG.info("Mismatch at Index:" + i);
            }
        }

    } catch (IOException e) {
        LOG.error(CCStringUtils.stringifyException(e));
    } catch (CloneNotSupportedException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:Main.java

public static long getStringCRC(String localData) {
    if (localData == null)
        return 0;
    CRC32 crc = new CRC32();
    crc.update(localData.getBytes());/*ww  w. ja va 2 s .c om*/
    return crc.getValue();
}

From source file:com.esri.geoportal.harvester.api.base.SimpleScrambler.java

/**
 * Encodes string./*from www  . java2 s  .co  m*/
 * @param txt string to encode
 * @return encoded string or <code>null</code> if error encoding string
 */
public static String encode(String txt) {
    txt = StringUtils.defaultIfEmpty(txt, "");
    try {
        CRC32 crC32 = new CRC32();
        crC32.update(txt.getBytes("UTF-8"));
        long crc = crC32.getValue();
        String crctxt = String.format("%10d%s", crc, txt);
        Base64.Encoder encoder = Base64.getEncoder();
        return encoder.encodeToString(crctxt.getBytes("UTF-8"));
    } catch (UnsupportedEncodingException ex) {
        return null;
    }
}

From source file:com.esri.geoportal.harvester.api.base.SimpleScrambler.java

/**
 * Decodes string./*from   w  w  w.  j a v a2  s .  c om*/
 * @param encoded encoded string to decode
 * @return decoded string or <code>null</code> if error decoding string
 */
public static String decode(String encoded) {
    try {
        encoded = StringUtils.defaultIfEmpty(encoded, "");
        Base64.Decoder decoder = Base64.getDecoder();
        String crctxt = new String(decoder.decode(encoded), "UTF-8");
        if (crctxt.length() < 10) {
            return null;
        }
        long crc = Long.parseLong(StringUtils.trimToEmpty(crctxt.substring(0, 10)));
        String txt = crctxt.substring(10);
        CRC32 crC32 = new CRC32();
        crC32.update(txt.getBytes("UTF-8"));
        if (crc != crC32.getValue()) {
            return null;
        }
        return txt;
    } catch (NumberFormatException | UnsupportedEncodingException ex) {
        return null;
    }
}

From source file:org.apache.tika.server.writer.ZipWriter.java

private static void zipStoreBuffer(ZipArchiveOutputStream zip, String name, byte[] dataBuffer)
        throws IOException {
    ZipEntry zipEntry = new ZipEntry(name != null ? name : UUID.randomUUID().toString());
    zipEntry.setMethod(ZipOutputStream.STORED);

    zipEntry.setSize(dataBuffer.length);
    CRC32 crc32 = new CRC32();
    crc32.update(dataBuffer);/*w w  w .j av  a 2  s .  c o  m*/
    zipEntry.setCrc(crc32.getValue());

    try {
        zip.putArchiveEntry(new ZipArchiveEntry(zipEntry));
    } catch (ZipException ex) {
        if (name != null) {
            zipStoreBuffer(zip, "x-" + name, dataBuffer);
            return;
        }
    }

    zip.write(dataBuffer);

    zip.closeArchiveEntry();
}

From source file:brut.directory.ZipUtils.java

private static void processFolder(final File folder, final ZipOutputStream zipOutputStream,
        final int prefixLength) throws BrutException, IOException {
    for (final File file : folder.listFiles()) {
        if (file.isFile()) {
            final String cleanedPath = BrutIO.sanitizeUnknownFile(folder,
                    file.getPath().substring(prefixLength));
            final ZipEntry zipEntry = new ZipEntry(BrutIO.normalizePath(cleanedPath));

            // aapt binary by default takes in parameters via -0 arsc to list extensions that shouldn't be
            // compressed. We will replicate that behavior
            final String extension = FilenameUtils.getExtension(file.getAbsolutePath());
            if (mDoNotCompress != null
                    && (mDoNotCompress.contains(extension) || mDoNotCompress.contains(zipEntry.getName()))) {
                zipEntry.setMethod(ZipEntry.STORED);
                zipEntry.setSize(file.length());
                BufferedInputStream unknownFile = new BufferedInputStream(new FileInputStream(file));
                CRC32 crc = BrutIO.calculateCrc(unknownFile);
                zipEntry.setCrc(crc.getValue());
                unknownFile.close();/* w w w .j a  v  a  2 s . com*/
            } else {
                zipEntry.setMethod(ZipEntry.DEFLATED);
            }

            zipOutputStream.putNextEntry(zipEntry);
            try (FileInputStream inputStream = new FileInputStream(file)) {
                IOUtils.copy(inputStream, zipOutputStream);
            }
            zipOutputStream.closeEntry();
        } else if (file.isDirectory()) {
            processFolder(file, zipOutputStream, prefixLength);
        }
    }
}

From source file:com.haulmont.cuba.core.sys.logging.LogArchiver.java

private static ArchiveEntry newTailArchive(String name, byte[] tail) {
    ZipArchiveEntry zipEntry = new ZipArchiveEntry(name);
    zipEntry.setSize(tail.length);/*from www. j a v a 2 s . co m*/
    zipEntry.setCompressedSize(zipEntry.getSize());
    CRC32 crc32 = new CRC32();
    crc32.update(tail);
    zipEntry.setCrc(crc32.getValue());
    return zipEntry;
}

From source file:com.hortonworks.registries.storage.tool.shell.ShellMigrationResolver.java

/**
 * Calculates the checksum of these bytes.
 *
 * @param bytes The bytes to calculate the checksum for.
 * @return The crc-32 checksum of the bytes.
 *//*  w  ww  .  j a v  a 2 s  .  co m*/
private static int calculateChecksum(byte[] bytes) {
    final CRC32 crc32 = new CRC32();
    crc32.update(bytes);
    return (int) crc32.getValue();
}

From source file:org.trellisldp.rosid.file.FileUtils.java

/**
 * Partition an identifier into a directory structure
 * @param identifier the identifier/*w  w  w.  jav  a 2  s  .  c o m*/
 * @return a string usable as a directory path
 */
public static String partition(final String identifier) {
    requireNonNull(identifier, "identifier must not be null!");

    final StringJoiner joiner = new StringJoiner(separator);
    final CRC32 hasher = new CRC32();
    hasher.update(identifier.getBytes(UTF_8));
    final String intermediate = Long.toHexString(hasher.getValue());

    range(0, intermediate.length() / LENGTH).limit(MAX)
            .forEach(i -> joiner.add(intermediate.substring(i * LENGTH, (i + 1) * LENGTH)));

    joiner.add(md5Hex(identifier));
    return joiner.toString();
}

From source file:org.bdval.util.ShortHash.java

/**
 * Return a short hash (String of 5 chars, A-Z) of the contents of toHash.
 * @param toHash the content to hash/*  w w w .ja v a2s . com*/
 * @return the short hash
 */
public static String shortHash(final String toHash) {
    if (StringUtils.isBlank(toHash)) {
        return null;
    }

    // Get the CRC32 checksum of the string (CRC will clash less often than the Adler checksum for short strings)
    final CRC32 crc32 = new CRC32();
    crc32.update(toHash.getBytes());
    // Map it from a long to an int with mod
    final int checksum = (int) (crc32.getValue() % Integer.MAX_VALUE);

    final StringBuilder output = new StringBuilder();
    for (int i = 0; i < MASKS.length; i++) {
        // Mask the value, shift it to the right, and mod it to the output-able characters
        final int partial = ((checksum & MASKS[i]) >> MASK_SHIFTS[i]) % HASH_CHARS.length;
        final char asChar = HASH_CHARS[partial];
        output.append(asChar);
    }
    LOG.debug(String.format("hash=%s for string=%s", output.toString(), toHash));
    return output.toString();
}