Example usage for com.google.common.collect Iterators getNext

List of usage examples for com.google.common.collect Iterators getNext

Introduction

In this page you can find the example usage for com.google.common.collect Iterators getNext.

Prototype

@Nullable
public static <T> T getNext(Iterator<? extends T> iterator, @Nullable T defaultValue) 

Source Link

Document

Returns the next element in iterator or defaultValue if the iterator is empty.

Usage

From source file:org.commoncrawl.service.crawler.CrawlLog.java

static void validateLogFlusherCode(final File localDirPath, final Path remotePath, boolean injectErrors)
        throws IOException {

    final Configuration conf = new Configuration();

    final FileSystem fs = FileSystem.get(conf);

    fs.mkdirs(remotePath);/*from   w w  w . ja v a2 s .  c  o  m*/

    // ok create a crawlLog test file
    File localFile = File.createTempFile("crawlLog", "test", localDirPath);
    localFile.delete();

    LOG.info("Initializing Temp File:" + localFile);
    // initialize
    LogFileHeader fileHeader = initializeLogFileHeaderFromLogFile(localFile);

    LOG.info("Creating SyncedCrawl URL Writer");
    // create synced url writer ...
    SyncedCrawlURLLogWriter crawlURLWriter = new SyncedCrawlURLLogWriter(injectErrors);

    ArrayList<CrawlURL> urlObjects = new ArrayList<CrawlURL>();
    // write a couple of url objects
    for (int i = 0; i < 100; ++i) {
        CrawlURL url = new CrawlURL();
        url.setUrl("http://someurl.com/" + i);
        byte bytes[] = MD5.digest("Some Random:" + Math.random() + " Number").getBytes();
        url.setContentRaw(new FlexBuffer(bytes));
        final DataOutputStream crawlLogStream = new DataOutputStream(new FileOutputStream(localFile, true));
        try {
            LOG.info("Appending object to log");
            crawlURLWriter.writeItem(crawlLogStream, url);
        } finally {
            LOG.info("Flushing Log");
            crawlLogStream.flush();
            crawlLogStream.close();
        }
        LOG.info("Updating Header");
        updateLogFileHeader(localFile, fileHeader, 1);

        if (!injectErrors || i % 2 == 0) {
            urlObjects.add(url);
        } else {
            // drop odd entry
            LOG.info("Dropping Odd Entry:" + url.getUrl());
        }
    }

    final ArrayList<CrawlURL> urlObjectsOut = new ArrayList<CrawlURL>();

    HDFSCrawlURLWriter stubWriter = new HDFSCrawlURLWriter() {

        SequenceFileCrawlURLWriter innerWriter = new SequenceFileCrawlURLWriter(conf, fs, remotePath,
                "testNode", 1L);

        @Override
        public void writeCrawlURLItem(Text url, CrawlURL urlObject) throws IOException {
            LOG.info("Got URL:" + url.toString());
            urlObjectsOut.add(urlObject);
            innerWriter.writeCrawlURLItem(url, urlObject);
        }

        @Override
        public void close() throws IOException {
            innerWriter.close();
        }

        public List<Path> getFilenames() {
            return innerWriter.getFilenames();
        }
    };

    try {
        LOG.info("Transferring from Local to Remote");
        transferLocalCheckpointLog(localFile, stubWriter, 1L);
    } finally {
        stubWriter.close();
    }
    LOG.info("Validating Input/Output");
    validateInputOutputCrawlURLArrays(urlObjects, urlObjectsOut);
    // read via sequenceFile
    urlObjectsOut.clear();
    Path firstFile = Iterators.getNext(stubWriter.getFilenames().iterator(), null);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, firstFile, conf);
    Text key = new Text();
    CrawlURL value = new CrawlURL();
    while (reader.next(key, value)) {
        LOG.info("Got:" + key.toString());
        urlObjectsOut.add(value);
        value = new CrawlURL();
    }
    reader.close();
    LOG.info("Validating Input/Output");
    validateInputOutputCrawlURLArrays(urlObjects, urlObjectsOut);

    LOG.info("Done!");
}