List of usage examples for com.google.common.collect Iterators getNext
@Nullable public static <T> T getNext(Iterator<? extends T> iterator, @Nullable T defaultValue)
From source file:org.commoncrawl.service.crawler.CrawlLog.java
static void validateLogFlusherCode(final File localDirPath, final Path remotePath, boolean injectErrors) throws IOException { final Configuration conf = new Configuration(); final FileSystem fs = FileSystem.get(conf); fs.mkdirs(remotePath);/*from w w w . ja v a2 s . c o m*/ // ok create a crawlLog test file File localFile = File.createTempFile("crawlLog", "test", localDirPath); localFile.delete(); LOG.info("Initializing Temp File:" + localFile); // initialize LogFileHeader fileHeader = initializeLogFileHeaderFromLogFile(localFile); LOG.info("Creating SyncedCrawl URL Writer"); // create synced url writer ... SyncedCrawlURLLogWriter crawlURLWriter = new SyncedCrawlURLLogWriter(injectErrors); ArrayList<CrawlURL> urlObjects = new ArrayList<CrawlURL>(); // write a couple of url objects for (int i = 0; i < 100; ++i) { CrawlURL url = new CrawlURL(); url.setUrl("http://someurl.com/" + i); byte bytes[] = MD5.digest("Some Random:" + Math.random() + " Number").getBytes(); url.setContentRaw(new FlexBuffer(bytes)); final DataOutputStream crawlLogStream = new DataOutputStream(new FileOutputStream(localFile, true)); try { LOG.info("Appending object to log"); crawlURLWriter.writeItem(crawlLogStream, url); } finally { LOG.info("Flushing Log"); crawlLogStream.flush(); crawlLogStream.close(); } LOG.info("Updating Header"); updateLogFileHeader(localFile, fileHeader, 1); if (!injectErrors || i % 2 == 0) { urlObjects.add(url); } else { // drop odd entry LOG.info("Dropping Odd Entry:" + url.getUrl()); } } final ArrayList<CrawlURL> urlObjectsOut = new ArrayList<CrawlURL>(); HDFSCrawlURLWriter stubWriter = new HDFSCrawlURLWriter() { SequenceFileCrawlURLWriter innerWriter = new SequenceFileCrawlURLWriter(conf, fs, remotePath, "testNode", 1L); @Override public void writeCrawlURLItem(Text url, CrawlURL urlObject) throws IOException { LOG.info("Got URL:" + url.toString()); urlObjectsOut.add(urlObject); innerWriter.writeCrawlURLItem(url, urlObject); } @Override public void close() throws IOException { innerWriter.close(); } public List<Path> getFilenames() { return innerWriter.getFilenames(); } }; try { LOG.info("Transferring from Local to Remote"); transferLocalCheckpointLog(localFile, stubWriter, 1L); } finally { stubWriter.close(); } LOG.info("Validating Input/Output"); validateInputOutputCrawlURLArrays(urlObjects, urlObjectsOut); // read via sequenceFile urlObjectsOut.clear(); Path firstFile = Iterators.getNext(stubWriter.getFilenames().iterator(), null); SequenceFile.Reader reader = new SequenceFile.Reader(fs, firstFile, conf); Text key = new Text(); CrawlURL value = new CrawlURL(); while (reader.next(key, value)) { LOG.info("Got:" + key.toString()); urlObjectsOut.add(value); value = new CrawlURL(); } reader.close(); LOG.info("Validating Input/Output"); validateInputOutputCrawlURLArrays(urlObjects, urlObjectsOut); LOG.info("Done!"); }