List of usage examples for org.apache.hadoop.mapred OutputLogFilter OutputLogFilter
OutputLogFilter
From source file:crunch.MaxTemperature.java
License:Apache License
public void test() throws Exception { Configuration conf = createJobConf(); Path localInput = new Path("input/ncdc/micro"); Path input = getInputDir(); Path output = getOutputDir(); // Copy input data into test HDFS getFileSystem().copyFromLocalFile(localInput, input); MaxTemperatureDriver driver = new MaxTemperatureDriver(); driver.setConf(conf);//from www . j a v a2 s . c om int exitCode = driver.run(new String[] { input.toString(), output.toString() }); assertThat(exitCode, is(0)); // Check the output is as expected Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter())); assertThat(outputFiles.length, is(1)); InputStream in = getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertThat(reader.readLine(), is("1949\t111")); assertThat(reader.readLine(), is("1950\t22")); assertThat(reader.readLine(), nullValue()); reader.close(); }
From source file:crunch.MaxTemperature.java
License:Apache License
private void checkOutput(Configuration conf, Path output) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(output, new OutputLogFilter())); assertThat(outputFiles.length, is(1)); BufferedReader actual = asBufferedReader(fs.open(outputFiles[0])); BufferedReader expected = asBufferedReader(getClass().getResourceAsStream("/expected.txt")); String expectedLine;// w ww. j a v a 2s . c om while ((expectedLine = expected.readLine()) != null) { assertThat(actual.readLine(), is(expectedLine)); } assertThat(actual.readLine(), nullValue()); actual.close(); expected.close(); }
From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java
License:Apache License
@Test public void testCount() throws Exception { createTextInputFile();/*from w w w .ja va 2s. c o m*/ JobClient.runJob(createJobConf()); Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter())); Assert.assertEquals(1, outputFiles.length); InputStream is = getFileSystem().open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); Assert.assertEquals("a\t2", reader.readLine()); Assert.assertEquals("b\t1", reader.readLine()); Assert.assertNull(reader.readLine()); reader.close(); }
From source file:org.apache.mahout.clustering.cdbw.CDbwMapper.java
License:Apache License
public static Map<Integer, List<VectorWritable>> getRepresentativePoints(Configuration conf) { String statePath = conf.get(CDbwDriver.STATE_IN_KEY); Map<Integer, List<VectorWritable>> representativePoints = new HashMap<Integer, List<VectorWritable>>(); try {/* ww w . j ava 2 s. co m*/ Path path = new Path(statePath); FileSystem fs = FileSystem.get(path.toUri(), conf); FileStatus[] status = fs.listStatus(path, new OutputLogFilter()); for (FileStatus s : status) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf); try { IntWritable key = new IntWritable(0); VectorWritable point = new VectorWritable(); while (reader.next(key, point)) { List<VectorWritable> repPoints = representativePoints.get(key.get()); if (repPoints == null) { repPoints = new ArrayList<VectorWritable>(); representativePoints.put(key.get(), repPoints); } repPoints.add(point); point = new VectorWritable(); } } finally { reader.close(); } } return representativePoints; } catch (IOException e) { throw new IllegalStateException(e); } }
From source file:org.unigram.likelike.lsh.TestLSHRecommendations.java
License:Apache License
private boolean dfsCheck(Configuration conf, Path outputPath) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outputPath, new OutputLogFilter())); //if (outputFiles != null) { // TestCase.assertEquals(outputFiles.length, 1); //} else {/* w w w . j av a2s. c o m*/ // TestCase.fail(); //} BufferedReader reader = this.asBufferedReader(fs.open(outputFiles[0])); String line; MultiHashMap resultMap = new MultiHashMap(); while ((line = reader.readLine()) != null) { String[] lineArray = line.split("\t"); resultMap.put(Long.parseLong(lineArray[0]), // target Long.parseLong(lineArray[1])); // recommended } this.check(resultMap); return true; }
From source file:uk.bl.wa.hadoop.datasets.WARCDatasetGeneratorIntegrationTest.java
License:Open Source License
@SuppressWarnings("deprecation") @Test//from www.ja v a2 s . c o m public void testGenerator() throws Exception { // prepare for test // createTextInputFile(); log.info("Checking input file is present..."); // Check that the input file is present: Path[] inputFiles = FileUtil.stat2Paths( getFileSystem().listStatus(new Path(input, "gov.uk-revisit-warcs/"), new OutputLogFilter())); Assert.assertEquals(2, inputFiles.length); // Create a file of the inputs File tmpInputsFile = writeInputFile(inputFiles); // Set up arguments for the job: String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() }; // Set up the WARCIndexerRunner WARCDatasetGenerator wir = new WARCDatasetGenerator(); // run job // Job configuration: log.info("Setting up job config..."); JobConf jobConf = this.mrCluster.createJobConf(); jobConf.set("mapred.child.java.opts", "-Xmx512m"); wir.createJobConf(jobConf, args); log.info("Running job..."); JobClient.runJob(jobConf); log.info("Job finished, checking the results..."); // check the output exists Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter())); // Copy the output out of HDFS and onto local FS: for (Path output : outputFiles) { FileOutputStream fout = new FileOutputStream("target/datasets-" + output.getName()); log.info(" --- output : " + output); if (getFileSystem().isFile(output)) { InputStream is = getFileSystem().open(output); IOUtils.copy(is, fout); } else { log.info(" --- ...skipping directory..."); } fout.flush(); fout.close(); } // Did we generate the expected multiple output files?: Assert.assertEquals(4, outputFiles.length); }
From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqSampleGeneratorIntegrationTest.java
License:Open Source License
@Test public void testSeqStats() throws Exception { log.info("Checking input file is present..."); // Check that the input file is present: Path[] inputFiles = FileUtil.stat2Paths( dfsCluster.getFileSystem().listStatus(new Path(input, "mdx-seq/"), new OutputLogFilter())); Assert.assertEquals(1, inputFiles.length); // Create a file of the inputs File tmpInputsFile = WARCMDXGeneratorIntegrationTest.writeInputFile(inputFiles); // Set up arguments for the job: String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() }; // Set up the WARCIndexerRunner MDXSeqSampleGenerator wir = new MDXSeqSampleGenerator(); // run job//from w w w .j av a2 s . com // Job configuration: log.info("Setting up job config..."); JobConf jobConf = this.mrCluster.createJobConf(); wir.createJobConf(jobConf, args); log.info("Running job..."); JobClient.runJob(jobConf); log.info("Job finished, checking the results..."); // check the output exists Path[] outputFiles = FileUtil .stat2Paths(dfsCluster.getFileSystem().listStatus(output, new OutputLogFilter())); // Assert.assertEquals(1, outputFiles.length); // Copy the output out: for (Path output : outputFiles) { FileOutputStream fout = new FileOutputStream("target/" + output.getName()); log.info(" --- output : " + output); if (dfsCluster.getFileSystem().isFile(output)) { InputStream is = dfsCluster.getFileSystem().open(output); IOUtils.copy(is, fout); } else { log.info(" --- ...skipping directory..."); } fout.close(); } // Check contents of the output: // TBA }
From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqStatsGeneratorIntegrationTest.java
License:Open Source License
@Test public void testSeqStats() throws Exception { log.info("Checking input file is present..."); // Check that the input file is present: Path[] inputFiles = FileUtil.stat2Paths( dfsCluster.getFileSystem().listStatus(new Path(input, "mdx-seq/"), new OutputLogFilter())); Assert.assertEquals(1, inputFiles.length); // Create a file of the inputs File tmpInputsFile = WARCMDXGeneratorIntegrationTest.writeInputFile(inputFiles); // Set up arguments for the job: String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() }; // Set up the WARCIndexerRunner MDXSeqStatsGenerator wir = new MDXSeqStatsGenerator(); // run job/*from ww w . jav a 2 s . com*/ // Job configuration: log.info("Setting up job config..."); JobConf jobConf = this.mrCluster.createJobConf(); wir.createJobConf(jobConf, args); log.info("Running job..."); JobClient.runJob(jobConf); log.info("Job finished, checking the results..."); // check the output exists Path[] outputFiles = FileUtil .stat2Paths(dfsCluster.getFileSystem().listStatus(output, new OutputLogFilter())); // Assert.assertEquals(1, outputFiles.length); // Copy the output out: for (Path output : outputFiles) { FileOutputStream fout = new FileOutputStream("target/" + output.getName()); log.info(" --- output : " + output); if (dfsCluster.getFileSystem().isFile(output)) { InputStream is = dfsCluster.getFileSystem().open(output); IOUtils.copy(is, fout); } else { log.info(" --- ...skipping directory..."); } fout.close(); } // Check contents of the output: // TBA }
From source file:uk.bl.wa.hadoop.indexer.mdx.WARCMDXGeneratorIntegrationTest.java
License:Open Source License
@SuppressWarnings("deprecation") @Test//from w w w . j a v a 2s. co m public void testMDXGenerator() throws Exception { // prepare for test // createTextInputFile(); log.info("Checking input file is present..."); // Check that the input file is present: Path[] inputFiles = FileUtil.stat2Paths( getFileSystem().listStatus(new Path(input, "gov.uk-revisit-warcs/"), new OutputLogFilter())); Assert.assertEquals(2, inputFiles.length); // Create a file of the inputs File tmpInputsFile = writeInputFile(inputFiles); // Set up arguments for the job: String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() }; // Set up the WARCIndexerRunner WARCMDXGenerator wir = new WARCMDXGenerator(); // run job // Job configuration: log.info("Setting up job config..."); JobConf jobConf = this.mrCluster.createJobConf(); jobConf.setInt(WARCMDXGenerator.WARC_HADOOP_NUM_REDUCERS, 1); jobConf.set("mapred.child.java.opts", "-Xmx512m"); wir.createJobConf(jobConf, args); log.info("Running job..."); JobClient.runJob(jobConf); log.info("Job finished, checking the results..."); // check the output exists Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter())); // Default is 1 reducers (as knitting together multiple sequence files // is not a mere matter of concatentation): Assert.assertEquals(1, outputFiles.length); // Copy the output out of HDFS and onto local FS: FileOutputStream fout = new FileOutputStream(outputSeq); for (Path output : outputFiles) { log.info(" --- output : " + output); if (getFileSystem().isFile(output)) { InputStream is = getFileSystem().open(output); IOUtils.copy(is, fout); } else { log.info(" --- ...skipping directory..."); } fout.flush(); } fout.close(); // Check contents of the output: Configuration config = new Configuration(); Path path = new Path(outputSeq.getAbsolutePath()); SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(config), path, config); WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance(); Writable value = (Writable) reader.getValueClass().newInstance(); MDX mdx; int counter = 0; while (reader.next(key, value)) { mdx = new MDX(value.toString()); System.out.println( "Key is: " + key + " record_type: " + mdx.getRecordType() + " SURT: " + mdx.getUrlAsSURT()); counter++; } assertEquals(114, counter); reader.close(); // Now test the MDXSeqMerger testSeqMerger(outputFiles); }
From source file:uk.bl.wa.hadoop.indexer.mdx.WARCMDXGeneratorIntegrationTest.java
License:Open Source License
private void testSeqMerger(Path[] inputFiles) throws Exception { // Create a file of the inputs File tmpInputsFile = writeInputFile(inputFiles); // Set up arguments for the job: String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.outputMerged.getName(), "-r", "1" }; // Set up the WARCIndexerRunner MDXSeqMerger msm = new MDXSeqMerger(); // run job//from w w w .j a v a2 s. co m log.info("Setting up job config..."); JobConf jobConf = this.mrCluster.createJobConf(); msm.createJobConf(jobConf, args); log.info("Running job..."); JobClient.runJob(jobConf); log.info("Job finished, checking the results..."); // Copy the output out of HDFS and onto local FS: FileOutputStream fout = new FileOutputStream(outputMergedSeq); Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outputMerged, new OutputLogFilter())); for (Path output : outputFiles) { log.info(" --- output : " + output); if (getFileSystem().isFile(output)) { InputStream is = getFileSystem().open(output); IOUtils.copy(is, fout); } else { log.info(" --- ...skipping directory..."); } fout.flush(); } fout.close(); }