Example usage for org.apache.hadoop.mapred OutputLogFilter OutputLogFilter

List of usage examples for org.apache.hadoop.mapred OutputLogFilter OutputLogFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred OutputLogFilter OutputLogFilter.

Prototype

OutputLogFilter

Source Link

Usage

From source file:crunch.MaxTemperature.java

License:Apache License

public void test() throws Exception {
        Configuration conf = createJobConf();

        Path localInput = new Path("input/ncdc/micro");
        Path input = getInputDir();
        Path output = getOutputDir();

        // Copy input data into test HDFS
        getFileSystem().copyFromLocalFile(localInput, input);

        MaxTemperatureDriver driver = new MaxTemperatureDriver();
        driver.setConf(conf);//from www .  j a  v a2 s  .  c  om

        int exitCode = driver.run(new String[] { input.toString(), output.toString() });
        assertThat(exitCode, is(0));

        // Check the output is as expected
        Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter()));
        assertThat(outputFiles.length, is(1));

        InputStream in = getFileSystem().open(outputFiles[0]);
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        assertThat(reader.readLine(), is("1949\t111"));
        assertThat(reader.readLine(), is("1950\t22"));
        assertThat(reader.readLine(), nullValue());
        reader.close();
    }

From source file:crunch.MaxTemperature.java

License:Apache License

private void checkOutput(Configuration conf, Path output) throws IOException {
        FileSystem fs = FileSystem.getLocal(conf);
        Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(output, new OutputLogFilter()));
        assertThat(outputFiles.length, is(1));

        BufferedReader actual = asBufferedReader(fs.open(outputFiles[0]));
        BufferedReader expected = asBufferedReader(getClass().getResourceAsStream("/expected.txt"));
        String expectedLine;//  w ww.  j  a v  a 2s  . c  om
        while ((expectedLine = expected.readLine()) != null) {
            assertThat(actual.readLine(), is(expectedLine));
        }
        assertThat(actual.readLine(), nullValue());
        actual.close();
        expected.close();
    }

From source file:io.aos.t4f.hadoop.mapred.WordCountTest.java

License:Apache License

@Test
public void testCount() throws Exception {
    createTextInputFile();/*from w w w .ja  va 2s. c  o  m*/
    JobClient.runJob(createJobConf());
    Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter()));
    Assert.assertEquals(1, outputFiles.length);
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    Assert.assertEquals("a\t2", reader.readLine());
    Assert.assertEquals("b\t1", reader.readLine());
    Assert.assertNull(reader.readLine());
    reader.close();
}

From source file:org.apache.mahout.clustering.cdbw.CDbwMapper.java

License:Apache License

public static Map<Integer, List<VectorWritable>> getRepresentativePoints(Configuration conf) {
    String statePath = conf.get(CDbwDriver.STATE_IN_KEY);
    Map<Integer, List<VectorWritable>> representativePoints = new HashMap<Integer, List<VectorWritable>>();
    try {/*  ww  w  . j ava  2  s.  co  m*/
        Path path = new Path(statePath);
        FileSystem fs = FileSystem.get(path.toUri(), conf);
        FileStatus[] status = fs.listStatus(path, new OutputLogFilter());
        for (FileStatus s : status) {
            SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf);
            try {
                IntWritable key = new IntWritable(0);
                VectorWritable point = new VectorWritable();
                while (reader.next(key, point)) {
                    List<VectorWritable> repPoints = representativePoints.get(key.get());
                    if (repPoints == null) {
                        repPoints = new ArrayList<VectorWritable>();
                        representativePoints.put(key.get(), repPoints);
                    }
                    repPoints.add(point);
                    point = new VectorWritable();
                }
            } finally {
                reader.close();
            }
        }
        return representativePoints;
    } catch (IOException e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.unigram.likelike.lsh.TestLSHRecommendations.java

License:Apache License

private boolean dfsCheck(Configuration conf, Path outputPath) throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outputPath, new OutputLogFilter()));

    //if (outputFiles != null) {
    //    TestCase.assertEquals(outputFiles.length, 1);
    //} else {/*  w w  w  .  j  av  a2s.  c o  m*/
    //    TestCase.fail();
    //}

    BufferedReader reader = this.asBufferedReader(fs.open(outputFiles[0]));

    String line;
    MultiHashMap resultMap = new MultiHashMap();
    while ((line = reader.readLine()) != null) {
        String[] lineArray = line.split("\t");
        resultMap.put(Long.parseLong(lineArray[0]), // target 
                Long.parseLong(lineArray[1])); // recommended

    }
    this.check(resultMap);
    return true;
}

From source file:uk.bl.wa.hadoop.datasets.WARCDatasetGeneratorIntegrationTest.java

License:Open Source License

@SuppressWarnings("deprecation")
@Test//from   www.ja  v a2  s  .  c  o m
public void testGenerator() throws Exception {
    // prepare for test
    // createTextInputFile();

    log.info("Checking input file is present...");
    // Check that the input file is present:
    Path[] inputFiles = FileUtil.stat2Paths(
            getFileSystem().listStatus(new Path(input, "gov.uk-revisit-warcs/"), new OutputLogFilter()));
    Assert.assertEquals(2, inputFiles.length);
    // Create a file of the inputs
    File tmpInputsFile = writeInputFile(inputFiles);

    // Set up arguments for the job:
    String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() };

    // Set up the WARCIndexerRunner
    WARCDatasetGenerator wir = new WARCDatasetGenerator();

    // run job
    // Job configuration:
    log.info("Setting up job config...");
    JobConf jobConf = this.mrCluster.createJobConf();
    jobConf.set("mapred.child.java.opts", "-Xmx512m");
    wir.createJobConf(jobConf, args);
    log.info("Running job...");
    JobClient.runJob(jobConf);
    log.info("Job finished, checking the results...");

    // check the output exists
    Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter()));

    // Copy the output out of HDFS and onto local FS:
    for (Path output : outputFiles) {
        FileOutputStream fout = new FileOutputStream("target/datasets-" + output.getName());
        log.info(" --- output : " + output);
        if (getFileSystem().isFile(output)) {
            InputStream is = getFileSystem().open(output);
            IOUtils.copy(is, fout);
        } else {
            log.info(" --- ...skipping directory...");
        }
        fout.flush();
        fout.close();
    }

    // Did we generate the expected multiple output files?:
    Assert.assertEquals(4, outputFiles.length);

}

From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqSampleGeneratorIntegrationTest.java

License:Open Source License

@Test
public void testSeqStats() throws Exception {
    log.info("Checking input file is present...");
    // Check that the input file is present:
    Path[] inputFiles = FileUtil.stat2Paths(
            dfsCluster.getFileSystem().listStatus(new Path(input, "mdx-seq/"), new OutputLogFilter()));
    Assert.assertEquals(1, inputFiles.length);

    // Create a file of the inputs
    File tmpInputsFile = WARCMDXGeneratorIntegrationTest.writeInputFile(inputFiles);

    // Set up arguments for the job:
    String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() };

    // Set up the WARCIndexerRunner
    MDXSeqSampleGenerator wir = new MDXSeqSampleGenerator();

    // run job//from  w w  w .j  av a2  s  . com
    // Job configuration:
    log.info("Setting up job config...");
    JobConf jobConf = this.mrCluster.createJobConf();
    wir.createJobConf(jobConf, args);
    log.info("Running job...");
    JobClient.runJob(jobConf);
    log.info("Job finished, checking the results...");

    // check the output exists
    Path[] outputFiles = FileUtil
            .stat2Paths(dfsCluster.getFileSystem().listStatus(output, new OutputLogFilter()));
    // Assert.assertEquals(1, outputFiles.length);

    // Copy the output out:
    for (Path output : outputFiles) {
        FileOutputStream fout = new FileOutputStream("target/" + output.getName());
        log.info(" --- output : " + output);
        if (dfsCluster.getFileSystem().isFile(output)) {
            InputStream is = dfsCluster.getFileSystem().open(output);
            IOUtils.copy(is, fout);
        } else {
            log.info(" --- ...skipping directory...");
        }
        fout.close();
    }

    // Check contents of the output:
    // TBA
}

From source file:uk.bl.wa.hadoop.indexer.mdx.MDXSeqStatsGeneratorIntegrationTest.java

License:Open Source License

@Test
public void testSeqStats() throws Exception {
    log.info("Checking input file is present...");
    // Check that the input file is present:
    Path[] inputFiles = FileUtil.stat2Paths(
            dfsCluster.getFileSystem().listStatus(new Path(input, "mdx-seq/"), new OutputLogFilter()));
    Assert.assertEquals(1, inputFiles.length);

    // Create a file of the inputs
    File tmpInputsFile = WARCMDXGeneratorIntegrationTest.writeInputFile(inputFiles);

    // Set up arguments for the job:
    String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() };

    // Set up the WARCIndexerRunner
    MDXSeqStatsGenerator wir = new MDXSeqStatsGenerator();

    // run job/*from  ww w .  jav  a 2 s . com*/
    // Job configuration:
    log.info("Setting up job config...");
    JobConf jobConf = this.mrCluster.createJobConf();
    wir.createJobConf(jobConf, args);
    log.info("Running job...");
    JobClient.runJob(jobConf);
    log.info("Job finished, checking the results...");

    // check the output exists
    Path[] outputFiles = FileUtil
            .stat2Paths(dfsCluster.getFileSystem().listStatus(output, new OutputLogFilter()));
    // Assert.assertEquals(1, outputFiles.length);

    // Copy the output out:
    for (Path output : outputFiles) {
        FileOutputStream fout = new FileOutputStream("target/" + output.getName());
        log.info(" --- output : " + output);
        if (dfsCluster.getFileSystem().isFile(output)) {
            InputStream is = dfsCluster.getFileSystem().open(output);
            IOUtils.copy(is, fout);
        } else {
            log.info(" --- ...skipping directory...");
        }
        fout.close();
    }

    // Check contents of the output:
    // TBA
}

From source file:uk.bl.wa.hadoop.indexer.mdx.WARCMDXGeneratorIntegrationTest.java

License:Open Source License

@SuppressWarnings("deprecation")
@Test//from   w  w  w  . j a v a  2s. co  m
public void testMDXGenerator() throws Exception {
    // prepare for test
    // createTextInputFile();

    log.info("Checking input file is present...");
    // Check that the input file is present:
    Path[] inputFiles = FileUtil.stat2Paths(
            getFileSystem().listStatus(new Path(input, "gov.uk-revisit-warcs/"), new OutputLogFilter()));
    Assert.assertEquals(2, inputFiles.length);
    // Create a file of the inputs
    File tmpInputsFile = writeInputFile(inputFiles);

    // Set up arguments for the job:
    String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.output.getName() };

    // Set up the WARCIndexerRunner
    WARCMDXGenerator wir = new WARCMDXGenerator();

    // run job
    // Job configuration:
    log.info("Setting up job config...");
    JobConf jobConf = this.mrCluster.createJobConf();
    jobConf.setInt(WARCMDXGenerator.WARC_HADOOP_NUM_REDUCERS, 1);
    jobConf.set("mapred.child.java.opts", "-Xmx512m");
    wir.createJobConf(jobConf, args);
    log.info("Running job...");
    JobClient.runJob(jobConf);
    log.info("Job finished, checking the results...");

    // check the output exists
    Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(output, new OutputLogFilter()));
    // Default is 1 reducers (as knitting together multiple sequence files
    // is not a mere matter of concatentation):
    Assert.assertEquals(1, outputFiles.length);

    // Copy the output out of HDFS and onto local FS:
    FileOutputStream fout = new FileOutputStream(outputSeq);
    for (Path output : outputFiles) {
        log.info(" --- output : " + output);
        if (getFileSystem().isFile(output)) {
            InputStream is = getFileSystem().open(output);
            IOUtils.copy(is, fout);
        } else {
            log.info(" --- ...skipping directory...");
        }
        fout.flush();
    }
    fout.close();

    // Check contents of the output:
    Configuration config = new Configuration();
    Path path = new Path(outputSeq.getAbsolutePath());
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(config), path, config);
    WritableComparable key = (WritableComparable) reader.getKeyClass().newInstance();
    Writable value = (Writable) reader.getValueClass().newInstance();

    MDX mdx;
    int counter = 0;
    while (reader.next(key, value)) {
        mdx = new MDX(value.toString());
        System.out.println(
                "Key is: " + key + " record_type: " + mdx.getRecordType() + " SURT: " + mdx.getUrlAsSURT());
        counter++;
    }
    assertEquals(114, counter);
    reader.close();

    // Now test the MDXSeqMerger
    testSeqMerger(outputFiles);
}

From source file:uk.bl.wa.hadoop.indexer.mdx.WARCMDXGeneratorIntegrationTest.java

License:Open Source License

private void testSeqMerger(Path[] inputFiles) throws Exception {

    // Create a file of the inputs
    File tmpInputsFile = writeInputFile(inputFiles);

    // Set up arguments for the job:
    String[] args = { "-i", tmpInputsFile.getAbsolutePath(), "-o", this.outputMerged.getName(), "-r", "1" };

    // Set up the WARCIndexerRunner
    MDXSeqMerger msm = new MDXSeqMerger();

    // run job//from   w w  w .j  a  v  a2 s. co m
    log.info("Setting up job config...");
    JobConf jobConf = this.mrCluster.createJobConf();
    msm.createJobConf(jobConf, args);
    log.info("Running job...");
    JobClient.runJob(jobConf);
    log.info("Job finished, checking the results...");

    // Copy the output out of HDFS and onto local FS:
    FileOutputStream fout = new FileOutputStream(outputMergedSeq);
    Path[] outputFiles = FileUtil.stat2Paths(getFileSystem().listStatus(outputMerged, new OutputLogFilter()));
    for (Path output : outputFiles) {
        log.info(" --- output : " + output);
        if (getFileSystem().isFile(output)) {
            InputStream is = getFileSystem().open(output);
            IOUtils.copy(is, fout);
        } else {
            log.info(" --- ...skipping directory...");
        }
        fout.flush();
    }
    fout.close();

}