Example usage for org.apache.hadoop.fs Path suffix

List of usage examples for org.apache.hadoop.fs Path suffix

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path suffix.

Prototype

public Path suffix(String suffix) 

Source Link

Document

Adds a suffix to the final name in the path.

Usage

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroSpecificOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SpecificStatsReducer.class);
    AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*from  w  w  w . j a  v  a2 s.c  o  m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroReflectOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ReflectStatsReducer.class);
    AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new ReflectDatumReader<ReflectStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (ReflectStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*  ww w  . j  a v a 2s.  c  o m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroInput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString()));
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, TextStats.SCHEMA$);

    job.setMapperClass(StatCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SpecificStatsReducer.class);
    AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific-input");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*ww  w  .j ava  2  s.  c  o  m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testReflectInput() throws Exception {
    Job job = new Job();
    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString()));
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setMapperClass(ReflectCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ReflectStatsReducer.class);
    AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect-input");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new ReflectDatumReader<ReflectStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (ReflectStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*from w ww.j a v  a  2s  . com*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

/**
 * Tests the MR output to text files when using AvroKey and AvroValue records.
 *//*from  w  ww .jav a  2 s  .c  o m*/
@Test
public void testAvroUsingTextFileOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(AvroSumReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    job.setOutputFormatClass(TextOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-text");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    Path filePath = outputFiles[0].getPath();
    InputStream inputStream = filePath.getFileSystem(job.getConfiguration()).open(filePath);
    Assert.assertNotNull(inputStream);
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
    try {
        Assert.assertTrue(reader.ready());
        Assert.assertEquals("apple\t3", reader.readLine());
        Assert.assertEquals("banana\t2", reader.readLine());
        Assert.assertEquals("carrot\t1", reader.readLine());
        Assert.assertFalse(reader.ready());
    } finally {
        reader.close();
    }
}

From source file:org.apache.beam.runners.spark.translation.streaming.Checkpoint.java

License:Apache License

private static void write(FileSystem fileSystem, Path checkpointFilePath, byte[] value) throws IOException {
    Path tmpPath = checkpointFilePath.suffix(TEMP_FILE_SUFFIX);
    Path backupPath = checkpointFilePath.suffix(BACKUP_FILE_SUFFIX);
    if (fileSystem.exists(checkpointFilePath)) {
        if (fileSystem.exists(backupPath)) {
            fileSystem.delete(backupPath, false);
        }//from   w ww . j a  v a  2  s . c  o  m
        fileSystem.rename(checkpointFilePath, backupPath);
    }
    FSDataOutputStream os = fileSystem.create(tmpPath, true);
    os.write(value);
    os.close();
    fileSystem.rename(tmpPath, checkpointFilePath);
}

From source file:org.apache.beam.runners.spark.translation.streaming.Checkpoint.java

License:Apache License

private static byte[] read(FileSystem fileSystem, Path checkpointFilePath) throws IOException {
    Path backupCheckpointPath = checkpointFilePath.suffix(".bak");
    FSDataInputStream is = null;/*from w ww  . j ava 2 s .c  o m*/
    if (fileSystem.exists(checkpointFilePath)) {
        is = fileSystem.open(checkpointFilePath);
    } else if (fileSystem.exists(backupCheckpointPath)) {
        is = fileSystem.open(backupCheckpointPath);
    }
    return is != null ? IOUtils.toByteArray(is) : null;
}

From source file:org.apache.hama.examples.SpMV.java

License:Apache License

/**
 * Function parses command line in standart form.
 *//*from   w w w  . jav  a2s.c o m*/
private static void parseArgs(HamaConfiguration conf, String[] args) {
    if (args.length < 3) {
        printUsage();
        System.exit(-1);
    }

    conf.set(inputMatrixPathString, args[0]);
    conf.set(inputVectorPathString, args[1]);

    Path path = new Path(args[2]);
    path = path.suffix(intermediate);
    conf.set(outputPathString, path.toString());

    if (args.length == 4) {
        try {
            int taskCount = Integer.parseInt(args[3]);
            if (taskCount < 0) {
                printUsage();
                throw new IllegalArgumentException(
                        "The number of requested tasks can't be negative. Actual value: "
                                + String.valueOf(taskCount));
            }
            conf.setInt(requestedBspTasksString, taskCount);
        } catch (NumberFormatException e) {
            printUsage();
            throw new IllegalArgumentException(
                    "The format of requested task count is int. Can not parse value: " + args[3]);
        }
    }
}

From source file:org.apache.mahout.text.SequenceFilesFromMailArchivesTest.java

License:Apache License

@Test
public void testMapReduce() throws Exception {

    Path tmpDir = getTestTempDirPath();
    Path mrOutputDir = new Path(tmpDir, "mail-archives-out-mr");
    Configuration configuration = getConfiguration();
    FileSystem fs = FileSystem.get(configuration);

    File expectedInputFile = new File(inputDir.toString());

    String[] args = { "-Dhadoop.tmp.dir=" + configuration.get("hadoop.tmp.dir"), "--input",
            expectedInputFile.getAbsolutePath(), "--output", mrOutputDir.toString(), "--charset", "UTF-8",
            "--keyPrefix", "TEST", "--method", "mapreduce", "--body", "--subject", "--separator", "" };

    // run the application's main method
    SequenceFilesFromMailArchives.main(args);

    // app should create a single SequenceFile named "chunk-0" in the output dir
    FileStatus[] fileStatuses = fs.listStatus(mrOutputDir.suffix("/part-m-00000"));
    assertEquals(1, fileStatuses.length); // only one
    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
    SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(
            mrOutputDir.suffix("/part-m-00000"), true, configuration);

    Assert.assertTrue("First key/value pair not found!", iterator.hasNext());
    Pair<Text, Text> record = iterator.next();

    File parentFileSubSubDir = new File(new File(new File(new File("TEST"), "subdir"), "subsubdir"),
            "mail-messages-2.gz");

    String expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }//w ww  .j a  va 2  s.c om
    Assert.assertEquals(new File(parentFileSubSubDir, testVars[0][0]).toString(), expected);
    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());

    record = iterator.next();
    expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }
    Assert.assertEquals(new File(parentFileSubSubDir, testVars[1][0]).toString(), expected);
    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());

    // test other file
    File parentFile = new File(new File(new File("TEST"), "subdir"), "mail-messages.gz");
    record = iterator.next();
    expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }
    Assert.assertEquals(new File(parentFile, testVars[0][0]).toString(), expected);
    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());

    record = iterator.next();
    expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }
    Assert.assertEquals(new File(parentFile, testVars[1][0]).toString(), expected);
    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());
    Assert.assertFalse("Only four key/value pairs expected!", iterator.hasNext());
}

From source file:org.apache.mahout.text.TestSequenceFilesFromDirectory.java

License:Apache License

private static void checkMRResultFiles(Configuration conf, Path outputDir, String[][] data, String prefix)
        throws IOException {
    FileSystem fs = FileSystem.get(conf);

    // output exists?
    FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
    assertEquals(1, fileStatuses.length); // only one
    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
    Map<String, String> fileToData = Maps.newHashMap();
    for (String[] aData : data) {
        System.out.printf("map.put: %s %s\n", prefix + Path.SEPARATOR + aData[0], aData[1]);
        fileToData.put(prefix + Path.SEPARATOR + aData[0], aData[1]);
    }//from w  w w .j  av a 2 s  .co m

    // read a chunk to check content
    SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(fileStatuses[0].getPath(),
            true, conf);
    try {
        while (iterator.hasNext()) {
            Pair<Text, Text> record = iterator.next();
            String retrievedData = fileToData.get(record.getFirst().toString().trim());

            System.out.printf("MR> %s >> %s\n", record.getFirst().toString().trim(),
                    record.getSecond().toString().trim());
            assertNotNull(retrievedData);
            assertEquals(retrievedData, record.getSecond().toString().trim());
        }
    } finally {
        Closeables.close(iterator, true);
    }
}