Example usage for org.apache.hadoop.fs Path suffix

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path suffix.

Prototype

public Path suffix(String suffix)

Source Link

Document

Adds a suffix to the final name in the path.

Usage

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroSpecificOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SpecificStatsReducer.class);
    AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*from  w  w  w . j a  v  a2 s.c  o  m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroReflectOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ReflectStatsReducer.class);
    AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new ReflectDatumReader<ReflectStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (ReflectStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*  ww w  . j  a v a 2s.  c  o m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testAvroInput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString()));
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, TextStats.SCHEMA$);

    job.setMapperClass(StatCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(SpecificStatsReducer.class);
    AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific-input");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<TextStats> reader = new DataFileReader<TextStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new SpecificDatumReader<TextStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (TextStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*ww  w  .j ava  2  s.  c  o  m*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

@Test
public void testReflectInput() throws Exception {
    Job job = new Job();
    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString()));
    job.setInputFormatClass(AvroKeyInputFormat.class);
    AvroJob.setInputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setMapperClass(ReflectCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ReflectStatsReducer.class);
    AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA);

    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect-input");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>(
            new FsInput(outputFiles[0].getPath(), job.getConfiguration()),
            new ReflectDatumReader<ReflectStats>());
    Map<String, Integer> counts = new HashMap<String, Integer>();
    for (ReflectStats record : reader) {
        counts.put(record.name.toString(), record.count);
    }/*from w ww.j a v  a  2s  . com*/
    reader.close();

    Assert.assertEquals(3, counts.get("apple").intValue());
    Assert.assertEquals(2, counts.get("banana").intValue());
    Assert.assertEquals(1, counts.get("carrot").intValue());
}

From source file:org.apache.avro.mapreduce.TestWordCount.java

License:Apache License

/**
 * Tests the MR output to text files when using AvroKey and AvroValue records.
 *//*from  w  ww .jav a  2 s  .c  o m*/
@Test
public void testAvroUsingTextFileOutput() throws Exception {
    Job job = new Job();

    FileInputFormat.setInputPaths(job, new Path(
            getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString()));
    job.setInputFormatClass(TextInputFormat.class);

    job.setMapperClass(LineCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(AvroSumReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    job.setOutputFormatClass(TextOutputFormat.class);
    Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-text");
    FileOutputFormat.setOutputPath(job, outputPath);

    Assert.assertTrue(job.waitForCompletion(true));

    // Check that the results from the MapReduce were as expected.
    FileSystem fileSystem = FileSystem.get(job.getConfiguration());
    FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*"));
    Assert.assertEquals(1, outputFiles.length);
    Path filePath = outputFiles[0].getPath();
    InputStream inputStream = filePath.getFileSystem(job.getConfiguration()).open(filePath);
    Assert.assertNotNull(inputStream);
    BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
    try {
        Assert.assertTrue(reader.ready());
        Assert.assertEquals("apple\t3", reader.readLine());
        Assert.assertEquals("banana\t2", reader.readLine());
        Assert.assertEquals("carrot\t1", reader.readLine());
        Assert.assertFalse(reader.ready());
    } finally {
        reader.close();
    }
}

From source file:org.apache.beam.runners.spark.translation.streaming.Checkpoint.java

License:Apache License

private static void write(FileSystem fileSystem, Path checkpointFilePath, byte[] value) throws IOException {
    Path tmpPath = checkpointFilePath.suffix(TEMP_FILE_SUFFIX);
    Path backupPath = checkpointFilePath.suffix(BACKUP_FILE_SUFFIX);
    if (fileSystem.exists(checkpointFilePath)) {
        if (fileSystem.exists(backupPath)) {
            fileSystem.delete(backupPath, false);
        }//from   w ww . j a  v a  2  s . c  o  m
        fileSystem.rename(checkpointFilePath, backupPath);
    }
    FSDataOutputStream os = fileSystem.create(tmpPath, true);
    os.write(value);
    os.close();
    fileSystem.rename(tmpPath, checkpointFilePath);
}

From source file:org.apache.beam.runners.spark.translation.streaming.Checkpoint.java

License:Apache License

private static byte[] read(FileSystem fileSystem, Path checkpointFilePath) throws IOException {
    Path backupCheckpointPath = checkpointFilePath.suffix(".bak");
    FSDataInputStream is = null;/*from w ww  . j ava 2 s .c  o m*/
    if (fileSystem.exists(checkpointFilePath)) {
        is = fileSystem.open(checkpointFilePath);
    } else if (fileSystem.exists(backupCheckpointPath)) {
        is = fileSystem.open(backupCheckpointPath);
    }
    return is != null ? IOUtils.toByteArray(is) : null;
}

From source file:org.apache.hama.examples.SpMV.java

License:Apache License

/**
 * Function parses command line in standart form.
 *//*from   w w w  . jav  a2s.c o m*/
private static void parseArgs(HamaConfiguration conf, String[] args) {
    if (args.length < 3) {
        printUsage();
        System.exit(-1);
    }

    conf.set(inputMatrixPathString, args[0]);
    conf.set(inputVectorPathString, args[1]);

    Path path = new Path(args[2]);
    path = path.suffix(intermediate);
    conf.set(outputPathString, path.toString());

    if (args.length == 4) {
        try {
            int taskCount = Integer.parseInt(args[3]);
            if (taskCount < 0) {
                printUsage();
                throw new IllegalArgumentException(
                        "The number of requested tasks can't be negative. Actual value: "
                                + String.valueOf(taskCount));
            }
            conf.setInt(requestedBspTasksString, taskCount);
        } catch (NumberFormatException e) {
            printUsage();
            throw new IllegalArgumentException(
                    "The format of requested task count is int. Can not parse value: " + args[3]);
        }
    }
}

From source file:org.apache.mahout.text.SequenceFilesFromMailArchivesTest.java

License:Apache License

@Test
public void testMapReduce() throws Exception {

    Path tmpDir = getTestTempDirPath();
    Path mrOutputDir = new Path(tmpDir, "mail-archives-out-mr");
    Configuration configuration = getConfiguration();
    FileSystem fs = FileSystem.get(configuration);

    File expectedInputFile = new File(inputDir.toString());

    String[] args = { "-Dhadoop.tmp.dir=" + configuration.get("hadoop.tmp.dir"), "--input",
            expectedInputFile.getAbsolutePath(), "--output", mrOutputDir.toString(), "--charset", "UTF-8",
            "--keyPrefix", "TEST", "--method", "mapreduce", "--body", "--subject", "--separator", "" };

    // run the application's main method
    SequenceFilesFromMailArchives.main(args);

    // app should create a single SequenceFile named "chunk-0" in the output dir
    FileStatus[] fileStatuses = fs.listStatus(mrOutputDir.suffix("/part-m-00000"));
    assertEquals(1, fileStatuses.length); // only one
    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
    SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(
            mrOutputDir.suffix("/part-m-00000"), true, configuration);

    Assert.assertTrue("First key/value pair not found!", iterator.hasNext());
    Pair<Text, Text> record = iterator.next();

    File parentFileSubSubDir = new File(new File(new File(new File("TEST"), "subdir"), "subsubdir"),
            "mail-messages-2.gz");

    String expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }//w ww  .j a  va 2  s.c om
    Assert.assertEquals(new File(parentFileSubSubDir, testVars[0][0]).toString(), expected);
    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());

    record = iterator.next();
    expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }
    Assert.assertEquals(new File(parentFileSubSubDir, testVars[1][0]).toString(), expected);
    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());

    // test other file
    File parentFile = new File(new File(new File("TEST"), "subdir"), "mail-messages.gz");
    record = iterator.next();
    expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }
    Assert.assertEquals(new File(parentFile, testVars[0][0]).toString(), expected);
    Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString());
    Assert.assertTrue("Second key/value pair not found!", iterator.hasNext());

    record = iterator.next();
    expected = record.getFirst().toString();
    if (SystemUtils.IS_OS_WINDOWS) {
        expected = expected.replace("/", "\\");
    }
    Assert.assertEquals(new File(parentFile, testVars[1][0]).toString(), expected);
    Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString());
    Assert.assertFalse("Only four key/value pairs expected!", iterator.hasNext());
}

From source file:org.apache.mahout.text.TestSequenceFilesFromDirectory.java

License:Apache License

private static void checkMRResultFiles(Configuration conf, Path outputDir, String[][] data, String prefix)
        throws IOException {
    FileSystem fs = FileSystem.get(conf);

    // output exists?
    FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter());
    assertEquals(1, fileStatuses.length); // only one
    assertEquals("part-m-00000", fileStatuses[0].getPath().getName());
    Map<String, String> fileToData = Maps.newHashMap();
    for (String[] aData : data) {
        System.out.printf("map.put: %s %s\n", prefix + Path.SEPARATOR + aData[0], aData[1]);
        fileToData.put(prefix + Path.SEPARATOR + aData[0], aData[1]);
    }//from w  w w .j  av a 2 s  .co m

    // read a chunk to check content
    SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(fileStatuses[0].getPath(),
            true, conf);
    try {
        while (iterator.hasNext()) {
            Pair<Text, Text> record = iterator.next();
            String retrievedData = fileToData.get(record.getFirst().toString().trim());

            System.out.printf("MR> %s >> %s\n", record.getFirst().toString().trim(),
                    record.getSecond().toString().trim());
            assertNotNull(retrievedData);
            assertEquals(retrievedData, record.getSecond().toString().trim());
        }
    } finally {
        Closeables.close(iterator, true);
    }
}