List of usage examples for org.apache.hadoop.fs Path suffix
public Path suffix(String suffix)
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
@Test public void testAvroSpecificOutput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString())); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SpecificStatsReducer.class); AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<TextStats> reader = new DataFileReader<TextStats>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new SpecificDatumReader<TextStats>()); Map<String, Integer> counts = new HashMap<String, Integer>(); for (TextStats record : reader) { counts.put(record.name.toString(), record.count); }/*from w w w . j a v a2 s.c o m*/ reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
@Test public void testAvroReflectOutput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString())); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ReflectStatsReducer.class); AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new ReflectDatumReader<ReflectStats>()); Map<String, Integer> counts = new HashMap<String, Integer>(); for (ReflectStats record : reader) { counts.put(record.name.toString(), record.count); }/* ww w . j a v a 2s. c o m*/ reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
@Test public void testAvroInput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString())); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, TextStats.SCHEMA$); job.setMapperClass(StatCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(SpecificStatsReducer.class); AvroJob.setOutputKeySchema(job, TextStats.SCHEMA$); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-specific-input"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<TextStats> reader = new DataFileReader<TextStats>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new SpecificDatumReader<TextStats>()); Map<String, Integer> counts = new HashMap<String, Integer>(); for (TextStats record : reader) { counts.put(record.name.toString(), record.count); }/*ww w .j ava 2 s. c o m*/ reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
@Test public void testReflectInput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.avro").toURI().toString())); job.setInputFormatClass(AvroKeyInputFormat.class); AvroJob.setInputKeySchema(job, REFLECT_STATS_SCHEMA); job.setMapperClass(ReflectCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ReflectStatsReducer.class); AvroJob.setOutputKeySchema(job, REFLECT_STATS_SCHEMA); job.setOutputFormatClass(AvroKeyOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-reflect-input"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); DataFileReader<ReflectStats> reader = new DataFileReader<ReflectStats>( new FsInput(outputFiles[0].getPath(), job.getConfiguration()), new ReflectDatumReader<ReflectStats>()); Map<String, Integer> counts = new HashMap<String, Integer>(); for (ReflectStats record : reader) { counts.put(record.name.toString(), record.count); }/*from w ww.j a v a 2s . com*/ reader.close(); Assert.assertEquals(3, counts.get("apple").intValue()); Assert.assertEquals(2, counts.get("banana").intValue()); Assert.assertEquals(1, counts.get("carrot").intValue()); }
From source file:org.apache.avro.mapreduce.TestWordCount.java
License:Apache License
/** * Tests the MR output to text files when using AvroKey and AvroValue records. *//*from w ww .jav a 2 s .c o m*/ @Test public void testAvroUsingTextFileOutput() throws Exception { Job job = new Job(); FileInputFormat.setInputPaths(job, new Path( getClass().getResource("/org/apache/avro/mapreduce/mapreduce-test-input.txt").toURI().toString())); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(LineCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(AvroSumReducer.class); AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); job.setOutputFormatClass(TextOutputFormat.class); Path outputPath = new Path(tmpFolder.getRoot().getPath() + "/out-text"); FileOutputFormat.setOutputPath(job, outputPath); Assert.assertTrue(job.waitForCompletion(true)); // Check that the results from the MapReduce were as expected. FileSystem fileSystem = FileSystem.get(job.getConfiguration()); FileStatus[] outputFiles = fileSystem.globStatus(outputPath.suffix("/part-*")); Assert.assertEquals(1, outputFiles.length); Path filePath = outputFiles[0].getPath(); InputStream inputStream = filePath.getFileSystem(job.getConfiguration()).open(filePath); Assert.assertNotNull(inputStream); BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); try { Assert.assertTrue(reader.ready()); Assert.assertEquals("apple\t3", reader.readLine()); Assert.assertEquals("banana\t2", reader.readLine()); Assert.assertEquals("carrot\t1", reader.readLine()); Assert.assertFalse(reader.ready()); } finally { reader.close(); } }
From source file:org.apache.beam.runners.spark.translation.streaming.Checkpoint.java
License:Apache License
private static void write(FileSystem fileSystem, Path checkpointFilePath, byte[] value) throws IOException { Path tmpPath = checkpointFilePath.suffix(TEMP_FILE_SUFFIX); Path backupPath = checkpointFilePath.suffix(BACKUP_FILE_SUFFIX); if (fileSystem.exists(checkpointFilePath)) { if (fileSystem.exists(backupPath)) { fileSystem.delete(backupPath, false); }//from w ww . j a v a 2 s . c o m fileSystem.rename(checkpointFilePath, backupPath); } FSDataOutputStream os = fileSystem.create(tmpPath, true); os.write(value); os.close(); fileSystem.rename(tmpPath, checkpointFilePath); }
From source file:org.apache.beam.runners.spark.translation.streaming.Checkpoint.java
License:Apache License
private static byte[] read(FileSystem fileSystem, Path checkpointFilePath) throws IOException { Path backupCheckpointPath = checkpointFilePath.suffix(".bak"); FSDataInputStream is = null;/*from w ww . j ava 2 s .c o m*/ if (fileSystem.exists(checkpointFilePath)) { is = fileSystem.open(checkpointFilePath); } else if (fileSystem.exists(backupCheckpointPath)) { is = fileSystem.open(backupCheckpointPath); } return is != null ? IOUtils.toByteArray(is) : null; }
From source file:org.apache.hama.examples.SpMV.java
License:Apache License
/** * Function parses command line in standart form. *//*from w w w . jav a2s.c o m*/ private static void parseArgs(HamaConfiguration conf, String[] args) { if (args.length < 3) { printUsage(); System.exit(-1); } conf.set(inputMatrixPathString, args[0]); conf.set(inputVectorPathString, args[1]); Path path = new Path(args[2]); path = path.suffix(intermediate); conf.set(outputPathString, path.toString()); if (args.length == 4) { try { int taskCount = Integer.parseInt(args[3]); if (taskCount < 0) { printUsage(); throw new IllegalArgumentException( "The number of requested tasks can't be negative. Actual value: " + String.valueOf(taskCount)); } conf.setInt(requestedBspTasksString, taskCount); } catch (NumberFormatException e) { printUsage(); throw new IllegalArgumentException( "The format of requested task count is int. Can not parse value: " + args[3]); } } }
From source file:org.apache.mahout.text.SequenceFilesFromMailArchivesTest.java
License:Apache License
@Test public void testMapReduce() throws Exception { Path tmpDir = getTestTempDirPath(); Path mrOutputDir = new Path(tmpDir, "mail-archives-out-mr"); Configuration configuration = getConfiguration(); FileSystem fs = FileSystem.get(configuration); File expectedInputFile = new File(inputDir.toString()); String[] args = { "-Dhadoop.tmp.dir=" + configuration.get("hadoop.tmp.dir"), "--input", expectedInputFile.getAbsolutePath(), "--output", mrOutputDir.toString(), "--charset", "UTF-8", "--keyPrefix", "TEST", "--method", "mapreduce", "--body", "--subject", "--separator", "" }; // run the application's main method SequenceFilesFromMailArchives.main(args); // app should create a single SequenceFile named "chunk-0" in the output dir FileStatus[] fileStatuses = fs.listStatus(mrOutputDir.suffix("/part-m-00000")); assertEquals(1, fileStatuses.length); // only one assertEquals("part-m-00000", fileStatuses[0].getPath().getName()); SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>( mrOutputDir.suffix("/part-m-00000"), true, configuration); Assert.assertTrue("First key/value pair not found!", iterator.hasNext()); Pair<Text, Text> record = iterator.next(); File parentFileSubSubDir = new File(new File(new File(new File("TEST"), "subdir"), "subsubdir"), "mail-messages-2.gz"); String expected = record.getFirst().toString(); if (SystemUtils.IS_OS_WINDOWS) { expected = expected.replace("/", "\\"); }//w ww .j a va 2 s.c om Assert.assertEquals(new File(parentFileSubSubDir, testVars[0][0]).toString(), expected); Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString()); Assert.assertTrue("Second key/value pair not found!", iterator.hasNext()); record = iterator.next(); expected = record.getFirst().toString(); if (SystemUtils.IS_OS_WINDOWS) { expected = expected.replace("/", "\\"); } Assert.assertEquals(new File(parentFileSubSubDir, testVars[1][0]).toString(), expected); Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString()); // test other file File parentFile = new File(new File(new File("TEST"), "subdir"), "mail-messages.gz"); record = iterator.next(); expected = record.getFirst().toString(); if (SystemUtils.IS_OS_WINDOWS) { expected = expected.replace("/", "\\"); } Assert.assertEquals(new File(parentFile, testVars[0][0]).toString(), expected); Assert.assertEquals(testVars[0][1] + testVars[0][2], record.getSecond().toString()); Assert.assertTrue("Second key/value pair not found!", iterator.hasNext()); record = iterator.next(); expected = record.getFirst().toString(); if (SystemUtils.IS_OS_WINDOWS) { expected = expected.replace("/", "\\"); } Assert.assertEquals(new File(parentFile, testVars[1][0]).toString(), expected); Assert.assertEquals(testVars[1][1] + testVars[1][2], record.getSecond().toString()); Assert.assertFalse("Only four key/value pairs expected!", iterator.hasNext()); }
From source file:org.apache.mahout.text.TestSequenceFilesFromDirectory.java
License:Apache License
private static void checkMRResultFiles(Configuration conf, Path outputDir, String[][] data, String prefix) throws IOException { FileSystem fs = FileSystem.get(conf); // output exists? FileStatus[] fileStatuses = fs.listStatus(outputDir.suffix("/part-m-00000"), PathFilters.logsCRCFilter()); assertEquals(1, fileStatuses.length); // only one assertEquals("part-m-00000", fileStatuses[0].getPath().getName()); Map<String, String> fileToData = Maps.newHashMap(); for (String[] aData : data) { System.out.printf("map.put: %s %s\n", prefix + Path.SEPARATOR + aData[0], aData[1]); fileToData.put(prefix + Path.SEPARATOR + aData[0], aData[1]); }//from w w w .j av a 2 s .co m // read a chunk to check content SequenceFileIterator<Text, Text> iterator = new SequenceFileIterator<Text, Text>(fileStatuses[0].getPath(), true, conf); try { while (iterator.hasNext()) { Pair<Text, Text> record = iterator.next(); String retrievedData = fileToData.get(record.getFirst().toString().trim()); System.out.printf("MR> %s >> %s\n", record.getFirst().toString().trim(), record.getSecond().toString().trim()); assertNotNull(retrievedData); assertEquals(retrievedData, record.getSecond().toString().trim()); } } finally { Closeables.close(iterator, true); } }