List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
/** Test FijiTableInputFormat in a MapReduce job. */ @Test// ww w . j a va 2s .c o m public void testMapReduceJob() throws Exception { final Path outputFile = createOutputFile(); // Create a test job. final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key null, // limit key null); // filter // Run the job. assertTrue("Hadoop job failed", job.waitForCompletion(true)); // Check to make sure output exists. final FileSystem fs = FileSystem.get(job.getConfiguration()); assertTrue(fs.exists(outputFile.getParent())); // Verify that the output matches what's expected. final FSDataInputStream in = fs.open(outputFile); final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n")); final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder(); for (String line : output) { final String[] keyValue = line.split("\t"); final String emailDomain = keyValue[0]; final Set<String> names = Sets.newHashSet(keyValue[1].split(",")); builder.put(emailDomain, names); } final Map<String, Set<String>> actual = builder.build(); final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder() .put("usermail.example.com", Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu")) .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build(); assertEquals("Result of job wasn't what was expected", expected, actual); // Clean up. fs.delete(outputFile.getParent(), true); IOUtils.closeQuietly(in); // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that // causes it to close other thread's filesystem objects. For more information // see: https://issues.apache.org/jira/browse/HADOOP-7973 }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
/** * Asserts the content of an HFile.//from w w w.j av a2 s . com * * @param path Path of the HFile to validate the content of. * @param values Expected KeyValue entries, in order. * @throws IOException on I/O error. */ private void assertHFileContent(Path path, KeyValue... values) throws IOException { final FileSystem fs = path.getFileSystem(mConf); assertTrue(String.format("HFile '%s' does not exist.", path), fs.exists(path)); final List<KeyValue> kvs = loadHFile(path, mConf); assertEquals(kvs.size(), values.length); for (int i = 0; i < values.length; ++i) { assertEquals(kvs.get(i), values[i]); } }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);/*w ww . ja v a2 s . c om*/ final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);// ww w .j ava 2 s . co m final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW); writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);/*from w ww . j a v a 2 s . c om*/ final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW); final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java
License:Apache License
@Test public void testTombstonesInHFile() throws Exception { final HFileKeyValue put = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue deleteCell = entry("row-key2", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteCell); final HFileKeyValue deleteColumn = entry("row-key3", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteColumn); final HFileKeyValue deleteFamily = entry("row-key4", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteFamily); final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314, TaskType.MAP, 159, 2);/*from w w w .ja v a 2s . com*/ final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(put, NW); writer.write(deleteCell, NW); writer.write(deleteColumn, NW); writer.write(deleteFamily, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); assertHFileContent(new Path(defaultDir, "00000"), put.getKeyValue(), deleteCell.getKeyValue(), deleteColumn.getKeyValue(), deleteFamily.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:com.moz.fiji.schema.testutil.IntegrationHelper.java
License:Apache License
/** * Copies a local file into the default filesystem (which is HDFS if you've started an * HBase cluster).//from w w w. j av a2 s . c o m * * @param localFile The file to copy. * @param destPath A relative destination path to be used within the shared tmp dir. * @return The path of the file in HDFS. */ public Path copyToDfs(File localFile, String destPath) throws IOException { Path target = getDfsPath(destPath); FileSystem fs = FileSystem.get(getConf()); if (!fs.exists(target)) { // Only copy if it doesn't already exist. FileUtil.copy(localFile, fs, target, false, getConf()); } return target; }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
public static void cleanupOnFailureImpl(String location, Job job) throws IOException { Path path = new Path(location); FileSystem fs = path.getFileSystem(job.getConfiguration()); if (fs.exists(path)) { fs.delete(path, true);//from ww w. j a v a2 s.co m } }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); state = new State<T>(); Path baseDirectory = new Path(basePath); hadoopConf = HadoopFileSystem.getHadoopConfiguration(); FileSystem fs = baseDirectory.getFileSystem(hadoopConf); refTruncate = reflectTruncate(fs);/* www.ja v a 2 s . com*/ long currentProcessingTime = ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime(); checkForInactiveBuckets(currentProcessingTime); ((StreamingRuntimeContext) getRuntimeContext()) .registerTimer(currentProcessingTime + inactiveBucketCheckInterval, this); this.clock = new Clock() { @Override public long currentTimeMillis() { return ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime(); } }; // delete pending/in-progress files that might be left if we fail while // no checkpoint has yet been done try { if (fs.exists(baseDirectory) && cleanupOnOpen) { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(baseDirectory, true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } } catch (IOException e) { LOG.error("Error while deleting leftover pending/in-progress files: {}", e); throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e); } }