Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a MapReduce job. */
@Test//  ww w  .  j a  va 2s .c o m
public void testMapReduceJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder();
    for (String line : output) {
        final String[] keyValue = line.split("\t");
        final String emailDomain = keyValue[0];
        final Set<String> names = Sets.newHashSet(keyValue[1].split(","));

        builder.put(emailDomain, names);
    }
    final Map<String, Set<String>> actual = builder.build();
    final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder()
            .put("usermail.example.com",
                    Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu"))
            .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build();
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

/**
 * Asserts the content of an HFile.//from   w  w w.j  av a2  s  .  com
 *
 * @param path Path of the HFile to validate the content of.
 * @param values Expected KeyValue entries, in order.
 * @throws IOException on I/O error.
 */
private void assertHFileContent(Path path, KeyValue... values) throws IOException {
    final FileSystem fs = path.getFileSystem(mConf);
    assertTrue(String.format("HFile '%s' does not exist.", path), fs.exists(path));
    final List<KeyValue> kvs = loadHFile(path, mConf);
    assertEquals(kvs.size(), values.length);
    for (int i = 0; i < values.length; ++i) {
        assertEquals(kvs.get(i), values[i]);
    }
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeSameRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);/*w  ww . ja v  a2 s . c  om*/
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(!fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeNewRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(FijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);// ww w  .j ava  2  s  .  co m
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertFalse(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue());
    assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00002")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testMultipleLayouts() throws Exception {
    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);/*from w  ww .  j a  v a  2 s  . c  om*/
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);

    final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    writer.write(defaultEntry, NW);
    final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024));
    writer.write(inMemoryEntry, NW);

    try {
        // Test with an invalid locality group ID:
        final ColumnId invalid = new ColumnId(1234);
        assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid));
        writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW);
        fail("Output format did not fail on unknown locality group IDs.");
    } catch (IllegalArgumentException iae) {
        LOG.info("Expected error: " + iae);
    }

    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue());
    assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue());

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:com.moz.fiji.mapreduce.output.TestFijiHFileOutputFormat.java

License:Apache License

@Test
public void testTombstonesInHFile() throws Exception {
    final HFileKeyValue put = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue deleteCell = entry("row-key2", mDefaultLGId, "a", 1L, HFileKeyValue.Type.DeleteCell);
    final HFileKeyValue deleteColumn = entry("row-key3", mDefaultLGId, "a", 1L,
            HFileKeyValue.Type.DeleteColumn);
    final HFileKeyValue deleteFamily = entry("row-key4", mDefaultLGId, "a", 1L,
            HFileKeyValue.Type.DeleteFamily);

    final TaskAttemptID taskAttemptId = FijiMRPlatformBridge.get().newTaskAttemptID("jobTracker_jtPort", 314,
            TaskType.MAP, 159, 2);/*from  w w  w  .ja  v  a 2s  . com*/
    final TaskAttemptContext context = FijiMRPlatformBridge.get().newTaskAttemptContext(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, FijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(put, NW);
    writer.write(deleteCell, NW);
    writer.write(deleteColumn, NW);
    writer.write(deleteFamily, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    assertHFileContent(new Path(defaultDir, "00000"), put.getKeyValue(), deleteCell.getKeyValue(),
            deleteColumn.getKeyValue(), deleteFamily.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:com.moz.fiji.schema.testutil.IntegrationHelper.java

License:Apache License

/**
 * Copies a local file into the default filesystem (which is HDFS if you've started an
 * HBase cluster).//from  w  w w. j av a2 s  . c o m
 *
 * @param localFile The file to copy.
 * @param destPath A relative destination path to be used within the shared tmp dir.
 * @return The path of the file in HDFS.
 */
public Path copyToDfs(File localFile, String destPath) throws IOException {
    Path target = getDfsPath(destPath);
    FileSystem fs = FileSystem.get(getConf());
    if (!fs.exists(target)) {
        // Only copy if it doesn't already exist.
        FileUtil.copy(localFile, fs, target, false, getConf());
    }
    return target;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

public static void cleanupOnFailureImpl(String location, Job job) throws IOException {
    Path path = new Path(location);
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    if (fs.exists(path)) {
        fs.delete(path, true);//from  ww  w. j a v  a2 s.co  m
    }
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void open(Configuration parameters) throws Exception {
    super.open(parameters);

    subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();

    state = new State<T>();

    Path baseDirectory = new Path(basePath);
    hadoopConf = HadoopFileSystem.getHadoopConfiguration();
    FileSystem fs = baseDirectory.getFileSystem(hadoopConf);
    refTruncate = reflectTruncate(fs);/* www.ja v  a 2 s .  com*/

    long currentProcessingTime = ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();

    checkForInactiveBuckets(currentProcessingTime);

    ((StreamingRuntimeContext) getRuntimeContext())
            .registerTimer(currentProcessingTime + inactiveBucketCheckInterval, this);

    this.clock = new Clock() {
        @Override
        public long currentTimeMillis() {
            return ((StreamingRuntimeContext) getRuntimeContext()).getCurrentProcessingTime();
        }
    };

    // delete pending/in-progress files that might be left if we fail while
    // no checkpoint has yet been done
    try {
        if (fs.exists(baseDirectory) && cleanupOnOpen) {
            RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(baseDirectory, true);

            while (bucketFiles.hasNext()) {
                LocatedFileStatus file = bucketFiles.next();
                if (file.getPath().toString().endsWith(pendingSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover pending file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
                if (file.getPath().toString().endsWith(inProgressSuffix)) {
                    // only delete files that contain our subtask index
                    if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                        LOG.debug("(OPEN) Deleting leftover in-progress file {}", file.getPath().toString());
                        fs.delete(file.getPath(), true);
                    }
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting leftover pending/in-progress files: {}", e);
        throw new RuntimeException("Error while deleting leftover pending/in-progress files.", e);
    }
}