Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:com.mongodb.hadoop.BSONFileOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) throws IOException {
    // Open data output stream

    Path outPath = getDefaultWorkFile(context, ".bson");
    LOG.info("output going into " + outPath);

    FileSystem fs = outPath.getFileSystem(context.getConfiguration());
    FSDataOutputStream outFile = fs.create(outPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(context.getConfiguration())) {
        Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }//ww  w .ja  v  a 2 s . com

    long splitSize = BSONSplitter.getSplitSize(context.getConfiguration(), null);
    return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize);
}

From source file:com.mongodb.hadoop.hive.output.HiveBSONFileOutputFormat.java

License:Apache License

/**
 * create the final output file/*from  w  w w .j  av a 2  s.com*/
 *
 * @param jc              the job configuration
 * @param fileOutputPath  the file that the output should be directed at
 * @param valueClass      the value class used to create
 * @param tableProperties the tableInfo for this file's corresponding table
 * @return RecordWriter for the output file
 */
@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path fileOutputPath,
        final Class<? extends Writable> valueClass, final boolean isCompressed,
        final Properties tableProperties, final Progressable progress) throws IOException {

    LOG.info("Output going into " + fileOutputPath);

    FileSystem fs = fileOutputPath.getFileSystem(jc);
    FSDataOutputStream outFile = fs.create(fileOutputPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(jc)) {
        Path splitPath = new Path(fileOutputPath.getParent(), "." + fileOutputPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }

    long splitSize = BSONSplitter.getSplitSize(jc, null);

    return new HiveBSONFileRecordWriter(outFile, splitFile, splitSize);
}

From source file:com.mongodb.hadoop.mapred.BSONFileOutputFormat.java

License:Apache License

public RecordWriter<K, V> getRecordWriter(final FileSystem ignored, final JobConf job, final String name,
        final Progressable progress) throws IOException {
    Path outPath = getDefaultWorkFile(job, name, ".bson");
    LOG.info("output going into " + outPath);

    FileSystem fs = outPath.getFileSystem(job);
    FSDataOutputStream outFile = fs.create(outPath);

    FSDataOutputStream splitFile = null;
    if (MongoConfigUtil.getBSONOutputBuildSplits(job)) {
        Path splitPath = new Path(outPath.getParent(), "." + outPath.getName() + ".splits");
        splitFile = fs.create(splitPath);
    }//from ww  w .j av  a  2s  .  c  om

    long splitSize = BSONSplitter.getSplitSize(job, null);

    return new BSONFileRecordWriter<K, V>(outFile, splitFile, splitSize);
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass,
        Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter)
        throws Exception {
    final Job job = new Job(createConfiguration());
    final Configuration conf = job.getConfiguration();

    // Get settings for test.
    final FijiDataRequest request = FijiDataRequest.builder()
            .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build();

    job.setJarByClass(IntegrationTestFijiTableInputFormat.class);

    // Setup the InputFormat.
    FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter);
    job.setInputFormatClass(HBaseFijiTableInputFormat.class);

    // Duplicate functionality from MapReduceJobBuilder, since we are not using it here:
    final List<Path> jarFiles = Lists.newArrayList();
    final FileSystem fs = FileSystem.getLocal(conf);
    for (String cpEntry : System.getProperty("java.class.path").split(":")) {
        if (cpEntry.endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(new Path(cpEntry)));
        }//from ww w  . j ava2  s.co m
    }
    DistributedCacheJars.addJarsToDistributedCache(job, jarFiles);

    // Create a test job.
    job.setJobName(jobName);

    // Setup the OutputFormat.
    TextOutputFormat.setOutputPath(job, outputFile.getParent());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Set the mapper class.
    if (null != mapperClass) {
        job.setMapperClass(mapperClass);
    }
    // Set the reducer class.
    if (null != reducerClass) {
        job.setReducerClass(reducerClass);
    }

    return job;
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job. */
@Test/*from w  w w  .  j a  v a 2  s.c  om*/
public void testMapJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJob", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball", "gmail.com\tJohn Doe",
            "usermail.example.com\tChristophe Bisciglia", "usermail.example.com\tKiyan Ahmadizadeh",
            "gmail.com\tJane Doe", "usermail.example.com\tGarrett Wu");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with start and limit keys. */
@Test//  w ww  .ja v a  2 s.  c  o  m
public void testMapJobWithStartAndLimitKeys() throws Exception {
    final Path outputFile = createOutputFile();
    // Set the same entity IDs for start and limit, and we should get just the start row
    final EntityId startEntityId = getFooTable().getEntityId("jane.doe@gmail.com");
    final byte[] endRowKey = startEntityId.getHBaseRowKey();
    final EntityId rawLimitEntityId = HBaseEntityId
            .fromHBaseRowKey(Arrays.copyOf(endRowKey, endRowKey.length + 1));

    // Create a test job.
    final Job job = setupJob("testMapJobWithStartAndLimitKeys", outputFile, TestMapper.class, null, // reducer class
            startEntityId, rawLimitEntityId, null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("gmail.com\tJane Doe");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a map-only job with a row filter. */
@Test//from w w w  .j  a va  2  s. c o  m
public void testMapJobWithFilter() throws Exception {
    final FijiRowFilter filter = new ColumnValueEqualsRowFilter("info", "email",
            new DecodedCell<String>(Schema.create(Schema.Type.STRING), "aaron@usermail.example.com"));
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapJobWithFilter", outputFile, TestMapper.class, null, // reducer class
            null, // start key
            null, // limit key
            filter);

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> actual = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final Set<String> expected = Sets.newHashSet("usermail.example.com\tAaron Kimball");
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

/** Test FijiTableInputFormat in a MapReduce job. */
@Test//from  w w  w  . j  av  a  2  s  .co m
public void testMapReduceJob() throws Exception {
    final Path outputFile = createOutputFile();
    // Create a test job.
    final Job job = setupJob("testMapReduceJob", outputFile, TestMapper.class, TestReducer.class, null, // start key
            null, // limit key
            null); // filter

    // Run the job.
    assertTrue("Hadoop job failed", job.waitForCompletion(true));

    // Check to make sure output exists.
    final FileSystem fs = FileSystem.get(job.getConfiguration());
    assertTrue(fs.exists(outputFile.getParent()));

    // Verify that the output matches what's expected.
    final FSDataInputStream in = fs.open(outputFile);
    final Set<String> output = Sets.newHashSet(IOUtils.toString(in).trim().split("\n"));
    final ImmutableMap.Builder<String, Set<String>> builder = ImmutableMap.builder();
    for (String line : output) {
        final String[] keyValue = line.split("\t");
        final String emailDomain = keyValue[0];
        final Set<String> names = Sets.newHashSet(keyValue[1].split(","));

        builder.put(emailDomain, names);
    }
    final Map<String, Set<String>> actual = builder.build();
    final Map<String, Set<String>> expected = ImmutableMap.<String, Set<String>>builder()
            .put("usermail.example.com",
                    Sets.newHashSet("Aaron Kimball", "Christophe Bisciglia", "Kiyan Ahmadizadeh", "Garrett Wu"))
            .put("gmail.com", Sets.newHashSet("John Doe", "Jane Doe")).build();
    assertEquals("Result of job wasn't what was expected", expected, actual);

    // Clean up.
    fs.delete(outputFile.getParent(), true);

    IOUtils.closeQuietly(in);
    // NOTE: fs should get closed here, but doesn't because of a bug with FileSystem that
    // causes it to close other thread's filesystem objects. For more information
    // see: https://issues.apache.org/jira/browse/HADOOP-7973
}

From source file:com.moz.fiji.mapreduce.lib.examples.News20BulkImporter.java

License:Apache License

/**
 * Reads a single news article, and writes its contents to a new fiji row,
 * indexed by the article's name (A string consisting of the parent folder, and
 * this article's hash), and the a priori categorization of this article.
 *
 * @param key The fully qualified path to the current file we're reading.
 * @param value The raw data to insert into this column.
 * @param context The context to write to.
 * @throws IOException if there is an error.
 *//*from   ww w  . j a v  a 2s  .com*/
@Override
public void produce(Text key, Text value, FijiTableContext context) throws IOException {
    Path qualifiedPath = new Path(key.toString());

    // Category is specified on the containing folder.
    String category = qualifiedPath.getParent().getName();
    // Name is the concatenation of category and file name.
    String name = category + "." + qualifiedPath.getName();

    // write name, category, and raw article.
    EntityId entity = context.getEntityId(name);
    context.put(entity, FAMILY, ARTICLE_NAME_QUALIFIER, name);
    context.put(entity, FAMILY, CATEGORY_QUALIFIER, category);
    context.put(entity, FAMILY, RAW_ARTICLE_QUALIFIER, value.toString());
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

/**
 * Opens a new part file.//from   w  w w.  ja v  a2s . c om
 *
 * <p>
 * This closes the old bucket file and retrieves a new bucket path from the {@code Bucketer}.
 */
private void openNewPartFile(Path bucketPath, BucketState<T> bucketState) throws Exception {
    closeCurrentPartFile(bucketState);

    FileSystem fs = new Path(basePath).getFileSystem(hadoopConf);

    if (!fs.exists(bucketPath)) {
        try {
            if (fs.mkdirs(bucketPath)) {
                LOG.debug("Created new bucket directory: {}", bucketPath);
            }
        } catch (IOException e) {
            throw new RuntimeException("Could not create new bucket path.", e);
        }
    }

    Path partPath = new Path(bucketPath,
            partPrefix + "-" + subtaskIndex + "-" + bucketState.partCounter + partSuffix);

    // This should work since there is only one parallel subtask that tries names with
    // our subtask id. Otherwise we would run into concurrency issues here.
    while (fs.exists(partPath) || fs
            .exists(new Path(partPath.getParent(), pendingPrefix + partPath.getName()).suffix(pendingSuffix))) {
        bucketState.partCounter++;
        partPath = new Path(bucketPath,
                partPrefix + "-" + subtaskIndex + "-" + bucketState.partCounter + partSuffix);
    }

    // increase, so we don't have to check for this name next time
    bucketState.partCounter++;

    LOG.debug("Next part path is {}", partPath.toString());
    bucketState.currentFile = partPath.toString();

    Path inProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
            .suffix(inProgressSuffix);

    // If we don't already have a writer for this bucket, create one
    if (bucketState.writer == null) {
        bucketState.writer = writerTemplate.duplicate();
    }

    bucketState.writer.open(fs, inProgressPath);
    bucketState.isWriterOpen = true;
}