Example usage for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:HBaseBloomFilterSemiJoinSystemTest.java

License:Apache License

@Test
public void testBloomFilterSemiJoinDirectly()
        throws IOException, InterruptedException, NoSuchFieldException, IllegalAccessException {
    NavigableMap<ByteBuffer, ListMultimap<ByteBuffer, BloomFilter>> regionIndex = NonAggregatingRegionObserver
            .buildIndex("test_table", util.getConfiguration(), util.getTestFileSystem(),
                    new Path(util.getDefaultRootDirPath() + Path.SEPARATOR + "test_table"));

    assertSame("Unexpected number of regions.", 3, regionIndex.size());

    NavigableMap<HRegionInfo, ServerName> regions = table.getRegionLocations();
    for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
        LOG.info("Using Region: " + entry.getKey() + " Server: " + entry.getValue());
    }//from w ww .  j  av a  2  s . c o  m

    for (Map.Entry<ByteBuffer, ListMultimap<ByteBuffer, BloomFilter>> entry : regionIndex.entrySet()) {
        assertSame("Unexpected number of HFiles.", 1,
                entry.getValue().get(ByteBuffer.wrap(ROW_COLBF_CF)).size());
    }

    CompoundBloomFilterBase bfEntryCreator = new CompoundBloomFilterBase();
    double falsePositivesCounter = 0.0;

    for (int i = 0; i < NUM_ROWS; i++) {
        byte[] key1 = toBytes("aaa" + i);
        byte[] key2 = toBytes("bbb" + i);
        byte[] key3 = toBytes("ccc" + i);

        assertNotNull("Could not find a region for key: " + new String(key2));

        // creates bbbXaaaX bf entry keys that must match region0 [bbb0, bbb999] ROW_COL BF [bbb0aaa0, bbb999aaa999]
        byte[] bfMatchKey = bfEntryCreator.createBloomKey(key2, 0, key2.length, key1, 0, key1.length);
        BloomFilter bfMatch = bloomFilterForRowCol(regionIndex, key2);

        // bloom filters don't return false positives
        assertTrue("Unexpected result from the bloom filter: " + new String(bfMatchKey),
                bfMatch.contains(bfMatchKey, 0, bfMatchKey.length, null));

        // creates bbbXcccX bf entry keys that don't exist in region2 [bbb0, bbb999] ROW_COL BF [bbb0aaa0, bbb999aaa999]
        // but will match the index and therefore may provide false positives
        byte[] bfNoMatchKeyFalsePositives = bfEntryCreator.createBloomKey(key2, 0, key2.length, key3, 0,
                key3.length);
        BloomFilter bfNoMatchFalsePositives = bloomFilterForRowCol(regionIndex, key2);

        if (bfNoMatchFalsePositives.contains(bfNoMatchKeyFalsePositives, 0, bfNoMatchKeyFalsePositives.length,
                null)) {
            falsePositivesCounter++;
        }

        if (i <= NUM_ROWS / 2 && Integer.parseInt((i + "").charAt(0) + "") < 5) {
            // creates cccXaaaX bf entry keys that don't exist in region3 [ccc0, ccc999] ROW_COL BF [ccc0bbb0, ccc999bbb999]
            // but won't match the index (and therefore won't provide false positives)
            byte[] bfNoMatchKeyNoFalsePositives = bfEntryCreator.createBloomKey(key3, 0, key3.length, key1, 0,
                    key1.length);
            BloomFilter bfNoMatchNoFalsePositives = bloomFilterForRowCol(regionIndex, key3);
            assertFalse("Unexpected result from the bloom filter: " + new String(bfNoMatchKeyNoFalsePositives),
                    bfNoMatchNoFalsePositives.contains(bfNoMatchKeyNoFalsePositives, 0,
                            bfNoMatchKeyNoFalsePositives.length, null));
        }
    }
    double falsePositiveRate = falsePositivesCounter / NUM_ROWS;
    LOG.info("False positive Rate: {} ", falsePositiveRate);
    assertTrue("Unexpectedly high percentage of false positives: " + falsePositiveRate,
            falsePositiveRate < 0.1);
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static String createPath(String... parts) {
    StringBuilder path = new StringBuilder();
    for (String part : parts) {
        path.append(part);//www .  j av a  2 s  .  c o  m
        path.append(Path.SEPARATOR);
    }
    return path.substring(0, path.length() - 1);
}

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) {
    Path path = new Path(dir);
    try {//from  ww  w.  ja va  2 s  .  co  m
        for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) {
            String[] sp = fs.getPath().toString().split(Path.SEPARATOR);
            String filename = sp[sp.length - 1];
            if (toKeep.contains(filename)) {
                cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS");
                continue;
            }
            cleanDirs(fs.getPath().toString());
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:cascading.flow.tez.Hadoop2TezFlow.java

License:Open Source License

private String createStagingRoot() {
    return ".staging" + Path.SEPARATOR + getID();
}

From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java

License:Open Source License

String createStepStagingPath() {
    String result = "";

    if (HadoopUtil.isLocal(jobConfiguration))
        result = jobConfiguration.get("hadoop.tmp.dir") + Path.SEPARATOR;

    String flowStagingPath = ((Hadoop2TezFlow) flowStep.getFlow()).getFlowStagingPath();

    return result + flowStagingPath + Path.SEPARATOR + flowStep.getID();
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static Path getTaskOutputPath(JobConf conf) {
    String taskId = conf.get("mapred.task.id");

    Path p = new Path(FileOutputFormat.getOutputPath(conf), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId);

    try {//from www .  ja v  a 2s .  c  o  m
        FileSystem fs = p.getFileSystem(conf);
        return p.makeQualified(fs);
    } catch (IOException ie) {
        return p;
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static Path getTaskOutputPath(Configuration conf) {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    Path p = new Path(FileOutputFormat.getOutputPath(asJobConfInstance(conf)),
            TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId);

    try {//from  w  w w  . j  av  a 2s  .co  m
        FileSystem fs = p.getFileSystem(conf);
        return p.makeQualified(fs);
    } catch (IOException ie) {
        return p;
    }
}

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

/**
 * Runs this tool./*from ww  w  .j  ava2 s  . com*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));
    options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg()
            .withDescription("probability of topic assignment").create(PCUTOFF));
    options.addOption(OptionBuilder.withArgName(INDEX).hasArg()
            .withDescription("path to data directory containing term and title indices").create(INDEX));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String indexPath = cmdline.getOptionValue(INDEX);
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    float cutoff = 0.9f;
    if (cmdline.hasOption(PCUTOFF)) {
        cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF));
    }
    LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName());
    LOG.info(" - indices path: " + indexPath);
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - log(probCutoff): " + Math.log(cutoff));

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Job job = Job.getInstance(conf);
    job.setJobName(AnnotateDocuments.class.getSimpleName());
    job.setJarByClass(AnnotateDocuments.class);

    String termIndex = indexPath + Path.SEPARATOR + TERM;
    String titleIndex = indexPath + Path.SEPARATOR + TITLE;

    Path termIndexPath = new Path(termIndex);
    Path titleIndexPath = new Path(titleIndex);

    Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath);
    DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration());
    Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath);
    DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration());

    job.setNumReduceTasks(reduceTasks);
    conf.setFloat(PCUTOFF, cutoff);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(HMapSIW.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(HMapSIW.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:ch.sentric.hbase.coprocessor.LoadWithTableDescriptorExample.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = HBaseConfiguration.create();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(fs.getUri() + Path.SEPARATOR + "coprocessor-1.0-SNAPSHOT.jar");

    HTableDescriptor htd = new HTableDescriptor("testtable");
    htd.addFamily(new HColumnDescriptor("colfam1"));
    htd.setValue("COPROCESSOR$1", path.toString() + "|"
            + ProspectiveSearchRegionObserver.class.getCanonicalName() + "|" + Coprocessor.PRIORITY_USER);

    HBaseAdmin admin = new HBaseAdmin(conf);
    admin.createTable(htd);//w ww . j av  a  2 s  .c o  m

    System.out.println(admin.getTableDescriptor(Bytes.toBytes("testtable")));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

@Override
public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    Partitioning partitioning = outputDataset.getPartitioning();

    Set<PartitionKey> partitionsToAdd = new HashSet<>();
    Set<String> relativePaths = new HashSet<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);

            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path,
                        relativePath);// w ww  .j a v  a 2  s  .c  om
                continue;
            }
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            String fileName = relativePath.substring(lastPathSepIdx + 1);

            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            Path finalPath = new Path(finalDir, fileName);
            if (fs.exists(finalPath)) {
                throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists");
            }
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);
            partitionsToAdd.add(partitionKey);
            relativePaths.add(relativeDir);
        }
    }

    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileSystem fs = finalOutput.getFileSystem(configuration);
    for (FileStatus stat : getAllCommittedTaskPaths(context)) {
        mergePaths(fs, stat, finalOutput);
    }

    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(),
            PartitionedFileSetArguments.OUTPUT_PARTITION_METADATA_PREFIX);

    // create all the necessary partitions
    for (PartitionKey partitionKey : partitionsToAdd) {
        PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);
        partitionOutput.setMetadata(metadata);
        partitionOutput.addPartition();
    }

    // close the TaskContext, which flushes dataset operations
    try {
        taskContext.flushOperations();
    } catch (Exception e) {
        Throwables.propagateIfPossible(e, IOException.class);
        throw new IOException(e);
    }

    // delete the job-specific _temporary folder and create a _done file in the o/p folder
    cleanupJob(context);

    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : relativePaths) {
            Path pathToMark = new Path(finalOutput, relativePath);
            Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);
            fs.createNewFile(markerPath);
        }
    }
}