The directory separator, a slash.


From source file:HBaseBloomFilterSemiJoinSystemTest.java

License:Apache License

public void testBloomFilterSemiJoinDirectly()
        throws IOException, InterruptedException, NoSuchFieldException, IllegalAccessException {
    NavigableMap<ByteBuffer, ListMultimap<ByteBuffer, BloomFilter>> regionIndex = NonAggregatingRegionObserver
            .buildIndex("test_table", util.getConfiguration(), util.getTestFileSystem(),
                    new Path(util.getDefaultRootDirPath() + Path.SEPARATOR + "test_table"));

    assertSame("Unexpected number of regions.", 3, regionIndex.size());

    NavigableMap<HRegionInfo, ServerName> regions = table.getRegionLocations();
    for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) {
        LOG.info("Using Region: " + entry.getKey() + " Server: " + entry.getValue());
    }//from w ww .  j  av a  2  s . c o  m

    for (Map.Entry<ByteBuffer, ListMultimap<ByteBuffer, BloomFilter>> entry : regionIndex.entrySet()) {
        assertSame("Unexpected number of HFiles.", 1,

    CompoundBloomFilterBase bfEntryCreator = new CompoundBloomFilterBase();
    double falsePositivesCounter = 0.0;

    for (int i = 0; i < NUM_ROWS; i++) {
        byte[] key1 = toBytes("aaa" + i);
        byte[] key2 = toBytes("bbb" + i);
        byte[] key3 = toBytes("ccc" + i);

        assertNotNull("Could not find a region for key: " + new String(key2));

        // creates bbbXaaaX bf entry keys that must match region0 [bbb0, bbb999] ROW_COL BF [bbb0aaa0, bbb999aaa999]
        byte[] bfMatchKey = bfEntryCreator.createBloomKey(key2, 0, key2.length, key1, 0, key1.length);
        BloomFilter bfMatch = bloomFilterForRowCol(regionIndex, key2);

        // bloom filters don't return false positives
        assertTrue("Unexpected result from the bloom filter: " + new String(bfMatchKey),
                bfMatch.contains(bfMatchKey, 0, bfMatchKey.length, null));

        // creates bbbXcccX bf entry keys that don't exist in region2 [bbb0, bbb999] ROW_COL BF [bbb0aaa0, bbb999aaa999]
        // but will match the index and therefore may provide false positives
        byte[] bfNoMatchKeyFalsePositives = bfEntryCreator.createBloomKey(key2, 0, key2.length, key3, 0,
        BloomFilter bfNoMatchFalsePositives = bloomFilterForRowCol(regionIndex, key2);

        if (bfNoMatchFalsePositives.contains(bfNoMatchKeyFalsePositives, 0, bfNoMatchKeyFalsePositives.length,
                null)) {

        if (i <= NUM_ROWS / 2 && Integer.parseInt((i + "").charAt(0) + "") < 5) {
            // creates cccXaaaX bf entry keys that don't exist in region3 [ccc0, ccc999] ROW_COL BF [ccc0bbb0, ccc999bbb999]
            // but won't match the index (and therefore won't provide false positives)
            byte[] bfNoMatchKeyNoFalsePositives = bfEntryCreator.createBloomKey(key3, 0, key3.length, key1, 0,
            BloomFilter bfNoMatchNoFalsePositives = bloomFilterForRowCol(regionIndex, key3);
            assertFalse("Unexpected result from the bloom filter: " + new String(bfNoMatchKeyNoFalsePositives),
                    bfNoMatchNoFalsePositives.contains(bfNoMatchKeyNoFalsePositives, 0,
                            bfNoMatchKeyNoFalsePositives.length, null));
    double falsePositiveRate = falsePositivesCounter / NUM_ROWS;
    LOG.info("False positive Rate: {} ", falsePositiveRate);
    assertTrue("Unexpectedly high percentage of false positives: " + falsePositiveRate,
            falsePositiveRate < 0.1);

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static String createPath(String... parts) {
    StringBuilder path = new StringBuilder();
    for (String part : parts) {
        path.append(part);//www .  j av a  2 s  .  c o  m
    return path.substring(0, path.length() - 1);

From source file:be.uantwerpen.adrem.hadoop.util.Tools.java

License:Apache License

public static void cleanupSubdirsExcept(String dir, Collection<String> toKeep) {
    Path path = new Path(dir);
    try {//from  ww  w.  ja va  2 s  .  co  m
        for (FileStatus fs : path.getFileSystem(new Configuration()).listStatus(path)) {
            String[] sp = fs.getPath().toString().split(Path.SEPARATOR);
            String filename = sp[sp.length - 1];
            if (toKeep.contains(filename)) {
                cleanDirs(fs.getPath().toString() + Path.SEPARATOR + "_SUCCESS");
    } catch (IOException e) {

From source file:cascading.flow.tez.Hadoop2TezFlow.java

License:Open Source License

private String createStagingRoot() {
    return ".staging" + Path.SEPARATOR + getID();

From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java

License:Open Source License

String createStepStagingPath() {
    String result = "";

    if (HadoopUtil.isLocal(jobConfiguration))
        result = jobConfiguration.get("hadoop.tmp.dir") + Path.SEPARATOR;

    String flowStagingPath = ((Hadoop2TezFlow) flowStep.getFlow()).getFlowStagingPath();

    return result + flowStagingPath + Path.SEPARATOR + flowStep.getID();

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static Path getTaskOutputPath(JobConf conf) {
    String taskId = conf.get("mapred.task.id");

    Path p = new Path(FileOutputFormat.getOutputPath(conf), TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId);

    try {//from www .  ja v  a 2s .  c  o  m
        FileSystem fs = p.getFileSystem(conf);
        return p.makeQualified(fs);
    } catch (IOException ie) {
        return p;

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static Path getTaskOutputPath(Configuration conf) {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    Path p = new Path(FileOutputFormat.getOutputPath(asJobConfInstance(conf)),
            TEMPORARY_PATH + Path.SEPARATOR + "_" + taskId);

    try {//from  w  w w  . j  av  a 2s  .co  m
        FileSystem fs = p.getFileSystem(conf);
        return p.makeQualified(fs);
    } catch (IOException ie) {
        return p;

From source file:cc.slda.AnnotateDocuments.java

License:Apache License

 * Runs this tool./*from ww  w  .j  ava2 s  . com*/
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .withDescription("probability of topic assignment").create(PCUTOFF));
            .withDescription("path to data directory containing term and title indices").create(INDEX));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        return -1;

    String indexPath = cmdline.getOptionValue(INDEX);
    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    float cutoff = 0.9f;
    if (cmdline.hasOption(PCUTOFF)) {
        cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF));
    LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName());
    LOG.info(" - indices path: " + indexPath);
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - number of reducers: " + reduceTasks);
    LOG.info(" - log(probCutoff): " + Math.log(cutoff));

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Job job = Job.getInstance(conf);

    String termIndex = indexPath + Path.SEPARATOR + TERM;
    String titleIndex = indexPath + Path.SEPARATOR + TITLE;

    Path termIndexPath = new Path(termIndex);
    Path titleIndexPath = new Path(titleIndex);

    Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath);
    DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration());
    Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath);
    DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration());

    conf.setFloat(PCUTOFF, cutoff);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));



    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;

From source file:ch.sentric.hbase.coprocessor.LoadWithTableDescriptorExample.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration conf = HBaseConfiguration.create();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(fs.getUri() + Path.SEPARATOR + "coprocessor-1.0-SNAPSHOT.jar");

    HTableDescriptor htd = new HTableDescriptor("testtable");
    htd.addFamily(new HColumnDescriptor("colfam1"));
    htd.setValue("COPROCESSOR$1", path.toString() + "|"
            + ProspectiveSearchRegionObserver.class.getCanonicalName() + "|" + Coprocessor.PRIORITY_USER);

    HBaseAdmin admin = new HBaseAdmin(conf);
    admin.createTable(htd);//w ww . j av  a  2 s  .c o  m


From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

public void commitJob(JobContext context) throws IOException {
    Configuration configuration = context.getConfiguration();
    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    BasicMapReduceTaskContext taskContext = classLoader.getTaskContextProvider().get(this.taskContext);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    Partitioning partitioning = outputDataset.getPartitioning();

    Set<PartitionKey> partitionsToAdd = new HashSet<>();
    Set<String> relativePaths = new HashSet<>();
    // Go over all files in the temporary directory and keep track of partitions to add for them
    FileStatus[] allCommittedTaskPaths = getAllCommittedTaskPaths(context);
    for (FileStatus committedTaskPath : allCommittedTaskPaths) {
        FileSystem fs = committedTaskPath.getPath().getFileSystem(configuration);
        RemoteIterator<LocatedFileStatus> fileIter = fs.listFiles(committedTaskPath.getPath(), true);
        while (fileIter.hasNext()) {
            Path path = fileIter.next().getPath();
            String relativePath = getRelative(committedTaskPath.getPath(), path);

            int lastPathSepIdx = relativePath.lastIndexOf(Path.SEPARATOR);
            if (lastPathSepIdx == -1) {
                // this shouldn't happen because each relative path should consist of at least one partition key and
                // the output file name
                LOG.warn("Skipping path '{}'. It's relative path '{}' has fewer than two parts", path,
                        relativePath);// w ww  .j a v  a 2  s  .c  om
            // relativePath = "../key1/key2/part-m-00000"
            // relativeDir = "../key1/key2"
            // fileName = "part-m-00000"
            String relativeDir = relativePath.substring(0, lastPathSepIdx);
            String fileName = relativePath.substring(lastPathSepIdx + 1);

            Path finalDir = new Path(FileOutputFormat.getOutputPath(context), relativeDir);
            Path finalPath = new Path(finalDir, fileName);
            if (fs.exists(finalPath)) {
                throw new FileAlreadyExistsException("Final output path " + finalPath + " already exists");
            PartitionKey partitionKey = getPartitionKey(partitioning, relativeDir);

    // We need to copy to the parent of the FileOutputFormat's outputDir, since we added a _temporary_jobId suffix to
    // the original outputDir.
    Path finalOutput = FileOutputFormat.getOutputPath(context);
    FileSystem fs = finalOutput.getFileSystem(configuration);
    for (FileStatus stat : getAllCommittedTaskPaths(context)) {
        mergePaths(fs, stat, finalOutput);

    // compute the metadata to be written to every output partition
    Map<String, String> metadata = ConfigurationUtil.getNamedConfigurations(this.taskContext.getConfiguration(),

    // create all the necessary partitions
    for (PartitionKey partitionKey : partitionsToAdd) {
        PartitionOutput partitionOutput = outputDataset.getPartitionOutput(partitionKey);

    // close the TaskContext, which flushes dataset operations
    try {
    } catch (Exception e) {
        Throwables.propagateIfPossible(e, IOException.class);
        throw new IOException(e);

    // delete the job-specific _temporary folder and create a _done file in the o/p folder

    // mark all the final output paths with a _SUCCESS file, if configured to do so (default = true)
    if (configuration.getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
        for (String relativePath : relativePaths) {
            Path pathToMark = new Path(finalOutput, relativePath);
            Path markerPath = new Path(pathToMark, SUCCEEDED_FILE_NAME);