Example usage for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@Deprecated
public Path makeQualified(FileSystem fs)

Source Link

Document

Returns a qualified path object for the FileSystem 's working directory.

Usage

From source file:com.inmobi.databus.distcp.MirrorStreamService.java

License:Apache License

LinkedHashMap<FileStatus, Path> prepareForCommit(Path tmpOut) throws Exception {
    /*/*  w  w w .  j av a  2 s . c o  m*/
     * tmpOut would be like -
     * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/ After
     * distcp paths inside tmpOut would be eg:
     *
     * /databus/system/distcp_mirror_ua1_uj1
     * /databus/streams/<streamName>/2012/1/13/15/7/
     * <hostname>-<streamName>-2012-01-16-07-21_00000.gz
     *
     * tmpStreamRoot eg: /databus/system/distcp_mirror_<srcCluster>_
     * <destCluster>/databus/streams/
     */

    Path tmpStreamRoot = new Path(tmpOut.makeQualified(getDestFs()).toString() + File.separator
            + getSrcCluster().getUnqaulifiedFinalDestDirRoot());
    LOG.debug("tmpStreamRoot [" + tmpStreamRoot + "]");

    /* tmpStreamRoot eg -
     * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/databus
     * /streams/
     *
     * multiple streams can get mirrored from the same cluster
     * streams can get processed in any order but we have to retain order
     * of paths within a stream*/
    FileStatus[] fileStatuses = getDestFs().listStatus(tmpStreamRoot);

    //Retain the order of commitPaths
    LinkedHashMap<FileStatus, Path> commitPaths = new LinkedHashMap<FileStatus, Path>();
    if (fileStatuses != null) {
        for (FileStatus streamRoot : fileStatuses) {
            //for each stream : list the path in order of YYYY/mm/DD/HH/MM
            LOG.debug("StreamRoot [" + streamRoot.getPath() + "] streamName [" + streamRoot.getPath().getName()
                    + "]");
            List<FileStatus> streamPaths = new ArrayList<FileStatus>();
            createListing(getDestFs(), streamRoot, streamPaths);
            Collections.sort(streamPaths, new DatePathComparator());
            LOG.debug("createListing size: [" + streamPaths.size() + "]");
            createCommitPaths(commitPaths, streamPaths);
        }
    }
    return commitPaths;
}

From source file:com.inmobi.databus.local.LocalStreamServiceTest.java

License:Apache License

private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception {
    FileStatus[] files = createTestData(2, "/databus/data/stream", true);

    FileStatus[] stream1 = createTestData(2, "/databus/data/stream1/collector", true);

    FileStatus[] stream3 = createTestData(number_files, "/databus/data/stream1/collector1/file", true);

    FileStatus[] stream4 = createTestData(number_files, "/databus/data/stream1/collector2/file", true);

    FileStatus[] stream2 = createTestData(2, "/databus/data/stream2/collector", true);

    FileStatus[] stream5 = createTestData(number_files, "/databus/data/stream2/collector1/file", true);

    FileStatus[] stream6 = createTestData(number_files, "/databus/data/stream2/collector2/file", true);

    when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/"));
    when(fs.getUri()).thenReturn(new URI("localhost"));
    when(fs.listStatus(cluster.getDataDir())).thenReturn(files);
    when(fs.listStatus(new Path("/databus/data/stream1"))).thenReturn(stream1);

    when(fs.listStatus(new Path("/databus/data/stream1/collector1"), any(CollectorPathFilter.class)))
            .thenReturn(stream3);//from w w  w  . j  a  va 2s .c o m
    when(fs.listStatus(new Path("/databus/data/stream2"))).thenReturn(stream2);
    when(fs.listStatus(new Path("/databus/data/stream1/collector2"), any(CollectorPathFilter.class)))
            .thenReturn(stream4);
    when(fs.listStatus(new Path("/databus/data/stream2/collector1"), any(CollectorPathFilter.class)))
            .thenReturn(stream5);
    when(fs.listStatus(new Path("/databus/data/stream2/collector2"), any(CollectorPathFilter.class)))
            .thenReturn(stream6);

    Path file = mock(Path.class);
    when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/databus/data/stream1/collector1/"));
}

From source file:com.inmobi.grill.driver.hive.HiveDriver.java

License:Apache License

void addPersistentPath(QueryContext context) throws IOException {
    String hiveQuery;//from  ww w .  j  a va 2  s  . co  m
    if (context.isPersistent()
            && context.getConf().getBoolean(GrillConfConstants.GRILL_ADD_INSERT_OVEWRITE, true)) {
        // store persistent data into user specified location
        // If absent, take default home directory
        String resultSetParentDir = context.getResultSetParentDir();
        StringBuilder builder;
        Path resultSetPath;
        if (StringUtils.isNotBlank(resultSetParentDir)) {
            resultSetPath = new Path(resultSetParentDir, context.getQueryHandle().toString());
            // create query
            builder = new StringBuilder("INSERT OVERWRITE DIRECTORY ");
        } else {
            // Write to /tmp/grillreports
            resultSetPath = new Path(GrillConfConstants.GRILL_RESULT_SET_PARENT_DIR_DEFAULT,
                    context.getQueryHandle().toString());
            builder = new StringBuilder("INSERT OVERWRITE LOCAL DIRECTORY ");
        }
        context.setResultSetPath(
                resultSetPath.makeQualified(resultSetPath.getFileSystem(context.getConf())).toString());
        builder.append('"').append(resultSetPath).append("\" ");
        String outputDirFormat = context.getConf().get(GrillConfConstants.GRILL_OUTPUT_DIRECTORY_FORMAT);
        if (outputDirFormat != null) {
            builder.append(outputDirFormat);
        }
        builder.append(' ').append(context.getDriverQuery()).append(' ');
        hiveQuery = builder.toString();
    } else {
        hiveQuery = context.getDriverQuery();
    }
    LOG.info("Hive driver query:" + hiveQuery);
    context.setDriverQuery(hiveQuery);
}

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>");
        return 2;
    }/*www .ja v a 2 s .  com*/

    final String imageID = args[0];
    final String friendlyName = args[1];
    final String extentsPath = args[2];
    final String image = args[3];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf);
    job.setJarByClass(ExtractData.class);
    job.setMapperClass(ExtractDataMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(1);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    conf.setInt("mapreduce.job.jvm.numtasks", -1);

    final FileSystem fs = FileSystem.get(conf);
    Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString());
    hfileDir = hfileDir.makeQualified(fs);
    LOG.info("Hashes will be written temporarily to " + hfileDir);

    HFileOutputFormat.setOutputPath(job, hfileDir);

    final Path extp = new Path(extentsPath);
    final URI extents = extp.toUri();
    LOG.info("extents file is " + extents);

    DistributedCache.addCacheFile(extents, conf);
    conf.set("com.lbt.extentsname", extp.getName());
    // job.getConfiguration().setBoolean("mapred.task.profile", true);
    // job.getConfiguration().setBoolean("mapreduce.task.profile", true);

    HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B);

    HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B);

    final boolean result = job.waitForCompletion(true);
    if (result) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        HBaseConfiguration.addHbaseResources(conf);
        loader.setConf(conf);
        LOG.info("Loading hashes into hbase");
        chmodR(fs, hfileDir);
        loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B));
        //      result = fs.delete(hfileDir, true);
    }
    return result ? 0 : 1;
}

From source file:com.moz.fiji.mapreduce.kvstore.lib.FileStoreHelper.java

License:Apache License

/**
 * Serializes file- and DistributedCache-specific properties associated
 * with the KeyValueStore that owns this FileStoreHelper to the specified configuration.
 *
 * @param conf the configuration to populate.
 * @throws IOException if there's an error serializing the state.
 *///from  w ww .  j a v a  2s .  co m
public void storeToConf(KeyValueStoreConfiguration conf) throws IOException {
    if (mInputPaths.isEmpty()) {
        throw new IOException("Required attribute not set: input path");
    }

    if (mUseDCache && !"local".equals(conf.get("mapreduce.jobtracker.address", ""))) {
        // If we're scheduled to use the distributed cache, and we're not in the LocalJobRunner,
        // add these files to the DistributedCache.

        // TODO(aaron): This does not handle any sort of MapperTester, etc.
        // We need a separate flag that tells this to ignore mUseDCache if we're in a test
        // environment, and just use the original input file specs.
        final String dCachePrefix = getCachePrefix();

        // Associate this randomly chosen prefix id with this KVStore implementation.
        conf.set(CONF_DCACHE_PREFIX_KEY, dCachePrefix);

        // Add the input paths to the DistributedCache and translate path names.
        int uniqueId = 0;
        // TODO: getExpandedInputPaths() should use the Configuration from conf, not our getConf().
        for (Path inputPath : getExpandedInputPaths()) {
            FileSystem fs = inputPath.getFileSystem(conf.getDelegate());
            Path absolutePath = inputPath.makeQualified(fs);
            String uriStr = absolutePath.toString() + "#" + dCachePrefix + "-" + uniqueId;
            LOG.debug("Adding to DistributedCache: " + uriStr);
            uniqueId++;
            try {
                DistributedCache.addCacheFile(new URI(uriStr), conf.getDelegate());
            } catch (URISyntaxException use) {
                throw new IOException("Could not construct URI for file: " + uriStr, use);
            }
        }

        // Ensure that symlinks are created for cached files.
        DistributedCache.createSymlink(conf.getDelegate());

        // Now save the cache prefix into the local state.  We couldn't set this earlier,
        // because we wanted getExpandedInputPaths() to actually unglob things. That
        // function will behave differently if mDCachePrefix is already initialized.
        mDCachePrefix = dCachePrefix;
    } else {
        // Just put the regular HDFS paths in the Configuration.
        conf.setStrings(CONF_PATHS_KEY,
                Lists.toArray(Lists.map(mInputPaths, new Lists.ToStringFn<Path>()), String.class));
    }
}

From source file:com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput.java

License:Apache License

/**
 * Configures the partitioner for generating HFiles.
 *
 * <p>Each generated HFile should fit within a region of of the target table.
 * Additionally, it's optimal to have only one HFile to load into each region, since a
 * read from that region will require reading from each HFile under management (until
 * compaction happens and merges them all back into one HFile).</p>
 *
 * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the
 * records output from the Mapper based on their rank in a total ordering of the
 * keys.  The <code>startKeys</code> argument should contain a list of the first key in
 * each of those partitions.</p>// w w w .  ja  v a2  s  .  c  o  m
 *
 * @param job The job to configure.
 * @param startKeys A list of keys that will mark the boundaries between the partitions
 *     for the sorted map output records.
 * @throws IOException If there is an error.
 */
public static void configurePartitioner(Job job, List<HFileKeyValue> startKeys) throws IOException {
    FijiMRPlatformBridge.get().setTotalOrderPartitionerClass(job);

    LOG.info("Configuring " + startKeys.size() + " reduce partitions.");
    job.setNumReduceTasks(startKeys.size());

    // Write the file that the TotalOrderPartitioner reads to determine where to partition records.
    Path partitionFilePath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis());
    LOG.info("Writing partition information to " + partitionFilePath);

    final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration());
    partitionFilePath = partitionFilePath.makeQualified(fs);
    writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys);

    // Add it to the distributed cache.
    try {
        final URI cacheUri = new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH);
        DistributedCache.addCacheFile(cacheUri, job.getConfiguration());
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }
    DistributedCache.createSymlink(job.getConfiguration());
}

From source file:com.moz.fiji.schema.mapreduce.DistributedCacheJars.java

License:Apache License

/**
 * @param conf Configuration to get FileSystem from
 * @param jarDirectory The directory of jars to get.
 * @return A list of qualified paths to the jars in jarDirectory.
 * @throws IOException if there's a problem.
 *///w w w.j a va2 s . c o m
public static List<String> getJarsFromDirectory(Configuration conf, File jarDirectory) throws IOException {
    if (!jarDirectory.isDirectory()) {
        throw new IOException("Attempted to add jars from non-directory: " + jarDirectory.getCanonicalPath());
    }
    List<String> allJars = new ArrayList<String>();
    FileSystem fileSystem = FileSystem.getLocal(conf);
    for (File jar : jarDirectory.listFiles()) {
        if (jar.exists() && !jar.isDirectory() && jar.getName().endsWith(".jar")) {
            Path jarPath = new Path(jar.getCanonicalPath());
            String qualifiedPath = jarPath.makeQualified(fileSystem).toString();
            allJars.add(qualifiedPath);
        }
    }
    return allJars;
}

From source file:com.phantom.hadoop.examples.Sort.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//from w  ww. j a v a  2s. c o  m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }
    // Set user-supplied (possibly default) job configs
    job = new Job(conf);
    job.setJobName("sorter");
    job.setJarByClass(Sort.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    job.setNumReduceTasks(num_reduces);

    job.setInputFormatClass(inputFormatClass);
    job.setOutputFormatClass(outputFormatClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        job.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(conf, partitionFile);
        InputSampler.<K, V>writePartitionFile(job, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, conf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with "
            + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache./*from w  w w. ja  v  a2  s .  c  o m*/
 */
public static void addDependencyJars(Configuration conf, Class... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);

    Set<String> jars = new HashSet<String>();

    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // Add jars containing the specified classes
    for (Class clazz : classes) {
        if (clazz == null)
            continue;

        String pathStr = findOrCreateJar(clazz);
        if (pathStr == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        Path path = new Path(pathStr);
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.makeQualified(localFs).toString());
    }
    if (jars.isEmpty())
        return;

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}

From source file:com.splout.db.hadoop.DeployerCMD.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//ww w.  j  a  v a 2  s.c  om
public int run(String[] args) throws Exception {
    JCommander jComm = new JCommander(this);
    jComm.setProgramName("Tablespaces Deployer");
    try {
        jComm.parse(args);
    } catch (ParameterException e) {
        System.out.println(e.getMessage());
        jComm.usage();
        return -1;
    } catch (Throwable t) {
        t.printStackTrace();
        jComm.usage();
        return -1;
    }

    StoreDeployerTool deployer = new StoreDeployerTool(qnode, getConf());

    ArrayList<TablespaceDepSpec> deployments = new ArrayList<TablespaceDepSpec>();
    if (configFile != null) {
        deployments = JSONSerDe.deSer(Files.toString(new File(configFile), Charset.forName("UTF-8")),
                new TypeReference<ArrayList<TablespaceDepSpec>>() {
                });
    } else {
        Path rootPath = new Path(root);
        if (tablespaceName == null && tablespaces.size() == 0) {
            System.err.println(
                    "Tablespace name for root folder or tablespaces contained in them is lacking. Either use tablespacename or tablespaces option.");
            jComm.usage();
            return -1;
        }
        if (tablespaceName != null && tablespaces.size() > 0) {
            System.err.println(
                    "Can't use tablespacename and tablespaces at the same time. Root is to be either a generated tablespace or a folder with multiple generated tablespaces.");
            jComm.usage();
            return -1;
        }
        if (tablespaceName != null) {
            deployments.add(new TablespaceDepSpec(tablespaceName, rootPath.toString(), replicationFactor,
                    initStatements));
        }
        for (String tb : tablespaces) {
            Path tablespacePath = new Path(rootPath, tb);
            deployments.add(
                    new TablespaceDepSpec(tb, tablespacePath.toString(), replicationFactor, initStatements));
        }
    }

    // Checking for file existence
    for (TablespaceDepSpec spec : deployments) {
        Path tablespacePath = new Path(spec.getSourcePath());
        FileSystem fs = tablespacePath.getFileSystem(getConf());
        if (!fs.exists(tablespacePath)) {
            System.out.println("ERROR: Path [" + tablespacePath.makeQualified(fs) + "] not found.");
            return 1;
        }
    }

    deployer.deploy(deployments);
    return 0;
}