Example usage for org.apache.hadoop.fs Path makeQualified

List of usage examples for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@Deprecated
public Path makeQualified(FileSystem fs) 

Source Link

Document

Returns a qualified path object for the FileSystem 's working directory.

Usage

From source file:voldemort.store.readonly.mr.azkaban.VoldemortSwapJob.java

License:Apache License

public void run() throws Exception {
    String dataDir = swapConf.getDataDir();
    String storeName = swapConf.getStoreName();
    int httpTimeoutMs = swapConf.getHttpTimeoutMs();
    long pushVersion = swapConf.getPushVersion();
    Cluster cluster = swapConf.getCluster();
    ExecutorService executor = Executors.newCachedThreadPool();

    // Read the hadoop configuration settings
    JobConf conf = new JobConf();
    Path dataPath = new Path(dataDir);
    dataDir = dataPath.makeQualified(FileSystem.get(conf)).toString();

    /*/*from  www  .j a v a 2s .c  o  m*/
     * Replace the default protocol and port with the one derived as above
     */
    String existingProtocol = "";
    String existingPort = "";
    String[] pathComponents = dataDir.split(":");
    if (pathComponents.length >= 3) {
        existingProtocol = pathComponents[0];
        existingPort = pathComponents[2].split("/")[0];
    }
    info("Existing protocol = " + existingProtocol + " and port = " + existingPort);
    if (hdfsFetcherProtocol.length() > 0 && hdfsFetcherPort.length() > 0) {
        dataDir = dataDir.replaceFirst(existingProtocol, this.hdfsFetcherProtocol);
        dataDir = dataDir.replaceFirst(existingPort, this.hdfsFetcherPort);
    }

    // Create admin client
    AdminClient client = new AdminClient(cluster,
            new AdminClientConfig().setMaxConnectionsPerNode(cluster.getNumberOfNodes())
                    .setAdminConnectionTimeoutSec(httpTimeoutMs / 1000)
                    .setMaxBackoffDelayMs(swapConf.getMaxBackoffDelayMs()));

    if (pushVersion == -1L) {

        // Need to retrieve max version
        ArrayList<String> stores = new ArrayList<String>();
        stores.add(storeName);
        Map<String, Long> pushVersions = client.getROMaxVersion(stores);

        if (pushVersions == null || !pushVersions.containsKey(storeName)) {
            throw new RuntimeException("Push version could not be determined for store " + storeName);
        }
        pushVersion = pushVersions.get(storeName);
        pushVersion++;
    }

    // do the swap
    info("Initiating swap of " + storeName + " with dataDir:" + dataDir);
    AdminStoreSwapper swapper = new AdminStoreSwapper(cluster, executor, client, httpTimeoutMs,
            swapConf.getRollback(), swapConf.getRollback());
    swapper.swapStoreData(storeName, dataDir, pushVersion);
    info("Swap complete.");
    executor.shutdownNow();
    executor.awaitTermination(10, TimeUnit.SECONDS);
}

From source file:voldemort.store.readonly.mr.HadoopStoreJobRunner.java

License:Apache License

private static void addDepJars(Configuration conf, Class<?>[] deps, List<String> additionalJars)
        throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> depJars = new HashSet<String>();
    for (Class<?> dep : deps) {
        String tmp = findInClasspath(dep.getCanonicalName());
        if (tmp != null) {
            Path path = new Path(tmp);
            depJars.add(path.makeQualified(localFs).toString());
        }//w ww . j av a2 s .  c om
    }

    for (String additional : additionalJars) {
        Path path = new Path(additional);
        depJars.add(path.makeQualified(localFs).toString());
    }

    String[] tmpjars = conf.get("tmpjars", "").split(",");
    for (String tmpjar : tmpjars) {
        if (!StringUtils.isEmpty(tmpjar)) {
            depJars.add(tmpjar.trim());
        }
    }
    conf.set("tmpjars", StringUtils.join(depJars.iterator(), ','));
}

From source file:voldemort.store.readwrite.mr.HadoopRWStoreJobRunner.java

License:Apache License

public static void addDepJars(Configuration conf, Class<?>[] deps, List<String> additionalJars)
        throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> depJars = new HashSet<String>();
    for (Class<?> dep : deps) {
        String tmp = findInClasspath(dep.getCanonicalName());
        if (tmp != null) {
            Path path = new Path(tmp);
            depJars.add(path.makeQualified(localFs).toString());
        }//from   ww  w  .  j  a v  a  2 s. com
    }

    for (String additional : additionalJars) {
        Path path = new Path(additional);
        depJars.add(path.makeQualified(localFs).toString());
    }

    String[] tmpjars = conf.get("tmpjars", "").split(",");
    for (String tmpjar : tmpjars) {
        if (!StringUtils.isEmpty(tmpjar)) {
            depJars.add(tmpjar.trim());
        }
    }
    conf.set("tmpjars", StringUtils.join(depJars.iterator(), ','));
}

From source file:weka.distributed.spark.CanopyClustererSparkJob.java

License:Open Source License

protected void writeCanopyAssignments(String outputPath, JavaPairRDD<Integer, ?> assignments)
        throws IOException {
    String[] pathOnly = new String[1];
    Configuration conf = getFSConfigurationForPath(outputPath, pathOnly);

    FileSystem fs = FileSystem.get(conf);
    Path p = new Path(pathOnly[0]);
    p = p.makeQualified(fs);

    assignments.saveAsNewAPIHadoopFile(pathOnly[0], NullWritable.class, Text.class,
            SparkJob.NoKeyTextOutputFormat.class, conf);
}

From source file:weka.distributed.spark.RandomizedDataSparkJob.java

License:Open Source License

/**
 * Writes the randomized splits out to the output directory. Spark ensures
 * that each key goes to a separate part-xxxxx file in the output.
 *
 * @param outputPath the path to write to
 * @param sortedByFold the RDD containing the sorted dataset keyed by fold
 * @throws IOException if a problem occurs
 *//*from www  .j  a v  a  2 s  . c  o m*/
protected void writeRandomizedSplits(String outputPath, JavaRDD<Instance> sortedByFold) throws IOException {
    String[] pathOnly = new String[1];
    Configuration conf = getFSConfigurationForPath(outputPath, pathOnly);

    FileSystem fs = FileSystem.get(conf);
    Path p = new Path(pathOnly[0]);
    p = p.makeQualified(fs);

    if (!p.toString().toLowerCase().startsWith("file:/")) {
        m_finalOutputDirectory = p.toString();
    } else {
        m_finalOutputDirectory = pathOnly[0];
    }

    if (m_saveTextualSplits) {
        sortedByFold.saveAsTextFile(m_finalOutputDirectory);
    } else {
        sortedByFold.saveAsObjectFile(m_finalOutputDirectory);
    }
    // sortedByFold.saveAsNewAPIHadoopFile(pathOnly[0], NullWritable.class,
    // Text.class, SparkJob.NoKeyTextOutputFormat.class, conf);

}

From source file:weka.distributed.spark.RandomizedDataSparkJob.java

License:Open Source License

protected void writeRandomizedSplits(String outputPath, JavaPairRDD<Integer, Object> sortedByFold)
        throws IOException {
    String[] pathOnly = new String[1];
    Configuration conf = getFSConfigurationForPath(outputPath, pathOnly);

    FileSystem fs = FileSystem.get(conf);
    Path p = new Path(pathOnly[0]);
    p = p.makeQualified(fs);

    if (!p.toString().toLowerCase().startsWith("file:/")) {
        m_finalOutputDirectory = p.toString();
    } else {// w ww .ja v  a 2  s.c o m
        m_finalOutputDirectory = pathOnly[0];
    }

    // sortedByFold.saveAsNewAPIHadoopFile(pathOnly[0], NullWritable.class,
    // Text.class, SparkJob.NoKeyTextOutputFormat.class, conf);
    //
    sortedByFold.saveAsNewAPIHadoopFile(pathOnly[0], IntWritable.class, Text.class, TextOutputFormat.class,
            conf);
}

From source file:weka.distributed.spark.SparkUtils.java

License:Open Source License

/**
 * Takes an input path and returns a fully qualified absolute one. Handles
 * local and non-local file systems. Original path can be a relative one. In
 * the case of a local file system, the relative path (relative to the working
 * directory) is made into an absolute one. In the case of a file system such
 * as HDFS, an absolute path will require an additional '/' - E.g.
 * {@code hdfs://host:port//users/fred/input} - otherwise it will be treated as
 * relative (to the user's home directory in HDFS). In either case, the
 * returned path will be an absolute one.
 *
 * @param original original path (either relative or absolute) on a file
 *          system//w w  w . ja  va 2 s  . com
 * @return absolute path
 * @throws IOException if a problem occurs
 */
public static String resolveLocalOrOtherFileSystemPath(String original) throws IOException {

    String[] pathOnly = new String[1];
    Configuration conf = getFSConfigurationForPath(original, pathOnly);
    String result;

    FileSystem fs = FileSystem.get(conf);
    Path p = new Path(pathOnly[0]);
    p = p.makeQualified(fs);

    if (!p.toString().toLowerCase().startsWith("file:/")) {
        result = p.toString();
    } else {
        result = pathOnly[0];
    }

    return result;
}

From source file:weka.distributed.spark.WekaScoringSparkJob.java

License:Open Source License

/**
 * Write the supplied data to the output directory
 *
 * @param outputPath the path to write to
 * @param scoredData the data to write// w w w.  jav a2s. c  o m
 * @throws IOException if a problem occurs
 */
protected void writeScoredData(String outputPath, JavaRDD<Instance> scoredData) throws IOException {
    String[] pathOnly = new String[1];
    Configuration conf = getFSConfigurationForPath(outputPath, pathOnly);

    FileSystem fs = FileSystem.get(conf);
    Path p = new Path(pathOnly[0]);
    p = p.makeQualified(fs);

    String finalOutputDir = "";
    if (!p.toString().toLowerCase().startsWith("file:/")) {
        finalOutputDir = p.toString();
    } else {
        finalOutputDir = pathOnly[0];
    }

    scoredData.saveAsTextFile(finalOutputDir);

    // write the header of the scored data out to the output directory in both
    // ARFF and CSV format
    String outArff = outputPath + "/scoredHeader.arff";
    String outCSV = outputPath + "/scoredHeader.csv";

    PrintWriter writer = null;
    try {
        writer = openTextFileForWrite(outArff);
        writer.println(m_scoredOutputFormat);
        writer.flush();
        writer.close();

        writer = openTextFileForWrite(outCSV);
        StringBuilder b = new StringBuilder();
        for (int i = 0; i < m_scoredOutputFormat.numAttributes(); i++) {
            b.append(m_scoredOutputFormat.attribute(i).name()).append(",");
        }
        b.setLength(b.length() - 1);
        writer.println(b.toString());
        writer.flush();
        writer.close();
    } finally {
        if (writer != null) {
            writer.close();
        }
    }

}