Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException 

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:com.ambimmort.webos.plugins.vfs4hdfs.HdfsFileSystem.java

License:Apache License

/**
 * @see org.apache.commons.vfs2.provider.AbstractFileSystem#resolveFile(org.apache.commons.vfs2.FileName)
 *//*from  w  ww . j  a v  a2  s  .c  o m*/
@Override
public FileObject resolveFile(final FileName name) throws FileSystemException {

    synchronized (this) {
        if (null == this.fs) {
            final String hdfsUri = name.getRootURI();
            final Configuration conf = new Configuration(true);
            conf.set(FileSystem.FS_DEFAULT_NAME_KEY, hdfsUri);
            this.fs = null;
            try {
                fs = FileSystem.get(conf);
            } catch (final IOException e) {
                log.error("Error connecting to filesystem " + hdfsUri, e);
                throw new FileSystemException("Error connecting to filesystem " + hdfsUri, e);
            }
        }
    }

    final boolean useCache = null != getContext().getFileSystemManager().getFilesCache();
    FileObject file;
    if (useCache) {
        file = this.getFileFromCache(name);
    } else {
        file = null;
    }
    if (null == file) {
        String path = null;
        try {
            path = URLDecoder.decode(name.getPath(), "UTF-8");
        } catch (final UnsupportedEncodingException e) {
            path = name.getPath();
        }
        final Path filePath = new Path(path);
        file = new HdfsFileObject((AbstractFileName) name, this, fs, filePath);
        if (useCache) {
            this.putFileToCache(file);
        }
    }
    /**
     * resync the file information if requested
     */
    if (getFileSystemManager().getCacheStrategy().equals(CacheStrategy.ON_RESOLVE)) {
        file.refresh();
    }
    return file;
}

From source file:com.amintor.hdfs.client.kerberizedhdfsclient.KerberizedHDFSClient.java

/**
 * @param args the command line arguments
 *///w w  w .  j a va2  s.  co  m
public static void main(String[] args) {

    try {
        Configuration conf = new Configuration();
        conf.addResource(new FileInputStream(HDFS_SITE_LOCATION));
        conf.addResource(new FileInputStream(CORE_SITE_LOCATION));
        String authType = conf.get("hadoop.security.authentication");
        System.out.println("Authentication Type:" + authType);
        if (authType.trim().equalsIgnoreCase("kerberos")) {
            // Login through UGI keytab
            UserGroupInformation.setConfiguration(conf);
            UserGroupInformation.loginUserFromKeytab("vijay", "/Users/vsingh/Software/vijay.keytab");
            FileSystem hdFS = FileSystem.get(conf);
            FileStatus[] listStatus = hdFS.listStatus(new Path(args[0]));
            for (FileStatus statusFile : listStatus) {
                System.out.print("Replication:" + statusFile.getReplication() + "\t");
                System.out.print("Owner:" + statusFile.getOwner() + "\t");
                System.out.print("Group:" + statusFile.getGroup() + "\t");
                System.out.println("Path:" + statusFile.getPath() + "\t");
            }

        }
    } catch (IOException ex) {
        Logger.getLogger(KerberizedHDFSClient.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:com.anhth12.lambda.BatchUpdateFunction.java

@Override
public Void call(JavaPairRDD<K, M> newData, Time timestamp) throws Exception {
    if (newData.take(1).isEmpty()) {
        log.info("No data in current generation's RDD; nothing to do");
        return null;
    }/*from  w w w  .j  a  v  a2  s  .co m*/

    log.info("Beginning update at {}", timestamp);

    Configuration hadoopConf = sparkContext.hadoopConfiguration();

    JavaPairRDD<K, M> pastData;
    Path inputPathPattern = new Path(dataDirString + "/*/part-*");
    FileSystem fs = FileSystem.get(hadoopConf);
    FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern);

    if (inputPathStatuses == null || inputPathStatuses.length == 0) {
        log.info("No past data at path(s) {}", inputPathPattern);
        pastData = null;
    } else {
        log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath());
        Configuration updatedConf = new Configuration(hadoopConf);
        updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses));
        JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext
                .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass,
                        messageWritableClass);
        pastData = pastWriteableData.mapToPair(
                new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass));

    }

    try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) {
        updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newData, pastData, modelDirString,
                producer);
    }
    return null;
}

From source file:com.anhth12.lambda.BatchUpdateFunction2.java

@Override
public Void call(JavaRDD<MessageAndMetadata> newData, Time timestamp) throws Exception {
    if (newData.take(1).isEmpty()) {
        log.info("No data in current generation's RDD; nothing to do");
        return null;
    }/*from   w ww.  j a  v  a 2  s  .c o  m*/

    log.info("Beginning update at {}", timestamp);

    JavaPairRDD<K, M> newDataKM = newData.mapToPair(new PairFunction<MessageAndMetadata, K, M>() {

        @Override
        public Tuple2<K, M> call(MessageAndMetadata t) throws Exception {

            return (Tuple2<K, M>) new Tuple2<>(new String(t.getKey()), new String(t.getPayload()));
        }
    });

    Configuration hadoopConf = sparkContext.hadoopConfiguration();

    JavaPairRDD<K, M> pastData;
    Path inputPathPattern = new Path(dataDirString + "/*/part-*");
    FileSystem fs = FileSystem.get(hadoopConf);
    FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern);

    if (inputPathStatuses == null || inputPathStatuses.length == 0) {
        log.info("No past data at path(s) {}", inputPathPattern);
        pastData = null;
    } else {
        log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath());
        Configuration updatedConf = new Configuration(hadoopConf);
        updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses));
        JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext
                .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass,
                        messageWritableClass);
        pastData = pastWriteableData.mapToPair(
                new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass));

    }
    try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) {
        updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newDataKM, pastData, modelDirString,
                producer);
    }
    return null;

}

From source file:com.anhth12.lambda.ml.MLUpdate.java

@Override
public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String, M> newKeyMessageData,
        JavaPairRDD<String, M> pastKeyMessageData, String modelDirString,
        TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException {

    Preconditions.checkNotNull(newKeyMessageData);

    JavaRDD<M> newData = newKeyMessageData.values();
    JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values();

    if (newData != null) {
        newData.cache();//w ww .j  a  v a  2 s.  co m
        newData.foreachPartition(Functions.<Iterator<M>>noOp());
    }
    if (pastData != null) {
        pastData.cache();
        pastData.foreachPartition(Functions.<Iterator<M>>noOp());
    }

    List<HyperParamValues<?>> hyperParamValues = getHyperParamValues();

    int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates);

    List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates,
            valuesPerHyperParam);

    FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration());

    Path modelDir = new Path(modelDirString);
    Path tempModelPath = new Path(modelDir, ".temporary");
    Path candiatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis()));
    fs.mkdirs(candiatesPath);

    Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos,
            candiatesPath);

    Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis()));
    if (bestCandidatePath == null) {
        log.info("Unable to build any model");
    } else {
        fs.rename(bestCandidatePath, finalPath);
    }

    fs.delete(candiatesPath, true);

    Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME);

    if (fs.exists(bestModelPath)) {
        PMML bestModel;
        try (InputStream in = new GZIPInputStream(fs.open(finalPath), 1 << 16)) {
            bestModel = PMMLUtils.read(in);
        }

        modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel));
        publishAdditionalModelData(sparkContext, bestModel, newData, pastData, candiatesPath, modelUpdateTopic);
    }

    if (newData != null) {
        newData.unpersist();
    }

    if (pastData != null) {
        pastData.unpersist();
    }

}

From source file:com.anhth12.lambda.ml.MLUpdate.java

private Path findBestCandidatePath(JavaSparkContext sparkContext, JavaRDD<M> newData, JavaRDD<M> pastData,
        List<List<?>> hyperParameterCombos, Path candiatesPath) throws InterruptedException, IOException {

    Map<Path, Double> pathToEval = new HashMap<>(candidates);
    if (evalParallelism > 1) {
        Collection<Future<Tuple2<Path, Double>>> futures = new ArrayList<>(candidates);
        ExecutorService executor = Executors.newFixedThreadPool(evalParallelism);

        try {/*from w ww. j a v a  2 s . c om*/
            for (int i = 0; i < candidates; i++) {
                futures.add(executor.submit(new BuildAndEvalWorker(i, hyperParameterCombos, sparkContext,
                        newData, pastData, candiatesPath)));
            }
        } finally {
            executor.shutdown();
        }

        for (Future<Tuple2<Path, Double>> future : futures) {
            Tuple2<Path, Double> pathEval;
            try {
                pathEval = future.get();
            } catch (ExecutionException ex) {
                throw new IllegalStateException(ex);
            }
            pathToEval.put(pathEval._1, pathEval._2);
        }
    } else {
        for (int i = 0; i < candidates; i++) {
            Tuple2<Path, Double> pathEval = new BuildAndEvalWorker(i, hyperParameterCombos, sparkContext,
                    newData, pastData, candiatesPath).call();
            pathToEval.put(pathEval._1, pathEval._2);
        }
    }

    FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration());

    Path bestCandidatePath = null;

    double bestEval = Double.NEGATIVE_INFINITY;

    for (Map.Entry<Path, Double> pathEval : pathToEval.entrySet()) {
        Path path = pathEval.getKey();
        Double eval = pathEval.getValue();

        if ((bestCandidatePath == null) || (eval != null && eval > bestEval) && fs.exists(path)) {
            log.info("Best eval / path is now {} / {}", eval, path);
            if (eval != null) {
                bestEval = eval;
            }
            bestCandidatePath = path;
        }
    }

    return bestCandidatePath;
}

From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]");
        System.exit(-1);/*from   ww w  .j ava 2s  . c  o  m*/
    }
    int startStep = 1;
    if (otherArgs.length == 4) {
        startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]);
    }
    FileSystem fs = FileSystem.get(conf);
    if (startStep <= 1) {
        System.out.println("calc");
        fs.delete(new Path(otherArgs[1]), true);
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());
        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(CalcFeatureMapper.class);
        job.setReducerClass(CalcFeatureReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(MyKey.class);

        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step1 failed");
            return;
        }
    }

    if (startStep <= 2)
    // sort
    {
        fs.delete(new Path(otherArgs[2]), true);
        System.out.println("sort");
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());

        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(MyValue.class);
        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[1]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step2 failed");
            return;
        }
    }

}

From source file:com.antbrains.crf.hadoop.ParallelTraining2.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));

    if (otherArgs.length != 5) {
        System.err.println(//from  w  w  w .j  a va2  s.  co m
                "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>");
        System.exit(-1);
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));
    int outIter = Integer.valueOf(otherArgs[4]);

    String prevOutDir = "";
    for (int i = 1; i <= outIter; i++) {
        System.out.println("iterator: " + i);
        conf.set("pt.iterate", i + "");
        conf.set("pt.featureCount", featureCount + "");

        conf.set("pt.params", object2String(params));
        String outDir = otherArgs[1] + "/result" + i;

        if (i > 1) {
            conf.set("paramDir", prevOutDir);
        }
        prevOutDir = outDir;
        fs.delete(new Path(outDir), true);

        Job job = new Job(conf, ParallelTraining2.class.getSimpleName());

        job.setJarByClass(ParallelTraining2.class);
        job.setMapperClass(TrainingMapper.class);
        job.setReducerClass(TrainingReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        System.out.println("outDir: " + outDir);
        FileOutputFormat.setOutputPath(job, new Path(outDir));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("iter " + i + " failed");
            break;
        }
    }
}

From source file:com.asakusafw.bulkloader.common.FileNameUtil.java

License:Apache License

/**
 * Resolves the raw path.//from   w  w w  .ja v  a 2s  .  c o m
 * @param conf current configuration
 * @param rawPaths raw paths
 * @param executionId current execution ID
 * @param user current user name
 * @return the resolved full path
 * @throws BulkLoaderSystemException if failed to resolve the path
 * @since 0.4.0
 */
public static List<Path> createPaths(Configuration conf, List<String> rawPaths, String executionId, String user)
        throws BulkLoaderSystemException {
    String basePathString = ConfigurationLoader.getProperty(Constants.PROP_KEY_BASE_PATH);
    Path basePath;
    if (basePathString == null || basePathString.isEmpty()) {
        basePath = null;
    } else {
        basePath = new Path(basePathString);
    }
    VariableTable variables = Constants.createVariableTable();
    variables.defineVariable(Constants.HDFS_PATH_VARIABLE_USER, user);
    variables.defineVariable(Constants.HDFS_PATH_VARIABLE_EXECUTION_ID, executionId);
    FileSystem fs;
    try {
        if (basePath == null) {
            fs = FileSystem.get(conf);
        } else {
            fs = FileSystem.get(basePath.toUri(), conf);
            basePath = fs.makeQualified(basePath);
        }
    } catch (IOException e) {
        throw new BulkLoaderSystemException(e, CLASS, "TG-COMMON-00019", rawPaths);
    }
    List<Path> results = new ArrayList<>();
    for (String rawPath : rawPaths) {
        String resolved = variables.parse(rawPath, false);
        Path fullPath;
        if (basePath == null) {
            fullPath = fs.makeQualified(new Path(resolved));
        } else {
            fullPath = new Path(basePath, resolved);
        }
        results.add(fullPath);
    }
    return results;
}

From source file:com.asakusafw.compiler.directio.DirectFileIoProcessorRunTest.java

License:Apache License

private List<Path> find(String target) throws IOException {
    FileSystem fs = FileSystem.get(tester.configuration());
    FileStatus[] list = fs.globStatus(getPath(target));
    if (list == null) {
        return Collections.emptyList();
    }/*w  w  w  . ja  v  a  2s .c  o m*/
    List<Path> results = new ArrayList<>();
    for (FileStatus file : list) {
        results.add(file.getPath());
    }
    return results;
}