List of usage examples for org.apache.hadoop.fs FileSystem get
public static FileSystem get(Configuration conf) throws IOException
From source file:com.ambimmort.webos.plugins.vfs4hdfs.HdfsFileSystem.java
License:Apache License
/** * @see org.apache.commons.vfs2.provider.AbstractFileSystem#resolveFile(org.apache.commons.vfs2.FileName) *//*from w ww . j a v a2 s .c o m*/ @Override public FileObject resolveFile(final FileName name) throws FileSystemException { synchronized (this) { if (null == this.fs) { final String hdfsUri = name.getRootURI(); final Configuration conf = new Configuration(true); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, hdfsUri); this.fs = null; try { fs = FileSystem.get(conf); } catch (final IOException e) { log.error("Error connecting to filesystem " + hdfsUri, e); throw new FileSystemException("Error connecting to filesystem " + hdfsUri, e); } } } final boolean useCache = null != getContext().getFileSystemManager().getFilesCache(); FileObject file; if (useCache) { file = this.getFileFromCache(name); } else { file = null; } if (null == file) { String path = null; try { path = URLDecoder.decode(name.getPath(), "UTF-8"); } catch (final UnsupportedEncodingException e) { path = name.getPath(); } final Path filePath = new Path(path); file = new HdfsFileObject((AbstractFileName) name, this, fs, filePath); if (useCache) { this.putFileToCache(file); } } /** * resync the file information if requested */ if (getFileSystemManager().getCacheStrategy().equals(CacheStrategy.ON_RESOLVE)) { file.refresh(); } return file; }
From source file:com.amintor.hdfs.client.kerberizedhdfsclient.KerberizedHDFSClient.java
/** * @param args the command line arguments *///w w w . j a va2 s. co m public static void main(String[] args) { try { Configuration conf = new Configuration(); conf.addResource(new FileInputStream(HDFS_SITE_LOCATION)); conf.addResource(new FileInputStream(CORE_SITE_LOCATION)); String authType = conf.get("hadoop.security.authentication"); System.out.println("Authentication Type:" + authType); if (authType.trim().equalsIgnoreCase("kerberos")) { // Login through UGI keytab UserGroupInformation.setConfiguration(conf); UserGroupInformation.loginUserFromKeytab("vijay", "/Users/vsingh/Software/vijay.keytab"); FileSystem hdFS = FileSystem.get(conf); FileStatus[] listStatus = hdFS.listStatus(new Path(args[0])); for (FileStatus statusFile : listStatus) { System.out.print("Replication:" + statusFile.getReplication() + "\t"); System.out.print("Owner:" + statusFile.getOwner() + "\t"); System.out.print("Group:" + statusFile.getGroup() + "\t"); System.out.println("Path:" + statusFile.getPath() + "\t"); } } } catch (IOException ex) { Logger.getLogger(KerberizedHDFSClient.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:com.anhth12.lambda.BatchUpdateFunction.java
@Override public Void call(JavaPairRDD<K, M> newData, Time timestamp) throws Exception { if (newData.take(1).isEmpty()) { log.info("No data in current generation's RDD; nothing to do"); return null; }/*from w w w .j a v a2 s .co m*/ log.info("Beginning update at {}", timestamp); Configuration hadoopConf = sparkContext.hadoopConfiguration(); JavaPairRDD<K, M> pastData; Path inputPathPattern = new Path(dataDirString + "/*/part-*"); FileSystem fs = FileSystem.get(hadoopConf); FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern); if (inputPathStatuses == null || inputPathStatuses.length == 0) { log.info("No past data at path(s) {}", inputPathPattern); pastData = null; } else { log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath()); Configuration updatedConf = new Configuration(hadoopConf); updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses)); JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass, messageWritableClass); pastData = pastWriteableData.mapToPair( new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass)); } try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) { updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newData, pastData, modelDirString, producer); } return null; }
From source file:com.anhth12.lambda.BatchUpdateFunction2.java
@Override public Void call(JavaRDD<MessageAndMetadata> newData, Time timestamp) throws Exception { if (newData.take(1).isEmpty()) { log.info("No data in current generation's RDD; nothing to do"); return null; }/*from w ww. j a v a 2 s .c o m*/ log.info("Beginning update at {}", timestamp); JavaPairRDD<K, M> newDataKM = newData.mapToPair(new PairFunction<MessageAndMetadata, K, M>() { @Override public Tuple2<K, M> call(MessageAndMetadata t) throws Exception { return (Tuple2<K, M>) new Tuple2<>(new String(t.getKey()), new String(t.getPayload())); } }); Configuration hadoopConf = sparkContext.hadoopConfiguration(); JavaPairRDD<K, M> pastData; Path inputPathPattern = new Path(dataDirString + "/*/part-*"); FileSystem fs = FileSystem.get(hadoopConf); FileStatus[] inputPathStatuses = fs.globStatus(inputPathPattern); if (inputPathStatuses == null || inputPathStatuses.length == 0) { log.info("No past data at path(s) {}", inputPathPattern); pastData = null; } else { log.info("Found past data at path(s) like {}", inputPathStatuses[0].getPath()); Configuration updatedConf = new Configuration(hadoopConf); updatedConf.set(FileInputFormat.INPUT_DIR, joinFSPaths(fs, inputPathStatuses)); JavaPairRDD<Writable, Writable> pastWriteableData = (JavaPairRDD<Writable, Writable>) sparkContext .newAPIHadoopRDD(updatedConf, SequenceFileInputFilter.class, keyWritableClass, messageWritableClass); pastData = pastWriteableData.mapToPair( new WritableToValueFunction<>(keyClass, messageClass, keyWritableClass, messageWritableClass)); } try (TopicProducer<String, U> producer = new TopicProducerImpl<>(updateBroker, updateTopic)) { updateInstance.runUpdate(sparkContext, timestamp.milliseconds(), newDataKM, pastData, modelDirString, producer); } return null; }
From source file:com.anhth12.lambda.ml.MLUpdate.java
@Override public void runUpdate(JavaSparkContext sparkContext, long timestamp, JavaPairRDD<String, M> newKeyMessageData, JavaPairRDD<String, M> pastKeyMessageData, String modelDirString, TopicProducer<String, String> modelUpdateTopic) throws IOException, InterruptedException { Preconditions.checkNotNull(newKeyMessageData); JavaRDD<M> newData = newKeyMessageData.values(); JavaRDD<M> pastData = pastKeyMessageData == null ? null : pastKeyMessageData.values(); if (newData != null) { newData.cache();//w ww .j a v a 2 s. co m newData.foreachPartition(Functions.<Iterator<M>>noOp()); } if (pastData != null) { pastData.cache(); pastData.foreachPartition(Functions.<Iterator<M>>noOp()); } List<HyperParamValues<?>> hyperParamValues = getHyperParamValues(); int valuesPerHyperParam = HyperParams.chooseValuesPerHyperParam(hyperParamValues.size(), candidates); List<List<?>> hyperParameterCombos = HyperParams.chooseHyperParameterCombos(hyperParamValues, candidates, valuesPerHyperParam); FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration()); Path modelDir = new Path(modelDirString); Path tempModelPath = new Path(modelDir, ".temporary"); Path candiatesPath = new Path(tempModelPath, Long.toString(System.currentTimeMillis())); fs.mkdirs(candiatesPath); Path bestCandidatePath = findBestCandidatePath(sparkContext, newData, pastData, hyperParameterCombos, candiatesPath); Path finalPath = new Path(modelDir, Long.toString(System.currentTimeMillis())); if (bestCandidatePath == null) { log.info("Unable to build any model"); } else { fs.rename(bestCandidatePath, finalPath); } fs.delete(candiatesPath, true); Path bestModelPath = new Path(finalPath, MODEL_FILE_NAME); if (fs.exists(bestModelPath)) { PMML bestModel; try (InputStream in = new GZIPInputStream(fs.open(finalPath), 1 << 16)) { bestModel = PMMLUtils.read(in); } modelUpdateTopic.send("MODEL", PMMLUtils.toString(bestModel)); publishAdditionalModelData(sparkContext, bestModel, newData, pastData, candiatesPath, modelUpdateTopic); } if (newData != null) { newData.unpersist(); } if (pastData != null) { pastData.unpersist(); } }
From source file:com.anhth12.lambda.ml.MLUpdate.java
private Path findBestCandidatePath(JavaSparkContext sparkContext, JavaRDD<M> newData, JavaRDD<M> pastData, List<List<?>> hyperParameterCombos, Path candiatesPath) throws InterruptedException, IOException { Map<Path, Double> pathToEval = new HashMap<>(candidates); if (evalParallelism > 1) { Collection<Future<Tuple2<Path, Double>>> futures = new ArrayList<>(candidates); ExecutorService executor = Executors.newFixedThreadPool(evalParallelism); try {/*from w ww. j a v a 2 s . c om*/ for (int i = 0; i < candidates; i++) { futures.add(executor.submit(new BuildAndEvalWorker(i, hyperParameterCombos, sparkContext, newData, pastData, candiatesPath))); } } finally { executor.shutdown(); } for (Future<Tuple2<Path, Double>> future : futures) { Tuple2<Path, Double> pathEval; try { pathEval = future.get(); } catch (ExecutionException ex) { throw new IllegalStateException(ex); } pathToEval.put(pathEval._1, pathEval._2); } } else { for (int i = 0; i < candidates; i++) { Tuple2<Path, Double> pathEval = new BuildAndEvalWorker(i, hyperParameterCombos, sparkContext, newData, pastData, candiatesPath).call(); pathToEval.put(pathEval._1, pathEval._2); } } FileSystem fs = FileSystem.get(sparkContext.hadoopConfiguration()); Path bestCandidatePath = null; double bestEval = Double.NEGATIVE_INFINITY; for (Map.Entry<Path, Double> pathEval : pathToEval.entrySet()) { Path path = pathEval.getKey(); Double eval = pathEval.getValue(); if ((bestCandidatePath == null) || (eval != null && eval > bestEval) && fs.exists(path)) { log.info("Best eval / path is now {} / {}", eval, path); if (eval != null) { bestEval = eval; } bestCandidatePath = path; } } return bestCandidatePath; }
From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3 && otherArgs.length != 4) { System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]"); System.exit(-1);/*from ww w .j ava 2s . c o m*/ } int startStep = 1; if (otherArgs.length == 4) { startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]); } FileSystem fs = FileSystem.get(conf); if (startStep <= 1) { System.out.println("calc"); fs.delete(new Path(otherArgs[1]), true); Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName()); job.setNumReduceTasks(1); job.setJarByClass(CalcFeatureWeights.class); job.setMapperClass(CalcFeatureMapper.class); job.setReducerClass(CalcFeatureReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(MyKey.class); job.setOutputKeyClass(MyKey.class); job.setOutputValueClass(MyValue.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("step1 failed"); return; } } if (startStep <= 2) // sort { fs.delete(new Path(otherArgs[2]), true); System.out.println("sort"); Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName()); job.setNumReduceTasks(1); job.setJarByClass(CalcFeatureWeights.class); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(MyKey.class); job.setMapOutputValueClass(MyValue.class); job.setOutputKeyClass(MyKey.class); job.setOutputValueClass(MyValue.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("step2 failed"); return; } } }
From source file:com.antbrains.crf.hadoop.ParallelTraining2.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); FileSystem fs = FileSystem.get(conf); TrainingParams params = SgdCrf.loadParams(otherArgs[3]); System.out.println(new Gson().toJson(params)); if (otherArgs.length != 5) { System.err.println(//from w w w .j a va2 s. co m "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>"); System.exit(-1); } int featureCount = Integer.valueOf(otherArgs[2]); // conf.set("tc", object2String(tc)); int outIter = Integer.valueOf(otherArgs[4]); String prevOutDir = ""; for (int i = 1; i <= outIter; i++) { System.out.println("iterator: " + i); conf.set("pt.iterate", i + ""); conf.set("pt.featureCount", featureCount + ""); conf.set("pt.params", object2String(params)); String outDir = otherArgs[1] + "/result" + i; if (i > 1) { conf.set("paramDir", prevOutDir); } prevOutDir = outDir; fs.delete(new Path(outDir), true); Job job = new Job(conf, ParallelTraining2.class.getSimpleName()); job.setJarByClass(ParallelTraining2.class); job.setMapperClass(TrainingMapper.class); job.setReducerClass(TrainingReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(DoubleWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); System.out.println("outDir: " + outDir); FileOutputFormat.setOutputPath(job, new Path(outDir)); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("iter " + i + " failed"); break; } } }
From source file:com.asakusafw.bulkloader.common.FileNameUtil.java
License:Apache License
/** * Resolves the raw path.//from w w w .ja v a 2s . c o m * @param conf current configuration * @param rawPaths raw paths * @param executionId current execution ID * @param user current user name * @return the resolved full path * @throws BulkLoaderSystemException if failed to resolve the path * @since 0.4.0 */ public static List<Path> createPaths(Configuration conf, List<String> rawPaths, String executionId, String user) throws BulkLoaderSystemException { String basePathString = ConfigurationLoader.getProperty(Constants.PROP_KEY_BASE_PATH); Path basePath; if (basePathString == null || basePathString.isEmpty()) { basePath = null; } else { basePath = new Path(basePathString); } VariableTable variables = Constants.createVariableTable(); variables.defineVariable(Constants.HDFS_PATH_VARIABLE_USER, user); variables.defineVariable(Constants.HDFS_PATH_VARIABLE_EXECUTION_ID, executionId); FileSystem fs; try { if (basePath == null) { fs = FileSystem.get(conf); } else { fs = FileSystem.get(basePath.toUri(), conf); basePath = fs.makeQualified(basePath); } } catch (IOException e) { throw new BulkLoaderSystemException(e, CLASS, "TG-COMMON-00019", rawPaths); } List<Path> results = new ArrayList<>(); for (String rawPath : rawPaths) { String resolved = variables.parse(rawPath, false); Path fullPath; if (basePath == null) { fullPath = fs.makeQualified(new Path(resolved)); } else { fullPath = new Path(basePath, resolved); } results.add(fullPath); } return results; }
From source file:com.asakusafw.compiler.directio.DirectFileIoProcessorRunTest.java
License:Apache License
private List<Path> find(String target) throws IOException { FileSystem fs = FileSystem.get(tester.configuration()); FileStatus[] list = fs.globStatus(getPath(target)); if (list == null) { return Collections.emptyList(); }/*w w w . ja v a 2s .c o m*/ List<Path> results = new ArrayList<>(); for (FileStatus file : list) { results.add(file.getPath()); } return results; }