List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:ivory.smrf.retrieval.RunQueryLocal_cascade.java
License:Apache License
public RunQueryLocal_cascade(String[] args) throws SAXException, IOException, ParserConfigurationException, Exception, NotBoundException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); try {/*ww w .jav a 2 s . c o m*/ sLogger.info("initilaize runquery ..."); runner = new BatchQueryRunner_cascade(args, fs); } catch (Exception e) { e.printStackTrace(); } }
From source file:ml.shifu.dtrain.util.HDFSUtils.java
License:Apache License
/** * Get local FileSystem//from w ww.jav a 2 s . com * * @throws RuntimeException * if any IOException to retrieve local file system. */ public static FileSystem getLocalFS() { if (lfs == null) { synchronized (HDFSUtils.class) { if (lfs == null) { try { // initialization lfs = FileSystem.getLocal(conf).getRaw(); } catch (IOException e) { LOG.error("Error on creating local FileSystem object.", e); throw new RuntimeException(e); } } } } return lfs; }
From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java
License:Apache License
/** * Modify configuration according user-specified generic options * //w w w .j a v a 2 s . c o m * @param conf * Configuration to be modified * @param line * User-specified generic options */ private void processGeneralOptions(Configuration conf, CommandLine line) throws IOException { if (line.hasOption("fs")) { FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); } if (line.hasOption("jt")) { conf.set("mapred.job.tracker", line.getOptionValue("jt")); } if (line.hasOption("conf")) { String[] values = line.getOptionValues("conf"); for (String value : values) { conf.addResource(new Path(value)); } } if (line.hasOption("libjars")) { conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf)); // setting libjars in client classpath URL[] libjars = getLibJars(conf); if (libjars != null && libjars.length > 0) { conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } if (line.hasOption("files")) { conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf)); } if (line.hasOption("archives")) { conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf)); } if (line.hasOption('D')) { String[] property = line.getOptionValues('D'); for (String prop : property) { String[] keyval = prop.split("=", 2); if (keyval.length == 2) { conf.set(keyval[0], keyval[1]); } } } conf.setBoolean("mapred.used.genericoptionsparser", true); // tokensFile if (line.hasOption("tokenCacheFile")) { String fileName = line.getOptionValue("tokenCacheFile"); // check if the local file exists try { FileSystem localFs = FileSystem.getLocal(conf); Path p = new Path(fileName); if (!localFs.exists(p)) { throw new FileNotFoundException("File " + fileName + " does not exist."); } LOG.debug("setting conf tokensFile: {}", fileName); conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString()); } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java
License:Apache License
/** * If libjars are set in the conf, parse the libjars. *///from w ww .j av a 2 s. c om public static URL[] getLibJars(Configuration conf) throws IOException { String jars = conf.get("tmpjars"); if (jars == null) { return null; } String[] files = jars.split(FILE_SEPERATOR); List<URL> cp = new ArrayList<URL>(); for (String file : files) { Path tmp = new Path(file); if (tmp.getFileSystem(conf).equals(FileSystem.getLocal(conf))) { cp.add(FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL()); } } return cp.toArray(new URL[0]); }
From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java
License:Apache License
/** * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2 *//*from w w w . j a va 2s.c o m*/ @SuppressWarnings("deprecation") private String validateFiles(String files, Configuration conf) throws IOException { if (files == null) return null; String[] fileArr = files.split(FILE_SEPERATOR); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; URI pathURI; try { pathURI = new URI(tmp); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path path = new Path(pathURI); FileSystem localFs = FileSystem.getLocal(conf); if (pathURI.getScheme() == null) { // default to the local file system // check if the file exists or not first if (!localFs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(localFs).toString(); } else { // check if the file exists in this file system // we need to recreate this filesystem object to copy // these files to the file system jobtracker is running // on. FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(fs).toString(); } finalArr[i] = finalPath; } return StringUtils.arrayToString(finalArr); }
From source file:ml.shifu.guagua.hadoop.util.HDPUtils.java
License:Apache License
/** * Copy local file to HDFS. This is used to set as classpath by distributed cache. *///from ww w .j a va 2 s . co m private static Path shipToHDFS(Configuration conf, String fileName) throws IOException { Path dst = new Path("tmp", fileName.substring(fileName.lastIndexOf(File.separator) + 1)); FileSystem fs = dst.getFileSystem(conf); OutputStream os = null; InputStream is = null; try { is = FileSystem.getLocal(conf).open(new Path(fileName)); os = fs.create(dst); IOUtils.copyBytes(is, os, 4096, true); } finally { org.apache.commons.io.IOUtils.closeQuietly(is); // IOUtils should not close stream to HDFS quietly if (os != null) { os.close(); } } return dst; }
From source file:ml.shifu.guagua.mapreduce.GuaguaOptionsParser.java
License:Apache License
/** * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2 *//*from w ww . j av a2s .co m*/ private String validateFiles(String files, Configuration conf) throws IOException { if (files == null) return null; String[] fileArr = files.split(FILE_SEPERATOR); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; URI pathURI; try { pathURI = new URI(tmp); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path path = new Path(pathURI.toString()); FileSystem localFs = FileSystem.getLocal(conf); if (pathURI.getScheme() == null) { // default to the local file system // check if the file exists or not first if (!localFs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(localFs).toString(); } else { // check if the file exists in this file system // we need to recreate this filesystem object to copy // these files to the file system jobtracker is running // on. FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(fs).toString(); } finalArr[i] = finalPath; } return StringUtils.arrayToString(finalArr); }
From source file:ml.shifu.guagua.yarn.GuaguaOptionsParser.java
License:Apache License
/** * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2 *//*from w w w.ja v a2s . c om*/ private String validateFiles(String files, Configuration conf) throws IOException { if (files == null) return null; String[] fileArr = files.split(FILE_SEPERATOR); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; URI pathURI; try { pathURI = new URI(tmp); } catch (URISyntaxException e) { throw new IllegalArgumentException(e); } Path path = new Path(pathURI); FileSystem localFs = FileSystem.getLocal(conf); if (pathURI.getScheme() == null) { // default to the local file system // check if the file exists or not first if (!localFs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(localFs).toString(); } else { // check if the file exists in this file system // we need to recreate this filesystem object to copy // these files to the file system jobtracker is running // on. FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(fs).toString(); } finalArr[i] = finalPath; } return StringUtils.arrayToString(finalArr); }
From source file:ml.shifu.shifu.core.processor.ExportModelProcessor.java
License:Apache License
@Override public int run() throws Exception { setUp(ModelStep.EXPORT);//from w w w. jav a2 s. com int status = 0; File pmmls = new File("pmmls"); FileUtils.forceMkdir(pmmls); if (StringUtils.isBlank(type)) { type = PMML; } String modelsPath = pathFinder.getModelsPath(SourceType.LOCAL); if (type.equalsIgnoreCase(ONE_BAGGING_MODEL)) { if (!"nn".equalsIgnoreCase(modelConfig.getAlgorithm()) && !CommonUtils.isTreeModel(modelConfig.getAlgorithm())) { log.warn("Currently one bagging model is only supported in NN/GBT/RF algorithm."); } else { List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelsPath, ALGORITHM.valueOf(modelConfig.getAlgorithm().toUpperCase())); if (models.size() < 1) { log.warn("No model is found in {}.", modelsPath); } else { log.info("Convert nn models into one binary bagging model."); Configuration conf = new Configuration(); Path output = new Path(pathFinder.getBaggingModelPath(SourceType.LOCAL), "model.b" + modelConfig.getAlgorithm()); if ("nn".equalsIgnoreCase(modelConfig.getAlgorithm())) { BinaryNNSerializer.save(modelConfig, columnConfigList, models, FileSystem.getLocal(conf), output); } else if (CommonUtils.isTreeModel(modelConfig.getAlgorithm())) { List<List<TreeNode>> baggingTrees = new ArrayList<List<TreeNode>>(); for (int i = 0; i < models.size(); i++) { TreeModel tm = (TreeModel) models.get(i); // TreeModel only has one TreeNode instance although it is list inside baggingTrees.add(tm.getIndependentTreeModel().getTrees().get(0)); } int[] inputOutputIndex = DTrainUtils .getNumericAndCategoricalInputAndOutputCounts(this.columnConfigList); // numerical + categorical = # of all input int inputCount = inputOutputIndex[0] + inputOutputIndex[1]; BinaryDTSerializer.save(modelConfig, columnConfigList, baggingTrees, modelConfig.getParams().get("Loss").toString(), inputCount, FileSystem.getLocal(conf), output); } log.info("Please find one unified bagging model in local {}.", output); } } } else if (type.equalsIgnoreCase(PMML)) { // typical pmml generation List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelsPath, ALGORITHM.valueOf(modelConfig.getAlgorithm().toUpperCase())); PMMLTranslator translator = PMMLConstructorFactory.produce(modelConfig, columnConfigList, isConcise(), false); for (int index = 0; index < models.size(); index++) { String path = "pmmls" + File.separator + modelConfig.getModelSetName() + Integer.toString(index) + ".pmml"; log.info("\t Start to generate " + path); PMML pmml = translator.build(Arrays.asList(new BasicML[] { models.get(index) })); PMMLUtils.savePMML(pmml, path); } } else if (type.equalsIgnoreCase(ONE_BAGGING_PMML_MODEL)) { // one unified bagging pmml generation log.info("Convert models into one bagging pmml model {} format", type); if (!"nn".equalsIgnoreCase(modelConfig.getAlgorithm())) { log.warn("Currently one bagging pmml model is only supported in NN algorithm."); } else { List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelsPath, ALGORITHM.valueOf(modelConfig.getAlgorithm().toUpperCase())); PMMLTranslator translator = PMMLConstructorFactory.produce(modelConfig, columnConfigList, isConcise(), true); String path = "pmmls" + File.separator + modelConfig.getModelSetName() + ".pmml"; log.info("\t Start to generate one unified model to: " + path); PMML pmml = translator.build(models); PMMLUtils.savePMML(pmml, path); } } else if (type.equalsIgnoreCase(COLUMN_STATS)) { saveColumnStatus(); } else if (type.equalsIgnoreCase(WOE_MAPPING)) { List<ColumnConfig> exportCatColumns = new ArrayList<ColumnConfig>(); List<String> catVariables = getRequestVars(); for (ColumnConfig columnConfig : this.columnConfigList) { if (CollectionUtils.isEmpty(catVariables) || isRequestColumn(catVariables, columnConfig)) { exportCatColumns.add(columnConfig); } } if (CollectionUtils.isNotEmpty(exportCatColumns)) { List<String> woeMappings = new ArrayList<String>(); for (ColumnConfig columnConfig : exportCatColumns) { String woeMapText = rebinAndExportWoeMapping(columnConfig); woeMappings.add(woeMapText); } FileUtils.write(new File("woemapping.txt"), StringUtils.join(woeMappings, ",\n")); } } else if (type.equalsIgnoreCase(WOE)) { List<String> woeInfos = new ArrayList<String>(); for (ColumnConfig columnConfig : this.columnConfigList) { if (columnConfig.getBinLength() > 1 && ((columnConfig.isCategorical() && CollectionUtils.isNotEmpty(columnConfig.getBinCategory())) || (columnConfig.isNumerical() && CollectionUtils.isNotEmpty(columnConfig.getBinBoundary()) && columnConfig.getBinBoundary().size() > 1))) { List<String> varWoeInfos = generateWoeInfos(columnConfig); if (CollectionUtils.isNotEmpty(varWoeInfos)) { woeInfos.addAll(varWoeInfos); woeInfos.add(""); } } FileUtils.writeLines(new File("varwoe_info.txt"), woeInfos); } } else if (type.equalsIgnoreCase(CORRELATION)) { // export correlation into mapping list if (!ShifuFileUtils.isFileExists(pathFinder.getLocalCorrelationCsvPath(), SourceType.LOCAL)) { log.warn("The correlation file doesn't exist. Please make sure you have ran `shifu stats -c`."); return 2; } return exportVariableCorr(); } else { log.error("Unsupported output format - {}", type); status = -1; } clearUp(ModelStep.EXPORT); log.info("Done."); return status; }
From source file:ml.shifu.shifu.util.HDFSUtils.java
License:Apache License
/** * Get local FileSystem/* ww w. j a v a2 s . co m*/ * * @throws IOException if any IOException to retrieve local file system. */ public static FileSystem getLocalFS() { if (lfs == null) { synchronized (HDFSUtils.class) { if (lfs == null) { try { // initialization lfs = FileSystem.getLocal(conf).getRaw(); } catch (IOException e) { LOG.error("Error on creating local FileSystem object.", e); throw new ShifuException(ShifuErrorCode.ERROR_GET_LOCAL_SYSTEM); } } } } return lfs; }