Example usage for org.apache.hadoop.fs FileSystem getLocal

List of usage examples for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException 

Source Link

Document

Get the local FileSystem.

Usage

From source file:ivory.smrf.retrieval.RunQueryLocal_cascade.java

License:Apache License

public RunQueryLocal_cascade(String[] args)
        throws SAXException, IOException, ParserConfigurationException, Exception, NotBoundException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    try {/*ww w  .jav  a  2  s .  c o m*/
        sLogger.info("initilaize runquery ...");
        runner = new BatchQueryRunner_cascade(args, fs);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:ml.shifu.dtrain.util.HDFSUtils.java

License:Apache License

/**
 * Get local FileSystem//from  w ww.jav a 2  s  . com
 * 
 * @throws RuntimeException
 *             if any IOException to retrieve local file system.
 */
public static FileSystem getLocalFS() {
    if (lfs == null) {
        synchronized (HDFSUtils.class) {
            if (lfs == null) {
                try {
                    // initialization
                    lfs = FileSystem.getLocal(conf).getRaw();
                } catch (IOException e) {
                    LOG.error("Error on creating local FileSystem object.", e);
                    throw new RuntimeException(e);
                }
            }
        }
    }
    return lfs;
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java

License:Apache License

/**
 * Modify configuration according user-specified generic options
 * //w w  w  .j  a  v a 2  s  . c  o m
 * @param conf
 *            Configuration to be modified
 * @param line
 *            User-specified generic options
 */
private void processGeneralOptions(Configuration conf, CommandLine line) throws IOException {
    if (line.hasOption("fs")) {
        FileSystem.setDefaultUri(conf, line.getOptionValue("fs"));
    }

    if (line.hasOption("jt")) {
        conf.set("mapred.job.tracker", line.getOptionValue("jt"));
    }
    if (line.hasOption("conf")) {
        String[] values = line.getOptionValues("conf");
        for (String value : values) {
            conf.addResource(new Path(value));
        }
    }
    if (line.hasOption("libjars")) {
        conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf));
        // setting libjars in client classpath
        URL[] libjars = getLibJars(conf);
        if (libjars != null && libjars.length > 0) {
            conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader()));
            Thread.currentThread().setContextClassLoader(
                    new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader()));
        }
    }
    if (line.hasOption("files")) {
        conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf));
    }
    if (line.hasOption("archives")) {
        conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf));
    }
    if (line.hasOption('D')) {
        String[] property = line.getOptionValues('D');
        for (String prop : property) {
            String[] keyval = prop.split("=", 2);
            if (keyval.length == 2) {
                conf.set(keyval[0], keyval[1]);
            }
        }
    }
    conf.setBoolean("mapred.used.genericoptionsparser", true);

    // tokensFile
    if (line.hasOption("tokenCacheFile")) {
        String fileName = line.getOptionValue("tokenCacheFile");
        // check if the local file exists
        try {
            FileSystem localFs = FileSystem.getLocal(conf);
            Path p = new Path(fileName);
            if (!localFs.exists(p)) {
                throw new FileNotFoundException("File " + fileName + " does not exist.");
            }

            LOG.debug("setting conf tokensFile: {}", fileName);
            conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString());
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java

License:Apache License

/**
 * If libjars are set in the conf, parse the libjars.
 *///from   w  ww .j av  a 2 s. c  om
public static URL[] getLibJars(Configuration conf) throws IOException {
    String jars = conf.get("tmpjars");
    if (jars == null) {
        return null;
    }
    String[] files = jars.split(FILE_SEPERATOR);
    List<URL> cp = new ArrayList<URL>();
    for (String file : files) {
        Path tmp = new Path(file);
        if (tmp.getFileSystem(conf).equals(FileSystem.getLocal(conf))) {
            cp.add(FileSystem.getLocal(conf).pathToFile(tmp).toURI().toURL());
        }
    }
    return cp.toArray(new URL[0]);
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java

License:Apache License

/**
 * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files
 * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of
 * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2
 *//*from w  w  w .  j  a  va 2s.c o m*/
@SuppressWarnings("deprecation")
private String validateFiles(String files, Configuration conf) throws IOException {
    if (files == null)
        return null;
    String[] fileArr = files.split(FILE_SEPERATOR);
    String[] finalArr = new String[fileArr.length];
    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;
        URI pathURI;
        try {
            pathURI = new URI(tmp);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
        Path path = new Path(pathURI);
        FileSystem localFs = FileSystem.getLocal(conf);
        if (pathURI.getScheme() == null) {
            // default to the local file system
            // check if the file exists or not first
            if (!localFs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(localFs).toString();
        } else {
            // check if the file exists in this file system
            // we need to recreate this filesystem object to copy
            // these files to the file system jobtracker is running
            // on.
            FileSystem fs = path.getFileSystem(conf);
            if (!fs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(fs).toString();
        }
        finalArr[i] = finalPath;
    }
    return StringUtils.arrayToString(finalArr);
}

From source file:ml.shifu.guagua.hadoop.util.HDPUtils.java

License:Apache License

/**
 * Copy local file to HDFS. This is used to set as classpath by distributed cache.
 *///from ww  w  .j a va  2  s  . co  m
private static Path shipToHDFS(Configuration conf, String fileName) throws IOException {
    Path dst = new Path("tmp", fileName.substring(fileName.lastIndexOf(File.separator) + 1));
    FileSystem fs = dst.getFileSystem(conf);
    OutputStream os = null;
    InputStream is = null;
    try {
        is = FileSystem.getLocal(conf).open(new Path(fileName));
        os = fs.create(dst);
        IOUtils.copyBytes(is, os, 4096, true);
    } finally {
        org.apache.commons.io.IOUtils.closeQuietly(is);
        // IOUtils should not close stream to HDFS quietly
        if (os != null) {
            os.close();
        }
    }
    return dst;
}

From source file:ml.shifu.guagua.mapreduce.GuaguaOptionsParser.java

License:Apache License

/**
 * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files
 * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of
 * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2
 *//*from w ww  .  j av a2s  .co m*/
private String validateFiles(String files, Configuration conf) throws IOException {
    if (files == null)
        return null;
    String[] fileArr = files.split(FILE_SEPERATOR);
    String[] finalArr = new String[fileArr.length];
    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;
        URI pathURI;
        try {
            pathURI = new URI(tmp);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
        Path path = new Path(pathURI.toString());
        FileSystem localFs = FileSystem.getLocal(conf);
        if (pathURI.getScheme() == null) {
            // default to the local file system
            // check if the file exists or not first
            if (!localFs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(localFs).toString();
        } else {
            // check if the file exists in this file system
            // we need to recreate this filesystem object to copy
            // these files to the file system jobtracker is running
            // on.
            FileSystem fs = path.getFileSystem(conf);
            if (!fs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(fs).toString();
        }
        finalArr[i] = finalPath;
    }
    return StringUtils.arrayToString(finalArr);
}

From source file:ml.shifu.guagua.yarn.GuaguaOptionsParser.java

License:Apache License

/**
 * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files
 * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of
 * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2
 *//*from  w  w  w.ja  v a2s . c  om*/
private String validateFiles(String files, Configuration conf) throws IOException {
    if (files == null)
        return null;
    String[] fileArr = files.split(FILE_SEPERATOR);
    String[] finalArr = new String[fileArr.length];
    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;
        URI pathURI;
        try {
            pathURI = new URI(tmp);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
        Path path = new Path(pathURI);
        FileSystem localFs = FileSystem.getLocal(conf);
        if (pathURI.getScheme() == null) {
            // default to the local file system
            // check if the file exists or not first
            if (!localFs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(localFs).toString();
        } else {
            // check if the file exists in this file system
            // we need to recreate this filesystem object to copy
            // these files to the file system jobtracker is running
            // on.
            FileSystem fs = path.getFileSystem(conf);
            if (!fs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(fs).toString();
        }
        finalArr[i] = finalPath;
    }
    return StringUtils.arrayToString(finalArr);
}

From source file:ml.shifu.shifu.core.processor.ExportModelProcessor.java

License:Apache License

@Override
public int run() throws Exception {
    setUp(ModelStep.EXPORT);//from   w  w w.  jav a2 s. com

    int status = 0;
    File pmmls = new File("pmmls");
    FileUtils.forceMkdir(pmmls);

    if (StringUtils.isBlank(type)) {
        type = PMML;
    }

    String modelsPath = pathFinder.getModelsPath(SourceType.LOCAL);
    if (type.equalsIgnoreCase(ONE_BAGGING_MODEL)) {
        if (!"nn".equalsIgnoreCase(modelConfig.getAlgorithm())
                && !CommonUtils.isTreeModel(modelConfig.getAlgorithm())) {
            log.warn("Currently one bagging model is only supported in NN/GBT/RF algorithm.");
        } else {
            List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelsPath,
                    ALGORITHM.valueOf(modelConfig.getAlgorithm().toUpperCase()));
            if (models.size() < 1) {
                log.warn("No model is found in {}.", modelsPath);
            } else {
                log.info("Convert nn models into one binary bagging model.");
                Configuration conf = new Configuration();
                Path output = new Path(pathFinder.getBaggingModelPath(SourceType.LOCAL),
                        "model.b" + modelConfig.getAlgorithm());
                if ("nn".equalsIgnoreCase(modelConfig.getAlgorithm())) {
                    BinaryNNSerializer.save(modelConfig, columnConfigList, models, FileSystem.getLocal(conf),
                            output);
                } else if (CommonUtils.isTreeModel(modelConfig.getAlgorithm())) {
                    List<List<TreeNode>> baggingTrees = new ArrayList<List<TreeNode>>();
                    for (int i = 0; i < models.size(); i++) {
                        TreeModel tm = (TreeModel) models.get(i);
                        // TreeModel only has one TreeNode instance although it is list inside
                        baggingTrees.add(tm.getIndependentTreeModel().getTrees().get(0));
                    }

                    int[] inputOutputIndex = DTrainUtils
                            .getNumericAndCategoricalInputAndOutputCounts(this.columnConfigList);
                    // numerical + categorical = # of all input
                    int inputCount = inputOutputIndex[0] + inputOutputIndex[1];

                    BinaryDTSerializer.save(modelConfig, columnConfigList, baggingTrees,
                            modelConfig.getParams().get("Loss").toString(), inputCount,
                            FileSystem.getLocal(conf), output);
                }
                log.info("Please find one unified bagging model in local {}.", output);
            }
        }
    } else if (type.equalsIgnoreCase(PMML)) {
        // typical pmml generation
        List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelsPath,
                ALGORITHM.valueOf(modelConfig.getAlgorithm().toUpperCase()));

        PMMLTranslator translator = PMMLConstructorFactory.produce(modelConfig, columnConfigList, isConcise(),
                false);

        for (int index = 0; index < models.size(); index++) {
            String path = "pmmls" + File.separator + modelConfig.getModelSetName() + Integer.toString(index)
                    + ".pmml";
            log.info("\t Start to generate " + path);
            PMML pmml = translator.build(Arrays.asList(new BasicML[] { models.get(index) }));
            PMMLUtils.savePMML(pmml, path);
        }
    } else if (type.equalsIgnoreCase(ONE_BAGGING_PMML_MODEL)) {
        // one unified bagging pmml generation
        log.info("Convert models into one bagging pmml model {} format", type);
        if (!"nn".equalsIgnoreCase(modelConfig.getAlgorithm())) {
            log.warn("Currently one bagging pmml model is only supported in NN algorithm.");
        } else {
            List<BasicML> models = ModelSpecLoaderUtils.loadBasicModels(modelsPath,
                    ALGORITHM.valueOf(modelConfig.getAlgorithm().toUpperCase()));
            PMMLTranslator translator = PMMLConstructorFactory.produce(modelConfig, columnConfigList,
                    isConcise(), true);
            String path = "pmmls" + File.separator + modelConfig.getModelSetName() + ".pmml";
            log.info("\t Start to generate one unified model to: " + path);
            PMML pmml = translator.build(models);
            PMMLUtils.savePMML(pmml, path);
        }
    } else if (type.equalsIgnoreCase(COLUMN_STATS)) {
        saveColumnStatus();
    } else if (type.equalsIgnoreCase(WOE_MAPPING)) {
        List<ColumnConfig> exportCatColumns = new ArrayList<ColumnConfig>();
        List<String> catVariables = getRequestVars();
        for (ColumnConfig columnConfig : this.columnConfigList) {
            if (CollectionUtils.isEmpty(catVariables) || isRequestColumn(catVariables, columnConfig)) {
                exportCatColumns.add(columnConfig);
            }
        }

        if (CollectionUtils.isNotEmpty(exportCatColumns)) {
            List<String> woeMappings = new ArrayList<String>();
            for (ColumnConfig columnConfig : exportCatColumns) {
                String woeMapText = rebinAndExportWoeMapping(columnConfig);
                woeMappings.add(woeMapText);
            }
            FileUtils.write(new File("woemapping.txt"), StringUtils.join(woeMappings, ",\n"));
        }
    } else if (type.equalsIgnoreCase(WOE)) {
        List<String> woeInfos = new ArrayList<String>();
        for (ColumnConfig columnConfig : this.columnConfigList) {
            if (columnConfig.getBinLength() > 1 && ((columnConfig.isCategorical()
                    && CollectionUtils.isNotEmpty(columnConfig.getBinCategory()))
                    || (columnConfig.isNumerical() && CollectionUtils.isNotEmpty(columnConfig.getBinBoundary())
                            && columnConfig.getBinBoundary().size() > 1))) {
                List<String> varWoeInfos = generateWoeInfos(columnConfig);
                if (CollectionUtils.isNotEmpty(varWoeInfos)) {
                    woeInfos.addAll(varWoeInfos);
                    woeInfos.add("");
                }
            }
            FileUtils.writeLines(new File("varwoe_info.txt"), woeInfos);
        }
    } else if (type.equalsIgnoreCase(CORRELATION)) {
        // export correlation into mapping list
        if (!ShifuFileUtils.isFileExists(pathFinder.getLocalCorrelationCsvPath(), SourceType.LOCAL)) {
            log.warn("The correlation file doesn't exist. Please make sure you have ran `shifu stats -c`.");
            return 2;
        }
        return exportVariableCorr();
    } else {
        log.error("Unsupported output format - {}", type);
        status = -1;
    }

    clearUp(ModelStep.EXPORT);

    log.info("Done.");

    return status;
}

From source file:ml.shifu.shifu.util.HDFSUtils.java

License:Apache License

/**
 * Get local FileSystem/* ww  w.  j  a v a2  s  . co  m*/
 *
 * @throws IOException if any IOException to retrieve local file system.
 */
public static FileSystem getLocalFS() {
    if (lfs == null) {
        synchronized (HDFSUtils.class) {
            if (lfs == null) {
                try {
                    // initialization
                    lfs = FileSystem.getLocal(conf).getRaw();
                } catch (IOException e) {
                    LOG.error("Error on creating local FileSystem object.", e);
                    throw new ShifuException(ShifuErrorCode.ERROR_GET_LOCAL_SYSTEM);
                }
            }
        }
    }
    return lfs;
}