Example usage for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException

Source Link

Document

Check if a path exists.

Usage

From source file:com.linkedin.cubert.utils.AvroUtils.java

License:Open Source License

public static void createFileIfNotExists(BlockSchema fileSchema, String path) throws IOException {
    Configuration conf = new JobConf();
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(new Path(path)))
        return;/*from   ww  w  .j a v  a  2s. c om*/

    Schema avroSchema = convertFromBlockSchema("CUBERT_MV_RECORD", fileSchema);
    System.out.println("Creating avro file with schema = " + avroSchema);
    GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(avroSchema);
    DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(datumWriter);

    FSDataOutputStream fout = FileSystem.create(fs, new Path(path),
            new FsPermission(FsAction.ALL, FsAction.READ_EXECUTE, FsAction.READ_EXECUTE));
    writer.create(avroSchema, fout);
    writer.flush();
    writer.close();

}

From source file:com.linkedin.cubert.utils.CodeDictionary.java

License:Open Source License

public void write(FileSystem fs, Path path) throws IOException {
    // if the path exists, rename the existing file with ".old" suffix
    if (fs.exists(path)) {
        Path renamePath = new Path(path.toString() + ".old");
        fs.delete(renamePath, false);/*from   w w w  .  j  a v a 2s .  c om*/

        fs.rename(path, renamePath);
    }

    // Write data to file
    FSDataOutputStream ostream = fs.create(path);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(ostream));

    for (Map.Entry<String, Integer> entry : keyToCodeMap.entrySet()) {
        String line = String.format("%s %d\n", entry.getKey(), entry.getValue());
        writer.write(line);
    }

    writer.flush();
    writer.close();
    ostream.close();
}

From source file:com.linkedin.hadoop.jobs.HdfsWaitJob.java

License:Apache License

/**
 * Method checkDirectory loops through the folders pointed to by dirPath, and will
 * cause the job to succeed if any of the folders are fresh enough. However, if the
 * parameter checkExactPath is true, this method only checks for the existence of
 * dirPath in HDFS./* w ww.ja  v a  2s . c o m*/
 *
 * @param dirPath The path to the directory we are searching for fresh folders
 * @param freshness The timeframe in which the folder has to have been modified by
 * @param checkExactPath The boolean that decides if we only check for the existence of dirPath in HDFS
 * @throws IOException If there is an HDFS exception
 * @return A boolean value corresponding to whether a fresh folder was found
 */
public boolean checkDirectory(String dirPath, long freshness, boolean checkExactPath)
        throws IOException, NullPointerException {
    FileSystem fileSys = FileSystem.get(getConf());

    if (fileSys == null) {
        String errMessage = "ERROR: The file system trying to be accessed does not exist. JOB TERMINATED.";
        log.info(errMessage);
        throw new NullPointerException(errMessage);
    }

    if (checkExactPath) {
        if (fileSys.exists(new Path(dirPath))) {
            log.info("SUCCESS: The exact path: " + dirPath + " was found in HDFS. Program now quitting.");
            return true;
        }
        log.info("STATUS: The exact path: " + dirPath + " was not found during latest polling.");
        return false;
    }

    FileStatus[] status = fileSys.listStatus(new Path(dirPath));

    if (status == null) {
        String errMessage = "ERROR: dirPath -> " + dirPath + " is empty or does not exist. JOB TERMINATED.";
        log.info(errMessage);
        throw new IOException(errMessage);
    }

    for (FileStatus file : status) {
        if (file.isDirectory()) {
            long timeModified = file.getModificationTime();
            if ((System.currentTimeMillis() - timeModified) <= freshness) {
                String fileName = file.getPath().toString();
                log.info("We found this fresh folder in the filePath: "
                        + fileName.substring(fileName.lastIndexOf("/") + 1));
                log.info("SUCCESS: Program now quitting after successfully finding a fresh folder.");
                return true;
            }
        }
    }
    log.info("STATUS: No fresh folders found during latest polling.");
    return false;
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

/**
 * Adds all subdirectories under a root path to the input format.
 * //from  w w  w . jav a  2s.c o  m
 * @param conf The JobConf.
 * @param path The root path.
 * @throws IOException
 */
public static void addAllSubPaths(JobConf conf, Path path) throws IOException {
    if (shouldPathBeIgnored(path)) {
        throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path));
    }

    final FileSystem fs = path.getFileSystem(conf);

    if (fs.exists(path)) {
        for (FileStatus status : fs.listStatus(path)) {
            if (!shouldPathBeIgnored(status.getPath())) {
                if (status.isDir()) {
                    addAllSubPaths(conf, status.getPath());
                } else {
                    AvroInputFormat.addInputPath(conf, status.getPath());
                }
            }
        }
    }
}

From source file:com.linkedin.mapred.AvroUtils.java

License:Open Source License

/**
 * Enumerates all the files under a given path.
 * /*from   w  w w . j ava 2  s.  co m*/
 * @param conf The JobConf.
 * @param basePath The base path.
 * @return A list of files found under the base path.
 * @throws IOException
 */
public static List<Path> enumerateFiles(JobConf conf, Path basePath) throws IOException {
    if (shouldPathBeIgnored(basePath)) {
        throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", basePath));
    }
    List<Path> paths = new ArrayList<Path>();
    FileSystem fs = basePath.getFileSystem(conf);

    if (!fs.exists(basePath)) {
        return Collections.emptyList();
    }

    for (FileStatus s : fs.listStatus(basePath)) {
        if (!shouldPathBeIgnored(s.getPath())) {
            if (s.isDir()) {
                paths.addAll(enumerateFiles(conf, s.getPath()));
            } else {
                paths.add(s.getPath());
            }
        }
    }
    return paths;
}

From source file:com.linkedin.mlease.regression.jobs.RegressionAdmmTrain.java

License:Open Source License

@Override
public void run() throws Exception {
    _logger.info("Now running Regression Train using ADMM...");
    JobConfig props = super.getJobConfig();
    String outBasePath = props.getString(OUTPUT_BASE_PATH);
    JobConf conf = super.createJobConf();

    // Various configs
    int nblocks = props.getInt(NUM_BLOCKS);
    int niter = props.getInt(NUM_ITERS, 10);
    //Aggressive decay of liblinear_epsilon
    boolean aggressiveLiblinearEpsilonDecay = props.getBoolean(AGGRESSIVE_LIBLINEAR_EPSILON_DECAY, false);
    // Getting the value of the regularizer L1/L2
    int reg = props.getInt(REGULARIZER);
    if ((reg != 1) && (reg != 2)) {
        throw new IOException("Only L1 and L2 regularization supported!");
    }//w  w w .  j a  v  a2  s. c  om
    int numClickReplicates = props.getInt(NUM_CLICK_REPLICATES, 1);
    boolean ignoreValue = props.getBoolean(BINARY_FEATURE, false);
    float initializeBoostRate = props.getFloat(INITIALIZE_BOOST_RATE, 0);
    float rhoAdaptCoefficient = props.getFloat(RHO_ADAPT_COEFFICIENT, 0);

    // handling lambda and rho
    // initialize z and u and compute z-u and write to hadoop
    Map<String, LinearModel> z = new HashMap<String, LinearModel>(); // lambda ->
    List<String> lambdastr = props.getStringList(LAMBDA, ",");
    List<String> rhostr = props.getStringList(RHO, null, ",");
    if (rhostr != null) {
        if (rhostr.size() != lambdastr.size())
            throw new IOException(
                    "The number of rho's should be exactly the same as the number of lambda's. OR: don't claim rho!");
    }
    Map<Float, Float> lambdaRho = new HashMap<Float, Float>();
    for (int j = 0; j < lambdastr.size(); j++) {
        float lambda = Float.parseFloat(lambdastr.get(j));
        float rho;
        if (rhostr != null) {
            rho = Float.parseFloat(rhostr.get(j));
        } else {
            if (lambda <= 100) {
                rho = 1;
            } else {
                rho = 10;
            }
        }
        lambdaRho.put(lambda, rho);
        z.put(String.valueOf(lambda), new LinearModel());
    }

    // Get specific lambda treatment for some features
    String lambdaMapPath = props.getString(LAMBDA_MAP, "");
    Map<String, Float> lambdaMap = new HashMap<String, Float>();
    if (!lambdaMapPath.equals("")) {
        AvroHdfsFileReader reader = new AvroHdfsFileReader(conf);
        ReadLambdaMapConsumer consumer = new ReadLambdaMapConsumer();
        reader.build(lambdaMapPath, consumer);
        consumer.done();
        lambdaMap = consumer.get();
    }
    _logger.info("Lambda Map has size = " + String.valueOf(lambdaMap.size()));
    // Write lambda_rho mapping into file
    String rhoPath = outBasePath + "/lambda-rho/part-r-00000.avro";
    writeLambdaRho(conf, rhoPath, lambdaRho);

    // test-loglik computation
    boolean testLoglikPerIter = props.getBoolean(TEST_LOGLIK_PER_ITER, false);
    DataFileWriter<GenericRecord> testRecordWriter = null;
    // test if the test file exists
    String testPath = props.getString(TEST_PATH, "");
    testLoglikPerIter = Util.checkPath(testPath);
    if (testLoglikPerIter) {
        List<Path> testPathList = AvroUtils.enumerateFiles(conf, new Path(testPath));
        if (testPathList.size() > 0) {
            testPath = testPathList.get(0).toString();
            _logger.info("Sample test path = " + testPath);

            AvroHdfsFileWriter<GenericRecord> writer = new AvroHdfsFileWriter<GenericRecord>(conf,
                    outBasePath + "/sample-test-loglik/write-test-00000.avro", SampleTestLoglik.SCHEMA$);
            testRecordWriter = writer.get();
        }
    }
    if (testRecordWriter == null) {
        testLoglikPerIter = false;
        _logger.info(
                "test.loglik.per.iter=false or test path doesn't exist or is empty! So we will not output test loglik per iteration.");
    } else {
        testRecordWriter.close();
    }

    MutableFloat bestTestLoglik = new MutableFloat(-9999999);
    //Initialize z by mean model 
    if (initializeBoostRate > 0 && reg == 2) {
        _logger.info("Now start mean model initializing......");
        // Different paths for L1 vs L2 set from job file
        String initalModelPath;
        initalModelPath = outBasePath + "/initialModel";

        Path initalModelPathFromNaiveTrain = new Path(outBasePath, "models");
        JobConfig propsIni = JobConfig.clone(props);
        if (!propsIni.containsKey(LIBLINEAR_EPSILON)) {
            propsIni.put(LIBLINEAR_EPSILON, 0.01);
        }
        propsIni.put(RegressionNaiveTrain.HEAVY_PER_ITEM_TRAIN, "true");
        propsIni.put(LAMBDA_MAP, lambdaMapPath);
        propsIni.put(REMOVE_TMP_DIR, "false");

        // run job
        RegressionNaiveTrain initializationJob = new RegressionNaiveTrain(
                super.getJobId() + "_ADMMInitialization", propsIni);
        initializationJob.run();

        FileSystem fs = initalModelPathFromNaiveTrain.getFileSystem(conf);
        if (fs.exists(new Path(initalModelPath))) {
            fs.delete(new Path(initalModelPath), true);
        }
        fs.rename(initalModelPathFromNaiveTrain, new Path(initalModelPath));
        // set up lambda
        Set<Float> lambdaSet = new HashSet<Float>();
        for (String l : lambdastr) {
            lambdaSet.add(Float.parseFloat(l));
        }
        // Compute Mean model as initial model
        z = LinearModelUtils.meanModel(conf, initalModelPath, nblocks, lambdaSet.size(), true);

        if (testLoglikPerIter) {
            updateLogLikBestModel(conf, 0, z, testPath, ignoreValue, bestTestLoglik, outBasePath,
                    numClickReplicates);
        }
    }

    double mindiff = 99999999;
    float liblinearEpsilon = 0.01f;
    int i;
    for (i = 1; i <= niter; i++) {
        _logger.info("Now starting iteration " + String.valueOf(i));
        // set up configuration
        props.put(AbstractAvroJob.OUTPUT_PATH, outBasePath + "/iter-" + String.valueOf(i));
        conf = createJobConf(AdmmMapper.class, AdmmReducer.class,
                Pair.getPairSchema(Schema.create(Type.INT), RegressionPrepareOutput.SCHEMA$),
                RegressionTrainOutput.SCHEMA$);
        conf.setPartitionerClass(AdmmPartitioner.class);
        //AvroUtils.setSpecificReducerInput(conf, true);
        conf.setInt(NUM_BLOCKS, nblocks);
        //Added for L1/L2
        conf.setInt(REGULARIZER, reg);
        conf.setLong(REPORT_FREQUENCY, props.getLong(REPORT_FREQUENCY, 1000000));
        //boolean ignoreValue = props.getBoolean(BINARY_FEATURE, false);
        conf.setBoolean(BINARY_FEATURE, ignoreValue);
        conf.setBoolean(SHORT_FEATURE_INDEX, props.getBoolean(SHORT_FEATURE_INDEX, false));

        boolean penalizeIntercept = props.getBoolean(PENALIZE_INTERCEPT, false);
        String interceptKey = props.getString(INTERCEPT_KEY, LibLinearDataset.INTERCEPT_NAME);
        conf.set(INTERCEPT_KEY, interceptKey);
        //int schemaType = props.getInt(SCHEMA_TYPE, 1);

        // compute and store u into file
        // u = uplusx - z
        String uPath = outBasePath + "/iter-" + String.valueOf(i) + "/u/part-r-00000.avro";
        if (i == 1) {
            LinearModelUtils.writeLinearModel(conf, uPath, new HashMap<String, LinearModel>());
            if (initializeBoostRate > 0 && reg == 2) {

                conf.setFloat(RHO_ADAPT_RATE, initializeBoostRate);
            }
        } else {
            String uplusxPath = outBasePath + "/iter-" + String.valueOf(i - 1) + "/model";
            computeU(conf, uPath, uplusxPath, z);
            if (rhoAdaptCoefficient > 0) {
                float curRhoAdaptRate = (float) Math.exp(-(i - 1) * rhoAdaptCoefficient);
                conf.setFloat(RHO_ADAPT_RATE, curRhoAdaptRate);
            }
        }
        // write z into file
        String zPath = outBasePath + "/iter-" + String.valueOf(i) + "/init-value/part-r-00000.avro";
        LinearModelUtils.writeLinearModel(conf, zPath, z);

        // run job
        String outpath = outBasePath + "/iter-" + String.valueOf(i) + "/model";
        conf.set(U_PATH, uPath);
        conf.set(INIT_VALUE_PATH, zPath);
        conf.set(LAMBDA_RHO_MAP, rhoPath);
        if (i > 1 && mindiff < 0.001 && !aggressiveLiblinearEpsilonDecay) // need to get a more accurate estimate from liblinear
        {
            liblinearEpsilon = liblinearEpsilon / 10;
        } else if (aggressiveLiblinearEpsilonDecay && i > 5) {
            liblinearEpsilon = liblinearEpsilon / 10;
        }
        conf.setFloat(LIBLINEAR_EPSILON, liblinearEpsilon);
        //Added for logging aggressive decay
        _logger.info("Liblinear Epsilon for iter = " + String.valueOf(i) + " is: "
                + String.valueOf(liblinearEpsilon));
        _logger.info("aggressiveLiblinearEpsilonDecay=" + aggressiveLiblinearEpsilonDecay);
        AvroOutputFormat.setOutputPath(conf, new Path(outpath));
        AvroUtils.addAvroCacheFiles(conf, new Path(uPath));
        AvroUtils.addAvroCacheFiles(conf, new Path(zPath));
        AvroUtils.addAvroCacheFiles(conf, new Path(rhoPath));
        conf.setNumReduceTasks(nblocks * lambdastr.size());
        AvroJob.setInputSchema(conf, RegressionPrepareOutput.SCHEMA$);
        AvroUtils.runAvroJob(conf);
        // Load the result from the last iteration
        // compute z and u given x

        Map<String, LinearModel> xbar = LinearModelUtils.meanModel(conf, outpath, nblocks, lambdaRho.size(),
                true);
        Map<String, LinearModel> ubar = LinearModelUtils.meanModel(conf, uPath, nblocks, lambdaRho.size(),
                false);
        Map<String, LinearModel> lastz = new HashMap<String, LinearModel>();
        for (String k : z.keySet()) {
            lastz.put(k, z.get(k).copy());
        }
        for (String lambda : xbar.keySet()) {
            LinearModel thisz = z.get(lambda);
            thisz.clear();
            float l = Float.parseFloat(lambda);
            float r = lambdaRho.get(l);
            double weight;
            //L2 regularization
            if (reg == 2) {
                _logger.info("Running code for regularizer = " + String.valueOf(reg));
                weight = nblocks * r / (l + nblocks * r);
                Map<String, Double> weightmap = new HashMap<String, Double>();
                for (String k : lambdaMap.keySet()) {
                    weightmap.put(k, nblocks * r / (lambdaMap.get(k) + nblocks * r + 0.0));
                }
                thisz.linearCombine(1.0, weight, xbar.get(lambda), weightmap);
                if (!ubar.isEmpty()) {
                    thisz.linearCombine(1.0, weight, ubar.get(lambda), weightmap);
                }
                if (!penalizeIntercept) {
                    if (ubar.isEmpty()) {
                        thisz.setIntercept(xbar.get(lambda).getIntercept());
                    } else {
                        thisz.setIntercept(xbar.get(lambda).getIntercept() + ubar.get(lambda).getIntercept());
                    }
                }
                z.put(lambda, thisz);
            } else {
                // L1 regularization

                _logger.info("Running code for regularizer = " + String.valueOf(reg));
                weight = l / (r * nblocks + 0.0);
                Map<String, Double> weightmap = new HashMap<String, Double>();
                for (String k : lambdaMap.keySet()) {
                    weightmap.put(k, lambdaMap.get(k) / (r * nblocks + 0.0));
                }
                // LinearModel thisz = new LinearModel();
                thisz.linearCombine(1.0, 1.0, xbar.get(lambda));
                if (!ubar.isEmpty()) {
                    thisz.linearCombine(1.0, 1.0, ubar.get(lambda));
                }
                // Iterative Thresholding
                Map<String, Double> thisCoefficients = thisz.getCoefficients();
                for (String k : thisCoefficients.keySet()) {
                    double val = thisCoefficients.get(k);
                    if (val > weight) {
                        thisCoefficients.put(k, val - weight);
                    } else if (val < -weight) {
                        thisCoefficients.put(k, val + weight);
                    }
                }
                thisz.setCoefficients(thisCoefficients);
                if (!penalizeIntercept) {
                    if (ubar.isEmpty()) {
                        thisz.setIntercept(xbar.get(lambda).getIntercept());
                    } else {
                        thisz.setIntercept(xbar.get(lambda).getIntercept() + ubar.get(lambda).getIntercept());
                    }
                }
                z.put(lambda, thisz);
            }
        }
        xbar.clear();
        ubar.clear();
        // Output max difference between last z and this z
        mindiff = 99999999;
        double maxdiff = 0;
        for (String k : z.keySet()) {
            LinearModel tmp = lastz.get(k);
            if (tmp == null)
                tmp = new LinearModel();
            tmp.linearCombine(1, -1, z.get(k));
            double diff = tmp.maxAbsValue();
            _logger.info(
                    "For lambda=" + k + ": Max Difference between last z and this z = " + String.valueOf(diff));
            tmp.clear();
            if (mindiff > diff)
                mindiff = diff;
            if (maxdiff < diff)
                maxdiff = diff;
        }
        double epsilon = props.getDouble(EPSILON, 0.0001);
        // remove tmp files?
        if (props.getBoolean(REMOVE_TMP_DIR, false) && i >= 2) {
            FileSystem fs = FileSystem.get(conf);
            fs.delete(new Path(outBasePath + "/iter-" + String.valueOf(i - 1)), true);
        }
        // Output testloglik and update best model
        if (testLoglikPerIter) {
            updateLogLikBestModel(conf, i, z, testPath, ignoreValue, bestTestLoglik, outBasePath,
                    numClickReplicates);
        }

        if (maxdiff < epsilon && liblinearEpsilon <= 0.00001) {
            break;
        }
    }

    // write z into file
    String zPath = outBasePath + "/final-model/part-r-00000.avro";
    LinearModelUtils.writeLinearModel(conf, zPath, z);
    // remove tmp files?
    if (props.getBoolean(REMOVE_TMP_DIR, false)) {
        FileSystem fs = FileSystem.get(conf);
        Path initalModelPath = new Path(outBasePath + "/initialModel");
        if (fs.exists(initalModelPath)) {
            fs.delete(initalModelPath, true);
        }
        for (int j = i - 2; j <= i; j++) {
            Path deletepath = new Path(outBasePath + "/iter-" + String.valueOf(j));
            if (fs.exists(deletepath)) {
                fs.delete(deletepath, true);
            }
        }
        fs.delete(new Path(outBasePath + "/tmp-data"), true);
    }

}

From source file:com.linkedin.mlease.regression.jobs.RegressionTest.java

License:Open Source License

@Override
public void run() throws Exception {
    JobConfig props = super.getJobConfig();
    JobConf conf = super.createJobConf();
    if (!props.getString("input.paths").equals("")) {
        // set up configuration
        _logger.info("Now starting test...");
        List<String> lambdastr = props.getStringList(LAMBDA, ",");
        String outBasePath = props.getString(OUTPUT_BASE_PATH);
        for (String lambda : lambdastr) {
            String outPath = outBasePath + "/lambda-" + lambda;
            props.put(AbstractAvroJob.OUTPUT_PATH, outPath);
            conf = createJobConf(AdmmTestMapper.class, AdmmTestReducer.class);
            AvroOutputFormat.setOutputPath(conf, new Path(outPath));
            String modelPath = props.getString(MODEL_BASE_PATH);
            modelPath = modelPath + "/final-model";
            AvroUtils.addAvroCacheFiles(conf, new Path(modelPath));
            conf.set(MODEL_PATH, modelPath);
            conf.setFloat(LAMBDA, Float.parseFloat(lambda));
            conf.setBoolean(BINARY_FEATURE, props.getBoolean(BINARY_FEATURE, false));
            AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf));
            AvroUtils.runAvroJob(conf);//from w w w. j  a v  a2s. c om
        }
        // also do full prediction on best-model if it exists
        FileSystem fs = FileSystem.get(conf);
        String modelPath = props.getString(MODEL_BASE_PATH) + "/best-model";
        if (fs.exists(new Path(modelPath))) {
            String outPath = outBasePath + "/best-model";
            props.put(AbstractAvroJob.OUTPUT_PATH, outPath);
            conf = createJobConf(AdmmTestMapper.class, AdmmTestReducer.class);
            AvroOutputFormat.setOutputPath(conf, new Path(outPath));
            AvroUtils.addAvroCacheFiles(conf, new Path(modelPath));
            conf.set(MODEL_PATH, modelPath);
            conf.setFloat(LAMBDA, -1);
            conf.setBoolean(BINARY_FEATURE, props.getBoolean(BINARY_FEATURE, false));
            AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf));
            AvroUtils.runAvroJob(conf);
        }
    } else {
        _logger.info("test.input.paths is empty! So no test will be done!");
    }
}

From source file:com.linkedin.mlease.regression.jobs.RegressionTestLoglik.java

License:Open Source License

@Override
public void run() throws Exception {
    JobConfig props = super.getJobConfig();
    JobConf conf = super.createJobConf();
    FileSystem fs = FileSystem.get(conf);
    if (props.getBoolean("get.test.loglik", true)) {
        // set up configuration
        _logger.info("Now starting computing test-loglik...");
        List<String> lambdastr = props.getStringList(LAMBDA, ",");
        String outBasePath = props.getString(OUTPUT_BASE_PATH);
        String inBasePath = props.getString(INPUT_BASE_PATHS);
        if (lambdastr != null) {
            for (String lambda : lambdastr) {
                String inPaths = inBasePath + "/lambda-" + lambda;
                String outPath = outBasePath + "/lambda-" + lambda + "/_loglik";
                if (!fs.exists(new Path(inPaths))) {
                    _logger.info("Can't find the input path " + inPaths);
                    continue;
                }/*  ww w. java2  s  .co  m*/
                props.put(AbstractAvroJob.INPUT_PATHS, inPaths);
                props.put(AbstractAvroJob.OUTPUT_PATH, outPath);
                conf = super.createJobConf(AdmmTestLoglikMapper.class, AdmmTestLoglikReducer.class,
                        AdmmTestLoglikCombiner.class,
                        Pair.getPairSchema(Schema.create(Type.STRING), RegressionTestLoglikOutput.SCHEMA$),
                        RegressionTestLoglikOutput.SCHEMA$);
                _logger.info("Computing loglik...");
                AvroOutputFormat.setOutputPath(conf, new Path(outPath));
                AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf));
                AvroUtils.runAvroJob(conf);
            }
        }
        // also do testloglik on best-model if it exists
        String inPaths = inBasePath + "/best-model";
        if (fs.exists(new Path(inPaths))) {
            String outPath = outBasePath + "/best-model/_loglik";
            props.put(AbstractAvroJob.INPUT_PATHS, inPaths);
            props.put(AbstractAvroJob.OUTPUT_PATH, outPath);
            conf = super.createJobConf(AdmmTestLoglikMapper.class, AdmmTestLoglikReducer.class,
                    Pair.getPairSchema(Schema.create(Type.STRING), RegressionTestLoglikOutput.SCHEMA$),
                    RegressionTestLoglikOutput.SCHEMA$);
            _logger.info("Computing loglik...");
            AvroOutputFormat.setOutputPath(conf, new Path(outPath));
            AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf));
            AvroUtils.runAvroJob(conf);
        }
    } else {
        _logger.info("get.test.loglik is false, so no test loglik will be generated!");
    }
}

From source file:com.linkedin.mlease.utils.Util.java

License:Open Source License

public static boolean checkPath(String pathstr) throws IOException {
    if (pathstr.equals(""))
        return false;
    Path path = new Path(pathstr);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    return fs.exists(path);
}

From source file:com.linkedin.oneclick.wordcount.WordCount.java

License:Apache License

static Path clean(Configuration conf, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.exists(path))
        fs.delete(path, true);/*from  w  w  w.ja  va  2s . co  m*/
    return path;
}