Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.ibm.bi.dml.yarn.DMLYarnClient.java

License:Open Source License

/**
 * /*  w  w  w  .j a va  2s  .c  o m*/
 * @param conf
 * @param yconf
 * @param appId
 * @return
 */
private String readMessageToHDFSWorkingDir(DMLConfig conf, YarnConfiguration yconf, ApplicationId appId) {
    String ret = null;

    //construct working directory (consistent with client)
    String hdfsWD = DMLAppMasterUtils.constructHDFSWorkingDir(conf, appId);
    Path msgPath = new Path(hdfsWD, DMLYarnClient.DML_STOPMSG_NAME);

    //write given message to hdfs
    try {
        FileSystem fs = FileSystem.get(yconf);
        if (fs.exists(msgPath)) {
            FSDataInputStream fin = fs.open(msgPath);
            BufferedReader br = new BufferedReader(new InputStreamReader(fin));
            ret = br.readLine();
            fin.close();
            LOG.debug("Stop message read from HDFS file " + msgPath + ": " + ret);
        }
    } catch (Exception ex) {
        LOG.error("Failed to read stop message from HDFS file: " + msgPath, ex);
    }

    return ret;
}

From source file:com.ibm.jaql.io.hadoop.CompositeOutputAdapter.java

License:Apache License

@Override
public void checkOutputSpecs(FileSystem ignored, JobConf conf) throws IOException {
    for (int i = 0; i < outputs.length; i++) {
        outputs[i].checkOutputSpecs(ignored, subconfs[i]);

        // HACK: Hadoop 0.18 has hacks that specialize FileOutputFormat handling. In particular,
        // the temporary directory is created by the Task or LocalJobRunner; they also promote
        // the temporary files to the parent upon completion.  We create the temporary file here,
        // if it doesn't already exist.  On
        Path outputPath = FileOutputFormat.getOutputPath(subconfs[i]);
        if (outputPath != null) {
            final String TEMP_DIR_NAME = "_temporary"; // MRConstants isn't public...
            Path jobTmpDir = new Path(outputPath, TEMP_DIR_NAME); // MRConstants.TEMP_DIR_NAME
            FileSystem fs = jobTmpDir.getFileSystem(subconfs[i]);
            if (!fs.exists(jobTmpDir)) {
                fs.mkdirs(jobTmpDir);/* w ww .ja  va 2s.c om*/
            }
        }
    }
}

From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java

License:Apache License

public void setSequential(JobConf conf) throws Exception {
    registerSerializers(conf);//from www.j a  va 2  s  .c  o m

    // For an expression, the location is the final file name
    Path outPath = new Path(location);
    FileSystem fs = outPath.getFileSystem(conf);
    outPath = outPath.makeQualified(fs);
    if (fs.exists(outPath)) {
        // TODO: Jaql currently has overwrite semantics; add flag to control this
        if (fs.isFile(outPath)) {
            fs.delete(outPath, false);
        } else {
            // Look for a map-reduce output directory
            FileStatus[] nonMR = fs.listStatus(outPath, new PathFilter() {
                boolean onlyOne = true;

                public boolean accept(Path path) {
                    String name = path.getName();
                    if (name.matches("([.][.]?)|([.]part-[0-9]+.crc)|(part-[0-9]+)")) {
                        return false;
                    }
                    if (onlyOne) {
                        onlyOne = false;
                        return true;
                    }
                    return false;
                }
            });
            if (nonMR.length > 0) {
                throw new IOException(
                        "directory exists and is not a map-reduce output directory: " + nonMR[0].getPath());
            }
            fs.delete(outPath, true);
        }
    }

    // In sequential mode, we will write directly to the output file
    // and bypass the _temporary directory and rename of the standard 
    // FileOutputCommitter by using our own DirectFileOutputCommitter.
    FileOutputFormat.setOutputPath(conf, outPath.getParent());
    conf.setClass("mapred.output.committer.class", DirectFileOutputCommiter.class, OutputCommitter.class);
}

From source file:com.ibm.jaql.lang.expr.system.RUtil.java

License:Apache License

/**
 * Function that puts a local file into HDFS.
 * @param localPath/*from ww  w .ja  v  a  2 s.  c  o m*/
 * @param hdfsPath
 * @return
 */
public static boolean saveToHDFS(String localPath, String hdfsPath) {
    try {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        int bufferSize = 4 * 1024;
        byte[] buffer = new byte[bufferSize];
        InputStream input = new BufferedInputStream(new FileInputStream(localPath), bufferSize);

        Path outputPath = new Path(hdfsPath);
        if (fs.exists(outputPath)) {
            if (!fs.isFile(outputPath)) {
                throw new IOException("Output path is a directory that already exists.");
            }
            LOG.info("Output path" + outputPath + " already exists. Overwriting it.");
        }
        FSDataOutputStream output = fs.create(outputPath, true);

        int numBytesRead;
        while ((numBytesRead = input.read(buffer)) > 0) {
            output.write(buffer, 0, numBytesRead);
        }
        input.close();
        output.close();
        return true;
    } catch (IOException e) {
        LOG.info("Error in writing file to HDFS.", e);
        return false;
    }
}

From source file:com.ibm.jaql.UtilForTest.java

License:Apache License

/**
 * @param dir// w  ww.jav a 2 s. co m
 * @throws IOException
 */
public static void cleanUpHDFS(String dir) throws IOException {
    if ("true".equals(System.getProperty("test.cleanup"))) {
        Configuration conf = new Configuration();
        FileSystem fs = FileSystem.get(conf);
        Path p = new Path(dir);
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
    }
}

From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java

License:Open Source License

/**
 * Assert that a path exists -but make no assertions as to the
 * type of that entry//w w  w  . j a v a 2s .  c o  m
 *
 * @param fileSystem filesystem to examine
 * @param message message to include in the assertion failure message
 * @param path path in the filesystem
 * @throws IOException IO problems
 */
public static void assertPathExists(FileSystem fileSystem, String message, Path path) throws IOException {
    if (!fileSystem.exists(path)) {
        //failure, report it
        fail(message + ": not found " + path + " in " + path.getParent());
    }
}

From source file:com.idvp.platform.hdfs.BucketWriter.java

License:Apache License

/**
 * Rename bucketPath file from .tmp to permanent location.
 */// www . ja  v  a  2 s  .  c om
// When this bucket writer is rolled based on rollCount or
// rollSize, the same instance is reused for the new file. But if
// the previous file was not closed/renamed,
// the bucket writer fields no longer point to it and hence need
// to be passed in from the thread attempting to close it. Even
// when the bucket writer is closed due to close timeout,
// this method can get called from the scheduled thread so the
// file gets closed later - so an implicit reference to this
// bucket writer would still be alive in the Callable instance.
private void renameBucket(String bucketPath, String targetPath, final FileSystem fs)
        throws IOException, InterruptedException {
    if (bucketPath.equals(targetPath)) {
        return;
    }

    final Path srcPath = new Path(bucketPath);
    final Path dstPath = new Path(targetPath);

    callWithTimeout((CallRunner<Void>) () -> {
        if (fs.exists(srcPath)) { // could block
            LOG.info("Renaming " + srcPath + " to " + dstPath);
            renameTries.incrementAndGet();
            fs.rename(srcPath, dstPath); // could block
        }
        return null;
    });
}

From source file:com.iflytek.spider.crawl.CrawlDb.java

License:Apache License

public void update(Path crawlDb, Path[] segments, boolean additionsAllowed, boolean force)
        throws IOException, InterruptedException, ClassNotFoundException {
    FileSystem fs = FileSystem.get(getConf());
    Path lock = new Path(crawlDb, LOCK_NAME);
    LockUtil.createLockFile(fs, lock, force);
    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: starting");
        LOG.info("CrawlDb update: db: " + crawlDb);
        LOG.info("CrawlDb update: segments: " + Arrays.asList(segments));
        LOG.info("CrawlDb update: additions allowed: " + additionsAllowed);
    }/*  ww  w . j a  v a  2 s  . c  o m*/

    Job job = CrawlDb.createJob(getConf(), crawlDb);
    job.getConfiguration().setBoolean(CRAWLDB_ADDITIONS_ALLOWED, additionsAllowed);
    for (int i = 0; i < segments.length; i++) {
        Path fetch = new Path(segments[i], CrawlDatum.FETCH_DIR_NAME);
        Path parse = new Path(segments[i], CrawlDatum.PARSE_DIR_NAME);
        if (fs.exists(fetch)) {
            FileInputFormat.addInputPath(job, fetch);
        }
        if (fs.exists(parse)) {
            FileInputFormat.addInputPath(job, parse);
        } else {
            LOG.info(" - skipping invalid segment " + segments[i]);
        }
    }

    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: Merging segment data into db.");
    }
    try {
        job.waitForCompletion(true);
    } catch (IOException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    } catch (InterruptedException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    } catch (ClassNotFoundException e) {
        LockUtil.removeLockFile(fs, lock);
        Path outPath = FileOutputFormat.getOutputPath(job);
        if (fs.exists(outPath))
            fs.delete(outPath, true);
        throw e;
    }

    CrawlDb.install(job, crawlDb);
    if (LOG.isInfoEnabled()) {
        LOG.info("CrawlDb update: done");
    }
}

From source file:com.iflytek.spider.crawl.CrawlDb.java

License:Apache License

public static void install(Job job, Path crawlDb) throws IOException {
    Path newCrawlDb = FileOutputFormat.getOutputPath(job);
    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path old = new Path(crawlDb, "old");
    Path current = new Path(crawlDb, CURRENT_NAME);
    if (fs.exists(current)) {
        if (fs.exists(old))
            fs.delete(old, true);//from   w w w  . j a v  a  2s.com
        fs.rename(current, old);
    }
    fs.mkdirs(crawlDb);
    fs.rename(newCrawlDb, current);
    if (fs.exists(old))
        fs.delete(old, true);
    Path lock = new Path(crawlDb, LOCK_NAME);
    LockUtil.removeLockFile(fs, lock);
}

From source file:com.iflytek.spider.parse.ParseSegment.java

License:Apache License

public int run(String[] args) throws Exception {

    String usage = "Usage: ParseSegment segments";

    if (args.length == 0) {
        System.err.println(usage);
        System.exit(-1);/*from w  w w .ja v a2  s .  c  o  m*/
    }
    FileSystem fs = FileSystem.get(getConf());
    for (FileStatus p : fs.listStatus(new Path(args[0]))) {
        if (fs.exists(new Path(p.getPath(), "crawl_parse")))
            fs.delete(new Path(p.getPath(), "crawl_parse"), true);
        if (fs.exists(new Path(p.getPath(), "parse_data")))
            fs.delete(new Path(p.getPath(), "parse_data"), true);
        parse(p.getPath());
    }
    return 0;
}