Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.datatorrent.stram.client.StramAppLauncher.java

License:Apache License

/**
 * Submit application to the cluster and return the app id.
 * Sets the context class loader for application dependencies.
 *
 * @param appConfig//from   w w  w.ja  va  2s  .  c om
 * @return ApplicationId
 * @throws Exception
 */
public ApplicationId launchApp(AppFactory appConfig) throws Exception {
    loadDependencies();
    Configuration conf = propertiesBuilder.conf;
    conf.setEnum(StreamingApplication.ENVIRONMENT, StreamingApplication.Environment.CLUSTER);
    LogicalPlan dag = appConfig.createApp(propertiesBuilder);
    String hdfsTokenMaxLifeTime = conf.get(StramClientUtils.HDFS_TOKEN_MAX_LIFE_TIME);
    if (hdfsTokenMaxLifeTime != null && hdfsTokenMaxLifeTime.trim().length() > 0) {
        dag.setAttribute(LogicalPlan.HDFS_TOKEN_LIFE_TIME, Long.parseLong(hdfsTokenMaxLifeTime));
    }
    String rmTokenMaxLifeTime = conf.get(StramClientUtils.RM_TOKEN_MAX_LIFE_TIME);
    if (rmTokenMaxLifeTime != null && rmTokenMaxLifeTime.trim().length() > 0) {
        dag.setAttribute(LogicalPlan.RM_TOKEN_LIFE_TIME, Long.parseLong(rmTokenMaxLifeTime));
    }
    if (conf.get(StramClientUtils.KEY_TAB_FILE) != null) {
        dag.setAttribute(LogicalPlan.KEY_TAB_FILE, conf.get(StramClientUtils.KEY_TAB_FILE));
    } else if (conf.get(StramUserLogin.DT_AUTH_KEYTAB) != null) {
        Path localKeyTabPath = new Path(conf.get(StramUserLogin.DT_AUTH_KEYTAB));
        FileSystem fs = StramClientUtils.newFileSystemInstance(conf);
        try {
            Path destPath = new Path(StramClientUtils.getDTDFSRootDir(fs, conf), localKeyTabPath.getName());
            if (!fs.exists(destPath)) {
                fs.copyFromLocalFile(false, false, localKeyTabPath, destPath);
            }
            dag.setAttribute(LogicalPlan.KEY_TAB_FILE, destPath.toString());
        } finally {
            fs.close();
        }
    }
    String tokenRefreshFactor = conf.get(StramClientUtils.TOKEN_ANTICIPATORY_REFRESH_FACTOR);
    if (tokenRefreshFactor != null && tokenRefreshFactor.trim().length() > 0) {
        dag.setAttribute(LogicalPlan.TOKEN_REFRESH_ANTICIPATORY_FACTOR, Double.parseDouble(tokenRefreshFactor));
    }
    StramClient client = new StramClient(conf, dag);
    try {
        client.start();
        LinkedHashSet<String> libjars = Sets.newLinkedHashSet();
        String libjarsCsv = conf.get(LIBJARS_CONF_KEY_NAME);
        if (libjarsCsv != null) {
            String[] jars = StringUtils.splitByWholeSeparator(libjarsCsv, StramClient.LIB_JARS_SEP);
            libjars.addAll(Arrays.asList(jars));
        }
        if (deployJars != null) {
            for (File deployJar : deployJars) {
                libjars.add(deployJar.getAbsolutePath());
            }
        }

        client.setResources(libjars);
        client.setFiles(conf.get(FILES_CONF_KEY_NAME));
        client.setArchives(conf.get(ARCHIVES_CONF_KEY_NAME));
        client.setOriginalAppId(conf.get(ORIGINAL_APP_ID));
        client.setQueueName(conf.get(QUEUE_NAME));
        client.startApplication();
        return client.getApplicationReport().getApplicationId();
    } finally {
        client.stop();
    }
}

From source file:com.datatorrent.stram.StramClient.java

License:Apache License

public void copyInitialState(Path origAppDir) throws IOException {
    // locate previous snapshot
    String newAppDir = this.dag.assertAppPath();

    FSRecoveryHandler recoveryHandler = new FSRecoveryHandler(origAppDir.toString(), conf);
    // read snapshot against new dependencies
    Object snapshot = recoveryHandler.restore();
    if (snapshot == null) {
        throw new IllegalArgumentException("No previous application state found in " + origAppDir);
    }/*  w  w w  .  j ava 2s .c o  m*/
    InputStream logIs = recoveryHandler.getLog();

    // modify snapshot state to switch app id
    ((StreamingContainerManager.CheckpointState) snapshot).setApplicationId(this.dag, conf);
    Path checkpointPath = new Path(newAppDir, LogicalPlan.SUBDIR_CHECKPOINTS);

    FileSystem fs = FileSystem.newInstance(origAppDir.toUri(), conf);
    // remove the path that was created by the storage agent during deserialization and replacement
    fs.delete(checkpointPath, true);

    // write snapshot to new location
    recoveryHandler = new FSRecoveryHandler(newAppDir, conf);
    recoveryHandler.save(snapshot);
    OutputStream logOs = recoveryHandler.rotateLog();
    IOUtils.copy(logIs, logOs);
    logOs.flush();
    logOs.close();
    logIs.close();

    // copy sub directories that are not present in target
    FileStatus[] lFiles = fs.listStatus(origAppDir);
    for (FileStatus f : lFiles) {
        if (f.isDirectory()) {
            String targetPath = f.getPath().toString().replace(origAppDir.toString(), newAppDir);
            if (!fs.exists(new Path(targetPath))) {
                LOG.debug("Copying {} to {}", f.getPath(), targetPath);
                FileUtil.copy(fs, f.getPath(), fs, new Path(targetPath), false, conf);
                //FSUtil.copy(fs, f, fs, new Path(targetPath), false, false, conf);
            } else {
                LOG.debug("Ignoring {} as it already exists under {}", f.getPath(), targetPath);
                //FSUtil.setPermission(fs, new Path(targetPath), new FsPermission((short)0777));
            }
        }
    }

}

From source file:com.datatorrent.stram.util.FSUtil.java

License:Apache License

public static boolean mkdirs(FileSystem fs, Path dest) throws IOException {
    try {/*from  www .  ja v  a 2s. c o  m*/
        return fs.mkdirs(dest);
    } catch (IOException e) {
        // some file system (MapR) throw exception if folder exists
        if (!fs.exists(dest)) {
            throw e;
        } else {
            return false;
        }
    }
}

From source file:com.davidgildeh.hadoop.utils.FileUtils.java

License:Apache License

/**
 * Check if a file exists, if not will throw a FileNotFoundException
 * //from www.  jav a 2 s  .  co  m
 * @param path              The path of the file to check
 * @throws IOException 
 */
private static void checkFileExists(FileSystem fileSystem, Path path) throws IOException {

    // Check file exists
    if (!fileSystem.exists(path)) {
        LOG.error("Path '" + path.toString() + "' does not exist.");
        fileSystem.close();
        throw new FileNotFoundException("Path '" + path.toString() + "' does not exist.");
    }
}

From source file:com.digitalpebble.behemoth.gate.GATEDriver.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    if (args.length < 3 | args.length > 4) {
        String syntax = "com.digitalpebble.behemoth.gate.GATEDriver in out path_gate_file [-XML]";
        System.err.println(syntax);
        return -1;
    }/*from  w w  w  .  j a va2 s  . com*/

    boolean dumpGATEXML = false;

    for (String arg : args) {
        if (arg.equalsIgnoreCase("-xml"))
            dumpGATEXML = true;
    }

    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    String zip_application_path = args[2];

    // check that the GATE application has been stored on HDFS
    Path zap = new Path(zip_application_path);
    if (fs.exists(zap) == false) {
        System.err
                .println("The GATE application " + zip_application_path + "can't be found on HDFS - aborting");
        return -1;
    }

    JobConf job = new JobConf(getConf());
    // MUST not forget the line below
    job.setJarByClass(this.getClass());

    job.setJobName("Processing " + args[0] + " with GATE application from " + zip_application_path);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(Text.class);

    if (dumpGATEXML) {
        job.setOutputValueClass(Text.class);
        job.setMapperClass(GATEXMLMapper.class);
    } else {
        job.setOutputValueClass(BehemothDocument.class);
        job.setMapperClass(GATEMapper.class);
    }

    // detect if any filters have been defined
    // and activate the reducer accordingly
    boolean isFilterRequired = BehemothReducer.isRequired(job);
    if (isFilterRequired)
        job.setReducerClass(BehemothReducer.class);
    else {
        job.setNumReduceTasks(0);
    }

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    // push the zipped_gate_application onto the DistributedCache
    DistributedCache.addCacheArchive(new URI(zip_application_path), job);

    job.set("gate.application.path", zip_application_path.toString());

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("GATEDriver completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error("Exception caught", e);
        // leave even partial output
        // fs.delete(outputPath, true);
    } finally {
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.uima.UIMADriver.java

License:Apache License

public int run(String[] args) throws Exception {

    final FileSystem fs = FileSystem.get(getConf());

    if (args.length != 3) {
        String syntax = "com.digitalpebble.behemoth.uima.UIMADriver in out path_pear_file";
        System.err.println(syntax);
        return -1;
    }/*w  ww  .  j  a  v a2  s . c om*/

    Path inputPath = new Path(args[0]);
    Path outputPath = new Path(args[1]);
    String pearPath = args[2];

    // check that the GATE application has been stored on HDFS
    Path zap = new Path(pearPath);
    if (fs.exists(zap) == false) {
        System.err.println("The UIMA application " + pearPath + "can't be found on HDFS - aborting");
        return -1;
    }

    JobConf job = new JobConf(getConf());
    job.setJarByClass(this.getClass());
    job.setJobName("Processing with UIMA application : " + pearPath);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(BehemothDocument.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BehemothDocument.class);

    job.setMapperClass(UIMAMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    // push the UIMA pear onto the DistributedCache
    DistributedCache.addCacheFile(new URI(pearPath), job);

    job.set("uima.pear.path", pearPath);

    try {
        long start = System.currentTimeMillis();
        JobClient.runJob(job);
        long finish = System.currentTimeMillis();
        if (LOG.isInfoEnabled()) {
            LOG.info("UIMADriver completed. Timing: " + (finish - start) + " ms");
        }
    } catch (Exception e) {
        LOG.error("Exception", e);
        fs.delete(outputPath, true);
    } finally {
    }

    return 0;
}

From source file:com.digitalpebble.behemoth.util.ContentExtractor.java

License:Apache License

private int generateDocs(String inputf, String outputf) throws IOException, ArchiveException {

    Path input = new Path(inputf);
    Path dirPath = new Path(outputf);

    FileSystem fsout = FileSystem.get(dirPath.toUri(), getConf());

    if (fsout.exists(dirPath) == false)
        fsout.mkdirs(dirPath);//from   w  w w .  java  2s  .  c  o m
    else {
        System.err.println("Output " + outputf + " already exists");
        return -1;
    }

    // index file
    Path indexPath = new Path(dirPath, "index");
    if (fsout.exists(indexPath) == false) {
        fsout.createNewFile(indexPath);
    }

    maxNumEntriesInArchive = getConf().getInt(numEntriesPerArchiveParamName, 10000);

    index = fsout.create(indexPath);

    createArchive(dirPath);

    FileSystem fs = input.getFileSystem(getConf());
    FileStatus[] statuses = fs.listStatus(input);
    int count[] = { 0 };
    for (int i = 0; i < statuses.length; i++) {
        FileStatus status = statuses[i];
        Path suPath = status.getPath();
        if (suPath.getName().equals("_SUCCESS"))
            continue;
        generateDocs(suPath, dirPath, count);
    }

    if (index != null)
        index.close();

    if (currentArchive != null) {
        currentArchive.finish();
        currentArchive.close();
    }

    return 0;
}

From source file:com.ebay.erl.mobius.core.criterion.TupleRestrictions.java

License:Apache License

/**
 * Create a tuple criterion that only accepts tuples when the value 
 * of the <code>column</code> are presented in the given <code>file</code>
 * <p>//from   w  w w .  j av a  2  s.c  om
 * 
 * The assumption of the file is that, it's single column and one to many
 * line text file.  Each line is read into a case insensitive set, and 
 * using the set to check the value of the <code>column</code> within
 * the set or not.
 * 
 * 
 * @param column the name of a column to be tested that whether its value is in 
 * the given <code>file</code> or not
 * 
 * @param file a single column and multiple lines of file that contains strings/numbers,
 * each line is treated as a single unit.
 *
 * @return an instance of {@link TupleCriterion} that extracts only the records 
 * when the value of its <code>column</code> are presented in the given 
 * <code>file</code>.
 * 
 * @throws FileNotFoundException if the given file cannot be found.
 */
public static TupleCriterion within(final String column, File file) throws FileNotFoundException {
    final File f = TupleRestrictions.checkFileExist(file);

    return new TupleCriterion() {

        private static final long serialVersionUID = -1121221619118915652L;
        private Set<String> set;

        @Override
        public void setConf(Configuration conf) {
            try {
                if (conf.get("tmpfiles") == null || conf.get("tmpfiles").trim().length() == 0) {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf));
                } else {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf) + "," + conf.get("tmpfiles"));
                }

            } catch (IOException e) {
                throw new IllegalArgumentException(e);
            }
        }

        /**
         * COPIED FROM org.apache.hadoop.util.GenericOptionsParser
         */
        private String validateFiles(String files, Configuration conf) throws IOException {
            if (files == null)
                return null;
            String[] fileArr = files.split(",");
            String[] finalArr = new String[fileArr.length];
            for (int i = 0; i < fileArr.length; i++) {
                String tmp = fileArr[i];
                String finalPath;
                Path path = new Path(tmp);
                URI pathURI = path.toUri();
                FileSystem localFs = FileSystem.getLocal(conf);
                if (pathURI.getScheme() == null) {
                    // default to the local file system
                    // check if the file exists or not first
                    if (!localFs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(localFs).toString();
                } else {
                    // check if the file exists in this file system
                    // we need to recreate this filesystem object to copy
                    // these files to the file system jobtracker is running
                    // on.
                    FileSystem fs = path.getFileSystem(conf);
                    if (!fs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(fs).toString();
                    try {
                        fs.close();
                    } catch (IOException e) {
                    }
                    ;
                }
                finalArr[i] = finalPath;
            }
            return StringUtils.arrayToString(finalArr);
        }

        @Override
        protected boolean evaluate(Tuple tuple, Configuration configuration) {
            if (set == null) {
                set = new CaseInsensitiveTreeSet();
                BufferedReader br = null;
                try {
                    br = new BufferedReader(new FileReader(new File(f.getName())));
                    String newLine = null;
                    while ((newLine = br.readLine()) != null) {
                        this.set.add(newLine);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } finally {
                    try {
                        br.close();
                    } catch (Throwable e) {
                    }
                }
            }

            String value = tuple.getString(column);
            if (value != null) {
                return this.set.contains(value);
            } else {
                return false;
            }
        }

        @Override
        public String[] getInvolvedColumns() {
            return new String[] { column };
        }
    };
}

From source file:com.ebay.erl.mobius.core.JobSetup.java

License:Apache License

private static void ensureOutputDelete(Path outputFolder, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    outputFolder = fs.makeQualified(outputFolder);
    if (fs.exists(outputFolder)) {
        LOGGER.info("Deleting " + outputFolder.toString());
        fs.delete(outputFolder, true);/*from w w w  .ja  va 2 s  .  co m*/
    }
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

private static void writePartitionFile(JobConf job, Sampler sampler) {
    try {//from   w w w.ja v a 2s  .c  om
        ////////////////////////////////////////////////
        // first, getting samples from the data sources
        ////////////////////////////////////////////////
        LOGGER.info("Running local sampling for job [" + job.getJobName() + "]");
        InputFormat inf = job.getInputFormat();
        Object[] samples = sampler.getSample(inf, job);
        LOGGER.info("Samples retrieved, sorting...");

        ////////////////////////////////////////////////
        // sort the samples
        ////////////////////////////////////////////////
        RawComparator comparator = job.getOutputKeyComparator();
        Arrays.sort(samples, comparator);

        if (job.getBoolean("mobius.print.sample", false)) {
            PrintWriter pw = new PrintWriter(
                    new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(
                            new File(job.get("mobius.sample.file", "./samples.txt.gz")))))));
            for (Object obj : samples) {
                pw.println(obj);
            }
            pw.flush();
            pw.close();
        }

        ////////////////////////////////////////////////
        // start to write partition files
        ////////////////////////////////////////////////

        FileSystem fs = FileSystem.get(job);
        Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job)));
        while (fs.exists(partitionFile)) {
            partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis());
        }
        fs.deleteOnExit(partitionFile);
        TotalOrderPartitioner.setPartitionFile(job, partitionFile);
        LOGGER.info("write partition file to:" + partitionFile.toString());

        int reducersNbr = job.getNumReduceTasks();
        Set<Object> wroteSamples = new HashSet<Object>();

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class,
                NullWritable.class);

        float avgReduceSize = samples.length / reducersNbr;

        int lastBegin = 0;
        for (int i = 0; i < samples.length;) {
            // trying to distribute the load for every reducer evenly,
            // dividing the <code>samples</code> into a set of blocks
            // separated by boundaries, objects that selected from the
            // <code>samples</code> array, and each blocks should have
            // about the same size.

            // find the last index of element that equals to samples[i], as
            // such element might appear multiple times in the samples.
            int upperBound = Util.findUpperBound(samples, samples[i], comparator);

            int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator);

            // the repeat time of samples[i], if the key itself is too big
            // select it as boundary
            int currentElemSize = upperBound - lowerBound + 1;

            if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size
            {
                // the current element is too big, greater than
                // two times of the <code>avgReduceSize</code>, 
                // put itself as boundary
                writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                //pw.println(samples[i]);

                // immediate put the next element to the boundary,
                // the next element starts at <code> upperBound+1
                // </code>, to prevent the current one consume even 
                // more.
                if (upperBound + 1 < samples.length) {
                    writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey());
                    //pw.println(samples[upperBound+1]);

                    // move on to the next element of <code>samples[upperBound+1]/code>
                    lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1;
                    i = lastBegin;
                } else {
                    break;
                }
            } else {
                // current element is small enough to be consider
                // with previous group
                int size = upperBound - lastBegin;
                if (size > avgReduceSize) {
                    // by including the current elements, we have
                    // found a block that's big enough, select it
                    // as boundary
                    writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                    //pw.println(samples[i]);

                    i = upperBound + 1;
                    lastBegin = i;
                } else {
                    i = upperBound + 1;
                }
            }
        }

        writer.close();

        // if the number of wrote samples doesn't equals to number of
        // reducer minus one, then it means the key spaces is too small
        // hence TotalOrderPartitioner won't work, it works only if 
        // the partition boundaries are distinct.
        //
        // we need to change the number of reducers
        if (wroteSamples.size() + 1 != reducersNbr) {
            LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size()
                    + ", reducer size:" + (reducersNbr));
            LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1));

            // add 1 because the wrote samples define boundary, ex, if
            // the sample size is two with two element [300, 1000], then 
            // there should be 3 reducers, one for handling i<300, one 
            // for n300<=i<1000, and another one for 1000<=i
            job.setNumReduceTasks((wroteSamples.size() + 1));
        }

        samples = null;
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}