Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public void updateFile(long companyId, long repositoryId, long newRepositoryId, String fileName)
        throws PortalException, SystemException {

    Path sourcePath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT);
    Path targetPath = HadoopManager.getFullVersionFilePath(companyId, newRepositoryId, fileName,
            VERSION_DEFAULT);/*from  ww w .  ja  v  a  2 s. c o m*/

    try {
        FileSystem fileSystem = HadoopManager.getFileSystem();

        if (fileSystem.exists(targetPath)) {
            throw new DuplicateFileException(fileName);
        }

        if (!fileSystem.exists(sourcePath)) {
            throw new PortalException("File " + sourcePath.toUri().toString() + " does not exist");
        }

        boolean renamed = fileSystem.rename(sourcePath, targetPath);

        if (!renamed) {
            throw new SystemException("File name directory was not renamed from "
                    + sourcePath.toUri().toString() + " to " + targetPath.toUri().toString());
        }
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

public void updateFile(long companyId, long repositoryId, String fileName, String newFileName)
        throws PortalException, SystemException {

    Path sourcePath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT);
    Path targetPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, newFileName,
            VERSION_DEFAULT);/*from w  w w .j  a v a 2 s. c  om*/

    try {
        FileSystem fileSystem = HadoopManager.getFileSystem();

        if (fileSystem.exists(targetPath)) {
            throw new DuplicateFileException(fileName);
        }

        if (!fileSystem.exists(sourcePath)) {
            throw new PortalException("File " + sourcePath.toUri().toString() + " does not exist");
        }

        boolean renamed = fileSystem.rename(sourcePath, targetPath);

        if (!renamed) {
            throw new SystemException("File name directory was not renamed from "
                    + sourcePath.toUri().toString() + " to " + targetPath.toUri().toString());
        }
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

protected void deleteEmptyAncestors(long companyId, long repositoryId, Path path) throws SystemException {

    try {//www  . j a va  2s. com
        FileSystem fileSystem = HadoopManager.getFileSystem();

        FileStatus[] listStatus = fileSystem.listStatus(path);

        if ((listStatus == null) || (listStatus.length > 0)) {
            return;
        }

        Path parentPath = path.getParent();

        if (fileSystem.delete(path, true) && fileSystem.exists(parentPath)) {

            deleteEmptyAncestors(companyId, repositoryId, parentPath);
        }
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.util.HadoopManager.java

License:Open Source License

public static void runJob(StoreEvent storeEvent) throws IOException {
    FileSystem fileSystem = getFileSystem();

    if (_servletContext == null) {
        return;/*w  w  w .  j ava2  s.  com*/
    }

    JobClient jobClient = getJobClient();

    Path inputPath = new Path("/index".concat(storeEvent.getRootPath().toString()).concat("/*"));
    Path outputPath = new Path("/wordcount".concat(storeEvent.getRootPath().toString()).concat("/results"));

    try {
        if (_runningJob == null) {
            if (!fileSystem.exists(_jobPath)) {
                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    InputStream inputStream = _servletContext
                            .getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }
            }

            if (fileSystem.exists(outputPath)) {
                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = new JobConf(_sharedJobConf);

            _jobConf.setJobName("Word Count");
            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            _runningJob = jobClient.submitJob(_jobConf);
        }

        int jobState = _runningJob.getJobState();

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            System.out.println("Re-issuing the word count job.");

            if (fileSystem.exists(outputPath)) {
                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _runningJob = jobClient.submitJob(_jobConf);
        }
    } catch (Exception ioe) {
        ioe.printStackTrace();
    }
}

From source file:com.lightboxtechnologies.spectrum.ExtractMapper.java

License:Apache License

protected Map<String, Object> process_extent_large(FSDataInputStream file, FileSystem fs, Path outPath,
        Map<String, ?> map, Context context) throws IOException {
    context.getCounter(FileTypes.BIG).increment(1);

    final Map<String, Object> rec = new HashMap<String, Object>();

    OutputStream fout = null;//from   www .  j  av  a  2  s.  c o m
    try {
        fout = fs.create(outPath, true);
        hashAndExtract(rec, fout, file, map, context);
    } finally {
        IOUtils.closeQuietly(fout);
    }

    final String hash = new String(Hex.encodeHex((byte[]) rec.get("md5")));
    final Path subDir = new Path("/texaspete/ev", hashFolder(hash)), hashPath = new Path(subDir, hash);
    fs.mkdirs(subDir);

    if (fs.exists(hashPath)) {
        context.getCounter(FileTypes.BIG_DUPES).increment(1);
    } else if (!fs.rename(outPath, hashPath)) {
        LOG.warn("Could not rename " + outPath + " to " + hashPath);
        context.getCounter(FileTypes.PROBLEMS).increment(1);
    }
    final StreamProxy content = new FileProxy(hashPath.toString());
    rec.put("Content", content);
    return rec;
}

From source file:com.lightboxtechnologies.spectrum.HDFSArchiver.java

License:Apache License

public static int runPipeline(String src, String dst) throws IOException {
    final Configuration conf = new Configuration();
    final FileSystem fs = FileSystem.get(conf);

    final Path rpath = new Path(src);
    final Path zpath = new Path(dst);

    if (!fs.exists(rpath)) {
        throw new IOException("Source path does not exist.");
    }// w w  w.j av  a2 s .  co m

    OutputStream out = null;
    try {
        out = zpath.getFileSystem(conf).create(zpath);
        zip(fs, rpath, out);
        out.close();
    } finally {
        IOUtils.closeQuietly(out);
    }

    return 0;
}

From source file:com.linkedin.cubert.analyzer.physical.ShuffleRewriter.java

License:Open Source License

private JsonNode rewriteDictionary(JsonNode job) {
    ObjectNode newJob = (ObjectNode) cloneNode(job);

    ObjectNode shuffle = (ObjectNode) newJob.get("shuffle");
    if (shuffle == null)
        throw new RuntimeException("Shuffle description missing. Cannot rewrite.");

    newJob.put("reducers", 1);

    // Determine if this is a refresh job or a fresh dictionary creation by looking at
    // STORE location
    String storePath = job.get("output").get("path").getTextValue();
    String dictionaryPath = storePath + "/part-r-00000.avro";
    boolean refresh = false;

    try {//ww w. j ava  2s.com
        FileSystem fs = FileSystem.get(new JobConf());
        refresh = fs.exists(new Path(dictionaryPath));
    } catch (IOException e) {
        // we will not refresh
    }

    // Rewrite map
    JsonNode relationName = shuffle.get("name");

    ObjectNode mapSideOperator = JsonUtils.createObjectNode("operator", "USER_DEFINED_TUPLE_OPERATOR", "class",
            "com.linkedin.cubert.operator.DictionaryRefreshMapSideOperator", "input",
            JsonUtils.createArrayNode(relationName), "output", relationName, "columns", shuffle.get("columns"));

    copyLine(shuffle, mapSideOperator, "[MAP] ");

    for (JsonNode map : newJob.path("map")) {
        if (!map.has("operators") || map.get("operators").isNull())
            ((ObjectNode) map).put("operators", JsonUtils.createArrayNode());
        ArrayNode operators = (ArrayNode) map.get("operators");
        operators.add(mapSideOperator);
    }

    // Rewrite shuffle
    shuffle.put("name", relationName);
    shuffle.put("type", "SHUFFLE");
    shuffle.put("partitionKeys", JsonUtils.createArrayNode(CommonUtils.array("colname", "colvalue")));
    shuffle.put("pivotKeys", JsonUtils.createArrayNode(CommonUtils.array("colname", "colvalue")));
    if (shuffle.has("columns"))
        shuffle.remove("columns");
    if (shuffle.has("dictionaryPath"))
        shuffle.remove("dictionaryPath");
    shuffle.remove("input");

    // Rewrite reduce
    if (!newJob.has("reduce") || newJob.get("reduce").isNull())
        newJob.put("reduce", JsonUtils.createArrayNode());
    ArrayNode reduceJob = (ArrayNode) newJob.get("reduce");

    ObjectNode reduceSideOperator = (ObjectNode) cloneNode(mapSideOperator);
    reduceSideOperator.put("class", "com.linkedin.cubert.operator.DictionaryRefreshReduceSideOperator");
    copyLine(shuffle, reduceSideOperator, "[REDUCE] ");
    reduceJob.insert(0, reduceSideOperator);

    // Rewrite cached files
    if (refresh) {
        String newStorePath = storePath + "/tmp";
        String newDictionaryPath = newStorePath + "/part-r-00000.avro";

        // put the existing dictionary file in dist cache
        if (!newJob.has("cachedFiles") || newJob.get("cachedFiles").isNull())
            newJob.put("cachedFiles", JsonUtils.createArrayNode());
        ArrayNode cachedFiles = (ArrayNode) newJob.get("cachedFiles");
        cachedFiles.add(dictionaryPath + "#dictionary");

        // tell the operators to use cached dictionary
        mapSideOperator.put("dictionary", dictionaryPath + "#dictionary");
        reduceSideOperator.put("dictionary", dictionaryPath + "#dictionary");

        // the output path is changed to <original path>/tmp
        ((ObjectNode) newJob.get("output")).put("path", newStorePath);

        // put onCompletion for this job to move the new dictionary to the parent
        // folder
        ArrayNode onCompletion = mapper.createArrayNode();
        // onCompletion.add(JsonUtils.createObjectNode("type",
        // "rm",
        // "paths",
        // JsonUtils.createArrayNode(storePath
        // + "/dictionary.avro")));
        onCompletion.add(JsonUtils.createObjectNode("type", "mv", "paths",
                JsonUtils.createArrayNode(new String[] { newDictionaryPath, dictionaryPath })));
        onCompletion.add(
                JsonUtils.createObjectNode("type", "rm", "paths", JsonUtils.createArrayNode(newStorePath)));
        newJob.put("onCompletion", onCompletion);
    }

    return newJob;
}

From source file:com.linkedin.cubert.pig.piggybank.storage.avro.AvroStorage.java

License:Apache License

/**
 * Get avro schema of input path. There are three cases:
 * 1. if path is a file, then return its avro schema;
 * 2. if path is a first-level directory (no sub-directories), then
 * return the avro schema of one underlying file;
 * 3. if path contains sub-directories, then recursively check
 * whether all of them share the same schema and return it
 * if so or throw an exception if not.//from w ww . j a v a 2 s .  c o m
 *
 * @param path input path
 * @param fs file system
 * @return avro schema of data
 * @throws IOException if underlying sub-directories do not share the same schema; or if input path is empty or does not exist
 */
@SuppressWarnings("deprecation")
protected Schema getAvroSchema(Path path, FileSystem fs) throws IOException {
    if (!fs.exists(path) || !AvroStorageUtils.PATH_FILTER.accept(path))
        return null;

    /* if path is first level directory or is a file */
    if (!fs.isDirectory(path)) {
        return getSchema(path, fs);
    }

    FileStatus[] ss = fs.listStatus(path, AvroStorageUtils.PATH_FILTER);
    Schema schema = null;
    if (ss.length > 0) {
        if (AvroStorageUtils.noDir(ss))
            return getSchema(path, fs);

        /*otherwise, check whether schemas of underlying directories are the same */
        for (FileStatus s : ss) {
            Schema newSchema = getAvroSchema(s.getPath(), fs);
            if (schema == null) {
                schema = newSchema;
                if (!checkSchema) {
                    System.out.println("Do not check schema; use schema of " + s.getPath());
                    return schema;
                }
            } else if (newSchema != null && !schema.equals(newSchema)) {
                throw new IOException("Input path is " + path + ". Sub-direcotry " + s.getPath()
                        + " contains different schema " + newSchema + " than " + schema);
            }
        }
    }

    if (schema == null)
        System.err.println("Cannot get avro schema! Input path " + path + " might be empty.");

    return schema;
}

From source file:com.linkedin.cubert.plan.physical.GenerateDictionary.java

License:Open Source License

public static void mergeDictionaries(Configuration conf, Path dir) throws IOException {
    Map<String, CodeDictionary> dictionaries = new HashMap<String, CodeDictionary>();
    FileSystem fs = FileSystem.get(conf);

    Path currentDictPath = new Path(dir, "dictionary");
    Schema schema = getSchema();//from w w w . jav  a2  s .  co  m

    // Read the existing dictionaries
    if (fs.exists(currentDictPath)) {
        dictionaries.putAll(loadDictionary(currentDictPath.toString(), true, conf));

        // move the current dictionary to new file
        Path oldPath = new Path(dir, "_dictionary.old");
        fs.delete(oldPath, true);
        fs.rename(currentDictPath, oldPath);
    }

    // Read the new entries
    Path globPath = new Path(dir, "tmp/part-*");
    FileStatus[] allStatus = fs.globStatus(globPath);
    for (FileStatus status : allStatus) {
        DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
        DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(
                new FsInput(status.getPath(), conf), datumReader);
        GenericRecord record = null;
        while (dataFileReader.hasNext()) {
            record = dataFileReader.next();
            String colName = record.get("colname").toString();
            String colValue = record.get("colvalue").toString();

            CodeDictionary dict = dictionaries.get(colName);
            if (dict == null) {
                dict = new CodeDictionary();
                dictionaries.put(colName, dict);
            }

            dict.addKey(colValue);
        }
    }

    // Write the dictionaries back
    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
    FSDataOutputStream out = fs.create(currentDictPath);

    dataFileWriter.create(schema, out);
    Record record = new Record(schema);

    for (Map.Entry<String, CodeDictionary> entry : dictionaries.entrySet()) {
        String colName = entry.getKey();
        CodeDictionary dict = entry.getValue();

        for (String colValue : dict.keySet()) {
            int code = dict.getCodeForKey(colValue);
            record.put("colname", colName);
            record.put("colvalue", colValue);
            record.put("code", code);

            dataFileWriter.append(record);
        }
    }
    dataFileWriter.close();

}

From source file:com.linkedin.cubert.plan.physical.JobExecutor.java

License:Open Source License

protected void setLibjars() throws IOException {
    if (!root.has("libjars"))
        return;/*from  w ww  . j  av  a 2 s . c o  m*/

    FileSystem localFs = FileSystem.getLocal(conf);

    for (JsonNode node : asArray(root, "libjars")) {
        Path path = new Path(node.getTextValue());

        if (localFs.exists(path)) {
            Path dstPath = new Path(tmpDir, path.getName());
            fs.copyFromLocalFile(path, dstPath);

            path = dstPath;
        }

        DistributedCache.addFileToClassPath(path, conf, fs);

    }
}