Example usage for org.apache.hadoop.fs FileSystem exists

List of usage examples for org.apache.hadoop.fs FileSystem exists

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem exists.

Prototype

public boolean exists(Path f) throws IOException 

Source Link

Document

Check if a path exists.

Usage

From source file:com.kylinolap.dict.DictionaryManager.java

License:Apache License

private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException {

    InputStream in = this.getClass().getResourceAsStream("/com/kylinolap/dict/" + dataSetName + ".txt");
    if (in == null) // data set resource not found
        return null;

    ByteArrayOutputStream buf = new ByteArrayOutputStream();
    IOUtils.copy(in, buf);/*from   w ww .  j ava  2s  . c  om*/
    in.close();
    byte[] bytes = buf.toByteArray();

    Path tmpDataSetPath = new Path(
            tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt");

    FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir);
    boolean writtenNewFile = false;
    if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) {
        fs.mkdirs(tmpDataSetPath.getParent());
        FSDataOutputStream out = fs.create(tmpDataSetPath);
        IOUtils.copy(new ByteArrayInputStream(bytes), out);
        out.close();
        writtenNewFile = true;
    }

    String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString();
    if (writtenNewFile)
        logger.info("Dictionary temp data set file written to " + qualifiedPath);
    return qualifiedPath;
}

From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java

License:Apache License

protected void deletePath(Configuration conf, Path path) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(path)) {
        fs.delete(path, true);/*  ww  w  .  j  a va 2  s.  co  m*/
    }
}

From source file:com.kylinolap.job.hadoop.cube.StorageCleanupJob.java

License:Apache License

private void cleanUnusedHdfsFiles(Configuration conf) throws IOException {
    JobEngineConfig engineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv());
    CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

    FileSystem fs = FileSystem.get(conf);
    List<String> allHdfsPathsNeedToBeDeleted = new ArrayList<String>();
    // GlobFilter filter = new
    // GlobFilter(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()
    // + "/kylin-.*");
    FileStatus[] fStatus = fs.listStatus(new Path(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory()));
    for (FileStatus status : fStatus) {
        String path = status.getPath().getName();
        // System.out.println(path);
        if (path.startsWith(JobInstance.JOB_WORKING_DIR_PREFIX)) {
            String kylinJobPath = engineConfig.getHdfsWorkingDirectory() + "/" + path;
            allHdfsPathsNeedToBeDeleted.add(kylinJobPath);
        }/*from  w  w  w.  ja v  a2 s .  c  o  m*/
    }

    List<JobInstance> allJobs = JobDAO.getInstance(KylinConfig.getInstanceFromEnv()).listAllJobs();
    for (JobInstance jobInstance : allJobs) {
        // only remove FINISHED and DISCARDED job intermediate files
        if (isJobInUse(jobInstance) == true) {
            String path = JobInstance.getJobWorkingDir(jobInstance, engineConfig);
            allHdfsPathsNeedToBeDeleted.remove(path);
            log.info("Remove " + path + " from deletion list, as the path belongs to job "
                    + jobInstance.getUuid() + " with status " + jobInstance.getStatus());
        }
    }

    // remove every segment working dir from deletion list
    for (CubeInstance cube : cubeMgr.listAllCubes()) {
        for (CubeSegment seg : cube.getSegments()) {
            String jobUuid = seg.getLastBuildJobID();
            if (jobUuid != null && jobUuid.equals("") == false) {
                String path = JobInstance.getJobWorkingDir(jobUuid, engineConfig.getHdfsWorkingDirectory());
                allHdfsPathsNeedToBeDeleted.remove(path);
                log.info("Remove " + path + " from deletion list, as the path belongs to segment " + seg
                        + " of cube " + cube.getName());
            }
        }
    }

    if (delete == true) {
        // remove files
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            log.info("Deleting hdfs path " + hdfsPath);
            Path p = new Path(hdfsPath);
            if (fs.exists(p) == true) {
                fs.delete(p, true);
                log.info("Deleted hdfs path " + hdfsPath);
            } else {
                log.info("Hdfs path " + hdfsPath + "does not exist");
            }
        }
    } else {
        System.out.println("--------------- HDFS Path To Be Deleted ---------------");
        for (String hdfsPath : allHdfsPathsNeedToBeDeleted) {
            System.out.println(hdfsPath);
        }
        System.out.println("-------------------------------------------------------");
    }

}

From source file:com.kylinolap.job.hadoop.hbase.CreateHTableJob.java

License:Apache License

@SuppressWarnings("deprecation")
public byte[][] getSplits(Configuration conf, Path path) throws Exception {
    FileSystem fs = path.getFileSystem(conf);
    if (fs.exists(path) == false) {
        System.err.println("Path " + path + " not found, no region split, HTable will be one region");
        return null;
    }/*from  w  w  w  . j  a  va 2s.  c om*/

    List<byte[]> rowkeyList = new ArrayList<byte[]>();
    SequenceFile.Reader reader = null;
    try {
        reader = new SequenceFile.Reader(fs, path, conf);
        Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
        Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
        while (reader.next(key, value)) {
            rowkeyList.add(((Text) key).copyBytes());
        }
    } catch (Exception e) {
        e.printStackTrace();
        throw e;
    } finally {
        IOUtils.closeStream(reader);
    }

    System.out.println((rowkeyList.size() + 1) + " regions");
    System.out.println(rowkeyList.size() + " splits");
    for (byte[] split : rowkeyList) {
        System.out.println(StringUtils.byteToHexString(split));
    }

    byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]);
    return retValue.length == 0 ? null : retValue;
}

From source file:com.liferay.hadoop.action.HadoopJob.java

License:Open Source License

public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception {

    response.setContentType(ContentTypes.TEXT_PLAIN_UTF8);

    PrintWriter writer = response.getWriter();

    FileSystem fileSystem = HadoopManager.getFileSystem();

    JobClient jobClient = HadoopManager.getJobClient();

    writer.println("-- Job Status --");

    Path inputPath = new Path("/index/*/*");
    Path outputPath = new Path("/wordcount/results");

    try {//from w  ww.ja v  a  2s  .c o  m
        if (_runningJob == null) {
            writer.println("Creating job");

            if (fileSystem.exists(_jobPath)) {
                fileSystem.delete(_jobPath, false);
            }

            if (!fileSystem.exists(_jobPath)) {
                writer.println("Deploying the job code to cluster");

                FSDataOutputStream outputStream = null;

                try {
                    outputStream = fileSystem.create(_jobPath);

                    ServletContext servletContext = HadoopManager.getServletContext();

                    InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar");

                    StreamUtil.transfer(inputStream, outputStream, false);
                } finally {
                    StreamUtil.cleanUp(outputStream);
                }

                writer.println("Job code deployed to cluster");
            }

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            _jobConf = HadoopManager.createNewJobConf();

            _jobConf.setJobName("Word Count");

            writer.println("Job '" + _jobConf.getJobName() + "' is being configured");

            _jobConf.setJarByClass(Map.class);
            _jobConf.setOutputKeyClass(Text.class);
            _jobConf.setOutputValueClass(IntWritable.class);
            _jobConf.setMapperClass(Map.class);
            _jobConf.setCombinerClass(Reduce.class);
            _jobConf.setReducerClass(Reduce.class);
            _jobConf.setInputFormat(TextInputFormat.class);
            _jobConf.setOutputFormat(TextOutputFormat.class);

            writer.println("Job code deployed to distributed cache's classpath");

            DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem);

            FileInputFormat.setInputPaths(_jobConf, inputPath);
            FileOutputFormat.setOutputPath(_jobConf, outputPath);

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }

        int jobState = _runningJob.getJobState();

        writer.println(
                "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)");

        if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) {

            writer.println("Re-issuing the job");

            if (fileSystem.exists(outputPath)) {
                writer.println("A previous job output was found, backing it up");

                fileSystem.rename(outputPath,
                        outputPath.getParent().suffix("/.results-" + System.currentTimeMillis()));
            }

            writer.println("Submitting job the first time");

            _runningJob = jobClient.submitJob(_jobConf);

            writer.println("Job submitted");
        }
    } catch (Exception ioe) {
        writer.println("Job error: ");

        ioe.printStackTrace(writer);
    }

    writer.flush();
    writer.close();

    return null;
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public void deleteFile(long companyId, long repositoryId, String fileName, String versionLabel)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel);

    try {//from  w  ww.jav  a  2 s  .c  om
        FileSystem fileSystem = HadoopManager.getFileSystem();

        if (fileSystem.exists(fullPath)) {
            fileSystem.delete(fullPath, true);
        }

        Path parentPath = fullPath.getParent();

        deleteEmptyAncestors(companyId, repositoryId, parentPath);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public InputStream getFileAsStream(long companyId, long repositoryId, String fileName, String versionLabel)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel);

    try {//from  w  ww . j  a  va2s .  c om
        FileSystem fileSystem = HadoopManager.getFileSystem();

        if (!fileSystem.exists(fullPath)) {
            throw new PortalException("File " + fullPath.toUri().toString() + " does not exist");
        }

        return fileSystem.open(fullPath);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public long getFileSize(long companyId, long repositoryId, String fileName)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT);

    try {// w ww . j a va2  s . c o m
        FileSystem fileSystem = HadoopManager.getFileSystem();

        if (!fileSystem.exists(fullPath)) {
            throw new PortalException("File " + fullPath.toUri().toString() + " does not exist");
        }

        FileStatus fileStatus = fileSystem.getFileStatus(fullPath);

        return fileStatus.getLen();
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public boolean hasDirectory(long companyId, long repositoryId, String dirName)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullDirPath(companyId, repositoryId, dirName);

    try {//from   w  ww  .j  ava 2 s. c  o  m
        FileSystem fileSystem = HadoopManager.getFileSystem();

        return fileSystem.exists(fullPath);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}

From source file:com.liferay.hadoop.store.HDFSStore.java

License:Open Source License

@Override
public boolean hasFile(long companyId, long repositoryId, String fileName, String versionLabel)
        throws PortalException, SystemException {

    Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel);

    try {/*  w  w w  . j  a v a 2  s .  c o m*/
        FileSystem fileSystem = HadoopManager.getFileSystem();

        return fileSystem.exists(fullPath);
    } catch (IOException ioe) {
        throw new SystemException(ioe);
    }
}