List of usage examples for org.apache.hadoop.fs FileSystem exists
public boolean exists(Path f) throws IOException
From source file:com.kylinolap.dict.DictionaryManager.java
License:Apache License
private String unpackDataSet(String tempHDFSDir, String dataSetName) throws IOException { InputStream in = this.getClass().getResourceAsStream("/com/kylinolap/dict/" + dataSetName + ".txt"); if (in == null) // data set resource not found return null; ByteArrayOutputStream buf = new ByteArrayOutputStream(); IOUtils.copy(in, buf);/*from w ww . j ava 2s . c om*/ in.close(); byte[] bytes = buf.toByteArray(); Path tmpDataSetPath = new Path( tempHDFSDir + "/dict/temp_dataset/" + dataSetName + "_" + bytes.length + ".txt"); FileSystem fs = HadoopUtil.getFileSystem(tempHDFSDir); boolean writtenNewFile = false; if (fs.exists(tmpDataSetPath) == false || fs.getFileStatus(tmpDataSetPath).getLen() != bytes.length) { fs.mkdirs(tmpDataSetPath.getParent()); FSDataOutputStream out = fs.create(tmpDataSetPath); IOUtils.copy(new ByteArrayInputStream(bytes), out); out.close(); writtenNewFile = true; } String qualifiedPath = tmpDataSetPath.makeQualified(fs.getUri(), new Path("/")).toString(); if (writtenNewFile) logger.info("Dictionary temp data set file written to " + qualifiedPath); return qualifiedPath; }
From source file:com.kylinolap.job.hadoop.AbstractHadoopJob.java
License:Apache License
protected void deletePath(Configuration conf, Path path) throws IOException { FileSystem fs = FileSystem.get(conf); if (fs.exists(path)) { fs.delete(path, true);/* ww w . j a va 2 s. co m*/ } }
From source file:com.kylinolap.job.hadoop.cube.StorageCleanupJob.java
License:Apache License
private void cleanUnusedHdfsFiles(Configuration conf) throws IOException { JobEngineConfig engineConfig = new JobEngineConfig(KylinConfig.getInstanceFromEnv()); CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv()); FileSystem fs = FileSystem.get(conf); List<String> allHdfsPathsNeedToBeDeleted = new ArrayList<String>(); // GlobFilter filter = new // GlobFilter(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory() // + "/kylin-.*"); FileStatus[] fStatus = fs.listStatus(new Path(KylinConfig.getInstanceFromEnv().getHdfsWorkingDirectory())); for (FileStatus status : fStatus) { String path = status.getPath().getName(); // System.out.println(path); if (path.startsWith(JobInstance.JOB_WORKING_DIR_PREFIX)) { String kylinJobPath = engineConfig.getHdfsWorkingDirectory() + "/" + path; allHdfsPathsNeedToBeDeleted.add(kylinJobPath); }/*from w w w. ja v a2 s . c o m*/ } List<JobInstance> allJobs = JobDAO.getInstance(KylinConfig.getInstanceFromEnv()).listAllJobs(); for (JobInstance jobInstance : allJobs) { // only remove FINISHED and DISCARDED job intermediate files if (isJobInUse(jobInstance) == true) { String path = JobInstance.getJobWorkingDir(jobInstance, engineConfig); allHdfsPathsNeedToBeDeleted.remove(path); log.info("Remove " + path + " from deletion list, as the path belongs to job " + jobInstance.getUuid() + " with status " + jobInstance.getStatus()); } } // remove every segment working dir from deletion list for (CubeInstance cube : cubeMgr.listAllCubes()) { for (CubeSegment seg : cube.getSegments()) { String jobUuid = seg.getLastBuildJobID(); if (jobUuid != null && jobUuid.equals("") == false) { String path = JobInstance.getJobWorkingDir(jobUuid, engineConfig.getHdfsWorkingDirectory()); allHdfsPathsNeedToBeDeleted.remove(path); log.info("Remove " + path + " from deletion list, as the path belongs to segment " + seg + " of cube " + cube.getName()); } } } if (delete == true) { // remove files for (String hdfsPath : allHdfsPathsNeedToBeDeleted) { log.info("Deleting hdfs path " + hdfsPath); Path p = new Path(hdfsPath); if (fs.exists(p) == true) { fs.delete(p, true); log.info("Deleted hdfs path " + hdfsPath); } else { log.info("Hdfs path " + hdfsPath + "does not exist"); } } } else { System.out.println("--------------- HDFS Path To Be Deleted ---------------"); for (String hdfsPath : allHdfsPathsNeedToBeDeleted) { System.out.println(hdfsPath); } System.out.println("-------------------------------------------------------"); } }
From source file:com.kylinolap.job.hadoop.hbase.CreateHTableJob.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); if (fs.exists(path) == false) { System.err.println("Path " + path + " not found, no region split, HTable will be one region"); return null; }/*from w w w . j a va 2s. c om*/ List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { rowkeyList.add(((Text) key).copyBytes()); } } catch (Exception e) { e.printStackTrace(); throw e; } finally { IOUtils.closeStream(reader); } System.out.println((rowkeyList.size() + 1) + " regions"); System.out.println(rowkeyList.size() + " splits"); for (byte[] split : rowkeyList) { System.out.println(StringUtils.byteToHexString(split)); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue.length == 0 ? null : retValue; }
From source file:com.liferay.hadoop.action.HadoopJob.java
License:Open Source License
public String doExecute(HttpServletRequest request, HttpServletResponse response) throws Exception { response.setContentType(ContentTypes.TEXT_PLAIN_UTF8); PrintWriter writer = response.getWriter(); FileSystem fileSystem = HadoopManager.getFileSystem(); JobClient jobClient = HadoopManager.getJobClient(); writer.println("-- Job Status --"); Path inputPath = new Path("/index/*/*"); Path outputPath = new Path("/wordcount/results"); try {//from w ww.ja v a 2s .c o m if (_runningJob == null) { writer.println("Creating job"); if (fileSystem.exists(_jobPath)) { fileSystem.delete(_jobPath, false); } if (!fileSystem.exists(_jobPath)) { writer.println("Deploying the job code to cluster"); FSDataOutputStream outputStream = null; try { outputStream = fileSystem.create(_jobPath); ServletContext servletContext = HadoopManager.getServletContext(); InputStream inputStream = servletContext.getResourceAsStream("/WEB-INF/lib/hadoop-job.jar"); StreamUtil.transfer(inputStream, outputStream, false); } finally { StreamUtil.cleanUp(outputStream); } writer.println("Job code deployed to cluster"); } if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } _jobConf = HadoopManager.createNewJobConf(); _jobConf.setJobName("Word Count"); writer.println("Job '" + _jobConf.getJobName() + "' is being configured"); _jobConf.setJarByClass(Map.class); _jobConf.setOutputKeyClass(Text.class); _jobConf.setOutputValueClass(IntWritable.class); _jobConf.setMapperClass(Map.class); _jobConf.setCombinerClass(Reduce.class); _jobConf.setReducerClass(Reduce.class); _jobConf.setInputFormat(TextInputFormat.class); _jobConf.setOutputFormat(TextOutputFormat.class); writer.println("Job code deployed to distributed cache's classpath"); DistributedCache.addArchiveToClassPath(_jobPath, _jobConf, fileSystem); FileInputFormat.setInputPaths(_jobConf, inputPath); FileOutputFormat.setOutputPath(_jobConf, outputPath); writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } int jobState = _runningJob.getJobState(); writer.println( "Job status: " + jobState + " (RUNNING = 1, SUCCEEDED = 2, FAILED = 3, PREP = 4, KILLED = 5)"); if ((jobState != JobStatus.RUNNING) && (jobState != JobStatus.PREP)) { writer.println("Re-issuing the job"); if (fileSystem.exists(outputPath)) { writer.println("A previous job output was found, backing it up"); fileSystem.rename(outputPath, outputPath.getParent().suffix("/.results-" + System.currentTimeMillis())); } writer.println("Submitting job the first time"); _runningJob = jobClient.submitJob(_jobConf); writer.println("Job submitted"); } } catch (Exception ioe) { writer.println("Job error: "); ioe.printStackTrace(writer); } writer.flush(); writer.close(); return null; }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public void deleteFile(long companyId, long repositoryId, String fileName, String versionLabel) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel); try {//from w ww.jav a 2 s .c om FileSystem fileSystem = HadoopManager.getFileSystem(); if (fileSystem.exists(fullPath)) { fileSystem.delete(fullPath, true); } Path parentPath = fullPath.getParent(); deleteEmptyAncestors(companyId, repositoryId, parentPath); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public InputStream getFileAsStream(long companyId, long repositoryId, String fileName, String versionLabel) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel); try {//from w ww . j a va2s . c om FileSystem fileSystem = HadoopManager.getFileSystem(); if (!fileSystem.exists(fullPath)) { throw new PortalException("File " + fullPath.toUri().toString() + " does not exist"); } return fileSystem.open(fullPath); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public long getFileSize(long companyId, long repositoryId, String fileName) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, VERSION_DEFAULT); try {// w ww . j a va2 s . c o m FileSystem fileSystem = HadoopManager.getFileSystem(); if (!fileSystem.exists(fullPath)) { throw new PortalException("File " + fullPath.toUri().toString() + " does not exist"); } FileStatus fileStatus = fileSystem.getFileStatus(fullPath); return fileStatus.getLen(); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public boolean hasDirectory(long companyId, long repositoryId, String dirName) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullDirPath(companyId, repositoryId, dirName); try {//from w ww .j ava 2 s. c o m FileSystem fileSystem = HadoopManager.getFileSystem(); return fileSystem.exists(fullPath); } catch (IOException ioe) { throw new SystemException(ioe); } }
From source file:com.liferay.hadoop.store.HDFSStore.java
License:Open Source License
@Override public boolean hasFile(long companyId, long repositoryId, String fileName, String versionLabel) throws PortalException, SystemException { Path fullPath = HadoopManager.getFullVersionFilePath(companyId, repositoryId, fileName, versionLabel); try {/* w w w . j a v a 2 s . c o m*/ FileSystem fileSystem = HadoopManager.getFileSystem(); return fileSystem.exists(fullPath); } catch (IOException ioe) { throw new SystemException(ioe); } }