List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:com.yahoo.glimmer.util.MergeSortTool.java
License:Open Source License
@Override public int run(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(MergeSortTool.class.getName(), "Merges alpha numerically sorted text files on HDFS", new Parameter[] { new FlaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'i', INPUT_ARG, "input filenames glob eg. .../part-r-?????/sortedlines.text"), new FlaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, 'o', OUTPUT_ARG, "output filename"), new FlaggedOption(COUNT_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'c', COUNT_ARG,/*from w ww .j av a2s. c o m*/ "optionally create a file containing a count of the number of lines merged in text"), }); JSAPResult jsapResult = jsap.parse(args); if (!jsapResult.success()) { System.err.print(jsap.getUsage()); System.exit(1); } // FileSystem fs = FileSystem.get(getConf()); // CompressionCodecFactory factory = new // CompressionCodecFactory(getConf()); // mergeSort(fs, sourcePaths, outputPath, factory); // Maybe quicker to use a MR job with one reducer.. Currently // decompression, merge and compression are all done in this thread.. Path inputGlobPath = new Path(jsapResult.getString(INPUT_ARG)); Configuration config = getConf(); FileSystem fs = FileSystem.get(config); FileStatus[] sources = fs.globStatus(inputGlobPath); if (sources.length == 0) { System.err.println("No files matching input glob:" + inputGlobPath.toString()); return 1; } List<Path> sourcePaths = new ArrayList<Path>(sources.length); for (FileStatus source : sources) { if (source.isDirectory()) { System.err.println(source.getPath().toString() + " is a directory."); return 1; } sourcePaths.add(source.getPath()); } Path outputPath = new Path(jsapResult.getString(OUTPUT_ARG)); CompressionCodecFactory factory = new CompressionCodecFactory(config); FSDataOutputStream countsOutputStream = null; if (jsapResult.contains(COUNT_ARG)) { Path countsPath = null; countsPath = new Path(jsapResult.getString(COUNT_ARG)); countsOutputStream = fs.create(countsPath); } int lineCount = MergeSortTool.mergeSort(fs, sourcePaths, outputPath, factory); System.out.println("Merged " + lineCount + " lines into " + outputPath.toString()); if (countsOutputStream != null) { countsOutputStream.writeBytes("" + lineCount + '\n'); } countsOutputStream.flush(); countsOutputStream.close(); return 0; }
From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSource.java
License:Apache License
public void init(Configuration config, String path, String ext) { this.config = config; this.filePaths = new ArrayList<String>(); Path hdfsPath = new Path(path); FileSystem fs;/* w w w . ja v a 2 s.c o m*/ try { fs = FileSystem.get(config); FileStatus fileStat = fs.getFileStatus(hdfsPath); if (fileStat.isDirectory()) { Path filterPath = hdfsPath; if (ext != null) { filterPath = new Path(path.toString(), "*." + ext); } else { filterPath = new Path(path.toString(), "*"); } FileStatus[] filesInDir = fs.globStatus(filterPath); for (int i = 0; i < filesInDir.length; i++) { if (filesInDir[i].isFile()) { filePaths.add(filesInDir[i].getPath().toString()); } } } else { this.filePaths.add(path); } } catch (IOException ioe) { throw new RuntimeException("Failed getting list of files at:" + path, ioe); } this.currentIndex = -1; }
From source file:de.huberlin.wbi.hiway.scheduler.Scheduler.java
License:Apache License
protected void parseLogs() { String hdfsBaseDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE, HiWayConfiguration.HIWAY_AM_DIRECTORY_BASE_DEFAULT); String hdfsSandboxDirectoryName = conf.get(HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE, HiWayConfiguration.HIWAY_AM_DIRECTORY_CACHE_DEFAULT); Path hdfsBaseDirectory = new Path(new Path(hdfs.getUri()), hdfsBaseDirectoryName); Path hdfsSandboxDirectory = new Path(hdfsBaseDirectory, hdfsSandboxDirectoryName); try {/*from w w w .j a v a 2s . co m*/ for (FileStatus appDirStatus : hdfs.listStatus(hdfsSandboxDirectory)) { if (appDirStatus.isDirectory()) { Path appDir = appDirStatus.getPath(); for (FileStatus srcStatus : hdfs.listStatus(appDir)) { Path src = srcStatus.getPath(); String srcName = src.getName(); if (srcName.endsWith(".log")) { Path dest = new Path(appDir.getName()); System.out.println("Parsing log " + dest.toString()); hdfs.copyToLocalFile(false, src, dest); try (BufferedReader reader = new BufferedReader( new FileReader(new File(dest.toString())))) { String line; while ((line = reader.readLine()) != null) { JsonReportEntry entry = new JsonReportEntry(line); addEntryToDB(entry); } } } } } } } catch (IOException | JSONException e) { e.printStackTrace(); System.exit(-1); } }
From source file:de.l3s.streamcorpus.terrier.ThriftFileCollectionInputFormat.java
License:Apache License
public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numLinesPerSplit) throws IOException { List<FileSplit> splits = new ArrayList<FileSplit>(); Path fileName = status.getPath(); if (status.isDirectory()) { throw new IOException("Not a file: " + fileName); }//from w w w . ja va2 s . c o m FileSystem fs = fileName.getFileSystem(conf); LineReader lr = null; try { FSDataInputStream in = fs.open(fileName); lr = new LineReader(in, conf); Text line = new Text(); int numLines = 0; long begin = 0; long length = 0; int num = -1; while ((num = lr.readLine(line)) > 0) { numLines++; length += num; if (numLines == numLinesPerSplit) { splits.add(createFileSplit(fileName, begin, length)); begin += length; length = 0; numLines = 0; } } if (numLines != 0) { splits.add(createFileSplit(fileName, begin, length)); } } finally { if (lr != null) { lr.close(); } } return splits; }
From source file:eagle.security.hdfs.entity.FileStatusEntity.java
License:Apache License
public FileStatusEntity(FileStatus status) throws IOException { //this.path = status.getPath(); this.length = status.getLen(); this.isdir = status.isDirectory(); this.block_replication = status.getReplication(); this.blocksize = status.getBlockSize(); this.modification_time = status.getModificationTime(); this.access_time = status.getAccessTime(); this.permission = status.getPermission(); this.owner = status.getOwner(); this.group = status.getGroup(); if (status.isSymlink()) { this.symlink = status.getSymlink(); }/*from w w w . ja va 2 s . co m*/ }
From source file:eagle.service.security.hdfs.HDFSFileSystem.java
License:Apache License
/** * Browse Resources of passed Path and its sub directories * Note :: Sub directories only to determine the Sensitivity Type of Root Directories * @param filePath/*from w w w . j a va2 s .c o m*/ * @return fileSystemResponseObj{ with SubDirectoryMap and ListOf FileStatus} * @throws Exception */ public HDFSFileSystemResponseEntity browseResources(String filePath) throws Exception { LOG.info("HDFS File Path : " + filePath + " and EndPoint : " + hdfsEndPoint); FileSystem hdfsFileSystem = null; FileStatus[] listStatus; HDFSFileSystemResponseEntity response = new HDFSFileSystemResponseEntity(); Map<String, List<String>> subdirectoriesMap = new HashMap<String, List<String>>(); try { Configuration config = createConfig(); hdfsFileSystem = getFileSystem(config); Path path = new Path(filePath); listStatus = hdfsFileSystem.listStatus(path); LOG.info(" Browsing Sub Directories .... "); // Browse Sub- directories for (FileStatus fileStatus : listStatus) { FileStatus[] fileStatusList = null; if (fileStatus.isDirectory()) fileStatusList = hdfsFileSystem.listStatus(new Path(fileStatus.getPath().toUri().getPath())); if (fileStatusList != null && fileStatusList.length > 0) subdirectoriesMap.put(fileStatus.getPath().toUri().getPath(), /* Key would be Parent */ getSubDirectories(fileStatusList) /* Value Would be Child Paths */); } response.setFileList(Arrays.asList(listStatus)); response.setSubDirectoriesMap(subdirectoriesMap); } catch (Exception ex) { LOG.error(" Exception when browsing files for the path " + filePath, ex.getMessage()); throw new Exception( " Exception When browsing Files/Directories in HDFS .. Message : " + ex.getMessage()); } finally { //Close the file system if (hdfsFileSystem != null) hdfsFileSystem.close(); } return response; }
From source file:eagle.service.security.hdfs.HDFSFileSystem.java
License:Apache License
/** * Browse only Sub-directories // w ww. ja va 2 s.c om * @param listStatus * @return * @throws FileNotFoundException * @throws IOException */ public List<String> getSubDirectories(FileStatus[] listStatus) throws IOException { List<String> list = new ArrayList<>(); for (FileStatus fileStatus : listStatus) { if (fileStatus.isDirectory()) list.add(fileStatus.getPath().toUri().getPath()); } return list; }
From source file:eagle.service.security.hdfs.HDFSResourceUtils.java
License:Apache License
/** * Filter Only directories /*from w ww. j av a2s . co m*/ * @param fileStatuses * @return */ public static List<String> filterDirectories(List<FileStatus> fileStatuses) { List<String> directories = new ArrayList<>(); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isDirectory()) directories.add(fileStatus.getPath().toUri().getPath()); } return directories; }
From source file:edu.nyu.vida.data_polygamy.utils.FrameworkUtils.java
License:BSD License
public static <K, V> void merge(Path fromDirectory, Path toFile, Class<K> keyClass, Class<V> valueClass) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(toFile), SequenceFile.Writer.keyClass(keyClass), SequenceFile.Writer.valueClass(valueClass)); for (FileStatus status : fs.listStatus(fromDirectory)) { if (status.isDirectory()) { System.out.println("Skip directory '" + status.getPath().getName() + "'"); continue; }//from w w w .j a v a2s.co m Path file = status.getPath(); if (file.getName().startsWith("_")) { System.out.println("Skip \"_\"-file '" + file.getName() + "'"); //There are files such "_SUCCESS"-named in jobs' ouput folders continue; } //System.out.println("Merging '" + file.getName() + "'"); SequenceFile.Reader reader = new SequenceFile.Reader(conf, SequenceFile.Reader.file(file)); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { writer.append(key, value); } reader.close(); } writer.close(); }
From source file:edu.nyu.vida.data_polygamy.utils.GetMergeFiles.java
License:BSD License
public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException { String fromDirectory = args[0]; String toEventsDirectory = args[1]; String toOutliersDirectory = args[2]; String metadataFile = args[3]; // Detecting datasets. HashSet<String> datasets = new HashSet<String>(); FileReader fileReader = new FileReader(metadataFile); BufferedReader bufferedReader = new BufferedReader(fileReader); String line;//from ww w .j a va 2s .c om while ((line = bufferedReader.readLine()) != null) { String[] parts = line.split(","); datasets.add(parts[0]); } bufferedReader.close(); // Downloading relationships. String relationshipPatternStr = "([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})\\-([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})"; Pattern relationshipPattern = Pattern.compile(relationshipPatternStr); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileSystem localFS = FileSystem.getLocal(conf); for (FileStatus status : fs.listStatus(new Path(fs.getHomeDirectory() + "/" + fromDirectory))) { if (!status.isDirectory()) { continue; } Path file = status.getPath(); Matcher m = relationshipPattern.matcher(file.getName()); if (!m.find()) continue; String ds1 = m.group(1); String ds2 = m.group(2); if (!datasets.contains(ds1)) continue; if (!datasets.contains(ds2)) continue; for (FileStatus statusDir : fs.listStatus(file)) { if (!statusDir.isDirectory()) { continue; } Path fromPath = statusDir.getPath(); String toPathStr; if (fromPath.getName().contains("events")) { toPathStr = toEventsDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName(); } else { toPathStr = toOutliersDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName(); } Path toPath = new Path(toPathStr); System.out.println("Copying:"); System.out.println(" From: " + fromPath.toString()); System.out.println(" To: " + toPath.toString()); FileUtil.copyMerge(fs, // HDFS File System fromPath, // HDFS path localFS, // Local File System toPath, // Local Path false, // Do not delete HDFS path conf, // Configuration null); } } }