List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:cascading.hcatalog.CascadingHCatUtil.java
License:Apache License
protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) { List<String> result = newArrayList(); String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, ""); Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex); try {/* w w w. j a va 2 s . co m*/ Path partitionDirPath = new Path(part.getSd().getLocation()); FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath); for (FileStatus currStatus : partitionContent) { if (!currStatus.isDir()) { if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) { result.add(currStatus.getPath().toUri().getPath()); } else { LOG.debug("Ignoring path {} since matches ignore regex {}", currStatus.getPath().toUri().getPath(), ignoreFileRegex); } } } } catch (IOException e) { logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e); } return result; }
From source file:cascading.platform.hadoop.BaseHadoopPlatform.java
License:Open Source License
@Override public void copyToLocal(String outputFile) throws IOException { if (!isUseCluster()) return;/*from www .j ava 2 s . c o m*/ Path path = new Path(safeFileName(outputFile)); if (!fileSys.exists(path)) throw new FileNotFoundException("data file not found: " + outputFile); File file = new File(outputFile); if (file.exists()) file.delete(); if (fileSys.isFile(path)) { // its a file, so just copy it over FileUtil.copy(fileSys, path, file, false, configuration); return; } // it's a directory file.mkdirs(); FileStatus contents[] = fileSys.listStatus(path); for (FileStatus fileStatus : contents) { Path currentPath = fileStatus.getPath(); if (currentPath.getName().startsWith("_")) // filter out temp and log dirs continue; FileUtil.copy(fileSys, currentPath, new File(file, currentPath.getName()), false, configuration); } }
From source file:cascading.scheme.DeprecatedAvroScheme.java
License:Apache License
/** * This method peeks at the source data to get a schema when none has been provided. * * @param flowProcess The cascading FlowProcess object for this flow. * @param tap The cascading Tap object. * @return Schema The schema of the peeked at data, or Schema.NULL if none exists. *///from ww w .j a va 2 s.c o m private Schema getSourceSchema(FlowProcess<? extends Configuration> flowProcess, Tap tap) throws IOException { if (tap instanceof CompositeTap) { tap = (Tap) ((CompositeTap) tap).getChildTaps().next(); } final String path = tap.getIdentifier(); Path p = new Path(path); final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy()); // Get all the input dirs List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter))); // Now get all the things that are one level down for (FileStatus status : new LinkedList<FileStatus>(statuses)) { if (status.isDir()) for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) { if (child.isDir()) { statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter))); } else if (fs.isFile(child.getPath())) { statuses.add(child); } } } for (FileStatus status : statuses) { Path statusPath = status.getPath(); if (fs.isFile(statusPath)) { // no need to open them all InputStream stream = null; DataFileStream reader = null; try { stream = new BufferedInputStream(fs.open(statusPath)); reader = new DataFileStream(stream, new GenericDatumReader()); return reader.getSchema(); } finally { if (reader == null) { if (stream != null) { stream.close(); } } else { reader.close(); } } } } // couldn't find any Avro files, return null schema return Schema.create(Schema.Type.NULL); }
From source file:cascading.tap.hadoop.BaseDistCacheTap.java
License:Open Source License
@Override public TupleEntryIterator openForRead(FlowProcess<? extends Configuration> flowProcess, RecordReader input) throws IOException { // always read via Hadoop FileSystem if in standalone/local mode, or if an RecordReader is provided if (HadoopUtil.isLocal(flowProcess.getConfig()) || input != null) { LOG.info("delegating to parent"); return super.openForRead(flowProcess, input); }/* w w w . j av a 2s . co m*/ Path[] cachedFiles = getLocalCacheFiles(flowProcess); if (cachedFiles == null || cachedFiles.length == 0) return super.openForRead(flowProcess, null); List<Path> paths = new ArrayList<>(); List<Tap> taps = new ArrayList<>(); if (isSimpleGlob()) { FileSystem fs = FileSystem.get(flowProcess.getConfig()); FileStatus[] statuses = fs.globStatus(getHfs().getPath()); for (FileStatus status : statuses) paths.add(status.getPath()); } else { paths.add(getHfs().getPath()); } for (Path pathToFind : paths) { for (Path path : cachedFiles) { if (path.toString().endsWith(pathToFind.getName())) { LOG.info("found {} in distributed cache", path); taps.add(new Lfs(getScheme(), path.toString())); } } } if (paths.isEmpty()) // not in cache, read from HDFS { LOG.info("could not find files in local resource path. delegating to parent: {}", super.getIdentifier()); return super.openForRead(flowProcess, input); } return new MultiSourceTap(taps.toArray(new Tap[taps.size()])).openForRead(flowProcess, input); }
From source file:cascading.tap.hadoop.BaseDistCacheTap.java
License:Open Source License
private void registerHfs(FlowProcess<? extends Configuration> process, Configuration conf, Hfs hfs) throws IOException { if (isSimpleGlob()) { FileSystem fs = FileSystem.get(conf); FileStatus[] statuses = fs.globStatus(getHfs().getPath()); if (statuses == null || statuses.length == 0) throw new TapException(String.format( "glob expression %s does not match any files on the filesystem", getHfs().getPath())); for (FileStatus fileStatus : statuses) registerURI(conf, fileStatus.getPath()); } else {//from w w w.ja v a 2s . com registerURI(conf, hfs.getPath()); } hfs.sourceConfInitComplete(process, conf); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { String taskId = conf.get("mapred.task.id"); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) { throw new IOException("Failed to delete earlier output of task: " + taskId); }/*www. j a va 2 s. c o m*/ if (!fs.rename(taskOutput, finalOutputPath)) { throw new IOException("Failed to save output of task: " + taskId); } } LOG.debug("Moved " + taskOutput + " to " + finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) { moveTaskOutputs(conf, fs, jobOutputDir, path.getPath()); } } } }
From source file:cascading.tap.hadoop.Hfs.java
License:Open Source License
private void getChildPaths(Configuration conf, Set<String> results, int trim, Path path, int depth) throws IOException { if (depth == 0) { String substring = path.toString().substring(trim); String identifier = getIdentifier(); if (identifier == null || identifier.isEmpty()) results.add(new Path(substring).toString()); else//w w w . j a v a 2 s . c om results.add(new Path(identifier, substring).toString()); return; } FileStatus[] statuses = getFileSystem(conf).listStatus(path, HIDDEN_FILES_FILTER); if (statuses == null) return; for (FileStatus fileStatus : statuses) getChildPaths(conf, results, trim, fileStatus.getPath(), depth - 1); }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput) throws IOException { String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); if (fs.isFile(taskOutput)) { Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); if (!fs.rename(taskOutput, finalOutputPath)) { if (!fs.delete(finalOutputPath, true)) throw new IOException("Failed to delete earlier output of task: " + taskId); if (!fs.rename(taskOutput, finalOutputPath)) throw new IOException("Failed to save output of task: " + taskId); }/* w w w. ja v a 2s . c o m*/ LOG.debug("Moved {} to {}", taskOutput, finalOutputPath); } else if (fs.getFileStatus(taskOutput).isDir()) { FileStatus[] paths = fs.listStatus(taskOutput); Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf)); fs.mkdirs(finalOutputPath); if (paths != null) { for (FileStatus path : paths) moveTaskOutputs(conf, fs, jobOutputDir, path.getPath()); } } }
From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java
License:Apache License
public static void main(String[] args) throws IOException { Path p = new Path(args[0]); FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration()); FileStatus[] listStatus = fileSystem.listStatus(p); for (FileStatus fileStatus : listStatus) { Path path = fileStatus.getPath(); fileSystem.rename(path, new Path(path.toString() + ".lf")); }// ww w.j a va 2 s .c o m }
From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java
License:Apache License
public static void convert(Path path) throws IOException { FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration()); if (!fileSystem.exists(path)) { System.out.println(path + " does not exists."); return;//from w ww . jav a2s.c o m } FileStatus fileStatus = fileSystem.getFileStatus(path); if (fileStatus.isDir()) { FileStatus[] listStatus = fileSystem.listStatus(path); for (FileStatus status : listStatus) { convert(status.getPath()); } } else { System.out.println("Converting file [" + path + "]"); HdfsMetaBlock block = new HdfsMetaBlock(); block.realPosition = 0; block.logicalPosition = 0; block.length = fileStatus.getLen(); FSDataOutputStream outputStream = fileSystem.append(path); block.write(outputStream); outputStream.writeInt(1); outputStream.writeLong(fileStatus.getLen()); outputStream.writeInt(HdfsFileWriter.VERSION); outputStream.close(); } }