List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java
License:Apache License
private static long expiredTime(FileStatus file, Map<String, Long> expiredHoplogs) { String expiredMarkerName = file.getPath().getName() + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION; long expiredTimeStamp = -1; if (expiredHoplogs.containsKey(expiredMarkerName)) { expiredTimeStamp = expiredHoplogs.get(expiredMarkerName); }/*from w w w.j a va 2 s . c o m*/ return expiredTimeStamp; }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java
License:Apache License
/** * @param regionPath/* w w w .j av a 2 s .co m*/ * @param fs * @return list of latest checkpoint files of all buckets in the region * @throws IOException */ public static Collection<FileStatus> getCheckpointFiles(Path regionPath, FileSystem fs) throws IOException { ArrayList<FileStatus> latestSnapshots = new ArrayList<FileStatus>(); Collection<Collection<FileStatus>> allBuckets = getBucketHoplogs(regionPath, fs, AbstractHoplogOrganizer.MAJOR_HOPLOG_EXTENSION, 0, 0); // extract the latest major compacted hoplog from each bucket for (Collection<FileStatus> bucket : allBuckets) { FileStatus latestSnapshot = null; for (FileStatus file : bucket) { if (latestSnapshot == null) { latestSnapshot = file; } else { String name1 = latestSnapshot.getPath().getName(); String name2 = file.getPath().getName(); if (HoplogComparator.compareByName(name1, name2) > 0) { latestSnapshot = file; } } } if (latestSnapshot != null) { latestSnapshots.add(latestSnapshot); } } return latestSnapshots; }
From source file:com.github.dongjinleekr.hadoop.examples.DistributedCacheExample.java
License:Apache License
public static void printCachePath(Configuration conf) throws IOException, URISyntaxException { FileSystem fs = FileSystem.get(conf); URI[] archives = DistributedCache.getCacheArchives(conf); for (URI archive : archives) { HarFileSystem hfs = new HarFileSystem(); String cacheUri = String.format("har://hdfs-%s:%d%s", fs.getUri().getHost(), fs.getUri().getPort(), archive.toString());//from ww w .ja va2s .c o m System.out.println(cacheUri); hfs.initialize(new URI(cacheUri), conf); FileStatus root = hfs.listStatus(new Path("."))[0]; FileStatus[] children = hfs.listStatus(root.getPath()); for (FileStatus child : children) { System.out.println(child.getPath()); } IOUtils.closeStream(hfs); } }
From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java
License:Apache License
@Override public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, final Progressable progressable) throws IOException { // Read configuration for the target path, first from jobconf, then from table properties String hfilePath = getFamilyPath(jc, tableProperties); if (hfilePath == null) { throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles"); }// w ww. ja v a 2 s .c o m // Target path's last component is also the column family name. final Path columnFamilyPath = new Path(hfilePath); final String columnFamilyName = columnFamilyPath.getName(); final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName); final Job job = new Job(jc); setCompressOutput(job, isCompressed); setOutputPath(job, finalOutPath); // Create the HFile writer final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims() .newTaskAttemptContext(job.getConfiguration(), progressable); final Path outputdir = FileOutputFormat.getOutputPath(tac); final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter( tac); // Individual columns are going to be pivoted to HBase cells, // and for each row, they need to be written out in order // of column name, so sort the column names now, creating a // mapping to their column position. However, the first // column is interpreted as the row key. String columnList = tableProperties.getProperty("columns"); String[] columnArray = columnList.split(","); final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR); int i = 0; for (String columnName : columnArray) { if (i != 0) { columnMap.put(Bytes.toBytes(columnName), i); } ++i; } return new RecordWriter() { @Override public void close(boolean abort) throws IOException { try { fileWriter.close(null); if (abort) { return; } // Move the hfiles file(s) from the task output directory to the // location specified by the user. FileSystem fs = outputdir.getFileSystem(jc); fs.mkdirs(columnFamilyPath); Path srcDir = outputdir; for (;;) { FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER); if ((files == null) || (files.length == 0)) { throw new IOException("No family directories found in " + srcDir); } if (files.length != 1) { throw new IOException("Multiple family directories found in " + srcDir); } srcDir = files[0].getPath(); if (srcDir.getName().equals(columnFamilyName)) { break; } } for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) { fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName())); } // Hive actually wants a file as task output (not a directory), so // replace the empty directory with an empty file to keep it happy. fs.delete(outputdir, true); fs.createNewFile(outputdir); } catch (InterruptedException ex) { throw new IOException(ex); } } private void writeText(Text text) throws IOException { // Decompose the incoming text row into fields. String s = text.toString(); String[] fields = s.split("\u0001"); assert (fields.length <= (columnMap.size() + 1)); // First field is the row key. byte[] rowKeyBytes = Bytes.toBytes(fields[0]); // Remaining fields are cells addressed by column name within row. for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) { byte[] columnNameBytes = entry.getKey(); int iColumn = entry.getValue(); String val; if (iColumn >= fields.length) { // trailing blank field val = ""; } else { val = fields[iColumn]; if ("\\N".equals(val)) { // omit nulls continue; } } byte[] valBytes = Bytes.toBytes(val); KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes); try { fileWriter.write(null, kv); } catch (IOException e) { LOG.error("Failed while writing row: " + s); throw e; } catch (InterruptedException ex) { throw new IOException(ex); } } } private void writePut(PutWritable put) throws IOException { ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow()); SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap(); for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) { Collections.sort(entry.getValue(), new CellComparator()); for (Cell c : entry.getValue()) { try { fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c)); } catch (InterruptedException e) { throw (InterruptedIOException) new InterruptedIOException().initCause(e); } } } } @Override public void write(Writable w) throws IOException { if (w instanceof Text) { writeText((Text) w); } else if (w instanceof PutWritable) { writePut((PutWritable) w); } else { throw new IOException("Unexpected writable " + w); } } }; }
From source file:com.github.joshelser.accumulo.DelimitedIngest.java
License:Apache License
private List<Path> convertInputToPaths() throws IOException { List<String> inputs = args.getInput(); List<Path> paths = new ArrayList<>(inputs.size()); for (String input : inputs) { Path p = new Path(input); FileSystem fs = p.getFileSystem(conf); FileStatus fstat = fs.getFileStatus(p); if (fstat.isFile()) { paths.add(p);//from ww w . jav a2 s .c o m } else if (fstat.isDirectory()) { for (FileStatus child : fs.listStatus(p)) { if (child.isFile()) { paths.add(child.getPath()); } } } else { throw new IllegalStateException("Unable to handle that which is not file nor directory: " + p); } } return paths; }
From source file:com.github.libsml.commons.util.HadoopUtils.java
License:Apache License
public static String readString(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); FileStatus[] statuses = fs.listStatus(path); StringBuilder re = new StringBuilder(); for (FileStatus status : statuses) { if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) { FSDataInputStream streaming = fs.open(status.getPath()); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming)); re.append(bufferedReader.readLine() + System.lineSeparator()); }//w w w.jav a 2 s . c o m } return re.toString(); }
From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java
License:Open Source License
private static void recurListFiles(FileSystem fs, FileStatus fileStatus, List<FileStatus> foundFiles, boolean fetchOneFile) throws IOException { if (fetchOneFile && !foundFiles.isEmpty()) return;/* w ww . j a va2s. com*/ if (fileStatus.isDirectory()) { FileStatus[] list = fs.listStatus(fileStatus.getPath(), PartFileFilter.instance); for (int i = 0; i < list.length; i++) { recurListFiles(fs, list[i], foundFiles, fetchOneFile); } } else { // file status is a file, add to the list foundFiles.add(fileStatus); } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Returns an array of FileStatus objects whose path names match pathPattern * and is accepted by the user-supplied path filter. Results are sorted by * their path names.//from ww w .java2 s . com * * Return null if pathPattern has no glob and the path does not exist. * Return an empty array if pathPattern has a glob and no path matches it. * * @param pathPattern A regular expression specifying the path pattern. * @param filter A user-supplied path filter. * @return An array of FileStatus objects. * @throws IOException if an error occurs. */ @Override public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException { checkOpen(); LOG.debug("GHFS.globStatus: {}", pathPattern); // URI does not handle glob expressions nicely, for the purpose of // fully-qualifying a path we can URI-encode them. // Using toString() to avoid Path(URI) constructor. Path encodedPath = new Path(pathPattern.toUri().toString()); // We convert pathPattern to GCS path and then to Hadoop path to ensure that it ends up in // the correct format. See note in getHadoopPath for more information. Path fixedPath = getHadoopPath(getGcsPath(encodedPath)); // Decode URI-encoded path back into a glob path. fixedPath = new Path(URI.create(fixedPath.toString())); LOG.debug("GHFS.globStatus fixedPath: {} => {}", pathPattern, fixedPath); if (shouldUseFlatGlob(fixedPath)) { String pathString = fixedPath.toString(); String prefixString = trimToPrefixWithoutGlob(pathString); Path prefixPath = new Path(prefixString); URI prefixUri = getGcsPath(prefixPath); if (prefixString.endsWith("/") && !prefixPath.toString().endsWith("/")) { // Path strips a trailing slash unless it's the 'root' path. We want to keep the trailing // slash so that we don't wastefully list sibling files which may match the directory-name // as a strict prefix but would've been omitted due to not containing the '/' at the end. prefixUri = FileInfo.convertToDirectoryPath(gcsfs.getPathCodec(), prefixUri); } // Get everything matching the non-glob prefix. LOG.debug("Listing everything with prefix '{}'", prefixUri); List<FileInfo> fileInfos = gcsfs.listAllFileInfoForPrefix(prefixUri); if (fileInfos.isEmpty()) { // Let the superclass define the proper logic for finding no matches. return super.globStatus(fixedPath, filter); } // Perform the core globbing logic in the helper filesystem. GoogleHadoopFileSystem helperFileSystem = ListHelperGoogleHadoopFileSystem.createInstance(gcsfs, fileInfos); FileStatus[] returnList = helperFileSystem.globStatus(pathPattern, filter); // If the return list contains directories, we should repair them if they're 'implicit'. if (enableAutoRepairImplicitDirectories) { List<URI> toRepair = new ArrayList<>(); for (FileStatus status : returnList) { // Modification time of 0 indicates implicit directory. if (status.isDir() && status.getModificationTime() == 0) { toRepair.add(getGcsPath(status.getPath())); } } if (!toRepair.isEmpty()) { LOG.warn("Discovered {} implicit directories to repair within return values.", toRepair.size()); gcsfs.repairDirs(toRepair); } } return returnList; } else { FileStatus[] ret = super.globStatus(fixedPath, filter); if (ret == null) { if (enableAutoRepairImplicitDirectories) { LOG.debug("GHFS.globStatus returned null for '{}', attempting possible repair.", pathPattern); if (gcsfs.repairPossibleImplicitDirectory(getGcsPath(fixedPath))) { LOG.warn("Success repairing '{}', re-globbing.", pathPattern); ret = super.globStatus(fixedPath, filter); } } } return ret; } }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java
License:Open Source License
/** * Converts the given FileStatus to its string representation. * * @param stat FileStatus to convert./*ww w . ja v a 2s. c o m*/ * @return String representation of the given FileStatus. */ private static String fileStatusToString(FileStatus stat) { assert stat != null; return String.format("path: %s, isDir: %s, len: %d, owner: %s", stat.getPath().toString(), stat.isDir(), stat.getLen(), stat.getOwner()); }
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemNewUriFormatIntegrationTest.java
License:Open Source License
@Test public void testPathsOnlyValidInNewUriScheme() throws IOException { GoogleHadoopFileSystem typedFs = (GoogleHadoopFileSystem) ghfs; Path directory = new Path( String.format("gs://%s/testPathsOnlyValidInNewUriScheme/", typedFs.getRootBucketName())); Path p = new Path(directory, "foo#bar#baz"); try {//from w w w .j av a2 s. c o m ghfs.getFileStatus(p); Assert.fail("Expected FileNotFoundException."); } catch (FileNotFoundException fnfe) { // expected. } ghfsHelper.writeFile(p, "SomeText", 100, false); FileStatus status = ghfs.getFileStatus(p); Assert.assertEquals(p, status.getPath()); ghfs.delete(directory, true); }