Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

private static long expiredTime(FileStatus file, Map<String, Long> expiredHoplogs) {
    String expiredMarkerName = file.getPath().getName() + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION;

    long expiredTimeStamp = -1;
    if (expiredHoplogs.containsKey(expiredMarkerName)) {
        expiredTimeStamp = expiredHoplogs.get(expiredMarkerName);
    }/*from   w  w  w.j  a va  2  s .  c  o m*/
    return expiredTimeStamp;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

/**
 * @param regionPath/* w w w  .j  av a 2 s .co m*/
 * @param fs
 * @return list of latest checkpoint files of all buckets in the region
 * @throws IOException
 */
public static Collection<FileStatus> getCheckpointFiles(Path regionPath, FileSystem fs) throws IOException {
    ArrayList<FileStatus> latestSnapshots = new ArrayList<FileStatus>();

    Collection<Collection<FileStatus>> allBuckets = getBucketHoplogs(regionPath, fs,
            AbstractHoplogOrganizer.MAJOR_HOPLOG_EXTENSION, 0, 0);

    // extract the latest major compacted hoplog from each bucket
    for (Collection<FileStatus> bucket : allBuckets) {
        FileStatus latestSnapshot = null;
        for (FileStatus file : bucket) {
            if (latestSnapshot == null) {
                latestSnapshot = file;
            } else {
                String name1 = latestSnapshot.getPath().getName();
                String name2 = file.getPath().getName();

                if (HoplogComparator.compareByName(name1, name2) > 0) {
                    latestSnapshot = file;
                }
            }
        }

        if (latestSnapshot != null) {
            latestSnapshots.add(latestSnapshot);
        }
    }

    return latestSnapshots;
}

From source file:com.github.dongjinleekr.hadoop.examples.DistributedCacheExample.java

License:Apache License

public static void printCachePath(Configuration conf) throws IOException, URISyntaxException {
    FileSystem fs = FileSystem.get(conf);
    URI[] archives = DistributedCache.getCacheArchives(conf);

    for (URI archive : archives) {
        HarFileSystem hfs = new HarFileSystem();
        String cacheUri = String.format("har://hdfs-%s:%d%s", fs.getUri().getHost(), fs.getUri().getPort(),
                archive.toString());//from ww w  .ja  va2s .c  o m
        System.out.println(cacheUri);

        hfs.initialize(new URI(cacheUri), conf);

        FileStatus root = hfs.listStatus(new Path("."))[0];
        FileStatus[] children = hfs.listStatus(root.getPath());

        for (FileStatus child : children) {
            System.out.println(child.getPath());
        }

        IOUtils.closeStream(hfs);
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java

License:Apache License

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath,
        Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
        final Progressable progressable) throws IOException {

    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }//  w ww. ja v  a 2 s .c  o m

    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);

    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims()
            .newTaskAttemptContext(job.getConfiguration(), progressable);

    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(
            tac);

    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }

    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = outputdir;
                for (;;) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(outputdir, true);
                fs.createNewFile(outputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("\u0001");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}

From source file:com.github.joshelser.accumulo.DelimitedIngest.java

License:Apache License

private List<Path> convertInputToPaths() throws IOException {
    List<String> inputs = args.getInput();
    List<Path> paths = new ArrayList<>(inputs.size());
    for (String input : inputs) {
        Path p = new Path(input);
        FileSystem fs = p.getFileSystem(conf);
        FileStatus fstat = fs.getFileStatus(p);
        if (fstat.isFile()) {
            paths.add(p);//from ww w . jav a2 s .c o  m
        } else if (fstat.isDirectory()) {
            for (FileStatus child : fs.listStatus(p)) {
                if (child.isFile()) {
                    paths.add(child.getPath());
                }
            }
        } else {
            throw new IllegalStateException("Unable to handle that which is not file nor directory: " + p);
        }
    }
    return paths;
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static String readString(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(path);
    StringBuilder re = new StringBuilder();
    for (FileStatus status : statuses) {
        if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) {
            FSDataInputStream streaming = fs.open(status.getPath());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming));
            re.append(bufferedReader.readLine() + System.lineSeparator());
        }//w w w.jav a  2  s .  c o  m
    }
    return re.toString();
}

From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java

License:Open Source License

private static void recurListFiles(FileSystem fs, FileStatus fileStatus, List<FileStatus> foundFiles,
        boolean fetchOneFile) throws IOException {
    if (fetchOneFile && !foundFiles.isEmpty())
        return;/* w ww  .  j a  va2s.  com*/
    if (fileStatus.isDirectory()) {
        FileStatus[] list = fs.listStatus(fileStatus.getPath(), PartFileFilter.instance);
        for (int i = 0; i < list.length; i++) {
            recurListFiles(fs, list[i], foundFiles, fetchOneFile);
        }
    } else {
        // file status is a file, add to the list
        foundFiles.add(fileStatus);
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Returns an array of FileStatus objects whose path names match pathPattern
 * and is accepted by the user-supplied path filter. Results are sorted by
 * their path names.//from  ww  w  .java2 s .  com
 *
 * Return null if pathPattern has no glob and the path does not exist.
 * Return an empty array if pathPattern has a glob and no path matches it.
 *
 * @param pathPattern A regular expression specifying the path pattern.
 * @param filter A user-supplied path filter.
 * @return An array of FileStatus objects.
 * @throws IOException if an error occurs.
 */
@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {

    checkOpen();

    LOG.debug("GHFS.globStatus: {}", pathPattern);
    // URI does not handle glob expressions nicely, for the purpose of
    // fully-qualifying a path we can URI-encode them.
    // Using toString() to avoid Path(URI) constructor.
    Path encodedPath = new Path(pathPattern.toUri().toString());
    // We convert pathPattern to GCS path and then to Hadoop path to ensure that it ends up in
    // the correct format. See note in getHadoopPath for more information.
    Path fixedPath = getHadoopPath(getGcsPath(encodedPath));
    // Decode URI-encoded path back into a glob path.
    fixedPath = new Path(URI.create(fixedPath.toString()));
    LOG.debug("GHFS.globStatus fixedPath: {} => {}", pathPattern, fixedPath);

    if (shouldUseFlatGlob(fixedPath)) {
        String pathString = fixedPath.toString();
        String prefixString = trimToPrefixWithoutGlob(pathString);
        Path prefixPath = new Path(prefixString);
        URI prefixUri = getGcsPath(prefixPath);

        if (prefixString.endsWith("/") && !prefixPath.toString().endsWith("/")) {
            // Path strips a trailing slash unless it's the 'root' path. We want to keep the trailing
            // slash so that we don't wastefully list sibling files which may match the directory-name
            // as a strict prefix but would've been omitted due to not containing the '/' at the end.
            prefixUri = FileInfo.convertToDirectoryPath(gcsfs.getPathCodec(), prefixUri);
        }

        // Get everything matching the non-glob prefix.
        LOG.debug("Listing everything with prefix '{}'", prefixUri);
        List<FileInfo> fileInfos = gcsfs.listAllFileInfoForPrefix(prefixUri);
        if (fileInfos.isEmpty()) {
            // Let the superclass define the proper logic for finding no matches.
            return super.globStatus(fixedPath, filter);
        }

        // Perform the core globbing logic in the helper filesystem.
        GoogleHadoopFileSystem helperFileSystem = ListHelperGoogleHadoopFileSystem.createInstance(gcsfs,
                fileInfos);
        FileStatus[] returnList = helperFileSystem.globStatus(pathPattern, filter);

        // If the return list contains directories, we should repair them if they're 'implicit'.
        if (enableAutoRepairImplicitDirectories) {
            List<URI> toRepair = new ArrayList<>();
            for (FileStatus status : returnList) {
                // Modification time of 0 indicates implicit directory.
                if (status.isDir() && status.getModificationTime() == 0) {
                    toRepair.add(getGcsPath(status.getPath()));
                }
            }
            if (!toRepair.isEmpty()) {
                LOG.warn("Discovered {} implicit directories to repair within return values.", toRepair.size());
                gcsfs.repairDirs(toRepair);
            }
        }
        return returnList;
    } else {
        FileStatus[] ret = super.globStatus(fixedPath, filter);
        if (ret == null) {
            if (enableAutoRepairImplicitDirectories) {
                LOG.debug("GHFS.globStatus returned null for '{}', attempting possible repair.", pathPattern);
                if (gcsfs.repairPossibleImplicitDirectory(getGcsPath(fixedPath))) {
                    LOG.warn("Success repairing '{}', re-globbing.", pathPattern);
                    ret = super.globStatus(fixedPath, filter);
                }
            }
        }
        return ret;
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Converts the given FileStatus to its string representation.
 *
 * @param stat FileStatus to convert./*ww w . ja  v  a  2s. c o  m*/
 * @return String representation of the given FileStatus.
 */
private static String fileStatusToString(FileStatus stat) {
    assert stat != null;

    return String.format("path: %s, isDir: %s, len: %d, owner: %s", stat.getPath().toString(), stat.isDir(),
            stat.getLen(), stat.getOwner());
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemNewUriFormatIntegrationTest.java

License:Open Source License

@Test
public void testPathsOnlyValidInNewUriScheme() throws IOException {
    GoogleHadoopFileSystem typedFs = (GoogleHadoopFileSystem) ghfs;

    Path directory = new Path(
            String.format("gs://%s/testPathsOnlyValidInNewUriScheme/", typedFs.getRootBucketName()));
    Path p = new Path(directory, "foo#bar#baz");
    try {//from w  w w .j av a2  s.  c o m
        ghfs.getFileStatus(p);
        Assert.fail("Expected FileNotFoundException.");
    } catch (FileNotFoundException fnfe) {
        // expected.
    }

    ghfsHelper.writeFile(p, "SomeText", 100, false);

    FileStatus status = ghfs.getFileStatus(p);
    Assert.assertEquals(p, status.getPath());
    ghfs.delete(directory, true);
}