Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

private static long expiredTime(FileStatus file, Map<String, Long> expiredHoplogs) {
    String expiredMarkerName = file.getPath().getName() + AbstractHoplogOrganizer.EXPIRED_HOPLOG_EXTENSION;

    long expiredTimeStamp = -1;
    if (expiredHoplogs.containsKey(expiredMarkerName)) {
        expiredTimeStamp = expiredHoplogs.get(expiredMarkerName);
    }/*from   w  w  w.j  a va  2  s .  c  o m*/
    return expiredTimeStamp;
}

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.HoplogUtil.java

License:Apache License

/**
 * @param regionPath/* w w w  .j  av a 2 s .co m*/
 * @param fs
 * @return list of latest checkpoint files of all buckets in the region
 * @throws IOException
 */
public static Collection<FileStatus> getCheckpointFiles(Path regionPath, FileSystem fs) throws IOException {
    ArrayList<FileStatus> latestSnapshots = new ArrayList<FileStatus>();

    Collection<Collection<FileStatus>> allBuckets = getBucketHoplogs(regionPath, fs,
            AbstractHoplogOrganizer.MAJOR_HOPLOG_EXTENSION, 0, 0);

    // extract the latest major compacted hoplog from each bucket
    for (Collection<FileStatus> bucket : allBuckets) {
        FileStatus latestSnapshot = null;
        for (FileStatus file : bucket) {
            if (latestSnapshot == null) {
                latestSnapshot = file;
            } else {
                String name1 = latestSnapshot.getPath().getName();
                String name2 = file.getPath().getName();

                if (HoplogComparator.compareByName(name1, name2) > 0) {
                    latestSnapshot = file;
                }
            }
        }

        if (latestSnapshot != null) {
            latestSnapshots.add(latestSnapshot);
        }
    }

    return latestSnapshots;
}

From source file:com.github.dongjinleekr.hadoop.examples.DistributedCacheExample.java

License:Apache License

public static void printCachePath(Configuration conf) throws IOException, URISyntaxException {
    FileSystem fs = FileSystem.get(conf);
    URI[] archives = DistributedCache.getCacheArchives(conf);

    for (URI archive : archives) {
        HarFileSystem hfs = new HarFileSystem();
        String cacheUri = String.format("har://hdfs-%s:%d%s", fs.getUri().getHost(), fs.getUri().getPort(),
                archive.toString());//from ww w  .ja  va2s .c  o m
        System.out.println(cacheUri);

        hfs.initialize(new URI(cacheUri), conf);

        FileStatus root = hfs.listStatus(new Path("."))[0];
        FileStatus[] children = hfs.listStatus(root.getPath());

        for (FileStatus child : children) {
            System.out.println(child.getPath());
        }

        IOUtils.closeStream(hfs);
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHFileOutputFormat.java

License:Apache License

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, final Path finalOutPath,
        Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
        final Progressable progressable) throws IOException {

    // Read configuration for the target path, first from jobconf, then from table properties
    String hfilePath = getFamilyPath(jc, tableProperties);
    if (hfilePath == null) {
        throw new RuntimeException("Please set " + HFILE_FAMILY_PATH + " to target location for HFiles");
    }//  w ww. ja v  a 2 s .c  o m

    // Target path's last component is also the column family name.
    final Path columnFamilyPath = new Path(hfilePath);
    final String columnFamilyName = columnFamilyPath.getName();
    final byte[] columnFamilyNameBytes = Bytes.toBytes(columnFamilyName);
    final Job job = new Job(jc);
    setCompressOutput(job, isCompressed);
    setOutputPath(job, finalOutPath);

    // Create the HFile writer
    final org.apache.hadoop.mapreduce.TaskAttemptContext tac = ShimLoader.getHadoopShims()
            .newTaskAttemptContext(job.getConfiguration(), progressable);

    final Path outputdir = FileOutputFormat.getOutputPath(tac);
    final org.apache.hadoop.mapreduce.RecordWriter<ImmutableBytesWritable, KeyValue> fileWriter = getFileWriter(
            tac);

    // Individual columns are going to be pivoted to HBase cells,
    // and for each row, they need to be written out in order
    // of column name, so sort the column names now, creating a
    // mapping to their column position.  However, the first
    // column is interpreted as the row key.
    String columnList = tableProperties.getProperty("columns");
    String[] columnArray = columnList.split(",");
    final SortedMap<byte[], Integer> columnMap = new TreeMap<byte[], Integer>(Bytes.BYTES_COMPARATOR);
    int i = 0;
    for (String columnName : columnArray) {
        if (i != 0) {
            columnMap.put(Bytes.toBytes(columnName), i);
        }
        ++i;
    }

    return new RecordWriter() {

        @Override
        public void close(boolean abort) throws IOException {
            try {
                fileWriter.close(null);
                if (abort) {
                    return;
                }
                // Move the hfiles file(s) from the task output directory to the
                // location specified by the user.
                FileSystem fs = outputdir.getFileSystem(jc);
                fs.mkdirs(columnFamilyPath);
                Path srcDir = outputdir;
                for (;;) {
                    FileStatus[] files = fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER);
                    if ((files == null) || (files.length == 0)) {
                        throw new IOException("No family directories found in " + srcDir);
                    }
                    if (files.length != 1) {
                        throw new IOException("Multiple family directories found in " + srcDir);
                    }
                    srcDir = files[0].getPath();
                    if (srcDir.getName().equals(columnFamilyName)) {
                        break;
                    }
                }
                for (FileStatus regionFile : fs.listStatus(srcDir, FileUtils.STAGING_DIR_PATH_FILTER)) {
                    fs.rename(regionFile.getPath(), new Path(columnFamilyPath, regionFile.getPath().getName()));
                }
                // Hive actually wants a file as task output (not a directory), so
                // replace the empty directory with an empty file to keep it happy.
                fs.delete(outputdir, true);
                fs.createNewFile(outputdir);
            } catch (InterruptedException ex) {
                throw new IOException(ex);
            }
        }

        private void writeText(Text text) throws IOException {
            // Decompose the incoming text row into fields.
            String s = text.toString();
            String[] fields = s.split("\u0001");
            assert (fields.length <= (columnMap.size() + 1));
            // First field is the row key.
            byte[] rowKeyBytes = Bytes.toBytes(fields[0]);
            // Remaining fields are cells addressed by column name within row.
            for (Map.Entry<byte[], Integer> entry : columnMap.entrySet()) {
                byte[] columnNameBytes = entry.getKey();
                int iColumn = entry.getValue();
                String val;
                if (iColumn >= fields.length) {
                    // trailing blank field
                    val = "";
                } else {
                    val = fields[iColumn];
                    if ("\\N".equals(val)) {
                        // omit nulls
                        continue;
                    }
                }
                byte[] valBytes = Bytes.toBytes(val);
                KeyValue kv = new KeyValue(rowKeyBytes, columnFamilyNameBytes, columnNameBytes, valBytes);
                try {
                    fileWriter.write(null, kv);
                } catch (IOException e) {
                    LOG.error("Failed while writing row: " + s);
                    throw e;
                } catch (InterruptedException ex) {
                    throw new IOException(ex);
                }
            }
        }

        private void writePut(PutWritable put) throws IOException {
            ImmutableBytesWritable row = new ImmutableBytesWritable(put.getPut().getRow());
            SortedMap<byte[], List<Cell>> cells = put.getPut().getFamilyCellMap();
            for (Map.Entry<byte[], List<Cell>> entry : cells.entrySet()) {
                Collections.sort(entry.getValue(), new CellComparator());
                for (Cell c : entry.getValue()) {
                    try {
                        fileWriter.write(row, KeyValueUtil.copyToNewKeyValue(c));
                    } catch (InterruptedException e) {
                        throw (InterruptedIOException) new InterruptedIOException().initCause(e);
                    }
                }
            }
        }

        @Override
        public void write(Writable w) throws IOException {
            if (w instanceof Text) {
                writeText((Text) w);
            } else if (w instanceof PutWritable) {
                writePut((PutWritable) w);
            } else {
                throw new IOException("Unexpected writable " + w);
            }
        }
    };
}

From source file:com.github.joshelser.accumulo.DelimitedIngest.java

License:Apache License

private List<Path> convertInputToPaths() throws IOException {
    List<String> inputs = args.getInput();
    List<Path> paths = new ArrayList<>(inputs.size());
    for (String input : inputs) {
        Path p = new Path(input);
        FileSystem fs = p.getFileSystem(conf);
        FileStatus fstat = fs.getFileStatus(p);
        if (fstat.isFile()) {
            paths.add(p);//from ww w . jav a2 s .c o  m
        } else if (fstat.isDirectory()) {
            for (FileStatus child : fs.listStatus(p)) {
                if (child.isFile()) {
                    paths.add(child.getPath());
                }
            }
        } else {
            throw new IllegalStateException("Unable to handle that which is not file nor directory: " + p);
        }
    }
    return paths;
}

From source file:com.github.libsml.commons.util.HadoopUtils.java

License:Apache License

public static String readString(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    FileStatus[] statuses = fs.listStatus(path);
    StringBuilder re = new StringBuilder();
    for (FileStatus status : statuses) {
        if (status.isFile() && !status.getPath().getName().equals("_SUCCESS")) {
            FSDataInputStream streaming = fs.open(status.getPath());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(streaming));
            re.append(bufferedReader.readLine() + System.lineSeparator());
        }//w w w.jav a  2  s .  c o  m
    }
    return re.toString();
}

From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java

License:Open Source License

private static void recurListFiles(FileSystem fs, FileStatus fileStatus, List<FileStatus> foundFiles,
        boolean fetchOneFile) throws IOException {
    if (fetchOneFile && !foundFiles.isEmpty())
        return;/* w ww  .  j a  va2s.  com*/
    if (fileStatus.isDirectory()) {
        FileStatus[] list = fs.listStatus(fileStatus.getPath(), PartFileFilter.instance);
        for (int i = 0; i < list.length; i++) {
            recurListFiles(fs, list[i], foundFiles, fetchOneFile);
        }
    } else {
        // file status is a file, add to the list
        foundFiles.add(fileStatus);
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Returns an array of FileStatus objects whose path names match pathPattern
 * and is accepted by the user-supplied path filter. Results are sorted by
 * their path names.//from  ww  w  .java2 s .  com
 *
 * Return null if pathPattern has no glob and the path does not exist.
 * Return an empty array if pathPattern has a glob and no path matches it.
 *
 * @param pathPattern A regular expression specifying the path pattern.
 * @param filter A user-supplied path filter.
 * @return An array of FileStatus objects.
 * @throws IOException if an error occurs.
 */
@Override
public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException {

    checkOpen();

    LOG.debug("GHFS.globStatus: {}", pathPattern);
    // URI does not handle glob expressions nicely, for the purpose of
    // fully-qualifying a path we can URI-encode them.
    // Using toString() to avoid Path(URI) constructor.
    Path encodedPath = new Path(pathPattern.toUri().toString());
    // We convert pathPattern to GCS path and then to Hadoop path to ensure that it ends up in
    // the correct format. See note in getHadoopPath for more information.
    Path fixedPath = getHadoopPath(getGcsPath(encodedPath));
    // Decode URI-encoded path back into a glob path.
    fixedPath = new Path(URI.create(fixedPath.toString()));
    LOG.debug("GHFS.globStatus fixedPath: {} => {}", pathPattern, fixedPath);

    if (shouldUseFlatGlob(fixedPath)) {
        String pathString = fixedPath.toString();
        String prefixString = trimToPrefixWithoutGlob(pathString);
        Path prefixPath = new Path(prefixString);
        URI prefixUri = getGcsPath(prefixPath);

        if (prefixString.endsWith("/") && !prefixPath.toString().endsWith("/")) {
            // Path strips a trailing slash unless it's the 'root' path. We want to keep the trailing
            // slash so that we don't wastefully list sibling files which may match the directory-name
            // as a strict prefix but would've been omitted due to not containing the '/' at the end.
            prefixUri = FileInfo.convertToDirectoryPath(gcsfs.getPathCodec(), prefixUri);
        }

        // Get everything matching the non-glob prefix.
        LOG.debug("Listing everything with prefix '{}'", prefixUri);
        List<FileInfo> fileInfos = gcsfs.listAllFileInfoForPrefix(prefixUri);
        if (fileInfos.isEmpty()) {
            // Let the superclass define the proper logic for finding no matches.
            return super.globStatus(fixedPath, filter);
        }

        // Perform the core globbing logic in the helper filesystem.
        GoogleHadoopFileSystem helperFileSystem = ListHelperGoogleHadoopFileSystem.createInstance(gcsfs,
                fileInfos);
        FileStatus[] returnList = helperFileSystem.globStatus(pathPattern, filter);

        // If the return list contains directories, we should repair them if they're 'implicit'.
        if (enableAutoRepairImplicitDirectories) {
            List<URI> toRepair = new ArrayList<>();
            for (FileStatus status : returnList) {
                // Modification time of 0 indicates implicit directory.
                if (status.isDir() && status.getModificationTime() == 0) {
                    toRepair.add(getGcsPath(status.getPath()));
                }
            }
            if (!toRepair.isEmpty()) {
                LOG.warn("Discovered {} implicit directories to repair within return values.", toRepair.size());
                gcsfs.repairDirs(toRepair);
            }
        }
        return returnList;
    } else {
        FileStatus[] ret = super.globStatus(fixedPath, filter);
        if (ret == null) {
            if (enableAutoRepairImplicitDirectories) {
                LOG.debug("GHFS.globStatus returned null for '{}', attempting possible repair.", pathPattern);
                if (gcsfs.repairPossibleImplicitDirectory(getGcsPath(fixedPath))) {
                    LOG.warn("Success repairing '{}', re-globbing.", pathPattern);
                    ret = super.globStatus(fixedPath, filter);
                }
            }
        }
        return ret;
    }
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemBase.java

License:Open Source License

/**
 * Converts the given FileStatus to its string representation.
 *
 * @param stat FileStatus to convert./*ww w . ja  v  a  2s. c o  m*/
 * @return String representation of the given FileStatus.
 */
private static String fileStatusToString(FileStatus stat) {
    assert stat != null;

    return String.format("path: %s, isDir: %s, len: %d, owner: %s", stat.getPath().toString(), stat.isDir(),
            stat.getLen(), stat.getOwner());
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystemNewUriFormatIntegrationTest.java

License:Open Source License

@Test
public void testPathsOnlyValidInNewUriScheme() throws IOException {
    GoogleHadoopFileSystem typedFs = (GoogleHadoopFileSystem) ghfs;

    Path directory = new Path(
            String.format("gs://%s/testPathsOnlyValidInNewUriScheme/", typedFs.getRootBucketName()));
    Path p = new Path(directory, "foo#bar#baz");
    try {//from w  w w .j av a2  s.  c o m
        ghfs.getFileStatus(p);
        Assert.fail("Expected FileNotFoundException.");
    } catch (FileNotFoundException fnfe) {
        // expected.
    }

    ghfsHelper.writeFile(p, "SomeText", 100, false);

    FileStatus status = ghfs.getFileStatus(p);
    Assert.assertEquals(p, status.getPath());
    ghfs.delete(directory, true);
}