Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:cascading.hcatalog.CascadingHCatUtil.java

License:Apache License

protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) {
    List<String> result = newArrayList();

    String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, "");
    Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex);

    try {/* w w  w. j  a  va 2  s  .  co  m*/
        Path partitionDirPath = new Path(part.getSd().getLocation());
        FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath);
        for (FileStatus currStatus : partitionContent) {
            if (!currStatus.isDir()) {
                if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) {
                    result.add(currStatus.getPath().toUri().getPath());
                } else {
                    LOG.debug("Ignoring path {} since matches ignore regex {}",
                            currStatus.getPath().toUri().getPath(), ignoreFileRegex);
                }
            }
        }

    } catch (IOException e) {
        logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e);
    }

    return result;
}

From source file:cascading.platform.hadoop.BaseHadoopPlatform.java

License:Open Source License

@Override
public void copyToLocal(String outputFile) throws IOException {
    if (!isUseCluster())
        return;/*from   www .j ava  2  s . c o m*/

    Path path = new Path(safeFileName(outputFile));

    if (!fileSys.exists(path))
        throw new FileNotFoundException("data file not found: " + outputFile);

    File file = new File(outputFile);

    if (file.exists())
        file.delete();

    if (fileSys.isFile(path)) {
        // its a file, so just copy it over
        FileUtil.copy(fileSys, path, file, false, configuration);
        return;
    }

    // it's a directory
    file.mkdirs();

    FileStatus contents[] = fileSys.listStatus(path);

    for (FileStatus fileStatus : contents) {
        Path currentPath = fileStatus.getPath();

        if (currentPath.getName().startsWith("_")) // filter out temp and log dirs
            continue;

        FileUtil.copy(fileSys, currentPath, new File(file, currentPath.getName()), false, configuration);
    }
}

From source file:cascading.scheme.DeprecatedAvroScheme.java

License:Apache License

/**
 * This method peeks at the source data to get a schema when none has been provided.
 *
 * @param flowProcess The cascading FlowProcess object for this flow.
 * @param tap         The cascading Tap object.
 * @return Schema The schema of the peeked at data, or Schema.NULL if none exists.
 *///from   ww  w .j a va  2 s.c o  m
private Schema getSourceSchema(FlowProcess<? extends Configuration> flowProcess, Tap tap) throws IOException {

    if (tap instanceof CompositeTap) {
        tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
    }
    final String path = tap.getIdentifier();
    Path p = new Path(path);
    final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy());
    // Get all the input dirs
    List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter)));
    // Now get all the things that are one level down
    for (FileStatus status : new LinkedList<FileStatus>(statuses)) {
        if (status.isDir())
            for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) {
                if (child.isDir()) {
                    statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter)));
                } else if (fs.isFile(child.getPath())) {
                    statuses.add(child);
                }
            }
    }
    for (FileStatus status : statuses) {
        Path statusPath = status.getPath();
        if (fs.isFile(statusPath)) {
            // no need to open them all
            InputStream stream = null;
            DataFileStream reader = null;
            try {
                stream = new BufferedInputStream(fs.open(statusPath));
                reader = new DataFileStream(stream, new GenericDatumReader());
                return reader.getSchema();
            } finally {
                if (reader == null) {
                    if (stream != null) {
                        stream.close();
                    }
                } else {
                    reader.close();
                }
            }

        }
    }
    // couldn't find any Avro files, return null schema
    return Schema.create(Schema.Type.NULL);
}

From source file:cascading.tap.hadoop.BaseDistCacheTap.java

License:Open Source License

@Override
public TupleEntryIterator openForRead(FlowProcess<? extends Configuration> flowProcess, RecordReader input)
        throws IOException {
    // always read via Hadoop FileSystem if in standalone/local mode, or if an RecordReader is provided
    if (HadoopUtil.isLocal(flowProcess.getConfig()) || input != null) {
        LOG.info("delegating to parent");
        return super.openForRead(flowProcess, input);
    }/* w w  w .  j av a  2s .  co  m*/

    Path[] cachedFiles = getLocalCacheFiles(flowProcess);

    if (cachedFiles == null || cachedFiles.length == 0)
        return super.openForRead(flowProcess, null);

    List<Path> paths = new ArrayList<>();
    List<Tap> taps = new ArrayList<>();

    if (isSimpleGlob()) {
        FileSystem fs = FileSystem.get(flowProcess.getConfig());
        FileStatus[] statuses = fs.globStatus(getHfs().getPath());

        for (FileStatus status : statuses)
            paths.add(status.getPath());
    } else {
        paths.add(getHfs().getPath());
    }

    for (Path pathToFind : paths) {
        for (Path path : cachedFiles) {
            if (path.toString().endsWith(pathToFind.getName())) {
                LOG.info("found {} in distributed cache", path);
                taps.add(new Lfs(getScheme(), path.toString()));
            }
        }
    }

    if (paths.isEmpty()) // not in cache, read from HDFS
    {
        LOG.info("could not find files in local resource path. delegating to parent: {}",
                super.getIdentifier());
        return super.openForRead(flowProcess, input);
    }

    return new MultiSourceTap(taps.toArray(new Tap[taps.size()])).openForRead(flowProcess, input);
}

From source file:cascading.tap.hadoop.BaseDistCacheTap.java

License:Open Source License

private void registerHfs(FlowProcess<? extends Configuration> process, Configuration conf, Hfs hfs)
        throws IOException {
    if (isSimpleGlob()) {
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] statuses = fs.globStatus(getHfs().getPath());

        if (statuses == null || statuses.length == 0)
            throw new TapException(String.format(
                    "glob expression %s does not match any files on the filesystem", getHfs().getPath()));

        for (FileStatus fileStatus : statuses)
            registerURI(conf, fileStatus.getPath());
    } else {//from  w  w w.ja v  a  2s  . com
        registerURI(conf, hfs.getPath());
    }

    hfs.sourceConfInitComplete(process, conf);
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id");

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + taskId);
            }/*www.  j  a  va  2 s.  c  o m*/
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + taskId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:cascading.tap.hadoop.Hfs.java

License:Open Source License

private void getChildPaths(Configuration conf, Set<String> results, int trim, Path path, int depth)
        throws IOException {
    if (depth == 0) {
        String substring = path.toString().substring(trim);
        String identifier = getIdentifier();

        if (identifier == null || identifier.isEmpty())
            results.add(new Path(substring).toString());
        else//w w w .  j a v  a  2 s .  c om
            results.add(new Path(identifier, substring).toString());

        return;
    }

    FileStatus[] statuses = getFileSystem(conf).listStatus(path, HIDDEN_FILES_FILTER);

    if (statuses == null)
        return;

    for (FileStatus fileStatus : statuses)
        getChildPaths(conf, results, trim, fileStatus.getPath(), depth - 1);
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true))
                throw new IOException("Failed to delete earlier output of task: " + taskId);

            if (!fs.rename(taskOutput, finalOutputPath))
                throw new IOException("Failed to save output of task: " + taskId);
        }/* w  w w.  ja v a 2s  . c  o m*/

        LOG.debug("Moved {} to {}", taskOutput, finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths)
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
        }
    }
}

From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path p = new Path(args[0]);
    FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration());
    FileStatus[] listStatus = fileSystem.listStatus(p);
    for (FileStatus fileStatus : listStatus) {
        Path path = fileStatus.getPath();
        fileSystem.rename(path, new Path(path.toString() + ".lf"));
    }// ww  w.j  a va  2 s .c o  m
}

From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java

License:Apache License

public static void convert(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    if (!fileSystem.exists(path)) {
        System.out.println(path + " does not exists.");
        return;//from w  ww .  jav a2s.c o  m
    }
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        FileStatus[] listStatus = fileSystem.listStatus(path);
        for (FileStatus status : listStatus) {
            convert(status.getPath());
        }
    } else {
        System.out.println("Converting file [" + path + "]");
        HdfsMetaBlock block = new HdfsMetaBlock();
        block.realPosition = 0;
        block.logicalPosition = 0;
        block.length = fileStatus.getLen();
        FSDataOutputStream outputStream = fileSystem.append(path);
        block.write(outputStream);
        outputStream.writeInt(1);
        outputStream.writeLong(fileStatus.getLen());
        outputStream.writeInt(HdfsFileWriter.VERSION);
        outputStream.close();
    }
}