Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:cascading.hcatalog.CascadingHCatUtil.java

License:Apache License

protected static List<String> getFilesInHivePartition(Partition part, JobConf jobConf) {
    List<String> result = newArrayList();

    String ignoreFileRegex = jobConf.get(HCatTap.IGNORE_FILE_IN_PARTITION_REGEX, "");
    Pattern ignoreFilePattern = Pattern.compile(ignoreFileRegex);

    try {/* w w  w. j  a  va 2  s  .  co  m*/
        Path partitionDirPath = new Path(part.getSd().getLocation());
        FileStatus[] partitionContent = partitionDirPath.getFileSystem(jobConf).listStatus(partitionDirPath);
        for (FileStatus currStatus : partitionContent) {
            if (!currStatus.isDir()) {
                if (!ignoreFilePattern.matcher(currStatus.getPath().getName()).matches()) {
                    result.add(currStatus.getPath().toUri().getPath());
                } else {
                    LOG.debug("Ignoring path {} since matches ignore regex {}",
                            currStatus.getPath().toUri().getPath(), ignoreFileRegex);
                }
            }
        }

    } catch (IOException e) {
        logError("Unable to read the content of partition '" + part.getSd().getLocation() + "'", e);
    }

    return result;
}

From source file:cascading.platform.hadoop.BaseHadoopPlatform.java

License:Open Source License

@Override
public void copyToLocal(String outputFile) throws IOException {
    if (!isUseCluster())
        return;/*from   www .j ava  2  s . c o m*/

    Path path = new Path(safeFileName(outputFile));

    if (!fileSys.exists(path))
        throw new FileNotFoundException("data file not found: " + outputFile);

    File file = new File(outputFile);

    if (file.exists())
        file.delete();

    if (fileSys.isFile(path)) {
        // its a file, so just copy it over
        FileUtil.copy(fileSys, path, file, false, configuration);
        return;
    }

    // it's a directory
    file.mkdirs();

    FileStatus contents[] = fileSys.listStatus(path);

    for (FileStatus fileStatus : contents) {
        Path currentPath = fileStatus.getPath();

        if (currentPath.getName().startsWith("_")) // filter out temp and log dirs
            continue;

        FileUtil.copy(fileSys, currentPath, new File(file, currentPath.getName()), false, configuration);
    }
}

From source file:cascading.scheme.DeprecatedAvroScheme.java

License:Apache License

/**
 * This method peeks at the source data to get a schema when none has been provided.
 *
 * @param flowProcess The cascading FlowProcess object for this flow.
 * @param tap         The cascading Tap object.
 * @return Schema The schema of the peeked at data, or Schema.NULL if none exists.
 *///from   ww  w .j a va  2 s.c o  m
private Schema getSourceSchema(FlowProcess<? extends Configuration> flowProcess, Tap tap) throws IOException {

    if (tap instanceof CompositeTap) {
        tap = (Tap) ((CompositeTap) tap).getChildTaps().next();
    }
    final String path = tap.getIdentifier();
    Path p = new Path(path);
    final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy());
    // Get all the input dirs
    List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter)));
    // Now get all the things that are one level down
    for (FileStatus status : new LinkedList<FileStatus>(statuses)) {
        if (status.isDir())
            for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) {
                if (child.isDir()) {
                    statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter)));
                } else if (fs.isFile(child.getPath())) {
                    statuses.add(child);
                }
            }
    }
    for (FileStatus status : statuses) {
        Path statusPath = status.getPath();
        if (fs.isFile(statusPath)) {
            // no need to open them all
            InputStream stream = null;
            DataFileStream reader = null;
            try {
                stream = new BufferedInputStream(fs.open(statusPath));
                reader = new DataFileStream(stream, new GenericDatumReader());
                return reader.getSchema();
            } finally {
                if (reader == null) {
                    if (stream != null) {
                        stream.close();
                    }
                } else {
                    reader.close();
                }
            }

        }
    }
    // couldn't find any Avro files, return null schema
    return Schema.create(Schema.Type.NULL);
}

From source file:cascading.tap.hadoop.BaseDistCacheTap.java

License:Open Source License

@Override
public TupleEntryIterator openForRead(FlowProcess<? extends Configuration> flowProcess, RecordReader input)
        throws IOException {
    // always read via Hadoop FileSystem if in standalone/local mode, or if an RecordReader is provided
    if (HadoopUtil.isLocal(flowProcess.getConfig()) || input != null) {
        LOG.info("delegating to parent");
        return super.openForRead(flowProcess, input);
    }/* w w  w .  j av a  2s .  co  m*/

    Path[] cachedFiles = getLocalCacheFiles(flowProcess);

    if (cachedFiles == null || cachedFiles.length == 0)
        return super.openForRead(flowProcess, null);

    List<Path> paths = new ArrayList<>();
    List<Tap> taps = new ArrayList<>();

    if (isSimpleGlob()) {
        FileSystem fs = FileSystem.get(flowProcess.getConfig());
        FileStatus[] statuses = fs.globStatus(getHfs().getPath());

        for (FileStatus status : statuses)
            paths.add(status.getPath());
    } else {
        paths.add(getHfs().getPath());
    }

    for (Path pathToFind : paths) {
        for (Path path : cachedFiles) {
            if (path.toString().endsWith(pathToFind.getName())) {
                LOG.info("found {} in distributed cache", path);
                taps.add(new Lfs(getScheme(), path.toString()));
            }
        }
    }

    if (paths.isEmpty()) // not in cache, read from HDFS
    {
        LOG.info("could not find files in local resource path. delegating to parent: {}",
                super.getIdentifier());
        return super.openForRead(flowProcess, input);
    }

    return new MultiSourceTap(taps.toArray(new Tap[taps.size()])).openForRead(flowProcess, input);
}

From source file:cascading.tap.hadoop.BaseDistCacheTap.java

License:Open Source License

private void registerHfs(FlowProcess<? extends Configuration> process, Configuration conf, Hfs hfs)
        throws IOException {
    if (isSimpleGlob()) {
        FileSystem fs = FileSystem.get(conf);
        FileStatus[] statuses = fs.globStatus(getHfs().getPath());

        if (statuses == null || statuses.length == 0)
            throw new TapException(String.format(
                    "glob expression %s does not match any files on the filesystem", getHfs().getPath()));

        for (FileStatus fileStatus : statuses)
            registerURI(conf, fileStatus.getPath());
    } else {//from  w  w w.ja v  a  2s  . com
        registerURI(conf, hfs.getPath());
    }

    hfs.sourceConfInitComplete(process, conf);
}

From source file:cascading.tap.hadoop.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(JobConf conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id");

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true)) {
                throw new IOException("Failed to delete earlier output of task: " + taskId);
            }/*www.  j  a  va  2 s.  c  o m*/
            if (!fs.rename(taskOutput, finalOutputPath)) {
                throw new IOException("Failed to save output of task: " + taskId);
            }
        }
        LOG.debug("Moved " + taskOutput + " to " + finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths) {
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
            }
        }
    }
}

From source file:cascading.tap.hadoop.Hfs.java

License:Open Source License

private void getChildPaths(Configuration conf, Set<String> results, int trim, Path path, int depth)
        throws IOException {
    if (depth == 0) {
        String substring = path.toString().substring(trim);
        String identifier = getIdentifier();

        if (identifier == null || identifier.isEmpty())
            results.add(new Path(substring).toString());
        else//w w w .  j a v  a  2 s .  c om
            results.add(new Path(identifier, substring).toString());

        return;
    }

    FileStatus[] statuses = getFileSystem(conf).listStatus(path, HIDDEN_FILES_FILTER);

    if (statuses == null)
        return;

    for (FileStatus fileStatus : statuses)
        getChildPaths(conf, results, trim, fileStatus.getPath(), depth - 1);
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

private static void moveTaskOutputs(Configuration conf, FileSystem fs, Path jobOutputDir, Path taskOutput)
        throws IOException {
    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    if (fs.isFile(taskOutput)) {
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        if (!fs.rename(taskOutput, finalOutputPath)) {
            if (!fs.delete(finalOutputPath, true))
                throw new IOException("Failed to delete earlier output of task: " + taskId);

            if (!fs.rename(taskOutput, finalOutputPath))
                throw new IOException("Failed to save output of task: " + taskId);
        }/* w  w w.  ja v a 2s  . c  o m*/

        LOG.debug("Moved {} to {}", taskOutput, finalOutputPath);
    } else if (fs.getFileStatus(taskOutput).isDir()) {
        FileStatus[] paths = fs.listStatus(taskOutput);
        Path finalOutputPath = getFinalPath(jobOutputDir, taskOutput, getTaskOutputPath(conf));
        fs.mkdirs(finalOutputPath);
        if (paths != null) {
            for (FileStatus path : paths)
                moveTaskOutputs(conf, fs, jobOutputDir, path.getPath());
        }
    }
}

From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path p = new Path(args[0]);
    FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration());
    FileStatus[] listStatus = fileSystem.listStatus(p);
    for (FileStatus fileStatus : listStatus) {
        Path path = fileStatus.getPath();
        fileSystem.rename(path, new Path(path.toString() + ".lf"));
    }// ww  w.j  a va  2 s .c o  m
}

From source file:cc.solr.lucene.store.hdfs.ConvertDirectory.java

License:Apache License

public static void convert(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    if (!fileSystem.exists(path)) {
        System.out.println(path + " does not exists.");
        return;//from w  ww .  jav a2s.c o  m
    }
    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (fileStatus.isDir()) {
        FileStatus[] listStatus = fileSystem.listStatus(path);
        for (FileStatus status : listStatus) {
            convert(status.getPath());
        }
    } else {
        System.out.println("Converting file [" + path + "]");
        HdfsMetaBlock block = new HdfsMetaBlock();
        block.realPosition = 0;
        block.logicalPosition = 0;
        block.length = fileStatus.getLen();
        FSDataOutputStream outputStream = fileSystem.append(path);
        block.write(outputStream);
        outputStream.writeInt(1);
        outputStream.writeLong(fileStatus.getLen());
        outputStream.writeInt(HdfsFileWriter.VERSION);
        outputStream.close();
    }
}