Example usage for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats)

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:org.mrgeo.hdfs.tile.FileSplit.java

License:Apache License

public void generateSplits(Path parent, Configuration conf) throws IOException {
    List<FileSplitInfo> list = new ArrayList<FileSplitInfo>();

    // get a Hadoop file system handle
    final FileSystem fs = getFileSystem(parent);

    // get the list of paths of the subdirectories of the parent
    final Path[] paths = FileUtil.stat2Paths(fs.listStatus(parent));

    Arrays.sort(paths);//from   w w w. j  a va  2 s.com

    int partition = 0;
    // look inside each subdirectory for a data dir and keep track
    for (final Path p : paths) {
        Path mapfile = null;
        final FileStatus[] dirFiles = fs.listStatus(p);
        for (final FileStatus dirFile : dirFiles) {
            if (dirFile.getPath().getName().equals("data")) {
                mapfile = dirFile.getPath().getParent();
                break;
            }
        }

        if (mapfile != null) {
            RasterWritable val = new RasterWritable();
            MapFile.Reader reader = createMapFileReader(conf, mapfile);
            TileIdWritable firstKey = (TileIdWritable) reader.getClosest(new TileIdWritable(0), val);
            TileIdWritable lastKey = (TileIdWritable) reader.getClosest(new TileIdWritable(Long.MAX_VALUE), val,
                    true);
            if (firstKey != null && lastKey != null) {
                list.add(new FileSplit.FileSplitInfo(firstKey.get(), lastKey.get(), mapfile.getName(),
                        partition));
            }

            partition++;
        }
    }

    splits = list.toArray(new FileSplit.FileSplitInfo[list.size()]);
}

From source file:org.mrgeo.hdfs.tile.HdfsMrsTileReader.java

License:Apache License

@Override
public long calculateTileCount() {
    int count = 0;
    try {//  w ww  . jav  a  2s  .c  o  m
        final FileSystem fs = imagePath.getFileSystem(conf);
        final Path[] names = FileUtil.stat2Paths(fs.listStatus(imagePath));
        Arrays.sort(names);
        final DataOutputBuffer key = new DataOutputBuffer();

        for (final Path name : names) {
            final FileStatus[] dirFiles = fs.listStatus(name);
            for (final FileStatus dirFile : dirFiles) {
                if (dirFile.getPath().getName().equals("index")) {
                    SequenceFile.Reader index = new SequenceFile.Reader(fs, dirFile.getPath(), conf);
                    try {
                        while (index.nextRawKey(key) >= 0) {
                            count++;
                        }
                    } finally {
                        index.close();
                    }
                }
            }
        }
        return count;
    } catch (final IOException e) {
        throw new MrsTileException(e);
    }
}

From source file:org.mrgeo.hdfs.tile.SplitFile.java

License:Apache License

private String[] findPartitions(final String splitFileDir) throws IOException {

    Path path = new Path(splitFileDir);
    final ArrayList<String> partitions = new ArrayList<String>();

    // get a Hadoop file system handle
    final FileSystem fs = path.getFileSystem(conf);

    // get the list of paths of the subdirectories of the parent
    final Path[] paths = FileUtil.stat2Paths(fs.listStatus(path));

    Arrays.sort(paths);/*from   w w  w.  j  a  v a  2  s .c  o m*/

    // look inside each subdirectory for a data dir and keep track
    for (final Path p : paths) {
        boolean isMapFileDir = false;
        final FileStatus[] dirFiles = fs.listStatus(p);
        for (final FileStatus dirFile : dirFiles) {
            if (dirFile.getPath().getName().equals("data")) {
                isMapFileDir = true;
                break;
            }
        }

        if (isMapFileDir) {
            // need to be relative to the path, so we can just use getName()
            partitions.add(p.getName());
        }
    }

    return partitions.toArray(new String[0]);
}

From source file:org.springframework.data.hadoop.test.HadoopClusterTests.java

License:Apache License

@Test
public void testConfiguredConfigurationWithJobRun() throws Exception {
    assertTrue(ctx.containsBean("hadoopConfiguration"));
    Configuration config = (Configuration) ctx.getBean("hadoopConfiguration");
    assertNotNull(config);//from   w  w  w  .j a va2s.c  o  m

    Path inDir = new Path("testing/jobconf/input");
    Path outDir = new Path("testing/jobconf/output");
    FileSystem fs = FileSystem.get(config);
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.mkdirs(inDir);

    OutputStream os = fs.create(new Path(inDir, "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();

    JobRunner runner = (JobRunner) ctx.getBean("runner");
    runner.call();

    Path[] outputFiles = FileUtil
            .stat2Paths(fs.listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter()));

    assertEquals(1, outputFiles.length);

    InputStream in = fs.open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    assertEquals("foo\t1", reader.readLine());
    assertNull(reader.readLine());
    reader.close();
}

From source file:org.springframework.data.hadoop.test.junit.AbstractMapReduceTests.java

License:Apache License

/**
 * Finds an array of output file {@link Path}s resulted as
 * mapreduce job run./*from ww w.jav a2  s  . co  m*/
 *
 * @param outputDirectory the path to jobs output directory
 * @return list of output files
 * @throws FileNotFoundException if given path was not found
 * @throws IOException if general access error occured
 */
protected Path[] getOutputFilePaths(Path outputDirectory) throws FileNotFoundException, IOException {
    return FileUtil.stat2Paths(
            getFileSystem().listStatus(outputDirectory, new Utils.OutputFileUtils.OutputFilesFilter()));
}

From source file:org.trafodion.sql.HBaseAccess.HBulkLoadClient.java

License:Apache License

public boolean doBulkLoad(String prepLocation, String tableName, boolean quasiSecure, boolean snapshot)
        throws Exception {
    if (logger.isDebugEnabled())
        logger.debug("HBulkLoadClient.doBulkLoad() - start");
    if (logger.isDebugEnabled())
        logger.debug("HBulkLoadClient.doBulkLoad() - Prep Location: " + prepLocation + ", Table Name:"
                + tableName + ", quasisecure : " + quasiSecure + ", snapshot: " + snapshot);

    HTable table = new HTable(config, tableName);
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
    Path prepPath = new Path(prepLocation);
    prepPath = prepPath.makeQualified(prepPath.toUri(), null);
    FileSystem prepFs = FileSystem.get(prepPath.toUri(), config);

    Path[] hFams = FileUtil.stat2Paths(prepFs.listStatus(prepPath));

    if (quasiSecure) {
        throw new Exception(
                "HBulkLoadClient.doBulkLoad() - cannot perform load. Trafodion on secure HBase mode is not implemented yet");
    } else {/*from   ww  w  .j  a v  a2s  .  c  o  m*/
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfiles permissions");
        for (Path hfam : hFams) {
            Path[] hfiles = FileUtil.stat2Paths(prepFs.listStatus(hfam));
            prepFs.setPermission(hfam, PERM_ALL_ACCESS);
            for (Path hfile : hfiles) {
                if (logger.isDebugEnabled())
                    logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfile permissions:" + hfile);
                prepFs.setPermission(hfile, PERM_ALL_ACCESS);

            }
            //create _tmp dir used as temp space for Hfile processing
            FileSystem.mkdirs(prepFs, new Path(hfam, "_tmp"), PERM_ALL_ACCESS);
        }
        if (logger.isDebugEnabled())
            logger.debug(
                    "HBulkLoadClient.doBulkLoad() - bulk load started. Loading directly from preparation directory");
        doSnapshotNBulkLoad(prepPath, tableName, table, loader, snapshot);
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.doBulkLoad() - bulk load is done ");
    }
    return true;
}

From source file:org.trafodion.sql.HBaseAccess.SequenceFileWriter.java

License:Apache License

public boolean hdfsMergeFiles(String srcPathStr, String dstPathStr) throws Exception {
    if (logger.isDebugEnabled())
        logger.debug("SequenceFileWriter.hdfsMergeFiles() - start");
    if (logger.isDebugEnabled())
        logger.debug("SequenceFileWriter.hdfsMergeFiles() - source Path: " + srcPathStr + ", destination File:"
                + dstPathStr);/* ww w  . ja  v  a  2s  .  c  o m*/
    try {
        Path srcPath = new Path(srcPathStr);
        srcPath = srcPath.makeQualified(srcPath.toUri(), null);
        FileSystem srcFs = FileSystem.get(srcPath.toUri(), conf);

        Path dstPath = new Path(dstPathStr);
        dstPath = dstPath.makeQualified(dstPath.toUri(), null);
        FileSystem dstFs = FileSystem.get(dstPath.toUri(), conf);

        if (dstFs.exists(dstPath)) {
            if (logger.isDebugEnabled())
                logger.debug("SequenceFileWriter.hdfsMergeFiles() - destination files exists");
            // for this prototype we just delete the file-- will change in next code drops
            dstFs.delete(dstPath, false);
            // The caller should already have checked existence of file-- throw exception 
            //throw new FileAlreadyExistsException(dstPath.toString());
        }

        Path tmpSrcPath = new Path(srcPath, "tmp");

        FileSystem.mkdirs(srcFs, tmpSrcPath, srcFs.getFileStatus(srcPath).getPermission());
        logger.debug("SequenceFileWriter.hdfsMergeFiles() - tmp folder created.");
        Path[] files = FileUtil.stat2Paths(srcFs.listStatus(srcPath));
        for (Path f : files) {
            srcFs.rename(f, tmpSrcPath);
        }
        // copyMerge and use false for the delete option since it removes the whole directory
        if (logger.isDebugEnabled())
            logger.debug("SequenceFileWriter.hdfsMergeFiles() - copyMerge");
        FileUtil.copyMerge(srcFs, tmpSrcPath, dstFs, dstPath, false, conf, null);

        if (logger.isDebugEnabled())
            logger.debug("SequenceFileWriter.hdfsMergeFiles() - delete intermediate files");
        srcFs.delete(tmpSrcPath, true);
    } catch (IOException e) {
        if (logger.isDebugEnabled())
            logger.debug("SequenceFileWriter.hdfsMergeFiles() --exception:" + e);
        throw e;
    }

    return true;
}

From source file:org.trafodion.sql.HBaseAccess.SequenceFileWriter.java

License:Apache License

public boolean hdfsCleanUnloadPath(String uldPathStr
/*, boolean checkExistence, String mergeFileStr*/) throws Exception {
    if (logger.isDebugEnabled())
        logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - start");
    logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - unload Path: " + uldPathStr);

    try {/*from  w  w  w.  j a va 2s .  c o  m*/
        Path uldPath = new Path(uldPathStr);
        uldPath = uldPath.makeQualified(uldPath.toUri(), null);
        FileSystem srcFs = FileSystem.get(uldPath.toUri(), conf);
        if (!srcFs.exists(uldPath)) {
            //unload location does not exist. hdfscreate will create it later
            //nothing to do 
            logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() -- unload location does not exist.");
            return true;
        }

        Path[] files = FileUtil.stat2Paths(srcFs.listStatus(uldPath));
        logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - delete files");
        for (Path f : files) {
            srcFs.delete(f, false);
        }
    } catch (IOException e) {
        logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() -exception:" + e);
        throw e;
    }

    return true;
}

From source file:org.unigram.likelike.lsh.TestLSHRecommendations.java

License:Apache License

private boolean dfsCheck(Configuration conf, Path outputPath) throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outputPath, new OutputLogFilter()));

    //if (outputFiles != null) {
    //    TestCase.assertEquals(outputFiles.length, 1);
    //} else {//w ww .j  a  va2  s. c o m
    //    TestCase.fail();
    //}

    BufferedReader reader = this.asBufferedReader(fs.open(outputFiles[0]));

    String line;
    MultiHashMap resultMap = new MultiHashMap();
    while ((line = reader.readLine()) != null) {
        String[] lineArray = line.split("\t");
        resultMap.put(Long.parseLong(lineArray[0]), // target 
                Long.parseLong(lineArray[1])); // recommended

    }
    this.check(resultMap);
    return true;
}

From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem fs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem local = FileSystem.getLocal(getConf());
    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (local.exists(dstPath)) {
        local.delete(dstPath, true);/*from w  w  w.ja v  a  2 s. c o  m*/
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = fs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create local output stream
        FSDataOutputStream out = local.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = fs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}