Example usage for org.apache.hadoop.fs FileUtil stat2Paths

List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats) 

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:org.mrgeo.hdfs.tile.FileSplit.java

License:Apache License

public void generateSplits(Path parent, Configuration conf) throws IOException {
    List<FileSplitInfo> list = new ArrayList<FileSplitInfo>();

    // get a Hadoop file system handle
    final FileSystem fs = getFileSystem(parent);

    // get the list of paths of the subdirectories of the parent
    final Path[] paths = FileUtil.stat2Paths(fs.listStatus(parent));

    Arrays.sort(paths);//from   w w w. j  a va  2 s.com

    int partition = 0;
    // look inside each subdirectory for a data dir and keep track
    for (final Path p : paths) {
        Path mapfile = null;
        final FileStatus[] dirFiles = fs.listStatus(p);
        for (final FileStatus dirFile : dirFiles) {
            if (dirFile.getPath().getName().equals("data")) {
                mapfile = dirFile.getPath().getParent();
                break;
            }
        }

        if (mapfile != null) {
            RasterWritable val = new RasterWritable();
            MapFile.Reader reader = createMapFileReader(conf, mapfile);
            TileIdWritable firstKey = (TileIdWritable) reader.getClosest(new TileIdWritable(0), val);
            TileIdWritable lastKey = (TileIdWritable) reader.getClosest(new TileIdWritable(Long.MAX_VALUE), val,
                    true);
            if (firstKey != null && lastKey != null) {
                list.add(new FileSplit.FileSplitInfo(firstKey.get(), lastKey.get(), mapfile.getName(),
                        partition));
            }

            partition++;
        }
    }

    splits = list.toArray(new FileSplit.FileSplitInfo[list.size()]);
}

From source file:org.mrgeo.hdfs.tile.HdfsMrsTileReader.java

License:Apache License

@Override
public long calculateTileCount() {
    int count = 0;
    try {//  w ww  . jav  a  2s  .c  o  m
        final FileSystem fs = imagePath.getFileSystem(conf);
        final Path[] names = FileUtil.stat2Paths(fs.listStatus(imagePath));
        Arrays.sort(names);
        final DataOutputBuffer key = new DataOutputBuffer();

        for (final Path name : names) {
            final FileStatus[] dirFiles = fs.listStatus(name);
            for (final FileStatus dirFile : dirFiles) {
                if (dirFile.getPath().getName().equals("index")) {
                    SequenceFile.Reader index = new SequenceFile.Reader(fs, dirFile.getPath(), conf);
                    try {
                        while (index.nextRawKey(key) >= 0) {
                            count++;
                        }
                    } finally {
                        index.close();
                    }
                }
            }
        }
        return count;
    } catch (final IOException e) {
        throw new MrsTileException(e);
    }
}

From source file:org.mrgeo.hdfs.tile.SplitFile.java

License:Apache License

private String[] findPartitions(final String splitFileDir) throws IOException {

    Path path = new Path(splitFileDir);
    final ArrayList<String> partitions = new ArrayList<String>();

    // get a Hadoop file system handle
    final FileSystem fs = path.getFileSystem(conf);

    // get the list of paths of the subdirectories of the parent
    final Path[] paths = FileUtil.stat2Paths(fs.listStatus(path));

    Arrays.sort(paths);/*from   w w  w.  j  a  v a  2  s .c  o m*/

    // look inside each subdirectory for a data dir and keep track
    for (final Path p : paths) {
        boolean isMapFileDir = false;
        final FileStatus[] dirFiles = fs.listStatus(p);
        for (final FileStatus dirFile : dirFiles) {
            if (dirFile.getPath().getName().equals("data")) {
                isMapFileDir = true;
                break;
            }
        }

        if (isMapFileDir) {
            // need to be relative to the path, so we can just use getName()
            partitions.add(p.getName());
        }
    }

    return partitions.toArray(new String[0]);
}

From source file:org.springframework.data.hadoop.test.HadoopClusterTests.java

License:Apache License

@Test
public void testConfiguredConfigurationWithJobRun() throws Exception {
    assertTrue(ctx.containsBean("hadoopConfiguration"));
    Configuration config = (Configuration) ctx.getBean("hadoopConfiguration");
    assertNotNull(config);//from   w  w  w  .j a va2s.c  o  m

    Path inDir = new Path("testing/jobconf/input");
    Path outDir = new Path("testing/jobconf/output");
    FileSystem fs = FileSystem.get(config);
    fs.delete(inDir, true);
    fs.delete(outDir, true);
    fs.mkdirs(inDir);

    OutputStream os = fs.create(new Path(inDir, "text.txt"));
    Writer wr = new OutputStreamWriter(os);
    wr.write("b a\n");
    wr.close();

    JobRunner runner = (JobRunner) ctx.getBean("runner");
    runner.call();

    Path[] outputFiles = FileUtil
            .stat2Paths(fs.listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter()));

    assertEquals(1, outputFiles.length);

    InputStream in = fs.open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(in));
    assertEquals("foo\t1", reader.readLine());
    assertNull(reader.readLine());
    reader.close();
}

From source file:org.springframework.data.hadoop.test.junit.AbstractMapReduceTests.java

License:Apache License

/**
 * Finds an array of output file {@link Path}s resulted as
 * mapreduce job run./*from ww w.jav a2  s  . co  m*/
 *
 * @param outputDirectory the path to jobs output directory
 * @return list of output files
 * @throws FileNotFoundException if given path was not found
 * @throws IOException if general access error occured
 */
protected Path[] getOutputFilePaths(Path outputDirectory) throws FileNotFoundException, IOException {
    return FileUtil.stat2Paths(
            getFileSystem().listStatus(outputDirectory, new Utils.OutputFileUtils.OutputFilesFilter()));
}

From source file:org.trafodion.sql.HBaseAccess.HBulkLoadClient.java

License:Apache License

public boolean doBulkLoad(String prepLocation, String tableName, boolean quasiSecure, boolean snapshot)
        throws Exception {
    if (logger.isDebugEnabled())
        logger.debug("HBulkLoadClient.doBulkLoad() - start");
    if (logger.isDebugEnabled())
        logger.debug("HBulkLoadClient.doBulkLoad() - Prep Location: " + prepLocation + ", Table Name:"
                + tableName + ", quasisecure : " + quasiSecure + ", snapshot: " + snapshot);

    HTable table = new HTable(config, tableName);
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
    Path prepPath = new Path(prepLocation);
    prepPath = prepPath.makeQualified(prepPath.toUri(), null);
    FileSystem prepFs = FileSystem.get(prepPath.toUri(), config);

    Path[] hFams = FileUtil.stat2Paths(prepFs.listStatus(prepPath));

    if (quasiSecure) {
        throw new Exception(
                "HBulkLoadClient.doBulkLoad() - cannot perform load. Trafodion on secure HBase mode is not implemented yet");
    } else {/*from   ww  w  .j  a v  a2s  .  c  o  m*/
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfiles permissions");
        for (Path hfam : hFams) {
            Path[] hfiles = FileUtil.stat2Paths(prepFs.listStatus(hfam));
            prepFs.setPermission(hfam, PERM_ALL_ACCESS);
            for (Path hfile : hfiles) {
                if (logger.isDebugEnabled())
                    logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfile permissions:" + hfile);
                prepFs.setPermission(hfile, PERM_ALL_ACCESS);

            }
            //create _tmp dir used as temp space for Hfile processing
            FileSystem.mkdirs(prepFs, new Path(hfam, "_tmp"), PERM_ALL_ACCESS);
        }
        if (logger.isDebugEnabled())
            logger.debug(
                    "HBulkLoadClient.doBulkLoad() - bulk load started. Loading directly from preparation directory");
        doSnapshotNBulkLoad(prepPath, tableName, table, loader, snapshot);
        if (logger.isDebugEnabled())
            logger.debug("HBulkLoadClient.doBulkLoad() - bulk load is done ");
    }
    return true;
}

From source file:org.trafodion.sql.HBaseAccess.SequenceFileWriter.java

License:Apache License

public boolean hdfsMergeFiles(String srcPathStr, String dstPathStr) throws Exception {
    if (logger.isDebugEnabled())
        logger.debug("SequenceFileWriter.hdfsMergeFiles() - start");
    if (logger.isDebugEnabled())
        logger.debug("SequenceFileWriter.hdfsMergeFiles() - source Path: " + srcPathStr + ", destination File:"
                + dstPathStr);/* ww w  . ja  v  a  2s  .  c  o m*/
    try {
        Path srcPath = new Path(srcPathStr);
        srcPath = srcPath.makeQualified(srcPath.toUri(), null);
        FileSystem srcFs = FileSystem.get(srcPath.toUri(), conf);

        Path dstPath = new Path(dstPathStr);
        dstPath = dstPath.makeQualified(dstPath.toUri(), null);
        FileSystem dstFs = FileSystem.get(dstPath.toUri(), conf);

        if (dstFs.exists(dstPath)) {
            if (logger.isDebugEnabled())
                logger.debug("SequenceFileWriter.hdfsMergeFiles() - destination files exists");
            // for this prototype we just delete the file-- will change in next code drops
            dstFs.delete(dstPath, false);
            // The caller should already have checked existence of file-- throw exception 
            //throw new FileAlreadyExistsException(dstPath.toString());
        }

        Path tmpSrcPath = new Path(srcPath, "tmp");

        FileSystem.mkdirs(srcFs, tmpSrcPath, srcFs.getFileStatus(srcPath).getPermission());
        logger.debug("SequenceFileWriter.hdfsMergeFiles() - tmp folder created.");
        Path[] files = FileUtil.stat2Paths(srcFs.listStatus(srcPath));
        for (Path f : files) {
            srcFs.rename(f, tmpSrcPath);
        }
        // copyMerge and use false for the delete option since it removes the whole directory
        if (logger.isDebugEnabled())
            logger.debug("SequenceFileWriter.hdfsMergeFiles() - copyMerge");
        FileUtil.copyMerge(srcFs, tmpSrcPath, dstFs, dstPath, false, conf, null);

        if (logger.isDebugEnabled())
            logger.debug("SequenceFileWriter.hdfsMergeFiles() - delete intermediate files");
        srcFs.delete(tmpSrcPath, true);
    } catch (IOException e) {
        if (logger.isDebugEnabled())
            logger.debug("SequenceFileWriter.hdfsMergeFiles() --exception:" + e);
        throw e;
    }

    return true;
}

From source file:org.trafodion.sql.HBaseAccess.SequenceFileWriter.java

License:Apache License

public boolean hdfsCleanUnloadPath(String uldPathStr
/*, boolean checkExistence, String mergeFileStr*/) throws Exception {
    if (logger.isDebugEnabled())
        logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - start");
    logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - unload Path: " + uldPathStr);

    try {/*from  w  w  w.  j a va 2s .  c o  m*/
        Path uldPath = new Path(uldPathStr);
        uldPath = uldPath.makeQualified(uldPath.toUri(), null);
        FileSystem srcFs = FileSystem.get(uldPath.toUri(), conf);
        if (!srcFs.exists(uldPath)) {
            //unload location does not exist. hdfscreate will create it later
            //nothing to do 
            logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() -- unload location does not exist.");
            return true;
        }

        Path[] files = FileUtil.stat2Paths(srcFs.listStatus(uldPath));
        logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - delete files");
        for (Path f : files) {
            srcFs.delete(f, false);
        }
    } catch (IOException e) {
        logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() -exception:" + e);
        throw e;
    }

    return true;
}

From source file:org.unigram.likelike.lsh.TestLSHRecommendations.java

License:Apache License

private boolean dfsCheck(Configuration conf, Path outputPath) throws IOException {
    FileSystem fs = FileSystem.getLocal(conf);
    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outputPath, new OutputLogFilter()));

    //if (outputFiles != null) {
    //    TestCase.assertEquals(outputFiles.length, 1);
    //} else {//w ww .j  a  va2  s. c o m
    //    TestCase.fail();
    //}

    BufferedReader reader = this.asBufferedReader(fs.open(outputFiles[0]));

    String line;
    MultiHashMap resultMap = new MultiHashMap();
    while ((line = reader.readLine()) != null) {
        String[] lineArray = line.split("\t");
        resultMap.put(Long.parseLong(lineArray[0]), // target 
                Long.parseLong(lineArray[1])); // recommended

    }
    this.check(resultMap);
    return true;
}

From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java

License:Apache License

public void dirToFile(String srcdir, String dstfile) throws IOException {

    FileSystem fs = FileSystem.get(URI.create(srcdir), getConf());
    FileSystem local = FileSystem.getLocal(getConf());
    Path srcPath = new Path(srcdir);
    Path dstPath = new Path(dstfile);

    // delete existed destination local file
    if (local.exists(dstPath)) {
        local.delete(dstPath, true);/*from w  w  w.ja v  a  2 s. c o  m*/
    }

    // get hdfs file list
    PathFilter resultFileFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX);
        }
    };

    FileStatus[] status = fs.listStatus(srcPath, resultFileFilter);

    Path[] listedPaths = FileUtil.stat2Paths(status);

    if (listedPaths.length > 0) {
        // create local output stream
        FSDataOutputStream out = local.create(dstPath);
        for (int i = 0; i < listedPaths.length; i++) {
            // create hdfs input stream
            FSDataInputStream in = fs.open(listedPaths[i]);
            byte buffer[] = new byte[256];
            int bytesRead = 0;
            while ((bytesRead = in.read(buffer)) > 0) {
                out.write(buffer, 0, bytesRead);
            }
            in.close();
        }
        out.close();
    }

    return;
}