Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:TestIndexMergeMR.java

License:Open Source License

public void testIndexMergeMR() throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    String indexdir = "indexdir";
    String indexdir1 = "indexdir1";
    int filenum = 10;
    int recnum = 1000;
    short idx = 0;
    TestUtil.genifdfindex(indexdir, filenum, recnum, idx, true);
    StringBuffer sb = new StringBuffer();
    FileStatus[] ss = fs.listStatus(new Path(indexdir));
    for (FileStatus fileStatus : ss) {
        sb.append(fileStatus.getPath().toString()).append(",");
    }/*from   w  w w. j  a va  2 s.  c o  m*/
    IndexMergeMR.running(sb.substring(0, sb.length() - 1), indexdir1, conf);

    IFormatDataFile ifdf = new IFormatDataFile(conf);
    ifdf.open(indexdir1 + "/part-00000");
    for (int i = 0; i < 100; i++) {
        ifdf.next().show();
    }

    ifdf.close();

    fs.delete(new Path(indexdir), true);
    fs.delete(new Path(indexdir1), true);

}

From source file:DupleInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context/* ww  w  . j a  va  2  s.com*/
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    // times that each file exists in the files List
    ArrayList<Integer> times = new ArrayList<Integer>();
    ArrayList<Path> paths = new ArrayList<Path>();

    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

            int index;
            if ((index = paths.indexOf(path)) != -1)
                times.set(index, times.get(index) + 1);
            else {
                times.add(0);
                paths.add(path);
                index = times.size() - 1;
            }

            // not splitable
            splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), times.get(index)));

        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    //LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:Vectors.java

License:Apache License

public static Vector readSequenceFile(Path path, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus fileStatus : fs.listStatus(path)) {
        if (fileStatus.getPath().getName().contains("part-")) {
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf);
                Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(),
                        conf);//www.j  ava2s . com
                reader.next(key, value);
                return value.get();
            } finally {
                IOUtils.closeStream(reader);
            }
        }
    }
    return null;
}

From source file:UtilIndexMR.java

License:Open Source License

public static void indexmrtest(String datadir, String indexdir, int filenum, int recnum, boolean var,
        boolean compress, boolean seq, boolean overwrite, boolean column, String idx, boolean removefile)
        throws Exception {

    if (column) {
        UtilIndexStorage.writeColumnFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite);
    } else {/*from   w w w.  j  a va2 s  . c o  m*/
        UtilIndexStorage.writeFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite);
    }

    FileStatus[] ss = fs.listStatus(new Path(datadir));
    StringBuffer sb = new StringBuffer();
    for (FileStatus fileStatus : ss) {
        sb.append(fileStatus.getPath().toString()).append(",");
    }
    System.out.println(sb.toString());
    IndexMR.running(conf, sb.substring(0, sb.length() - 1), column, idx, indexdir);

    IFormatDataFile ifdf = new IFormatDataFile(conf);
    ifdf.open(indexdir + "/part-00000");
    ifdf.seek(filenum * recnum / 2);
    for (int i = 0; i < 10; i++) {
        ifdf.next().show();
    }

    ifdf.close();
    fs.delete(new Path(indexdir + "/_logs"), true);
    if (removefile) {
        fs.delete(new Path(datadir), true);
        fs.delete(new Path(indexdir), true);
    }
}

From source file:AggregatedLogsPurger.java

License:Apache License

public boolean purge() throws IOException {
    LocalDateTime now = LocalDateTime.now();
    LocalDateTime deleteLogsOlderThanTime = now.minusDays(deleteOlderThanDays);

    //Identify which log dirs should be deleted
    FileSystem fs = rootLogDir.getFileSystem(conf);
    try {//from   w w  w . j  av a 2 s  .c om

        long totalBytes = 0;
        for (FileStatus userDir : fs.listStatus(rootLogDir)) {
            if (userDir.isDirectory()) {
                Path userDirPath = new Path(userDir.getPath(), suffix);
                System.out.println("Checking for userDir : " + userDirPath);
                for (FileStatus appDir : fs.listStatus(userDirPath)) {
                    LocalDateTime appDirDate = getAppDirDateTime(appDir.getModificationTime());
                    if (appDirDate.isBefore(deleteLogsOlderThanTime)) {
                        long size = getLengthRecursively(fs, appDir.getPath());
                        System.out.println(appDir.getPath() + ", " + appDir.getOwner() + ", "
                                + appDirDate.toString() + ", size=" + size);
                        totalBytes += size;
                        if (shouldDelete) {
                            System.out.println("Deleting " + appDir.getPath());
                            fs.delete(appDir.getPath(), true);
                        }
                    }
                }
            }
        }
        System.out.println("Savings : " + totalBytes);
    } catch (IOException e) {
        e.printStackTrace();
        return false;
    } finally {
        fs.close();
    }
    return true;
}

From source file:AggregatedLogsPurger.java

License:Apache License

private long getLengthRecursively(FileSystem fs, Path path) throws IOException {
    long size = 0;
    for (FileStatus status : fs.listStatus(path)) {
        if (status.isDirectory()) {
            getLengthRecursively(fs, status.getPath());
        } else {//  w w  w. j  a  v a 2s .  c  om
            size += status.getLen();
        }
    }
    return size;
}

From source file:DisplayClustering.java

License:Apache License

protected static void loadClustersWritable(Path output) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(output.toUri(), conf);
    for (FileStatus s : fs.listStatus(output, new ClustersFilter())) {
        List<Cluster> clusters = readClustersWritable(s.getPath());
        CLUSTERS.add(clusters);/*from  www  . j a  va 2  s. c  o m*/
    }
}

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * {@inheritDoc}/*w  w  w .  j a va 2 s. c  om*/
 */
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len)
        throws IOException {
    ChunkLocator newChunkLocator = null;
    if (file.getLen() < start + len) {
        throw new IOException("start+len must be less or equal than file length");
    }
    final ArrayList<BlockLocation> locations = new ArrayList<BlockLocation>();
    try {
        newChunkLocator = newChunkLocator();
        final Path makeQualified = file.getPath().makeQualified(this.getUri(), this.getWorkingDirectory());

        // sorted by offset
        final ChunkLocation[] chunkLocations = newChunkLocator.getChunkLocations(pathToFile(makeQualified),
                getVirtualFSFromPath(makeQualified, true));
        long begin = start;
        long length = len;
        for (final ChunkLocation chunkLocation : chunkLocations) {
            final ChunkInfo chunkInfo = chunkLocation.getChunkInfo();
            final StorageNodeInfo[] storageNodeInfo = chunkLocation.getStorageNodeInfo();
            if (length <= 0) {
                // stop when length exceeded
                break;
            }
            if (begin < chunkInfo.getChunkOffset()) {
                // skip if location not reached yet
                continue;
            }
            final List<String> hosts = new ArrayList<String>(0);
            for (int j = 0; j < storageNodeInfo.length; j++) {
                // select all enabled and running nodes
                if (storageNodeInfo[j].isUp() && storageNodeInfo[j].isEnabled()) {
                    hosts.add(storageNodeInfo[j].getNodeName());
                }
            }
            final long lengthInChunk = chunkInfo.getChunkLength() - (begin - chunkInfo.getChunkOffset());
            final BlockLocation blockLocation = new BlockLocation(null, hosts.toArray(new String[0]), begin,
                    lengthInChunk < length ? lengthInChunk : length);
            begin += blockLocation.getLength();
            length -= blockLocation.getLength();
            locations.add(blockLocation);

        }
        if (pLog.isDebugEnabled()) {
            pLog.debug("Fetched " + locations.size() + " chunk locations for " + makeQualified);
        }

        return locations.toArray(new BlockLocation[0]);

    } catch (final ChunkStorageException e) {
        throw new IOException(
                "can not fetch chunk locations " + newChunkLocator == null ? "" : newChunkLocator.toString(),
                e);
    } finally {
        if (newChunkLocator != null) {
            newChunkLocator.close();
        }
    }
}

From source file:a.TestConcatExample.java

License:Apache License

@Test
public void concatIsPermissive() throws IOException, URISyntaxException {
    MiniDFSCluster cluster = null;/* ww w  .j a va  2 s  .  c  o  m*/
    final Configuration conf = WebHdfsTestUtil.createConf();
    conf.set("dfs.namenode.fs-limits.min-block-size", "1000"); // Allow tiny blocks for the test
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
        cluster.waitActive();
        final FileSystem webHdfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsFileSystem.SCHEME);
        final FileSystem dfs = cluster.getFileSystem();

        final FileSystem fs = dfs; // WebHDFS has a bug in getLocatedBlocks

        Path root = new Path("/dir");
        fs.mkdirs(root);

        short origRep = 3;
        short secondRep = (short) (origRep - 1);
        Path f1 = new Path("/dir/f1");
        long size1 = writeFile(fs, f1, /* blocksize */ 4096, origRep, 5);
        long f1NumBlocks = fs.getFileBlockLocations(f1, 0, size1).length;
        assertEquals(5, f1NumBlocks);

        Path f2 = new Path("/dir/f2");
        long size2 = writeFile(fs, f2, /* blocksize (must divide 512 for checksum) */ 4096 - 512, secondRep, 4);
        long f2NumBlocks = fs.getFileBlockLocations(f2, 0, size2).length;
        assertEquals(5, f2NumBlocks);

        fs.concat(f1, new Path[] { f2 });
        FileStatus[] fileStatuses = fs.listStatus(root);

        // Only one file should remain
        assertEquals(1, fileStatuses.length);
        FileStatus fileStatus = fileStatuses[0];

        // And it should be named after the first file
        assertEquals("f1", fileStatus.getPath().getName());

        // The entire file takes the replication of the first argument
        assertEquals(origRep, fileStatus.getReplication());

        // As expected, the new concated file is the length of both the previous files
        assertEquals(size1 + size2, fileStatus.getLen());

        // And we should have the same number of blocks
        assertEquals(f1NumBlocks + f2NumBlocks,
                fs.getFileBlockLocations(fileStatus.getPath(), 0, size1 + size2).length);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }

    }
}

From source file:acromusashi.stream.bolt.hdfs.HdfsPreProcessor.java

License:Open Source License

/**
 * HDFSSink????<br>//from  w w w  .jav  a  2s.  c  o  m
 * ?????????????????
 * 
 * @param hdfs 
 * @param baseUrl URL
 * @param baseName ??
 * @param tmpSuffix ??
 */
public static void execute(FileSystem hdfs, String baseUrl, String baseName, String tmpSuffix) {
    String baseRealUrl = baseUrl;

    if (baseRealUrl.endsWith("/") == false) {
        baseRealUrl = baseRealUrl + "/";
    }

    String targetPattern = baseRealUrl + baseName + "[0-9]*" + tmpSuffix + "*";
    Path targetPathPattern = new Path(targetPattern);

    FileStatus[] targetTmpFiles = null;

    try {
        targetTmpFiles = hdfs.globStatus(targetPathPattern);
    } catch (IOException ioex) {
        logger.warn("Failed to search preprocess target files. Skip preprocess.", ioex);
        return;
    }

    if (targetTmpFiles.length == 0) {
        String logFormat = "Preprocess target files not exist. Path={0}";
        String logMessage = MessageFormat.format(logFormat, targetPattern);
        logger.info(logMessage);
        return;
    }

    if (logger.isInfoEnabled() == true) {
        printTargetPathList(targetTmpFiles);
    }

    for (FileStatus targetTmpFile : targetTmpFiles) {
        renameTmpFile(hdfs, targetTmpFile.getPath().toString(), tmpSuffix);
    }

}