Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:TestIndexMergeMR.java

License:Open Source License

public void testIndexMergeMR() throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    String indexdir = "indexdir";
    String indexdir1 = "indexdir1";
    int filenum = 10;
    int recnum = 1000;
    short idx = 0;
    TestUtil.genifdfindex(indexdir, filenum, recnum, idx, true);
    StringBuffer sb = new StringBuffer();
    FileStatus[] ss = fs.listStatus(new Path(indexdir));
    for (FileStatus fileStatus : ss) {
        sb.append(fileStatus.getPath().toString()).append(",");
    }/*from   w  w w. j  a va  2 s.  c o  m*/
    IndexMergeMR.running(sb.substring(0, sb.length() - 1), indexdir1, conf);

    IFormatDataFile ifdf = new IFormatDataFile(conf);
    ifdf.open(indexdir1 + "/part-00000");
    for (int i = 0; i < 100; i++) {
        ifdf.next().show();
    }

    ifdf.close();

    fs.delete(new Path(indexdir), true);
    fs.delete(new Path(indexdir1), true);

}

From source file:DupleInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context/* ww  w  . j a  va  2  s.com*/
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    // times that each file exists in the files List
    ArrayList<Integer> times = new ArrayList<Integer>();
    ArrayList<Path> paths = new ArrayList<Path>();

    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            FileSystem fs = path.getFileSystem(job.getConfiguration());
            BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

            int index;
            if ((index = paths.indexOf(path)) != -1)
                times.set(index, times.get(index) + 1);
            else {
                times.add(0);
                paths.add(path);
                index = times.size() - 1;
            }

            // not splitable
            splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(), times.get(index)));

        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    //LOG.debug("Total # of splits: " + splits.size());
    return splits;
}

From source file:Vectors.java

License:Apache License

public static Vector readSequenceFile(Path path, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus fileStatus : fs.listStatus(path)) {
        if (fileStatus.getPath().getName().contains("part-")) {
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf);
                Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(),
                        conf);//www.j  ava2s . com
                reader.next(key, value);
                return value.get();
            } finally {
                IOUtils.closeStream(reader);
            }
        }
    }
    return null;
}

From source file:UtilIndexMR.java

License:Open Source License

public static void indexmrtest(String datadir, String indexdir, int filenum, int recnum, boolean var,
        boolean compress, boolean seq, boolean overwrite, boolean column, String idx, boolean removefile)
        throws Exception {

    if (column) {
        UtilIndexStorage.writeColumnFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite);
    } else {/*from   w w w.  j  a va2 s  . c o  m*/
        UtilIndexStorage.writeFDF(datadir, filenum, recnum, (short) -1, var, compress, seq, overwrite);
    }

    FileStatus[] ss = fs.listStatus(new Path(datadir));
    StringBuffer sb = new StringBuffer();
    for (FileStatus fileStatus : ss) {
        sb.append(fileStatus.getPath().toString()).append(",");
    }
    System.out.println(sb.toString());
    IndexMR.running(conf, sb.substring(0, sb.length() - 1), column, idx, indexdir);

    IFormatDataFile ifdf = new IFormatDataFile(conf);
    ifdf.open(indexdir + "/part-00000");
    ifdf.seek(filenum * recnum / 2);
    for (int i = 0; i < 10; i++) {
        ifdf.next().show();
    }

    ifdf.close();
    fs.delete(new Path(indexdir + "/_logs"), true);
    if (removefile) {
        fs.delete(new Path(datadir), true);
        fs.delete(new Path(indexdir), true);
    }
}

From source file:AggregatedLogsPurger.java

License:Apache License

public boolean purge() throws IOException {
    LocalDateTime now = LocalDateTime.now();
    LocalDateTime deleteLogsOlderThanTime = now.minusDays(deleteOlderThanDays);

    //Identify which log dirs should be deleted
    FileSystem fs = rootLogDir.getFileSystem(conf);
    try {//from   w w  w . j  av a 2 s  .c om

        long totalBytes = 0;
        for (FileStatus userDir : fs.listStatus(rootLogDir)) {
            if (userDir.isDirectory()) {
                Path userDirPath = new Path(userDir.getPath(), suffix);
                System.out.println("Checking for userDir : " + userDirPath);
                for (FileStatus appDir : fs.listStatus(userDirPath)) {
                    LocalDateTime appDirDate = getAppDirDateTime(appDir.getModificationTime());
                    if (appDirDate.isBefore(deleteLogsOlderThanTime)) {
                        long size = getLengthRecursively(fs, appDir.getPath());
                        System.out.println(appDir.getPath() + ", " + appDir.getOwner() + ", "
                                + appDirDate.toString() + ", size=" + size);
                        totalBytes += size;
                        if (shouldDelete) {
                            System.out.println("Deleting " + appDir.getPath());
                            fs.delete(appDir.getPath(), true);
                        }
                    }
                }
            }
        }
        System.out.println("Savings : " + totalBytes);
    } catch (IOException e) {
        e.printStackTrace();
        return false;
    } finally {
        fs.close();
    }
    return true;
}

From source file:AggregatedLogsPurger.java

License:Apache License

private long getLengthRecursively(FileSystem fs, Path path) throws IOException {
    long size = 0;
    for (FileStatus status : fs.listStatus(path)) {
        if (status.isDirectory()) {
            getLengthRecursively(fs, status.getPath());
        } else {//  w w  w. j  a  v a 2s .  c  om
            size += status.getLen();
        }
    }
    return size;
}

From source file:DisplayClustering.java

License:Apache License

protected static void loadClustersWritable(Path output) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(output.toUri(), conf);
    for (FileStatus s : fs.listStatus(output, new ClustersFilter())) {
        List<Cluster> clusters = readClustersWritable(s.getPath());
        CLUSTERS.add(clusters);/*from  www  . j a  va 2  s. c  o m*/
    }
}

From source file:RawParascaleFileSystem.java

License:Apache License

/**
 * {@inheritDoc}/*w  w  w .  j a va 2 s. c  om*/
 */
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus file, final long start, final long len)
        throws IOException {
    ChunkLocator newChunkLocator = null;
    if (file.getLen() < start + len) {
        throw new IOException("start+len must be less or equal than file length");
    }
    final ArrayList<BlockLocation> locations = new ArrayList<BlockLocation>();
    try {
        newChunkLocator = newChunkLocator();
        final Path makeQualified = file.getPath().makeQualified(this.getUri(), this.getWorkingDirectory());

        // sorted by offset
        final ChunkLocation[] chunkLocations = newChunkLocator.getChunkLocations(pathToFile(makeQualified),
                getVirtualFSFromPath(makeQualified, true));
        long begin = start;
        long length = len;
        for (final ChunkLocation chunkLocation : chunkLocations) {
            final ChunkInfo chunkInfo = chunkLocation.getChunkInfo();
            final StorageNodeInfo[] storageNodeInfo = chunkLocation.getStorageNodeInfo();
            if (length <= 0) {
                // stop when length exceeded
                break;
            }
            if (begin < chunkInfo.getChunkOffset()) {
                // skip if location not reached yet
                continue;
            }
            final List<String> hosts = new ArrayList<String>(0);
            for (int j = 0; j < storageNodeInfo.length; j++) {
                // select all enabled and running nodes
                if (storageNodeInfo[j].isUp() && storageNodeInfo[j].isEnabled()) {
                    hosts.add(storageNodeInfo[j].getNodeName());
                }
            }
            final long lengthInChunk = chunkInfo.getChunkLength() - (begin - chunkInfo.getChunkOffset());
            final BlockLocation blockLocation = new BlockLocation(null, hosts.toArray(new String[0]), begin,
                    lengthInChunk < length ? lengthInChunk : length);
            begin += blockLocation.getLength();
            length -= blockLocation.getLength();
            locations.add(blockLocation);

        }
        if (pLog.isDebugEnabled()) {
            pLog.debug("Fetched " + locations.size() + " chunk locations for " + makeQualified);
        }

        return locations.toArray(new BlockLocation[0]);

    } catch (final ChunkStorageException e) {
        throw new IOException(
                "can not fetch chunk locations " + newChunkLocator == null ? "" : newChunkLocator.toString(),
                e);
    } finally {
        if (newChunkLocator != null) {
            newChunkLocator.close();
        }
    }
}

From source file:a.TestConcatExample.java

License:Apache License

@Test
public void concatIsPermissive() throws IOException, URISyntaxException {
    MiniDFSCluster cluster = null;/* ww w  .j a va  2 s  .  c  o  m*/
    final Configuration conf = WebHdfsTestUtil.createConf();
    conf.set("dfs.namenode.fs-limits.min-block-size", "1000"); // Allow tiny blocks for the test
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
        cluster.waitActive();
        final FileSystem webHdfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsFileSystem.SCHEME);
        final FileSystem dfs = cluster.getFileSystem();

        final FileSystem fs = dfs; // WebHDFS has a bug in getLocatedBlocks

        Path root = new Path("/dir");
        fs.mkdirs(root);

        short origRep = 3;
        short secondRep = (short) (origRep - 1);
        Path f1 = new Path("/dir/f1");
        long size1 = writeFile(fs, f1, /* blocksize */ 4096, origRep, 5);
        long f1NumBlocks = fs.getFileBlockLocations(f1, 0, size1).length;
        assertEquals(5, f1NumBlocks);

        Path f2 = new Path("/dir/f2");
        long size2 = writeFile(fs, f2, /* blocksize (must divide 512 for checksum) */ 4096 - 512, secondRep, 4);
        long f2NumBlocks = fs.getFileBlockLocations(f2, 0, size2).length;
        assertEquals(5, f2NumBlocks);

        fs.concat(f1, new Path[] { f2 });
        FileStatus[] fileStatuses = fs.listStatus(root);

        // Only one file should remain
        assertEquals(1, fileStatuses.length);
        FileStatus fileStatus = fileStatuses[0];

        // And it should be named after the first file
        assertEquals("f1", fileStatus.getPath().getName());

        // The entire file takes the replication of the first argument
        assertEquals(origRep, fileStatus.getReplication());

        // As expected, the new concated file is the length of both the previous files
        assertEquals(size1 + size2, fileStatus.getLen());

        // And we should have the same number of blocks
        assertEquals(f1NumBlocks + f2NumBlocks,
                fs.getFileBlockLocations(fileStatus.getPath(), 0, size1 + size2).length);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }

    }
}

From source file:acromusashi.stream.bolt.hdfs.HdfsPreProcessor.java

License:Open Source License

/**
 * HDFSSink????<br>//from  w w w  .jav  a  2s.  c  o  m
 * ?????????????????
 * 
 * @param hdfs 
 * @param baseUrl URL
 * @param baseName ??
 * @param tmpSuffix ??
 */
public static void execute(FileSystem hdfs, String baseUrl, String baseName, String tmpSuffix) {
    String baseRealUrl = baseUrl;

    if (baseRealUrl.endsWith("/") == false) {
        baseRealUrl = baseRealUrl + "/";
    }

    String targetPattern = baseRealUrl + baseName + "[0-9]*" + tmpSuffix + "*";
    Path targetPathPattern = new Path(targetPattern);

    FileStatus[] targetTmpFiles = null;

    try {
        targetTmpFiles = hdfs.globStatus(targetPathPattern);
    } catch (IOException ioex) {
        logger.warn("Failed to search preprocess target files. Skip preprocess.", ioex);
        return;
    }

    if (targetTmpFiles.length == 0) {
        String logFormat = "Preprocess target files not exist. Path={0}";
        String logMessage = MessageFormat.format(logFormat, targetPattern);
        logger.info(logMessage);
        return;
    }

    if (logger.isInfoEnabled() == true) {
        printTargetPathList(targetTmpFiles);
    }

    for (FileStatus targetTmpFile : targetTmpFiles) {
        renameTmpFile(hdfs, targetTmpFile.getPath().toString(), tmpSuffix);
    }

}