Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:boa.datagen.MapFileGen.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (SEQ_FILE_PATH.isEmpty()) {
        System.out.println("Missing path to sequence file. Please specify it in the properties file.");
        return;//from   w  ww  . j  a v  a  2  s  .  com
    }
    String base = "hdfs://boa-njt/";
    Configuration conf = new Configuration();
    conf.set("fs.default.name", base);
    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(SEQ_FILE_PATH);
    String name = path.getName();
    if (fs.isFile(path)) {
        if (path.getName().equals(MapFile.DATA_FILE_NAME)) {
            MapFile.fix(fs, path.getParent(), Text.class, BytesWritable.class, false, conf);
        } else {
            Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME);
            fs.rename(path, dataFile);
            Path dir = new Path(path.getParent(), name);
            fs.mkdirs(dir);
            fs.rename(dataFile, new Path(dir, dataFile.getName()));
            MapFile.fix(fs, dir, Text.class, BytesWritable.class, false, conf);
        }
    } else {
        FileStatus[] files = fs.listStatus(path);
        for (FileStatus file : files) {
            path = file.getPath();
            if (fs.isFile(path)) {
                Path dataFile = new Path(path.getParent(), MapFile.DATA_FILE_NAME);
                fs.rename(path, dataFile);
                MapFile.fix(fs, dataFile.getParent(), Text.class, BytesWritable.class, false, conf);
                break;
            }
        }
    }
    fs.close();
}

From source file:boa.datagen.SeqSortMerge.java

License:Apache License

public static void main(String[] args) throws IOException {
    conf.set("fs.default.name", base);
    FileSystem fs = FileSystem.get(conf);

    String inPath = "/tmprepcache/2015-07-sorted/";
    while (true) {
        FileStatus[] files = fs.listStatus(new Path(inPath));
        if (files.length < 2)
            break;
        Path path = new Path(inPath + System.currentTimeMillis());
        fs.mkdirs(path);/*www.jav  a 2 s  . com*/
        SequenceFile.Writer w = SequenceFile.createWriter(fs, conf,
                new Path(inPath + path.getName() + "/part-00000"), Text.class, BytesWritable.class);
        FileStatus[] candidates = getCandidates(files);
        System.out.println("Merging " + candidates.length + " from " + files.length);
        SequenceFile.Reader[] readers = new SequenceFile.Reader[candidates.length];
        for (int i = 0; i < candidates.length; i++)
            readers[i] = new SequenceFile.Reader(fs,
                    new Path(inPath + candidates[i].getPath().getName() + "/part-00000"), conf);
        Text[] keys = new Text[candidates.length];
        BytesWritable[] values = new BytesWritable[candidates.length];
        read(readers, keys, values);
        while (true) {
            int index = min(keys);
            if (keys[index].toString().isEmpty())
                break;
            w.append(keys[index], values[index]);
            read(readers[index], keys[index], values[index]);
        }
        for (int i = 0; i < readers.length; i++)
            readers[i].close();
        w.close();
        for (int i = 0; i < readers.length; i++)
            fs.delete(new Path(inPath + candidates[i].getPath().getName()), true);
    }
}

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;/* w w  w.j  a  v  a 2s. c  om*/

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

private void loadHFiles() throws Exception {
    conf = HBaseConfiguration.create();// ww  w.j  a v a  2 s. c  o  m
    HBaseAdmin hadmin = new HBaseAdmin(conf);
    Path hfofDir = new Path("out");
    FileSystem fs = hfofDir.getFileSystem(conf);
    //if (!fs.exists(hfofDir)) {
    //  throw new FileNotFoundException("HFileOutputFormat dir " +
    //      hfofDir + " not found");
    //}
    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    //if (familyDirStatuses == null) {
    //  throw new FileNotFoundException("No families found in " + hfofDir);
    //}
    int length = 0;
    byte[][] splits = new byte[18000][];
    for (FileStatus stat : familyDirStatuses) {
        if (!stat.isDir()) {
            continue;
        }
        Path familyDir = stat.getPath();
        // Skip _logs, etc
        if (familyDir.getName().startsWith("_"))
            continue;
        //byte[] family = familyDir.getName().getBytes();
        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
        for (Path hfile : hfiles) {
            if (hfile.getName().startsWith("_"))
                continue;

            HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf));
            //HFile.Reader hfr =    new HFile.Reader(fs, hfile, null, false);
            final byte[] first;
            try {
                hfr.loadFileInfo();
                first = hfr.getFirstRowKey();
            } finally {
                hfr.close();
            }
            splits[length] = first.clone();
            length++;
        }
    }
    //System.out.println(length);

    byte[][] splits1 = new byte[length][];

    for (int i = 0; i < splits1.length; i++) {
        splits1[i] = splits[i];
    }
    Arrays.sort(splits1, Bytes.BYTES_COMPARATOR);
    //HTableDescriptor desc = new HTableDescriptor("H2RDF");

    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("A");
    desc.addFamily(family);
    //for (int i = 0; i < splits.length; i++) {
    //   System.out.println(Bytes.toStringBinary(splits[i]));
    //}
    conf.setInt("zookeeper.session.timeout", 600000);
    if (hadmin.tableExists(TABLE_NAME)) {
        hadmin.disableTable(TABLE_NAME);
        hadmin.deleteTable(TABLE_NAME);
    } else {
        hadmin.createTable(desc, splits1);
    }
    //hadmin.createTable(desc);
    String[] args1 = new String[2];
    args1[0] = "out";
    args1[1] = TABLE_NAME;
    //args1[1]="new2";

    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1);

}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

/**
 * Copies paths from one local path to a remote path. If syncTimes is true, both modification and access time are
 * changed to match the local 'from' path.
 * <p/>/*from   w  w  w  . j a v  a2s  . c o m*/
 * Returns a map of file-name to remote modification times if the remote time is different than the local time.
 *
 * @param config
 * @param commonPaths
 * @param syncTimes
 */
public static Map<String, Long> syncPaths(Configuration config, Map<Path, Path> commonPaths,
        boolean syncTimes) {
    if (commonPaths == null)
        return Collections.emptyMap();

    Map<String, Long> timestampMap = new HashMap<>();

    Map<Path, Path> copyPaths = getCopyPaths(config, commonPaths); // tests remote file existence or if stale

    LocalFileSystem localFS = getLocalFS(config);
    FileSystem remoteFS = getDefaultFS(config);

    for (Map.Entry<Path, Path> entry : copyPaths.entrySet()) {
        Path localPath = entry.getKey();
        Path remotePath = entry.getValue();

        try {
            LOG.info("copying from: {}, to: {}", localPath, remotePath);
            remoteFS.copyFromLocalFile(localPath, remotePath);

            if (!syncTimes) {
                timestampMap.put(remotePath.getName(),
                        remoteFS.getFileStatus(remotePath).getModificationTime());
                continue;
            }
        } catch (IOException exception) {
            throw new FlowException("unable to copy local: " + localPath + " to remote: " + remotePath,
                    exception);
        }

        FileStatus localFileStatus = null;

        try {
            // sync the modified times so we can lazily upload jars to hdfs after job is started
            // otherwise modified time will be local to hdfs
            localFileStatus = localFS.getFileStatus(localPath);
            remoteFS.setTimes(remotePath, localFileStatus.getModificationTime(), -1); // don't set the access time
        } catch (IOException exception) {
            LOG.info(
                    "unable to set local modification time on remote file: {}, 'dfs.namenode.accesstime.precision' may be set to 0 on HDFS.",
                    remotePath);

            if (localFileStatus != null)
                timestampMap.put(remotePath.getName(), localFileStatus.getModificationTime());
        }
    }

    return timestampMap;
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static void resolvePaths(Configuration config, Collection<String> classpath, String remoteRoot,
        String resourceSubPath, Map<String, Path> localPaths, Map<String, Path> remotePaths) {
    FileSystem defaultFS = getDefaultFS(config);
    FileSystem localFS = getLocalFS(config);

    Path remoteRootPath = new Path(remoteRoot == null ? "./.staging" : remoteRoot);

    if (resourceSubPath != null)
        remoteRootPath = new Path(remoteRootPath, resourceSubPath);

    remoteRootPath = defaultFS.makeQualified(remoteRootPath);

    boolean defaultIsLocal = defaultFS.equals(localFS);

    for (String stringPath : classpath) {
        Path path = new Path(stringPath);

        URI uri = path.toUri();/*  w ww  . j a v a 2s.  c  o  m*/

        if (uri.getScheme() == null && !defaultIsLocal) // we want to sync
        {
            Path localPath = localFS.makeQualified(path);

            if (!exists(localFS, localPath))
                throw new FlowException("path not found: " + localPath);

            String name = localPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            localPaths.put(name, localPath);
            remotePaths.put(name, defaultFS.makeQualified(new Path(remoteRootPath, path.getName())));
        } else if (localFS.equals(getFileSystem(config, path))) {
            if (!exists(localFS, path))
                throw new FlowException("path not found: " + path);

            Path localPath = localFS.makeQualified(path);

            String name = localPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            localPaths.put(name, localPath);
        } else {
            if (!exists(defaultFS, path))
                throw new FlowException("path not found: " + path);

            Path defaultPath = defaultFS.makeQualified(path);

            String name = defaultPath.getName();

            if (resourceSubPath != null)
                name = resourceSubPath + "/" + name;

            remotePaths.put(name, defaultPath);
        }
    }
}

From source file:cascading.platform.hadoop.BaseHadoopPlatform.java

License:Open Source License

@Override
public void copyToLocal(String outputFile) throws IOException {
    if (!isUseCluster())
        return;/*from  w  ww  . j  a v a2  s.com*/

    Path path = new Path(safeFileName(outputFile));

    if (!fileSys.exists(path))
        throw new FileNotFoundException("data file not found: " + outputFile);

    File file = new File(outputFile);

    if (file.exists())
        file.delete();

    if (fileSys.isFile(path)) {
        // its a file, so just copy it over
        FileUtil.copy(fileSys, path, file, false, configuration);
        return;
    }

    // it's a directory
    file.mkdirs();

    FileStatus contents[] = fileSys.listStatus(path);

    for (FileStatus fileStatus : contents) {
        Path currentPath = fileStatus.getPath();

        if (currentPath.getName().startsWith("_")) // filter out temp and log dirs
            continue;

        FileUtil.copy(fileSys, currentPath, new File(file, currentPath.getName()), false, configuration);
    }
}

From source file:cascading.tap.hadoop.BaseDistCacheTap.java

License:Open Source License

@Override
public TupleEntryIterator openForRead(FlowProcess<? extends Configuration> flowProcess, RecordReader input)
        throws IOException {
    // always read via Hadoop FileSystem if in standalone/local mode, or if an RecordReader is provided
    if (HadoopUtil.isLocal(flowProcess.getConfig()) || input != null) {
        LOG.info("delegating to parent");
        return super.openForRead(flowProcess, input);
    }//from   ww w.  j ava 2  s . c o m

    Path[] cachedFiles = getLocalCacheFiles(flowProcess);

    if (cachedFiles == null || cachedFiles.length == 0)
        return super.openForRead(flowProcess, null);

    List<Path> paths = new ArrayList<>();
    List<Tap> taps = new ArrayList<>();

    if (isSimpleGlob()) {
        FileSystem fs = FileSystem.get(flowProcess.getConfig());
        FileStatus[] statuses = fs.globStatus(getHfs().getPath());

        for (FileStatus status : statuses)
            paths.add(status.getPath());
    } else {
        paths.add(getHfs().getPath());
    }

    for (Path pathToFind : paths) {
        for (Path path : cachedFiles) {
            if (path.toString().endsWith(pathToFind.getName())) {
                LOG.info("found {} in distributed cache", path);
                taps.add(new Lfs(getScheme(), path.toString()));
            }
        }
    }

    if (paths.isEmpty()) // not in cache, read from HDFS
    {
        LOG.info("could not find files in local resource path. delegating to parent: {}",
                super.getIdentifier());
        return super.openForRead(flowProcess, input);
    }

    return new MultiSourceTap(taps.toArray(new Tap[taps.size()])).openForRead(flowProcess, input);
}

From source file:chaohBIM.ZipFileRecordReader.java

License:Apache License

/**
 * Initialise and open the ZIP file from the FileSystem
 *//*from   www.j  a v a  2s  .co m*/
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);

    zipfilename = path.getName().replaceAll(".zip", "");
    //System.out.println(zipfilename);
}

From source file:cmd.download.java

License:Apache License

private void mergeToLocalFile(FileSystem fs, Path src, String outPath, Configuration configuration)
        throws FileNotFoundException, IOException {
    FileStatus[] status = fs.listStatus(src);
    Map<String, Path> paths = new TreeMap<String, Path>();
    for (FileStatus fileStatus : status) {
        Path path = fileStatus.getPath();
        String pathName = path.getName();
        if (pathName.startsWith(Constants.NAME_SECOND)) {
            paths.put(pathName, path);/*  www  .  j  a va2 s  .c  o m*/
        }
    }

    File outFile = new File(outPath, Names.indexId2Node + ".dat");
    OutputStream out = new FileOutputStream(outFile);
    for (String pathName : paths.keySet()) {
        Path path = new Path(src, paths.get(pathName));
        log.debug("Concatenating {} into {}...", path.toUri(), outFile.getAbsoluteFile());
        InputStream in = fs.open(new Path(path, Names.indexId2Node + ".dat"));
        IOUtils.copyBytes(in, out, configuration, false);
        in.close();
    }
    out.close();
}