Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:datafu.hourglass.jobs.AbstractPartitionPreservingIncrementalJob.java

License:Apache License

/**
 * Moves files from the staging path to the final output path.
 * //w ww.  j  a va 2 s.c o m
 * @param report report to update with output paths
 * @param sourcePath source of data to move
 * @throws IOException
 */
private void moveStagedFiles(Report report, Path sourcePath) throws IOException {
    _log.info("Following files produced in staging path:");
    for (FileStatus stat : getFileSystem().globStatus(new Path(sourcePath, "*.avro"))) {
        _log.info(String.format("* %s (%d bytes)", stat.getPath(), stat.getLen()));
    }

    FileStatus[] incrementalParts = getFileSystem().globStatus(new Path(sourcePath, "*"), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            String[] pathParts = path.getName().split("-");
            try {
                Long.parseLong(pathParts[0]);
                return true;
            } catch (NumberFormatException e) {
                return false;
            }
        }
    });

    // collect the new incremental data from the temp folder and move to subfolders
    Map<String, Path> incrementalTargetPaths = new HashMap<String, Path>();
    for (FileStatus stat : incrementalParts) {
        String[] pathParts = stat.getPath().getName().split("-");
        try {
            String timestamp = pathParts[0];

            if (!incrementalTargetPaths.containsKey(timestamp)) {
                Path parent = new Path(sourcePath, timestamp);

                if (!getFileSystem().exists(parent)) {
                    getFileSystem().mkdirs(parent);
                } else {
                    throw new RuntimeException("already exists: " + parent.toString());
                }

                incrementalTargetPaths.put(timestamp, parent);
            }

            Path parent = incrementalTargetPaths.get(timestamp);
            _log.info(String.format("Moving %s to %s", stat.getPath().getName(), parent.toString()));
            getFileSystem().rename(stat.getPath(), new Path(parent, stat.getPath().getName()));
        } catch (NumberFormatException e) {
            throw new RuntimeException(e);
        }
    }

    for (Path src : incrementalTargetPaths.values()) {
        Date srcDate;
        try {
            srcDate = PathUtils.datedPathFormat.parse(src.getName());
        } catch (ParseException e) {
            throw new RuntimeException(e);
        }
        Path target = new Path(getOutputPath(), PathUtils.nestedDatedPathFormat.format(srcDate));
        _log.info(String.format("Moving %s to %s", src.getName(), target));

        getFileSystem().mkdirs(target.getParent());

        if (!getFileSystem().rename(src, target)) {
            throw new RuntimeException("Failed to rename " + src + " to " + target);
        }

        report.outputFiles.add(new DatePath(srcDate, target));
    }
}

From source file:de.huberlin.wbi.hiway.common.Data.java

License:Apache License

private Data(Path localPath, String containerId) {
    this.localDirectory = localPath.getParent();
    this.fileName = localPath.getName();
    this.containerId = containerId;
}

From source file:de.huberlin.wbi.hiway.common.Data.java

License:Apache License

private void mkHdfsDir(Path dir) throws IOException {
    if (dir == null || hdfs.isDirectory(dir))
        return;/*ww  w .  j a v a 2 s .c  o m*/
    mkHdfsDir(dir.getParent());
    hdfs.mkdirs(dir);
    hdfs.setPermission(dir, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL));
}

From source file:de.l3s.concatgz.io.ImmediateOutput.java

License:Open Source License

public OutputStream stream(String base, String extension) throws IOException {
    Path path = new Path(dir, base + file + extension);
    OutputStream stream = streams.get(path.toString());
    if (stream == null) {
        fs.mkdirs(path.getParent());
        stream = fs.create(path, true, bufferSize, replication, fs.getDefaultBlockSize(path));
        streams.put(path.toString(), stream);
    }/*from w  ww.  j a  v  a2  s  . com*/
    return stream;
}

From source file:edu.emory.bmi.medicurator.storage.HdfsStorage.java

License:Apache License

/**
 * save data from InputStream to a relative path
 * required parent directories will be created if not exists
 * @param path String path//  w  ww.  j a  va 2s  .  c o m
 * @param in InputStream
  * @return boolean
  */
public boolean saveToPath(String path, InputStream in) {
    try {
        Configuration conf = new Configuration();
        FileSystem hdfs = FileSystem.get(new URI(Constants.HDFS_URI), conf);
        Path dst = new Path(hdfsPath + path);

        //Create a new file and write data to it.
        hdfs.mkdirs(dst.getParent());
        FSDataOutputStream out = hdfs.create(dst, true);
        byte[] b = new byte[1024];
        int numBytes = 0;
        while ((numBytes = in.read(b)) > 0) {
            out.write(b, 0, numBytes);
        }
        //Close all the file descripters
        out.close();
        hdfs.close();
        return true;
    } catch (Exception e) {
        System.out.println("[ERROR]  Exception occurs when HdfsStorage saveToPath (" + path + ")" + e);
    }
    return false;
}

From source file:edu.nyu.vida.data_polygamy.utils.GetMergeFiles.java

License:BSD License

public static void main(String[] args) throws IllegalArgumentException, IOException, URISyntaxException {
    String fromDirectory = args[0];
    String toEventsDirectory = args[1];
    String toOutliersDirectory = args[2];
    String metadataFile = args[3];

    // Detecting datasets.

    HashSet<String> datasets = new HashSet<String>();

    FileReader fileReader = new FileReader(metadataFile);
    BufferedReader bufferedReader = new BufferedReader(fileReader);

    String line;//from w  ww  . j  a  va 2 s.c  o m
    while ((line = bufferedReader.readLine()) != null) {
        String[] parts = line.split(",");
        datasets.add(parts[0]);
    }
    bufferedReader.close();

    // Downloading relationships.

    String relationshipPatternStr = "([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})\\-([a-zA-Z0-9]{4}\\-[a-zA-Z0-9]{4})";
    Pattern relationshipPattern = Pattern.compile(relationshipPatternStr);

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    FileSystem localFS = FileSystem.getLocal(conf);

    for (FileStatus status : fs.listStatus(new Path(fs.getHomeDirectory() + "/" + fromDirectory))) {
        if (!status.isDirectory()) {
            continue;
        }
        Path file = status.getPath();

        Matcher m = relationshipPattern.matcher(file.getName());
        if (!m.find())
            continue;

        String ds1 = m.group(1);
        String ds2 = m.group(2);

        if (!datasets.contains(ds1))
            continue;
        if (!datasets.contains(ds2))
            continue;

        for (FileStatus statusDir : fs.listStatus(file)) {
            if (!statusDir.isDirectory()) {
                continue;
            }

            Path fromPath = statusDir.getPath();
            String toPathStr;
            if (fromPath.getName().contains("events")) {
                toPathStr = toEventsDirectory + "/" + fromPath.getParent().getName() + "-" + fromPath.getName();
            } else {
                toPathStr = toOutliersDirectory + "/" + fromPath.getParent().getName() + "-"
                        + fromPath.getName();
            }
            Path toPath = new Path(toPathStr);

            System.out.println("Copying:");
            System.out.println("  From: " + fromPath.toString());
            System.out.println("  To: " + toPath.toString());

            FileUtil.copyMerge(fs, // HDFS File System
                    fromPath, // HDFS path
                    localFS, // Local File System
                    toPath, // Local Path
                    false, // Do not delete HDFS path
                    conf, // Configuration
                    null);
        }
    }
}

From source file:edu.ucsb.cs.partitioning.cosine.Partitioner.java

License:Apache License

public static FileStatus[] setFiles(FileSystem hdfs, Path inputPath) throws IOException {
    if (hdfs.isFile(inputPath))
        return hdfs.listStatus(inputPath.getParent());
    else/* ww w.j a va  2 s. co  m*/
        return hdfs.listStatus(inputPath);
}

From source file:edu.ucsb.cs.partitioning.statistics.Collector.java

License:Apache License

public static String getNumMinMaxAvgVecLengthAvgPosting(FileSystem fs, Path inputPath, JobConf job)
        throws IOException {

    LongWritable key = new LongWritable();
    FeatureWeightArrayWritable value = new FeatureWeightArrayWritable();
    long numDocuments = 0, minDocLength = Long.MAX_VALUE, maxDocLength = 0;
    double avgDocLength = 0;
    int partitionSize; // remove

    HashMap<Long, Float> partitionfeaturesWeight = new HashMap<Long, Float>();

    Iterator<Path> pathItr = getSortedFiles(inputPath, fs);
    if (!pathItr.hasNext())
        return "0,0,0,0";

    while (pathItr.hasNext()) {
        inputPath = pathItr.next();/*from w ww . j  av  a2 s .c o  m*/
        SequenceFile.Reader in = new SequenceFile.Reader(fs, inputPath, job);
        partitionSize = 0;// remove
        while (in.next(key, value)) {
            partitionSize++;// remove
            numDocuments++;
            avgDocLength += value.vectorSize;
            if (minDocLength > value.vectorSize)
                minDocLength = value.vectorSize;
            if (maxDocLength < value.vectorSize)
                maxDocLength = value.vectorSize;

            for (int j = 0; j < value.vectorSize; j++) {
                FeatureWeight current = value.vector[j];
                updatePartitionBaraglia(partitionfeaturesWeight, current);
            }
        }
        System.out.println(inputPath.getName() + " has " + partitionSize + " vectors."); // remove
        partitionsWriter.append(new Text(inputPath.getName()), new LongWritable(partitionSize));
        in.close();
        writePartitionBaraglia(inputPath.getName(), partitionfeaturesWeight);
    }
    partitionsWriter.close();
    maxWeightVector.clear();
    String nFeaturesAvgPost = getNFeaturesAvgPosting(fs, inputPath.getParent(), job);
    avgDocLength = avgDocLength / numDocuments;

    return numDocuments + " , " + minDocLength + " , " + maxDocLength + " ," + avgDocLength + " ,"
            + nFeaturesAvgPost;
}

From source file:edu.umn.cs.spatialHadoop.core.RTreeGridRecordWriter.java

License:Open Source License

@Override
protected Path getFinalCellPath(int cellIndex) throws IOException {
    Path finalCellPath = super.getFinalCellPath(cellIndex);
    return new Path(finalCellPath.getParent(), finalCellPath.getName() + ".rtree");
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java

License:Open Source License

protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter)
        throws IOException {
    GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir);
    if (gindex == null) {
        FileStatus[] listStatus;/*  ww  w .  j  a v  a 2  s. c  o  m*/
        if (OperationsParams.isWildcard(dir)) {
            // Wild card
            listStatus = fs.globStatus(dir);
        } else {
            listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter);
        }
        // Add all files under this directory
        for (FileStatus status : listStatus) {
            if (status.isDir()) {
                listStatus(fs, status.getPath(), result, filter);
            } else if (status.getPath().getName().toLowerCase().endsWith(".list")) {
                LineRecordReader in = new LineRecordReader(fs.open(status.getPath()), 0, status.getLen(),
                        Integer.MAX_VALUE);
                LongWritable key = in.createKey();
                Text value = in.createValue();
                while (in.next(key, value)) {
                    result.add(fs.getFileStatus(new Path(status.getPath().getParent(), value.toString())));
                }
                in.close();
            } else {
                result.add(status);
            }
        }
    } else {
        final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir;
        // Use the global index to limit files
        filter.selectCells(gindex, new ResultCollector<Partition>() {
            @Override
            public void collect(Partition partition) {
                try {
                    Path cell_path = new Path(indexDir, partition.filename);
                    if (!fs.exists(cell_path))
                        LOG.warn("Matched file not found: " + cell_path);
                    result.add(fs.getFileStatus(cell_path));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        });
    }
}