Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java

License:Apache License

public void setParallel(JobConf conf) throws Exception {
    registerSerializers(conf);//ww  w.j  a  v a2  s  .  c  o m

    // For map-reduce, multiple files can be produced, so the location is their
    // parent directory.
    Path outPath = new Path(location);
    FileSystem fs = outPath.getFileSystem(conf);
    fs.delete(outPath, true); // TODO: Jaql currently has overwrite semantics; add flag to control this
    FileOutputFormat.setOutputPath(conf, outPath);
}

From source file:com.ibm.jaql.lang.expr.io.HadoopTempExpr.java

License:Apache License

public JsonRecord eval(Context context) throws Exception {
    String filename = "jaqltemp_" + System.nanoTime(); // FIXME: figure out where this should go
    BufferedJsonRecord r = new BufferedJsonRecord();
    r.add(Adapter.TYPE_NAME, new JsonString("jaqltemp"));
    r.add(Adapter.LOCATION_NAME, new JsonString(filename));
    BufferedJsonRecord options = new BufferedJsonRecord();
    JsonSchema schema = (JsonSchema) exprs[0].eval(context);
    options.add(new JsonString("schema"), schema);
    r.add(Adapter.OPTIONS_NAME, options);

    Configuration conf = new Configuration(); // TODO: where to get this from?
    Path path = new Path(filename);
    FileSystem fs = path.getFileSystem(conf);
    context.doAtReset(new DeleteFileTask(fs, path));
    return r; // TODO: memory
}

From source file:com.ibm.jaql.lang.expr.system.LsFn.java

License:Apache License

@Override
public JsonIterator iter(final Context context) throws Exception {
    JsonString glob = (JsonString) exprs[0].eval(context);
    // Configuration conf = context.getConfiguration();
    Configuration conf = new Configuration(); // TODO: get from context, incl options
    //URI uri;//www .  j ava2 s  . c om
    //FileSystem fs = FileSystem.get(uri, conf);
    Path inpath = new Path(glob.toString());
    FileSystem fs = inpath.getFileSystem(conf);
    //final FileStatus[] stats = fs.listStatus(path, filter);
    final FileStatus[] stats = fs.globStatus(inpath);

    if (stats == null || stats.length == 0) {
        return JsonIterator.EMPTY;
    }

    final MutableJsonDate accessTime = new MutableJsonDate();
    final MutableJsonDate modifyTime = new MutableJsonDate();
    final MutableJsonLong length = new MutableJsonLong();
    final MutableJsonLong blockSize = new MutableJsonLong();
    final MutableJsonLong replication = new MutableJsonLong();
    final MutableJsonString path = new MutableJsonString();
    final MutableJsonString owner = new MutableJsonString();
    final MutableJsonString group = new MutableJsonString();
    final MutableJsonString permission = new MutableJsonString();
    final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path,
            owner, group, permission };
    final BufferedJsonRecord rec = new BufferedJsonRecord();
    rec.set(LsField.names, values, values.length, false);

    return new JsonIterator(rec) {
        int i = 0;

        @Override
        public boolean moveNext() throws Exception {
            if (i >= stats.length) {
                return false;
            }

            FileStatus stat = stats[i++];
            // fs.getUri().toString();
            long x = HadoopShim.getAccessTime(stat);
            if (x <= 0) {
                values[LsField.ACCESS_TIME.ordinal()] = null;
            } else {
                accessTime.set(x);
                values[LsField.ACCESS_TIME.ordinal()] = accessTime;
            }
            modifyTime.set(stat.getModificationTime());
            length.set(stat.getLen());
            blockSize.set(stat.getBlockSize());
            replication.set(stat.getReplication());
            path.setCopy(stat.getPath().toString());
            owner.setCopy(stat.getOwner());
            group.setCopy(stat.getGroup());
            permission.setCopy(stat.getPermission().toString());
            return true;
        }
    };
}

From source file:com.idvp.platform.hdfs.HDFSDataStream.java

License:Apache License

protected FileSystem getDfs(Configuration conf, Path dstPath) throws IOException {
    return dstPath.getFileSystem(conf);
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {/*w  w w .  j a  v  a 2 s.  c o m*/
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context// ww  w  . ja va2 s .c om
 * @throws IOException
 */
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Stopwatch sw = Stopwatch.createStarted();
    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job));
    long maxSize = getMaxSplitSize(job);

    // generate splits
    List<InputSplit> splits = new ArrayList<InputSplit>();
    List<FileStatus> files = listStatus(job);
    for (FileStatus file : files) {
        Path path = file.getPath();
        long length = file.getLen();
        if (length != 0) {
            BlockLocation[] blkLocations;
            if (file instanceof LocatedFileStatus) {
                blkLocations = ((LocatedFileStatus) file).getBlockLocations();
            } else {
                FileSystem fs = path.getFileSystem(job.getConfiguration());
                blkLocations = fs.getFileBlockLocations(file, 0, length);
            }
            if (isSplitable(job, path)) {
                long blockSize = file.getBlockSize();
                long splitSize = computeSplitSize(blockSize, minSize, maxSize);

                long bytesRemaining = length;
                while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, splitSize,
                            blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
                    bytesRemaining -= splitSize;
                }

                if (bytesRemaining != 0) {
                    int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
                    splits.add(makeSplit(path, length - bytesRemaining, bytesRemaining,
                            blkLocations[blkIndex].getHosts(), blkLocations[blkIndex].getCachedHosts()));
                }
            } else { // not splitable
                splits.add(makeSplit(path, 0, length, blkLocations[0].getHosts(),
                        blkLocations[0].getCachedHosts()));
            }
        } else {
            //Create empty hosts array for zero length files
            splits.add(makeSplit(path, 0, length, new String[0]));
        }
    }
    // Save the number of input files for metrics/loadgen
    job.getConfiguration().setLong(NUM_INPUT_FILES, files.size());
    sw.stop();
    if (LOG.isDebugEnabled()) {
        LOG.debug("Total # of splits generated by getSplits: " + splits.size() + ", TimeTaken: "
                + sw.elapsed(TimeUnit.MILLISECONDS));
    }
    return splits;
}

From source file:com.indeed.imhotep.builder.tsv.EasyIndexBuilderFromTSV.java

License:Apache License

public FileSystem getHDFS(Path inputFilePath) {
    try {//from w w  w. j a  v  a2s  .  co  m
        return inputFilePath.getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.indeed.imhotep.builder.tsv.TsvConverter.java

License:Apache License

private FileSystem getFS(Path path) {
    try {/*from www  . jav  a2s.c  om*/
        return path.getFileSystem(hdfsConf);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.inmobi.conduit.distcp.ConduitDistCp.java

License:Apache License

@Override
protected Path createInputFileListing(Job job) throws IOException {
    // get the file path where copy listing file has to be saved
    Path fileListingPath = getFileListingPath();
    Configuration config = job.getConfiguration();

    SequenceFile.Writer fileListWriter = null;
    try {// w w w.  ja va  2 s  . co  m
        fileListWriter = SequenceFile.createWriter(fileListingPath.getFileSystem(config), config,
                fileListingPath, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE);

        for (Map.Entry<String, FileStatus> entry : fileListingMap.entrySet()) {
            FileStatus status = FileUtil.getFileStatus(entry.getValue(), buffer, in);
            fileListWriter.append(new Text(entry.getKey()), status);

            // Create a sync point after each entry. This will ensure that SequenceFile
            // Reader can work at file entry level granularity, given that SequenceFile
            // Reader reads from the starting of sync point.
            fileListWriter.sync();

            totalBytesToCopy += entry.getValue().getLen();
            totalPaths++;
        }
    } finally {
        if (fileListWriter != null) {
            fileListWriter.close();
        }
    }

    LOG.info("Number of paths considered for copy: " + totalPaths);
    LOG.info("Number of bytes considered for copy: " + totalBytesToCopy
            + " (Actual number of bytes copied depends on whether any files are " + "skipped or overwritten.)");

    // set distcp configurations
    config.set(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH, fileListingPath.toString());
    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED, totalBytesToCopy);
    config.setLong(DistCpConstants.CONF_LABEL_TOTAL_NUMBER_OF_RECORDS, totalPaths);

    return fileListingPath;
}

From source file:com.inmobi.conduit.distcp.tools.CopyListing.java

License:Apache License

/**
 * Validate the final resulting path listing to see if there are any duplicate entries
 *
 * @param pathToListFile - path listing build by doBuildListing
 * @throws IOException - Any issues while checking for duplicates and throws
 * @throws DuplicateFileException - if there are duplicates
 *///www. j  a v a 2s.  c om
protected void checkForDuplicates(Path pathToListFile) throws DuplicateFileException, IOException {

    Configuration config = getConf();
    FileSystem fs = pathToListFile.getFileSystem(config);

    Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, sortedList, config);
    try {
        Text lastKey = new Text("*"); //source relative path can never hold *
        FileStatus lastFileStatus = new FileStatus();

        Text currentKey = new Text();
        while (reader.next(currentKey)) {
            if (currentKey.equals(lastKey)) {
                FileStatus currentFileStatus = new FileStatus();
                reader.getCurrentValue(currentFileStatus);
                throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and "
                        + currentFileStatus.getPath() + " would cause duplicates. Aborting");
            }
            reader.getCurrentValue(lastFileStatus);
            lastKey.set(currentKey);
        }
    } finally {
        IOUtils.closeStream(reader);
    }
}