Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:com.google.mr4c.sources.HDFSFileSource.java

License:Open Source License

public static HDFSFileSource create(Path dir, boolean flat) throws IOException {

    return create(dir.getFileSystem(new Configuration()), dir, flat);
}

From source file:com.google.mr4c.sources.MapFileSource.java

License:Open Source License

public MapFileSource(Path dir) throws IOException {
    this(dir.getFileSystem(new Configuration()), dir);
}

From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java

License:Apache License

private static Map<Integer, List<String>> readPoints(Path pointsPathDir, Configuration conf)
        throws IOException {
    Map<Integer, List<String>> result = new TreeMap<Integer, List<String>>();

    FileSystem fs = pointsPathDir.getFileSystem(conf);
    FileStatus[] children = fs.listStatus(pointsPathDir, new PathFilter() {
        public boolean accept(Path path) {
            String name = path.getName();
            return !(name.endsWith(".crc") || name.startsWith("_"));
        }//from w ww .  j  a v a  2 s. c  o m
    });

    for (FileStatus file : children) {
        Path path = file.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
        try {
            IntWritable key = reader.getKeyClass().asSubclass(IntWritable.class).newInstance();
            WeightedVectorWritable value = reader.getValueClass().asSubclass(WeightedVectorWritable.class)
                    .newInstance();
            while (reader.next(key, value)) {
                //key is the clusterId, value is a list of points
                //String clusterId = value.toString();
                List<String> pointList = result.get(key.get());
                if (pointList == null) {
                    pointList = new ArrayList<String>();
                    result.put(key.get(), pointList);
                }
                //We know we are dealing with named vectors, b/c we generated from the id field
                String name = ((NamedVector) value.getVector()).getName();
                pointList.add(name);
                //value = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance();
            }
        } catch (InstantiationException e) {
            log.error("Exception", e);
        } catch (IllegalAccessException e) {
            log.error("Exception", e);
        }
    }

    return result;
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/**
 * Return the {@link FileSystem} specified by src and the conf.
 * It the {@link FileSystem} supports checksum, set verifyChecksum.
 *///from   ww w  . j a v  a2  s .com
private FileSystem getSrcFileSystem(Path src, boolean verifyChecksum) throws IOException {
    FileSystem srcFs = src.getFileSystem(getConf());
    srcFs.setVerifyChecksum(verifyChecksum);
    return srcFs;
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

void text(String srcf) throws IOException {
    Path srcPattern = new Path(srcf);
    new DelayedExceptionThrowing() {
        @Override//from   w w  w .j av a 2  s  .  c  o m
        void process(Path p, FileSystem srcFs) throws IOException {
            if (srcFs.isDirectory(p)) {
                throw new IOException("Source must be a file.");
            }
            printToStdout(forMagic(p, srcFs));
        }
    }.globAndProcess(srcPattern, srcPattern.getFileSystem(getConf()));
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

/**
 * Get a listing of all files in that match the file pattern <i>srcf</i>.
 * @param srcf a file pattern specifying source files
 * @param recursive if need to list files in subdirs
 * @throws IOException  //  www. j  av  a  2 s.  c om
 * @see org.apache.hadoop.fs.FileSystem#globStatus(Path)
 */
private int ls(String srcf, boolean recursive) throws IOException {
    Path srcPath = new Path(srcf);
    FileSystem srcFs = srcPath.getFileSystem(this.getConf());
    FileStatus[] srcs = srcFs.globStatus(srcPath);
    if (srcs == null || srcs.length == 0) {
        throw new FileNotFoundException("Cannot access " + srcf + ": No such file or directory.");
    }

    boolean printHeader = (srcs.length == 1) ? true : false;
    int numOfErrors = 0;
    for (int i = 0; i < srcs.length; i++) {
        numOfErrors += ls(srcs[i], srcFs, recursive, printHeader);
    }
    return numOfErrors == 0 ? 0 : -1;
}

From source file:com.gruter.hadoop.customShell.CustomShell.java

License:Apache License

int runCmdHandler(CmdHandler handler, String[] args, int startIndex, boolean recursive) throws IOException {
    int errors = 0;

    for (int i = startIndex; i < args.length; i++) {
        Path srcPath = new Path(args[i]);
        FileSystem srcFs = srcPath.getFileSystem(getConf());
        Path[] paths = FileUtil.stat2Paths(srcFs.globStatus(srcPath), srcPath);
        // if nothing matches to given glob pattern then increment error count
        if (paths.length == 0) {
            System.err.println(handler.getName() + ": could not get status for '" + args[i] + "'");
            errors++;/*from   w  w w  . ja  v  a 2s .c om*/
        }
        for (Path path : paths) {
            try {
                FileStatus file = srcFs.getFileStatus(path);
                if (file == null) {
                    System.err.println(handler.getName() + ": could not get status for '" + path + "'");
                    errors++;
                } else {
                    errors += runCmdHandler(handler, file, srcFs, recursive);
                }
            } catch (IOException e) {
                String msg = (e.getMessage() != null ? e.getLocalizedMessage()
                        : (e.getCause().getMessage() != null ? e.getCause().getLocalizedMessage() : "null"));
                System.err.println(
                        handler.getName() + ": could not get status for '" + path + "': " + msg.split("\n")[0]);
                errors++;
            }
        }
    }

    return (errors > 0 || handler.getErrorCode() != 0) ? 1 : 0;
}

From source file:com.hadoop.mapred.DeprecatedLzoLineRecordReader.java

License:Open Source License

DeprecatedLzoLineRecordReader(Configuration conf, FileSplit split) throws IOException {
    start = split.getStart();/*from  ww w.j a v  a  2 s .  co  m*/
    end = start + split.getLength();
    final Path file = split.getPath();

    FileSystem fs = file.getFileSystem(conf);
    codecFactory = new CompressionCodecFactory(conf);
    final CompressionCodec codec = codecFactory.getCodec(file);
    if (codec == null) {
        throw new IOException("No LZO codec found, cannot run.");
    }

    // Open the file and seek to the next split.
    fileIn = fs.open(file);
    // Create input stream and read the file header.
    in = new LineReader(codec.createInputStream(fileIn), conf);
    if (start != 0) {
        fileIn.seek(start);

        // Read and ignore the first line.
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    pos = start;
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
protected FileStatus[] listStatus(JobConf conf) throws IOException {
    List<FileStatus> files = new ArrayList<FileStatus>(Arrays.asList(super.listStatus(conf)));

    boolean ignoreNonLzo = LzoInputFormatCommon.getIgnoreNonLzoProperty(conf);

    Iterator<FileStatus> it = files.iterator();
    while (it.hasNext()) {
        FileStatus fileStatus = it.next();
        Path file = fileStatus.getPath();

        if (!LzoInputFormatCommon.isLzoFile(file.toString())) {
            // Get rid of non-LZO files, unless the conf explicitly tells us to
            // keep them.
            // However, always skip over files that end with ".lzo.index", since
            // they are not part of the input.
            if (ignoreNonLzo || LzoInputFormatCommon.isLzoIndexFile(file.toString())) {
                it.remove();/*  w  w  w.  java 2 s .  c  o m*/
            }
        } else {
            FileSystem fs = file.getFileSystem(conf);
            LzoIndex index = LzoIndex.readIndex(fs, file);
            indexes.put(file, index);
        }
    }

    return files.toArray(new FileStatus[] {});
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
    FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits);
    // Find new starts/ends of the filesplit that align with the LZO blocks.

    List<FileSplit> result = new ArrayList<FileSplit>();

    for (FileSplit fileSplit : splits) {
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        if (!LzoInputFormatCommon.isLzoFile(file.toString())) {
            // non-LZO file, keep the input split as is.
            result.add(fileSplit);//from  ww  w  .j  av a 2  s  .  c  o  m
            continue;
        }

        // LZO file, try to split if the .index file was found
        LzoIndex index = indexes.get(file);
        if (index == null) {
            throw new IOException("Index not found for " + file);
        }
        if (index.isEmpty()) {
            // Empty index, keep it as is.
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long lzoStart = index.alignSliceStartToIndex(start, end);
        long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) {
            result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations()));
        }
    }

    return result.toArray(new FileSplit[result.size()]);
}