Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.asakusafw.runtime.stage.temporary.TemporaryStorage.java

License:Apache License

/**
 * Opens a temporary {@link ModelInput} for the specified path.
 * @param <V> data type//w w w .j  ava 2s. c  o  m
 * @param conf configuration
 * @param dataType data type
 * @param path source path (must not contain wildcards)
 * @return the opened {@link ModelInput}
 * @throws IOException if failed to open input
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
@SuppressWarnings("unchecked")
public static <V> ModelInput<V> openInput(Configuration conf, Class<V> dataType, Path path) throws IOException {
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }
    if (dataType == null) {
        throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$
    }
    if (path == null) {
        throw new IllegalArgumentException("path must not be null"); //$NON-NLS-1$
    }
    FileSystem fs = path.getFileSystem(conf);
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Opening temporary input: {0} (fs={1})", //$NON-NLS-1$
                path, fs.getUri()));
    }
    if (Writable.class.isAssignableFrom(dataType)) {
        return (ModelInput<V>) new TemporaryFileInput<>(fs.open(path), 0);
    }
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf);
    return (ModelInput<V>) new SequenceFileModelInput<>(reader);
}

From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java

License:Apache License

private long computeChecksum(FileSystem fs, Path file) throws IOException {
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("Computing checksum: {0}", //$NON-NLS-1$
                file));//  w  w  w. j a v a2 s .c  om
    }
    Checksum checksum = new CRC32();
    byte[] buf = byteBuffers.get();
    try (FSDataInputStream input = fs.open(file)) {
        while (true) {
            int read = input.read(buf);
            if (read < 0) {
                break;
            }
            checksum.update(buf, 0, read);
        }
    }
    return checksum.getValue();
}

From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java

License:Apache License

private boolean isCached(Path cacheFilePath, Path cacheChecksumPath, long checksum) throws IOException {
    if (LOG.isDebugEnabled()) {
        LOG.debug(MessageFormat.format("checking remote cache: {0}", //$NON-NLS-1$
                cacheFilePath));//from w  w w. j a v  a2s.c  om
    }
    FileSystem fs = cacheChecksumPath.getFileSystem(configuration);
    if (fs.exists(cacheChecksumPath) == false || fs.exists(cacheFilePath) == false) {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("remote cache is not found: {0}", //$NON-NLS-1$
                    cacheFilePath));
        }
        return false;
    } else {
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("reading remote cache checksum: {0}", //$NON-NLS-1$
                    cacheFilePath));
        }
        long other;
        try (FSDataInputStream input = fs.open(cacheChecksumPath)) {
            other = input.readLong();
        }
        return checksum == other;
    }
}

From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java

License:Apache License

private void syncFile(FileSystem sourceFs, Path sourceFile, FileSystem targetFs, Path targetFile)
        throws IOException {
    byte[] buf = byteBuffers.get();
    try (FSDataOutputStream output = targetFs.create(targetFile, false);
            FSDataInputStream input = sourceFs.open(sourceFile)) {
        while (true) {
            int read = input.read(buf);
            if (read < 0) {
                break;
            }/*from www.j  a  v a 2 s.com*/
            output.write(buf, 0, read);
        }
    }
}

From source file:com.asakusafw.windgate.hadoopfs.ssh.AbstractSshHadoopFsMirrorTest.java

License:Apache License

private void put(FileList.Writer writer, String path, String... contents) throws IOException {
    Configuration conf = new Configuration();
    File temp = folder.newFile(path);
    FileSystem fs = FileSystem.getLocal(conf);
    try (ModelOutput<Text> output = TemporaryStorage.openOutput(conf, Text.class, new Path(temp.toURI()))) {
        for (String content : contents) {
            output.write(new Text(content));
        }/* w ww . j  a  v a 2  s . c o  m*/
    }
    FileStatus status = fs.getFileStatus(new Path(temp.toURI()));
    try (FSDataInputStream src = fs.open(status.getPath());
            OutputStream dst = writer.openNext(status.getPath())) {
        byte[] buf = new byte[256];
        while (true) {
            int read = src.read(buf);
            if (read < 0) {
                break;
            }
            dst.write(buf, 0, read);
        }
    }
}

From source file:com.ashishpaliwal.hadoop.utils.inputformat.CsvRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;

    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, 2147483647);
    this.start = split.getStart();
    this.end = (this.start + split.getLength());
    Path file = split.getPath();//from  ww w .  ja  va2 s. c  om
    this.compressionCodecs = new CompressionCodecFactory(job);
    this.codec = this.compressionCodecs.getCodec(file);

    FileSystem fs = file.getFileSystem(job);
    this.fileIn = fs.open(file);
    if (isCompressedInput()) {
        this.decompressor = CodecPool.getDecompressor(this.codec);
        if ((this.codec instanceof SplittableCompressionCodec)) {
            SplitCompressionInputStream cIn = ((SplittableCompressionCodec) this.codec).createInputStream(
                    this.fileIn, this.decompressor, this.start, this.end,
                    SplittableCompressionCodec.READ_MODE.BYBLOCK);

            this.in = new CsvLineReader(cIn, job);
            this.start = cIn.getAdjustedStart();
            this.end = cIn.getAdjustedEnd();
            this.filePosition = cIn;
        } else {
            this.in = new CsvLineReader(this.codec.createInputStream(this.fileIn, this.decompressor), job);
            this.filePosition = this.fileIn;
        }
    } else {
        this.fileIn.seek(this.start);
        this.in = new CsvLineReader(this.fileIn, job);
        this.filePosition = this.fileIn;
    }

    if (this.start != 0L) {
        this.start += this.in.readLine(new Text(), 0, maxBytesToConsume(this.start));
    }
    this.pos = this.start;
}

From source file:com.audaque.instancematch.match.GenerateSignature2.java

public static List<Tuple2<String, String>> generateSignature(final String srcFile, String seedFile, final int q,
        int hashNum, JavaSparkContext sc) {
    JavaRDD<String> seedRDD = sc.textFile(seedFile, 40);

    JavaPairRDD<String, String> seeds_hashRDD = seedRDD
            .mapPartitionsToPair(new PairFlatMapFunction<Iterator<String>, String, String>() {
                @Override/*  w  w  w . jav  a 2s.  c o m*/
                public Iterable<Tuple2<String, String>> call(Iterator<String> seed) throws Exception {
                    List<Integer> seedList = new ArrayList<Integer>();
                    while (seed.hasNext()) {
                        seedList.add(Integer.valueOf(seed.next()));
                    }
                    int[] minHash = new int[seedList.size()];
                    for (int i = 0; i < minHash.length; i++) {
                        minHash[i] = Integer.MAX_VALUE;
                    }
                    //???hdfs
                    Configuration conf = new Configuration();
                    FileSystem fs = FileSystem.get(URI.create(srcFile), conf);
                    FSDataInputStream in = fs.open(new Path(srcFile));
                    BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
                    String line;
                    while ((line = br.readLine()) != null) {
                        for (int i = 0; i < seedList.size(); i++) {
                            int hash;
                            if (q < 0) {
                                hash = Hash.RSHash(line, seedList.get(i));
                            } else {
                                hash = QGramHash.RSHash(line, seedList.get(i), q);
                            }
                            if (hash < minHash[i]) {
                                minHash[i] = hash;
                            }
                        }

                    }
                    List<Tuple2<String, String>> tList = new ArrayList<Tuple2<String, String>>();
                    for (int i = 0; i < seedList.size(); i++) {
                        tList.add(new Tuple2<String, String>(String.valueOf(seedList.get(i)),
                                String.valueOf(minHash[i])));
                    }

                    return tList;
                }
            });

    return seeds_hashRDD.sortByKey().collect();
}

From source file:com.audaque.instancematch.spark.Test.java

public static void main(String[] args) throws IOException {
    String uri = "hdfs://172.16.1.101:8020/user/ALGO/match/data1/address2.txt";
    Configuration conf = new Configuration();
    //        conf.set("hadoop.job.ugi", "ALGO,audaque.algo");
    FileSystem fs = FileSystem.get(URI.create(uri), conf);
    FSDataInputStream in = fs.open(new Path(uri));
    BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
    String line;/* w w w.ja  va 2 s.  c o m*/

    while ((line = br.readLine()) != null) {
        System.out.println(line);
    }

    //        IOUtils.copyBytes(in, System.out,4096,false);
    //        System.out.println();
}

From source file:com.awcoleman.BouncyCastleGenericCDRHadoop.RawFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    path = ((FileSplit) split).getPath();
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(path);
    is = decompressStream(fsin);/*from   ww  w.j  a  va  2 s  .  co  m*/
    asnin = new ASN1InputStream(is);
}

From source file:com.awcoleman.BouncyCastleGenericCDRHadoopWithWritable.RawFileRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    path = ((FileSplit) split).getPath();
    FileSystem fs = path.getFileSystem(conf);
    FSDataInputStream fsin = fs.open(path);
    is = decompressStream(fsin);//  w  ww  .j  a v  a 2  s  . com
    asnin = new ASN1InputStream(is);

    recordCounter = 0;
}