List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.asakusafw.runtime.stage.temporary.TemporaryStorage.java
License:Apache License
/** * Opens a temporary {@link ModelInput} for the specified path. * @param <V> data type//w w w .j ava 2s. c o m * @param conf configuration * @param dataType data type * @param path source path (must not contain wildcards) * @return the opened {@link ModelInput} * @throws IOException if failed to open input * @throws IllegalArgumentException if some parameters were {@code null} */ @SuppressWarnings("unchecked") public static <V> ModelInput<V> openInput(Configuration conf, Class<V> dataType, Path path) throws IOException { if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ } if (dataType == null) { throw new IllegalArgumentException("dataType must not be null"); //$NON-NLS-1$ } if (path == null) { throw new IllegalArgumentException("path must not be null"); //$NON-NLS-1$ } FileSystem fs = path.getFileSystem(conf); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Opening temporary input: {0} (fs={1})", //$NON-NLS-1$ path, fs.getUri())); } if (Writable.class.isAssignableFrom(dataType)) { return (ModelInput<V>) new TemporaryFileInput<>(fs.open(path), 0); } SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, conf); return (ModelInput<V>) new SequenceFileModelInput<>(reader); }
From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java
License:Apache License
private long computeChecksum(FileSystem fs, Path file) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Computing checksum: {0}", //$NON-NLS-1$ file));// w w w. j a v a2 s .c om } Checksum checksum = new CRC32(); byte[] buf = byteBuffers.get(); try (FSDataInputStream input = fs.open(file)) { while (true) { int read = input.read(buf); if (read < 0) { break; } checksum.update(buf, 0, read); } } return checksum.getValue(); }
From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java
License:Apache License
private boolean isCached(Path cacheFilePath, Path cacheChecksumPath, long checksum) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("checking remote cache: {0}", //$NON-NLS-1$ cacheFilePath));//from w w w. j a v a2s.c om } FileSystem fs = cacheChecksumPath.getFileSystem(configuration); if (fs.exists(cacheChecksumPath) == false || fs.exists(cacheFilePath) == false) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("remote cache is not found: {0}", //$NON-NLS-1$ cacheFilePath)); } return false; } else { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("reading remote cache checksum: {0}", //$NON-NLS-1$ cacheFilePath)); } long other; try (FSDataInputStream input = fs.open(cacheChecksumPath)) { other = input.readLong(); } return checksum == other; } }
From source file:com.asakusafw.runtime.util.cache.HadoopFileCacheRepository.java
License:Apache License
private void syncFile(FileSystem sourceFs, Path sourceFile, FileSystem targetFs, Path targetFile) throws IOException { byte[] buf = byteBuffers.get(); try (FSDataOutputStream output = targetFs.create(targetFile, false); FSDataInputStream input = sourceFs.open(sourceFile)) { while (true) { int read = input.read(buf); if (read < 0) { break; }/*from www.j a v a 2 s.com*/ output.write(buf, 0, read); } } }
From source file:com.asakusafw.windgate.hadoopfs.ssh.AbstractSshHadoopFsMirrorTest.java
License:Apache License
private void put(FileList.Writer writer, String path, String... contents) throws IOException { Configuration conf = new Configuration(); File temp = folder.newFile(path); FileSystem fs = FileSystem.getLocal(conf); try (ModelOutput<Text> output = TemporaryStorage.openOutput(conf, Text.class, new Path(temp.toURI()))) { for (String content : contents) { output.write(new Text(content)); }/* w ww . j a v a 2 s . c o m*/ } FileStatus status = fs.getFileStatus(new Path(temp.toURI())); try (FSDataInputStream src = fs.open(status.getPath()); OutputStream dst = writer.openNext(status.getPath())) { byte[] buf = new byte[256]; while (true) { int read = src.read(buf); if (read < 0) { break; } dst.write(buf, 0, read); } } }
From source file:com.ashishpaliwal.hadoop.utils.inputformat.CsvRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, 2147483647); this.start = split.getStart(); this.end = (this.start + split.getLength()); Path file = split.getPath();//from ww w . ja va2 s. c om this.compressionCodecs = new CompressionCodecFactory(job); this.codec = this.compressionCodecs.getCodec(file); FileSystem fs = file.getFileSystem(job); this.fileIn = fs.open(file); if (isCompressedInput()) { this.decompressor = CodecPool.getDecompressor(this.codec); if ((this.codec instanceof SplittableCompressionCodec)) { SplitCompressionInputStream cIn = ((SplittableCompressionCodec) this.codec).createInputStream( this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK); this.in = new CsvLineReader(cIn, job); this.start = cIn.getAdjustedStart(); this.end = cIn.getAdjustedEnd(); this.filePosition = cIn; } else { this.in = new CsvLineReader(this.codec.createInputStream(this.fileIn, this.decompressor), job); this.filePosition = this.fileIn; } } else { this.fileIn.seek(this.start); this.in = new CsvLineReader(this.fileIn, job); this.filePosition = this.fileIn; } if (this.start != 0L) { this.start += this.in.readLine(new Text(), 0, maxBytesToConsume(this.start)); } this.pos = this.start; }
From source file:com.audaque.instancematch.match.GenerateSignature2.java
public static List<Tuple2<String, String>> generateSignature(final String srcFile, String seedFile, final int q, int hashNum, JavaSparkContext sc) { JavaRDD<String> seedRDD = sc.textFile(seedFile, 40); JavaPairRDD<String, String> seeds_hashRDD = seedRDD .mapPartitionsToPair(new PairFlatMapFunction<Iterator<String>, String, String>() { @Override/* w w w . jav a 2s. c o m*/ public Iterable<Tuple2<String, String>> call(Iterator<String> seed) throws Exception { List<Integer> seedList = new ArrayList<Integer>(); while (seed.hasNext()) { seedList.add(Integer.valueOf(seed.next())); } int[] minHash = new int[seedList.size()]; for (int i = 0; i < minHash.length; i++) { minHash[i] = Integer.MAX_VALUE; } //???hdfs Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(srcFile), conf); FSDataInputStream in = fs.open(new Path(srcFile)); BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8")); String line; while ((line = br.readLine()) != null) { for (int i = 0; i < seedList.size(); i++) { int hash; if (q < 0) { hash = Hash.RSHash(line, seedList.get(i)); } else { hash = QGramHash.RSHash(line, seedList.get(i), q); } if (hash < minHash[i]) { minHash[i] = hash; } } } List<Tuple2<String, String>> tList = new ArrayList<Tuple2<String, String>>(); for (int i = 0; i < seedList.size(); i++) { tList.add(new Tuple2<String, String>(String.valueOf(seedList.get(i)), String.valueOf(minHash[i]))); } return tList; } }); return seeds_hashRDD.sortByKey().collect(); }
From source file:com.audaque.instancematch.spark.Test.java
public static void main(String[] args) throws IOException { String uri = "hdfs://172.16.1.101:8020/user/ALGO/match/data1/address2.txt"; Configuration conf = new Configuration(); // conf.set("hadoop.job.ugi", "ALGO,audaque.algo"); FileSystem fs = FileSystem.get(URI.create(uri), conf); FSDataInputStream in = fs.open(new Path(uri)); BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8")); String line;/* w w w.ja va 2 s. c o m*/ while ((line = br.readLine()) != null) { System.out.println(line); } // IOUtils.copyBytes(in, System.out,4096,false); // System.out.println(); }
From source file:com.awcoleman.BouncyCastleGenericCDRHadoop.RawFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); path = ((FileSplit) split).getPath(); FileSystem fs = path.getFileSystem(conf); FSDataInputStream fsin = fs.open(path); is = decompressStream(fsin);/*from ww w.j a va 2 s . co m*/ asnin = new ASN1InputStream(is); }
From source file:com.awcoleman.BouncyCastleGenericCDRHadoopWithWritable.RawFileRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); path = ((FileSplit) split).getPath(); FileSystem fs = path.getFileSystem(conf); FSDataInputStream fsin = fs.open(path); is = decompressStream(fsin);// w ww .j a v a 2 s . com asnin = new ASN1InputStream(is); recordCounter = 0; }