List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:com.cloudera.crunch.io.avro.AvroFileReaderFactory.java
License:Open Source License
@Override public Iterator<T> read(FileSystem fs, final Path path) { this.mapFn.initialize(); try {//from www . j a v a 2 s.c o m FsInput fsi = new FsInput(path, fs.getConf()); final DataFileReader<T> reader = new DataFileReader<T>(fsi, recordReader); return new UnmodifiableIterator<T>() { @Override public boolean hasNext() { return reader.hasNext(); } @Override public T next() { return mapFn.map(reader.next()); } }; } catch (IOException e) { LOG.info("Could not read avro file at path: " + path, e); return Iterators.emptyIterator(); } }
From source file:com.cloudera.hoop.fs.FSAppend.java
License:Open Source License
/** * Executes the filesystem operation./*from w w w.j ava 2 s .com*/ * * @param fs filesystem instance to use. * @return void. * @throws IOException thrown if an IO error occured. */ @Override public Void execute(FileSystem fs) throws IOException { int bufferSize = fs.getConf().getInt("hoop.buffer.size", 4096); OutputStream os = fs.append(path, bufferSize); IOUtils.copy(is, os); os.close(); return null; }
From source file:com.cloudera.hoop.fs.FSCreate.java
License:Open Source License
/** * Executes the filesystem operation.//from w w w .j a v a 2 s .com * * @param fs filesystem instance to use. * @return The URI of the created file. * @throws IOException thrown if an IO error occured. */ @Override public URI execute(FileSystem fs) throws IOException { if (replication == -1) { replication = (short) fs.getConf().getInt("dfs.replication", 3); } if (blockSize == -1) { blockSize = fs.getConf().getInt("dfs.block.size", 67108864); } FsPermission fsPermission = FSUtils.getPermission(permission); int bufferSize = fs.getConf().getInt("hoop.buffer.size", 4096); OutputStream os = fs.create(path, fsPermission, override, bufferSize, replication, blockSize, null); IOUtils.copy(is, os); os.close(); return FSUtils.convertPathToHoop(path, HoopServer.get().getBaseUrl()).toUri(); }
From source file:com.datasalt.pangool.utils.HadoopUtils.java
License:Apache License
public static void synchronize(FileSystem fS1, Path p1, FileSystem fS2, Path p2) throws IOException { deleteIfExists(fS2, p2);/*from www .j av a2 s .com*/ FileUtil.copy(fS1, p1, fS2, p2, false, false, fS1.getConf()); }
From source file:com.datasalt.pangool.utils.HadoopUtils.java
License:Apache License
/** * Reads maps of integer -> double//w w w .j ava 2 s. co m */ public static HashMap<Integer, Double> readIntDoubleMap(Path path, FileSystem fs) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); IntWritable topic = new IntWritable(); DoubleWritable value = new DoubleWritable(); HashMap<Integer, Double> ret = new HashMap<Integer, Double>(); while (reader.next(topic)) { reader.getCurrentValue(value); ret.put(topic.get(), value.get()); } reader.close(); return ret; }
From source file:com.datasalt.pangool.utils.HadoopUtils.java
License:Apache License
/** * Reads maps of integer -> integer//from w w w . j av a 2 s. c om */ public static HashMap<Integer, Integer> readIntIntMap(Path path, FileSystem fs) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); IntWritable topic = new IntWritable(); IntWritable value = new IntWritable(); HashMap<Integer, Integer> ret = new HashMap<Integer, Integer>(); while (reader.next(topic)) { reader.getCurrentValue(value); ret.put(topic.get(), value.get()); } reader.close(); return ret; }
From source file:com.davidgildeh.hadoop.utils.FileUtils.java
License:Apache License
/** * Merges a list of input files in a directory to a single file under the * outputpath with a specified filename/*from ww w.j a v a 2 s. com*/ * * @param inputPath The input directory containing all the input files. E.g. /input/dir/on/hdfs/ * @param outputPath The output path to output the file. E.g. /output/dir/on/hdfs/filename * @throws IOException */ public static void mergeFiles(String inputPath, String outputPath) throws IOException { Path inputDir = new Path(inputPath); Path outputFile = new Path(outputPath); FileSystem fileSystem = getFileSystem(outputFile); checkFileExists(fileSystem, inputDir); // Check the input path is a directory if (!fileSystem.getFileStatus(inputDir).isDir()) { LOG.error("Path '" + inputDir.toString() + "' is not a directory."); throw new IOException("Path '" + inputDir.toString() + "' is not a directory."); } // Create Output File OutputStream out = fileSystem.create(outputFile); try { FileStatus contents[] = fileSystem.listStatus(inputDir); // Loop through all files in directory and merge them into one file for (int i = 0; i < contents.length; i++) { if (!contents[i].isDir()) { InputStream in = fileSystem.open(contents[i].getPath()); try { IOUtils.copyBytes(in, out, fileSystem.getConf(), false); } finally { in.close(); } } } } finally { out.close(); fileSystem.close(); LOG.info("Merged input files from '" + inputPath + "' to '" + outputPath + "'"); } }
From source file:com.fullcontact.sstable.index.SSTableIndexIndex.java
License:Apache License
/** * Create and write an index index based on the input Cassandra Index.db file. Read the Index.db and generate chunks * (splits) based on the configured chunk size. * * @param fileSystem Hadoop file system. * @param sstablePath SSTable Index.db.//w ww. j a v a 2 s. c o m * @throws IOException */ public static void writeIndex(final FileSystem fileSystem, final Path sstablePath) throws IOException { final Configuration configuration = fileSystem.getConf(); final long splitSize = configuration.getLong(HadoopSSTableConstants.HADOOP_SSTABLE_SPLIT_MB, HadoopSSTableConstants.DEFAULT_SPLIT_MB) * 1024 * 1024; final Closer closer = Closer.create(); final Path outputPath = sstablePath.suffix(SSTABLE_INDEX_SUFFIX); final Path inProgressOutputPath = sstablePath.suffix(SSTABLE_INDEX_IN_PROGRESS_SUFFIX); boolean success = false; try { final FSDataOutputStream os = closer.register(fileSystem.create(inProgressOutputPath)); final TLongArrayList splitOffsets = new TLongArrayList(); long currentStart = 0; long currentEnd = 0; final IndexOffsetScanner index = new IndexOffsetScanner(sstablePath, fileSystem); while (index.hasNext()) { // NOTE: This does not give an exact size of this split in bytes but a rough estimate. // This should be good enough since it's only used for sorting splits by size in hadoop land. while (currentEnd - currentStart < splitSize && index.hasNext()) { currentEnd = index.next(); splitOffsets.add(currentEnd); } // Record the split final long[] offsets = splitOffsets.toArray(); os.writeLong(offsets[0]); // Start os.writeLong(offsets[offsets.length - 1]); // End // Clear the offsets splitOffsets.clear(); if (index.hasNext()) { currentStart = index.next(); currentEnd = currentStart; splitOffsets.add(currentStart); } } success = true; } finally { closer.close(); if (!success) { fileSystem.delete(inProgressOutputPath, false); } else { fileSystem.rename(inProgressOutputPath, outputPath); } } }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.AbstractHoplog.java
License:Apache License
private void initialize(Path path, SortedOplogStatistics stats, FileSystem fs) { this.conf = fs.getConf(); this.stats = stats; this.path = fs.makeQualified(path); this.hfd = new HoplogDescriptor(this.path.getName()); }
From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.RWSplitIterator.java
License:Apache License
@Override protected AbstractHoplog getHoplog(FileSystem fs, Path path) throws IOException { SchemaMetrics.configureGlobally(fs.getConf()); return HFileSortedOplog.getHoplogForLoner(fs, path); }