List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:edu.bigdata.training.fileformats.compress.SequenceFileWriter.java
public static void main(String[] args) throws IOException { String uri = "output"; Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path(uri); IntWritable key = new IntWritable(); Text value = new Text(); File infile = new File("src/main/resources/input.txt"); SequenceFile.Writer writer = null; try {// w ww . j a v a 2 s . co m writer = SequenceFile.createWriter(conf, Writer.file(path), Writer.keyClass(key.getClass()), Writer.valueClass(value.getClass()), Writer.bufferSize(fs.getConf().getInt("io.file.buffer.size", 4096)), Writer.replication(fs.getDefaultReplication()), Writer.blockSize(1073741824), Writer.compression(SequenceFile.CompressionType.BLOCK, new DefaultCodec()), Writer.progressable(null), Writer.metadata(new Metadata())); int ctr = 100; List<String> lines = FileUtils.readLines(infile); for (String line : lines) { key.set(ctr++); value.set(line); if (ctr < 150) { System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value); } writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
From source file:edu.umd.cloud9.collection.clue.ClueWarcForwardIndex.java
License:Apache License
@Override public void loadIndex(Path index, Path mapping, FileSystem fs) throws IOException { this.conf = fs.getConf(); LOG.info("Loading forward index: " + index); docnoMapping.loadMapping(mapping, fs); FSDataInputStream in = fs.open(index); // Class name; throw away. in.readUTF();/* w ww . j a v a 2 s . c om*/ collectionPath = in.readUTF(); int blocks = in.readInt(); LOG.info(blocks + " blocks expected"); docnos = new int[blocks]; offsets = new int[blocks]; fileno = new short[blocks]; for (int i = 0; i < blocks; i++) { docnos[i] = in.readInt(); offsets[i] = in.readInt(); fileno[i] = in.readShort(); if (i > 0 && i % 100000 == 0) LOG.info(i + " blocks read"); } in.close(); }
From source file:edu.umd.cloud9.io.ReadSequenceFile.java
License:Apache License
private static int readSequenceFile(Path path, FileSystem fs, int max) throws IOException { SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); System.out.println("Reading " + path + "...\n"); try {//from w w w .ja va2 s . c o m System.out.println("Key type: " + reader.getKeyClass().toString()); System.out.println("Value type: " + reader.getValueClass().toString() + "\n"); } catch (Exception e) { throw new RuntimeException("Error: loading key/value class"); } Writable key, value; int n = 0; try { if (Tuple.class.isAssignableFrom(reader.getKeyClass())) { key = TUPLE_FACTORY.newTuple(); } else { key = (Writable) reader.getKeyClass().newInstance(); } if (Tuple.class.isAssignableFrom(reader.getValueClass())) { value = TUPLE_FACTORY.newTuple(); } else { value = (Writable) reader.getValueClass().newInstance(); } while (reader.next(key, value)) { System.out.println("Record " + n); System.out.println("Key: " + key + "\nValue: " + value); System.out.println("----------------------------------------"); n++; if (n >= max) break; } reader.close(); System.out.println(n + " records read.\n"); } catch (Exception e) { e.printStackTrace(); } return n; }
From source file:edu.umd.cloud9.io.SequenceFileUtils.java
License:Apache License
/** * Reads key-value pairs from a SequenceFile, up to a maximum number. * * @param path path to file// w w w . j ava 2 s . c o m * @param max maximum of key-value pairs to read * @return list of key-value pairs */ @SuppressWarnings("unchecked") public static <K extends Writable, V extends Writable> List<PairOfWritables<K, V>> readFile(Path path, FileSystem fs, int max) throws IOException { List<PairOfWritables<K, V>> list = new ArrayList<PairOfWritables<K, V>>(); try { int k = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); K key; V value; if (Tuple.class.isAssignableFrom(reader.getKeyClass())) { key = (K) TUPLE_FACTORY.newTuple(); } else { key = (K) reader.getKeyClass().newInstance(); } if (Tuple.class.isAssignableFrom(reader.getValueClass())) { value = (V) TUPLE_FACTORY.newTuple(); } else { value = (V) reader.getValueClass().newInstance(); } while (reader.next(key, value)) { k++; list.add(new PairOfWritables<K, V>(key, value)); if (k >= max) { break; } // Create new objects, because the key, value gets reused if (Tuple.class.isAssignableFrom(reader.getKeyClass())) { key = (K) TUPLE_FACTORY.newTuple(); } else { key = (K) reader.getKeyClass().newInstance(); } if (Tuple.class.isAssignableFrom(reader.getValueClass())) { value = (V) TUPLE_FACTORY.newTuple(); } else { value = (V) reader.getValueClass().newInstance(); } } reader.close(); } catch (IllegalAccessException e) { throw new RuntimeException("Error reading SequenceFile: " + e); } catch (InstantiationException e) { throw new RuntimeException("Error reading SequenceFile: " + e); } return list; }
From source file:edu.umd.cloud9.io.SequenceFileUtils.java
License:Apache License
@SuppressWarnings("unchecked") public static <K extends Writable> List<K> readKeys(Path path, FileSystem fs, int max) { List<K> list = new ArrayList<K>(); try {/*from w ww . j ava 2 s. c om*/ int k = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); K key = (K) reader.getKeyClass().newInstance(); Writable value = (Writable) reader.getValueClass().newInstance(); while (reader.next(key, value)) { k++; list.add(key); if (k >= max) { break; } key = (K) reader.getKeyClass().newInstance(); } reader.close(); } catch (Exception e) { throw new RuntimeException("Error reading SequenceFile " + path); } return list; }
From source file:edu.umd.cloud9.io.SequenceFileUtils.java
License:Apache License
@SuppressWarnings("unchecked") public static <V extends Writable> List<V> readValues(Path path, FileSystem fs, int max) { List<V> list = new ArrayList<V>(); try {/*from ww w . ja va 2 s .co m*/ int k = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); Writable key = (Writable) reader.getKeyClass().newInstance(); V value = (V) reader.getValueClass().newInstance(); while (reader.next(key, value)) { k++; list.add(value); if (k >= max) { break; } value = (V) reader.getValueClass().newInstance(); } reader.close(); } catch (Exception e) { throw new RuntimeException("Error reading SequenceFile " + path); } return list; }
From source file:edu.umd.cloud9.util.SequenceFileUtils.java
License:Apache License
public static <K extends WritableComparable, V extends Writable> SortedMap<K, V> readFileIntoMap(FileSystem fs, String s, int max) { Path path = new Path(s); SortedMap<K, V> list = new TreeMap<K, V>(); try {//from ww w .j a v a 2 s . c o m int k = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, fs.getConf()); K key = (K) reader.getKeyClass().newInstance(); V value = (V) reader.getValueClass().newInstance(); while (reader.next(key, value)) { k++; list.put(key, value); if (max != -1 && k >= max) break; key = (K) reader.getKeyClass().newInstance(); value = (V) reader.getValueClass().newInstance(); } reader.close(); } catch (Exception e) { throw new RuntimeException("Exception reading file " + s); // e.printStackTrace(); } return list; }
From source file:edu.umd.honghongie.BooleanRetrievalCompressed.java
License:Apache License
private void initialize(String indexPath, String collectionPath, FileSystem fs) throws IOException { index = new MapFile.Reader(new Path(indexPath + "/part-r-00000"), fs.getConf()); //where index is collection = fs.open(new Path(collectionPath)); //where the text is stack = new Stack<Set<Integer>>(); }
From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java
License:Open Source License
private static <T extends TextSerializable> int sampleLocalByCount(Path[] files, ResultCollector<T> output, OperationsParams params) throws IOException { ArrayList<Path> data_files = new ArrayList<Path>(); for (Path file : files) { FileSystem fs = file.getFileSystem(params); if (fs.getFileStatus(file).isDir()) { // Directory, process all data files in this directory (visible files) FileStatus[] fileStatus = fs.listStatus(file, hiddenFileFilter); for (FileStatus f : fileStatus) { data_files.add(f.getPath()); }/* w w w . j ava2s. c o m*/ } else { // File, process this file data_files.add(file); } } files = data_files.toArray(new Path[data_files.size()]); TextSerializable inObj1, outObj1; inObj1 = OperationsParams.getTextSerializable(params, "shape", new Text2()); outObj1 = OperationsParams.getTextSerializable(params, "outshape", new Text2()); // Make the objects final to be able to use in the anonymous inner class final TextSerializable inObj = inObj1; final T outObj = (T) outObj1; ResultCollector<TextSerializable> converter = createConverter(output, inObj, outObj); long[] files_start_offset = new long[files.length + 1]; // Prefix sum of files sizes long total_length = 0; for (int i_file = 0; i_file < files.length; i_file++) { FileSystem fs = files[i_file].getFileSystem(params); files_start_offset[i_file] = total_length; total_length += fs.getFileStatus(files[i_file]).getLen(); } files_start_offset[files.length] = total_length; // Generate offsets to read from and make sure they are ordered to minimize // seeks between different HDFS blocks Random random = new Random(params.getLong("seed", System.currentTimeMillis())); long[] offsets = new long[params.getInt("count", 0)]; for (int i = 0; i < offsets.length; i++) { if (total_length == 0) offsets[i] = 0; else offsets[i] = Math.abs(random.nextLong()) % total_length; } Arrays.sort(offsets); int record_i = 0; // Number of records read so far int records_returned = 0; int file_i = 0; // Index of the current file being sampled while (record_i < offsets.length) { // Skip to the file that contains the next sample while (offsets[record_i] > files_start_offset[file_i + 1]) file_i++; long current_file_size = files_start_offset[file_i + 1] - files_start_offset[file_i]; FileSystem fs = files[file_i].getFileSystem(params); ShapeLineRecordReader reader = new ShapeLineRecordReader(fs.getConf(), new FileSplit(files[file_i], 0, current_file_size, new String[] {})); Rectangle key = reader.createKey(); Text line = reader.createValue(); long pos = files_start_offset[file_i]; while (record_i < offsets.length && offsets[record_i] <= files_start_offset[file_i + 1] && reader.next(key, line)) { pos += line.getLength(); if (pos > offsets[record_i]) { // Passed the offset of record_i // Report this element to output if (converter != null) { inObj.fromText(line); converter.collect(inObj); } record_i++; records_returned++; } } reader.close(); // Skip any remaining records that were supposed to be read from this file // This case might happen if a generated random position was in the middle // of the last line. while (record_i < offsets.length && offsets[record_i] <= files_start_offset[file_i + 1]) record_i++; } return records_returned; }
From source file:etl.cmd.test.XTestCase.java
License:Apache License
private void setUpEmbeddedHadoop2() throws Exception { if (dfsCluster != null && dfsCluster2 == null) { // Trick dfs location for MiniDFSCluster since it doesn't accept location as input) String testBuildDataSaved = System.getProperty("test.build.data", "build/test/data"); try {/* w w w. j av a 2 s .com*/ System.setProperty("test.build.data", FilenameUtils.concat(testBuildDataSaved, "2")); // Only DFS cluster is created based upon current need MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(createDFSConfig()); dfsCluster2 = builder.build(); FileSystem fileSystem = dfsCluster2.getFileSystem(); fileSystem.mkdirs(new Path("target/test-data")); fileSystem.mkdirs(new Path("/user")); fileSystem.mkdirs(new Path("/tmp")); fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx")); System.setProperty(OOZIE_TEST_NAME_NODE2, fileSystem.getConf().get("fs.defaultFS")); } catch (Exception ex) { shutdownMiniCluster2(); throw ex; } finally { // Restore previus value System.setProperty("test.build.data", testBuildDataSaved); } } }