List of usage examples for org.apache.hadoop.io MapFile.Reader MapFile.Reader
@Deprecated public Reader(FileSystem fs, String dirName, Configuration conf) throws IOException
From source file:com.datatorrent.contrib.hdht.HadoopFilePerformanceTest.java
License:Open Source License
@Test public void testMapFileRead() throws Exception { logger.info("Reading {} with {} key/value pairs", Testfile.MAPFILE.filename(), String.format("%,d", testSize)); writeMapFile();/*from w ww . j a v a 2 s.co m*/ Text key = new Text(); Text value = new Text(); writeMapFile(); // Set amount of memory to use for buffer float bufferPercent = 0.25f; int bufferSize = (int) (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax() * bufferPercent); MapFile.Reader reader = new MapFile.Reader(Testfile.MAPFILE.filepath(), conf, SequenceFile.Reader.bufferSize(bufferSize)); startTimer(); reader.reset(); while (reader.next(key, value)) { //logger.debug("read key:{} value:{}", key, value); } logger.info("Duration for reader.next() SEQUENTIAL keys: {}", stopTimer(Testfile.MAPFILE, "READ-SEQ")); startTimer(); reader.reset(); for (int i = 0; i < testSize; i++) { key.set(getKey(i)); reader.get(key, value); //logger.debug("{}:{}", key, value); } logger.info("Duration for reader.get(key) SEQUENTIAL keys: {}", stopTimer(Testfile.MAPFILE, "READ-SEQ-ID")); Random random = new Random(); startTimer(); for (int i = 0; i < testSize; i++) { key.set(getKey(random.nextInt(testSize))); reader.get(key, value); //logger.debug("{}:{}", key, value); } logger.info("Duration for reader.get(key) RANDOM keys: {}", stopTimer(Testfile.MAPFILE, "READ-RAND")); reader.close(); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Before public void setUp() throws IOException { MapFileWriteDemo.main(new String[] { MAP_URI }); Configuration conf = new Configuration(); fs = FileSystem.get(URI.create(MAP_URI), conf); reader = new MapFile.Reader(fs, MAP_URI, conf); key = (WritableComparable<?>) ReflectionUtils.newInstance(reader.getKeyClass(), conf); value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); }// w ww .j ava 2s .c o m
From source file:io.aos.hdfs.MapFileSeekTest.java
License:Apache License
@Before public void setUp() throws IOException { MapFileWriteDemo.main(new String[] { MAP_URI }); Configuration conf = new Configuration(); fs = FileSystem.get(URI.create(MAP_URI), conf); reader = new MapFile.Reader(fs, MAP_URI, conf); key = (WritableComparable<?>) ReflectionUtils.newInstance(reader.getKeyClass(), conf); value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); }
From source file:org.archive.jbs.tools.Dump.java
License:Apache License
public void dump(FileSystem fs, Path inputPath, int mode) throws Exception { Configuration conf = getConf(); MapFile.Reader mapReader = null; SequenceFile.Reader seqReader = null; try {/*from ww w . j a v a2s. c om*/ mapReader = new MapFile.Reader(fs, inputPath.toString(), conf); } catch (IOException ioe) { // Hrm, try a sequence file... } if (mapReader != null) { WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(mapReader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(mapReader.getValueClass(), conf); while (mapReader.next(key, value)) { output(key, value, mode); } } else { // Not a MapFile...try a SequenceFile. try { seqReader = new SequenceFile.Reader(fs, inputPath, conf); } catch (IOException ioe) { // Hrm, neither MapFile nor SequenceFile. throw new IOException("Cannot open file: " + inputPath); } WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(seqReader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(seqReader.getValueClass(), conf); while (seqReader.next(key, value)) { output(key, value, mode); } } }
From source file:org.archive.nutchwax.tools.PageRanker.java
License:Apache License
/** * *///from w w w . j a va2s . co m public int run(String[] args) throws Exception { String usage = "Usage: PageRanker [OPTIONS] outputFile <linkdb|paths>\n" + "Emit PageRank values for URLs in linkDb(s). Suitable for use with\n" + "PageRank scoring filter.\n" + "\n" + "OPTIONS:\n" + " -p Use exact path as given, don't assume it's a typical\n" + " linkdb with \"current/part-nnnnn\" subdirs.\n" + " -t threshold Do not emit records with less than this many inlinks.\n" + " Default value 10."; if (args.length < 1) { System.err.println("Usage: " + usage); return -1; } boolean exactPath = false; int threshold = 10; int pos = 0; for (; pos < args.length && args[pos].charAt(0) == '-'; pos++) { if (args[pos].equals("-p")) { exactPath = true; } if (args[pos].equals("-t")) { pos++; if (args.length - pos < 1) { System.err.println("Error: missing argument to -t option"); return -1; } try { threshold = Integer.parseInt(args[pos]); } catch (NumberFormatException nfe) { System.err.println("Error: bad value for -t option: " + args[pos]); return -1; } } } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (pos >= args.length) { System.err.println("Error: missing outputFile"); return -1; } Path outputPath = new Path(args[pos++]); if (fs.exists(outputPath)) { System.err.println("Erorr: outputFile already exists: " + outputPath); return -1; } if (pos >= args.length) { System.err.println("Error: missing linkdb"); return -1; } List<Path> mapfiles = new ArrayList<Path>(); // If we are using exact paths, add each one to the list. // Otherwise, assume the given path is to a linkdb and look for // <linkdbPath>/current/part-nnnnn sub-dirs. if (exactPath) { for (; pos < args.length; pos++) { mapfiles.add(new Path(args[pos])); } } else { for (; pos < args.length; pos++) { FileStatus[] fstats = fs.listStatus(new Path(args[pos] + "/current"), HadoopFSUtil.getPassDirectoriesFilter(fs)); mapfiles.addAll(Arrays.asList(HadoopFSUtil.getPaths(fstats))); } } System.out.println("mapfiles = " + mapfiles); PrintWriter output = new PrintWriter( new OutputStreamWriter(fs.create(outputPath).getWrappedStream(), "UTF-8")); try { for (Path p : mapfiles) { MapFile.Reader reader = new MapFile.Reader(fs, p.toString(), conf); WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { if (!(key instanceof Text)) continue; String toUrl = ((Text) key).toString(); // HACK: Should make this into some externally configurable regex. if (!toUrl.startsWith("http")) continue; int count = -1; if (value instanceof IntWritable) { count = ((IntWritable) value).get(); } else if (value instanceof Inlinks) { Inlinks inlinks = (Inlinks) value; count = inlinks.size(); } if (count < threshold) continue; output.println(count + " " + toUrl); } } return 0; } catch (Exception e) { LOG.fatal("PageRanker: " + StringUtils.stringifyException(e)); return -1; } finally { output.flush(); output.close(); } }
From source file:org.archive.nutchwax.tools.ParseTextCombiner.java
License:Apache License
/** * *///from w ww . j a v a2s. c o m public int run(String[] args) throws Exception { String usage = "Usage: ParseTextCombiner [-v] output input...\n"; if (args.length < 1) { System.err.println("Usage: " + usage); return 1; } if (args[0].equals("-h")) { System.err.println("Usage: " + usage); return 1; } int argStart = 0; if (args[argStart].equals("-v")) { verbose = true; argStart = 1; } if (args.length - argStart < 2) { System.err.println("Usage: " + usage); return 1; } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(args[argStart]); if (fs.exists(outputPath)) { System.err.println("ERROR: output already exists: " + outputPath); return -1; } MapFile.Reader[] readers = new MapFile.Reader[args.length - argStart - 1]; for (int pos = argStart + 1; pos < args.length; pos++) { readers[pos - argStart - 1] = new MapFile.Reader(fs, args[pos], conf); } WritableComparable[] keys = new WritableComparable[readers.length]; Writable[] values = new Writable[readers.length]; WritableComparator wc = WritableComparator.get((Class<WritableComparable>) readers[0].getKeyClass()); MapFile.Writer writer = new MapFile.Writer(conf, fs, outputPath.toString(), (Class<WritableComparable>) readers[0].getKeyClass(), readers[0].getValueClass()); int readCount = 0; int writeCount = 0; for (int i = 0; i < readers.length; i++) { WritableComparable key = (WritableComparable) ReflectionUtils.newInstance(readers[i].getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(readers[i].getValueClass(), conf); if (readers[i].next(key, value)) { keys[i] = key; values[i] = value; readCount++; if (verbose) System.out.println("read: " + i + ": " + key); } else { // Not even one key/value pair in the map. System.out.println("WARN: No key/value pairs in mapfile: " + args[i + argStart + 1]); try { readers[i].close(); } catch (IOException ioe) { /* Don't care */ } readers[i] = null; } } while (true) { int candidate = -1; for (int i = 0; i < keys.length; i++) { if (keys[i] == null) continue; if (candidate < 0) { candidate = i; } else if (wc.compare(keys[i], keys[candidate]) < 0) { candidate = i; } } if (candidate < 0) { if (verbose) System.out.println("Candidate < 0, all done."); break; } // Candidate is the index of the "smallest" key. // Write it out. writer.append(keys[candidate], values[candidate]); writeCount++; if (verbose) System.out.println("write: " + candidate + ": " + keys[candidate]); // Now read in a new value from the corresponding reader. if (!readers[candidate].next(keys[candidate], values[candidate])) { if (verbose) System.out.println( "No more key/value pairs in (" + candidate + "): " + args[candidate + argStart + 1]); // No more key/value pairs left in this reader. try { readers[candidate].close(); } catch (IOException ioe) { /* Don't care */ } readers[candidate] = null; keys[candidate] = null; values[candidate] = null; } else { readCount++; if (verbose) System.out.println("read: " + candidate + ": " + keys[candidate]); } } System.out.println("Total # records in : " + readCount); System.out.println("Total # records out: " + writeCount); writer.close(); return 0; }
From source file:org.hadoop.tdg.TestPseudoHadoop.java
License:Apache License
/** * sorted sequence file/*from w w w . j a v a2 s. com*/ * * @throws IOException */ @Test public void mapFileIO() throws IOException { LongWritable key = new LongWritable(); Text value = new Text(); MapFile.Writer writer = null; try { writer = new MapFile.Writer(fs.getConf(), fs, DST, key.getClass(), value.getClass()); for (int i = 0; i < 100; i++) { key.set(i); value.set(DATA[i % DATA.length]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } MapFile.Reader reader = null; try { reader = new MapFile.Reader(fs, DST, fs.getConf()); LongWritable readerKey = (LongWritable) ReflectionUtils.newInstance(reader.getKeyClass(), fs.getConf()); Text readerValue = (Text) ReflectionUtils.newInstance(reader.getValueClass(), fs.getConf()); while (reader.next(readerKey, readerValue)) { System.out.printf("%s\t%s\n", readerKey, readerValue); } } finally { IOUtils.closeStream(writer); } }