List of usage examples for org.apache.hadoop.io SequenceFile.Reader SequenceFile.Reader
@Deprecated public Reader(FileSystem fs, Path file, Configuration conf) throws IOException
From source file:com.hadoopilluminated.examples.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi// ww w .jav a 2 s . c om */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.kadwa.hadoop.DistExec.java
License:Open Source License
/** Check whether the file list have duplication. */ static private void checkDuplication(FileSystem fs, Path file, Path sorted, Configuration conf) throws IOException { SequenceFile.Reader in = null; try {/*from w w w .j av a 2 s . c o m*/ SequenceFile.Sorter sorter = new SequenceFile.Sorter(fs, new Text.Comparator(), Text.class, Text.class, conf); sorter.sort(file, sorted); in = new SequenceFile.Reader(fs, sorted, conf); Text prevdst = null, curdst = new Text(); Text prevsrc = null, cursrc = new Text(); for (; in.next(curdst, cursrc);) { if (prevdst != null && curdst.equals(prevdst)) { throw new DuplicationException( "Invalid input, there are duplicated files in the sources: " + prevsrc + ", " + cursrc); } prevdst = curdst; curdst = new Text(); prevsrc = cursrc; cursrc = new Text(); } } finally { checkAndClose(in); } }
From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java
License:Apache License
@SuppressWarnings("deprecation") public byte[][] getSplits(Configuration conf, Path path) { List<byte[]> rowkeyList = new ArrayList<byte[]>(); SequenceFile.Reader reader = null; try {/* w ww . j a va2s . c o m*/ reader = new SequenceFile.Reader(path.getFileSystem(conf), path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { byte[] tmp = ((Text) key).copyBytes(); if (rowkeyList.contains(tmp) == false) { rowkeyList.add(tmp); } } } catch (Exception e) { e.printStackTrace(); } finally { IOUtils.closeStream(reader); } byte[][] retValue = rowkeyList.toArray(new byte[rowkeyList.size()][]); return retValue; }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
static private void finalize(Configuration conf, JobConf jobconf, final Path destPath, String presevedAttributes) throws IOException { if (presevedAttributes == null) { return;/* ww w .j av a 2 s . co m*/ } EnumSet<FileAttribute> preseved = FileAttribute.parse(presevedAttributes); if (!preseved.contains(FileAttribute.USER) && !preseved.contains(FileAttribute.GROUP) && !preseved.contains(FileAttribute.PERMISSION)) { return; } FileSystem dstfs = destPath.getFileSystem(conf); Path dstdirlist = new Path(jobconf.get(DST_DIR_LIST_LABEL)); SequenceFile.Reader in = null; try { in = new SequenceFile.Reader(dstdirlist.getFileSystem(jobconf), dstdirlist, jobconf); Text dsttext = new Text(); FilePair pair = new FilePair(); for (; in.next(dsttext, pair);) { Path absdst = new Path(destPath, pair.output); updatePermissions(pair.input, dstfs.getFileStatus(absdst), preseved, dstfs); } } finally { checkAndClose(in); } }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); }//from www. ja v a2 s .co m //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for (lsrstack.push(dstroot); !lsrstack.isEmpty();) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for (FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = { "-rmr", null }; boolean hasnext = dstin.next(dstpath, dstfrom); for (; lsrin.next(lsrpath, lsrstatus);) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for (; hasnext && dst_cmp_lsr < 0;) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch (Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException( "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }
From source file:com.pinterest.secor.uploader.Uploader.java
License:Apache License
/** * This method is intended to be overwritten in tests. *///from w w w . j a v a 2 s . c o m protected SequenceFile.Reader createReader(FileSystem fileSystem, Path path, Configuration configuration) throws IOException { return new SequenceFile.Reader(fileSystem, path, configuration); }
From source file:crunch.MaxTemperature.java
License:Apache License
public static void main(String[] args) throws Exception { String mapUri = args[0];// w w w . j a va 2s . c o m Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(mapUri), conf); Path map = new Path(mapUri); Path mapData = new Path(map, MapFile.DATA_FILE_NAME); // Get key and value types from data sequence file SequenceFile.Reader reader = new SequenceFile.Reader(fs, mapData, conf); Class keyClass = reader.getKeyClass(); Class valueClass = reader.getValueClass(); reader.close(); // Create the map file index file long entries = MapFile.fix(fs, map, keyClass, valueClass, false, conf); System.out.printf("Created MapFile %s with %d entries\n", map, entries); }
From source file:crunch.MaxTemperature.java
License:Apache License
public static void main(String[] args) throws IOException { String uri = args[0];//from w w w . ja va 2 s . co m Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(URI.create(uri), conf); Path path = new Path(uri); SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, path, conf); Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf); long position = reader.getPosition(); while (reader.next(key, value)) { String syncSeen = reader.syncSeen() ? "*" : ""; System.out.printf("[%s%s]\t%s\t%s\n", position, syncSeen, key, value); position = reader.getPosition(); // beginning of next record } } finally { IOUtils.closeStream(reader); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Before
public void setUp() throws IOException {
SequenceFileWriteDemo.main(new String[] { SF_URI });
Configuration conf = new Configuration();
fs = FileSystem.get(URI.create(SF_URI), conf);
Path path = new Path(SF_URI);
reader = new SequenceFile.Reader(fs, path, conf);
key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
}/*from www.j a va 2 s . c om*/
From source file:dk.aau.cs.cloudetl.io.SequenceIndexFileReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; conf = context.getConfiguration();//from w w w .j av a 2 s . c om Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); this.in = new SequenceFile.Reader(fs, path, conf); this.end = fileSplit.getStart() + fileSplit.getLength(); if (fileSplit.getStart() > in.getPosition()) { in.sync(fileSplit.getStart()); // sync to start } this.start = in.getPosition(); more = start < end; }