List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec) throws IOException
From source file:com.m6d.hive.protobuf.LongTest.java
License:Apache License
public void testWriteReadProto() throws Exception { Path p = new Path(this.ROOT_DIR, "reallybigfile2"); SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p, BytesWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK); long startLoad = System.currentTimeMillis(); int toLoad = load; for (int i = 0; i < toLoad; i++) { Person.Builder bbuild = Person.newBuilder(); Person ed = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt()) .setHobby(Hobby.newBuilder().setName(randomString())).build(); Person bo = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt()) .setHobby(Hobby.newBuilder().setName(randomString())).build(); BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); ByteArrayOutputStream s = new ByteArrayOutputStream(); ed.writeTo(s);//from w ww.j a v a 2 s. com ByteArrayOutputStream t = new ByteArrayOutputStream(); bo.writeTo(t); key.set(s.toByteArray(), 0, s.size()); value.set(t.toByteArray(), 0, t.size()); w.append(key, value); } w.close(); long start = System.currentTimeMillis(); SequenceFile.Reader r = new SequenceFile.Reader(this.getFileSystem(), p, this.createJobConf()); BytesWritable readkey = new BytesWritable(); BytesWritable readval = new BytesWritable(); while (r.next(readkey, readval)) { byte[] c = new byte[readkey.getLength()]; System.arraycopy(readkey.getBytes(), 0, c, 0, readkey.getLength()); Person.parseFrom(c); byte[] d = new byte[readval.getLength()]; System.arraycopy(readval.getBytes(), 0, d, 0, readval.getLength()); Person.parseFrom(d); } long end = System.currentTimeMillis(); System.out.println("reading proto took" + (end - start)); r.close(); }
From source file:com.m6d.hive.protobuf.LongTest.java
License:Apache License
public void testBigProto() throws Exception { Path p = new Path(this.ROOT_DIR, "reallybigfile"); SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p, BytesWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK); long startLoad = System.currentTimeMillis(); int toLoad = load; for (int i = 0; i < toLoad; i++) { Person.Builder bbuild = Person.newBuilder(); // Person ed = bbuild.setEmail("ed@email.com").setName("ed"). // setId(i).setHobby(Hobby.newBuilder().setName("java")).build(); Person ed = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt()) .setHobby(Hobby.newBuilder().setName(randomString())).build(); // Person bo = bbuild.setEmail("bo@email.com").setName("bo"). // setId(i).setHobby(Hobby.newBuilder().setName("bball")).build(); Person bo = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt()) .setHobby(Hobby.newBuilder().setName(randomString())).build(); BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); ByteArrayOutputStream s = new ByteArrayOutputStream(); ed.writeTo(s);//from w ww.ja va 2 s . c o m ByteArrayOutputStream t = new ByteArrayOutputStream(); bo.writeTo(t); key.set(s.toByteArray(), 0, s.size()); value.set(t.toByteArray(), 0, t.size()); w.append(key, value); } w.close(); System.out.println("len " + this.getFileSystem().getFileStatus(p).getLen()); long endLoad = System.currentTimeMillis(); System.out.println((endLoad - startLoad) + " time taken loading"); String jarFile; jarFile = KVAsVSeqFileBinaryInputFormat.class.getProtectionDomain().getCodeSource().getLocation().getFile(); client.execute("add jar " + jarFile); client.execute("set hive.aux.jars.path=file:///" + jarFile); client.execute("create table bigproto " + " ROW FORMAT SERDE '" + ProtobufDeserializer.class.getName() + "'" + " WITH SERDEPROPERTIES ('KEY_SERIALIZE_CLASS'='" + Ex.Person.class.getName() + "','VALUE_SERIALIZE_CLASS'='" + Ex.Person.class.getName() + "' )" + " STORED AS INPUTFORMAT '" + KVAsVSeqFileBinaryInputFormat.class.getName() + "'" + " OUTPUTFORMAT '" + SequenceFileOutputFormat.class.getName() + "'"); client.execute("load data local inpath '" + p.toString() + "' into table bigproto"); long startQuery = System.currentTimeMillis(); client.execute("SELECT count(1) FROM bigproto"); List<String> results = client.fetchAll(); Assert.assertEquals(toLoad + "", results.get(0)); long endQuery = System.currentTimeMillis(); System.out.println((endQuery - startQuery) + " Proto Query time taken"); client.execute("drop table bigproto"); }
From source file:com.m6d.hive.protobuf.LongTest.java
License:Apache License
public void testBigDat() throws Exception { Path p = new Path(this.ROOT_DIR, "reallybigflat"); SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p, NullWritable.class, Text.class, SequenceFile.CompressionType.BLOCK); long startLoad = System.currentTimeMillis(); int toLoad = load; for (int i = 0; i < toLoad; i++) { Text t = new Text(); //t.set("ed\ted@email.com\t1\tjava\tbob\tbob@email.com\t3\tbball"); t.set(randomString() + "\t" + randomString() + "\t" + randomInt() + "\t" + randomString() + "\t" + randomString() + "\t" + randomString() + "\t" + randomInt() + "\t" + randomString()); w.append(NullWritable.get(), t); }//from www .j a v a2 s . c o m w.close(); System.out.println("len " + this.getFileSystem().getFileStatus(p).getLen()); long endLoad = System.currentTimeMillis(); System.out.println((endLoad - startLoad) + " time taken loading"); String jarFile; jarFile = KVAsVSeqFileBinaryInputFormat.class.getProtectionDomain().getCodeSource().getLocation().getFile(); client.execute("add jar " + jarFile); client.execute("set hive.aux.jars.path=file:///" + jarFile); client.execute("create table bigtext " + "(name string, email string , id int , hobby string, " + " name1 string, email1 string, id1 int , hobby1 string)" + " ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' STORED AS SEQUENCEFILE"); client.execute("load data local inpath '" + p.toString() + "' into table bigtext"); long startQuery = System.currentTimeMillis(); //client.execute( "select distinct(name) from bigtext"); //List<String> result = client.fetchAll(); //Assert.assertEquals("edward", result); client.execute("SELECT count(1) FROM bigtext"); List<String> results = client.fetchAll(); Assert.assertEquals(toLoad + "", results.get(0)); long endQuery = System.currentTimeMillis(); System.out.println((endQuery - startQuery) + " Query time taken"); client.execute("drop table bigproto"); }
From source file:com.mozilla.bagheera.sink.SequenceFileSink.java
License:Apache License
private void initWriter() throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Thread " + Thread.currentThread().getId() + " - initWriter() called"); }//from w w w . ja va 2 s. com if (!hdfs.exists(baseDir)) { hdfs.mkdirs(baseDir); } outputPath = new Path(baseDir, new Path(UUID.randomUUID().toString())); LOG.info("Opening file handle to: " + outputPath.toString()); if (useBytesValue) { writer = SequenceFile.createWriter(hdfs, conf, outputPath, Text.class, BytesWritable.class, CompressionType.BLOCK); } else { writer = SequenceFile.createWriter(hdfs, conf, outputPath, Text.class, Text.class, CompressionType.BLOCK); } // Get time in millis at a day resolution Calendar prev = Calendar.getInstance(); prev.set(Calendar.HOUR_OF_DAY, 0); prev.set(Calendar.MINUTE, 0); prev.set(Calendar.SECOND, 0); prev.set(Calendar.MILLISECOND, 0); nextRolloverMillis = prev.getTimeInMillis() + DAY_IN_MILLIS; }
From source file:com.ning.metrics.collector.events.hadoop.writer.HadoopFileEventWriter.java
License:Apache License
private HadoopOutputChunk getChunk(final Event event, final String outputDir, final String tmpOutputDir, final Object value, final Class<?> clazz) throws IOException { if (value == null) { // Trying to write a null value triggers an NPE in SequenceFile$BlockCompressWriter.append. // Return here to avoid creating useless directories in HDFS. log.warn("Deserialized event contains no data: " + event); return null; }// w w w .jav a2s . c om HadoopOutputChunk chunk = outputChunks.get(outputDir); if (chunk == null) { final DateTime now = new DateTime(); final String filename = String.format("%s-%s", now, sessionId).replace(":", "."); Path outputPath = new Path(outputDir, filename); Path tmpOutputPath = new Path(tmpOutputDir, filename); for (int suffix = 0; fsAccess.get().exists(tmpOutputPath); suffix++) { outputPath = new Path(outputDir, String.format("%s-%d", filename, suffix)); tmpOutputPath = new Path(tmpOutputDir, String.format("%s-%d", filename, suffix)); } log.info(String.format("OutputPath (tmp): %s (%s)", outputPath.toUri().getPath(), tmpOutputPath.toUri().getPath())); final SequenceFile.Writer writer = SequenceFile.createWriter(fsAccess.get(), fsAccess.get().getConf(), tmpOutputPath, TBooleanWritable.class, clazz, SequenceFile.CompressionType.BLOCK); chunk = new HadoopOutputChunk(tmpOutputPath, outputPath, writer); outputChunks.put(outputDir, chunk); } return chunk; }
From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/* w ww .j a va 2 s .c om*/ */ public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); // setup job conf job.setJobName(QuasiMonteCarlo.class.getSimpleName()); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); // setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { throw new IOException( "Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { // generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } // start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.waitForCompletion(true); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); // read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf); try { reader.next(numInside, numOutside); } finally { reader.close(); } // compute estimated value final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints)); return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP); } finally { fs.delete(tmpDir, true); } }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments//from www . ja va 2 s.c o m */ private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); }/*from w w w .j a va 2 s .c o m*/ //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for (lsrstack.push(dstroot); !lsrstack.isEmpty();) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for (FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = { "-rmr", null }; boolean hasnext = dstin.next(dstpath, dstfrom); for (; lsrin.next(lsrpath, lsrstatus);) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for (; hasnext && dst_cmp_lsr < 0;) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch (Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException( "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }
From source file:com.ripariandata.timberwolf.writer.hive.SequenceFileMailWriter.java
License:Apache License
@Override public void write(final Iterable<MailboxItem> mails) { try {/* w w w. j a v a 2s. c om*/ SequenceFile.Writer writer = SequenceFile.createWriter(new Configuration(), outStream, Text.class, Text.class, SequenceFile.CompressionType.NONE, null); write(mails, writer); writer.close(); } catch (IOException e) { LOG.error("There was an error writing to the Hive file."); throw HiveMailWriterException.log(LOG, new HiveMailWriterException("There was an error writing to the Hive file", e)); } }
From source file:com.scaleunlimited.cascading.DistCp.java
License:Apache License
/** Delete the dst files/dirs which do not exist in src */ static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs, Path jobdir, JobConf jobconf, Configuration conf) throws IOException { if (!dstroot.isDir()) { throw new IOException("dst must be a directory when option " + Options.DELETE.cmd + " is set, but dst (= " + dstroot.getPath() + ") is not a directory."); }// w w w . j a va2s . co m //write dst lsr results final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr"); final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class, FileStatus.class, SequenceFile.CompressionType.NONE); try { //do lsr to get all file statuses in dstroot final Stack<FileStatus> lsrstack = new Stack<FileStatus>(); for (lsrstack.push(dstroot); !lsrstack.isEmpty();) { final FileStatus status = lsrstack.pop(); if (status.isDir()) { for (FileStatus child : dstfs.listStatus(status.getPath())) { String relative = makeRelative(dstroot.getPath(), child.getPath()); writer.append(new Text(relative), child); lsrstack.push(child); } } } } finally { checkAndClose(writer); } //sort lsr results final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted"); SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class, FileStatus.class, jobconf); sorter.sort(dstlsr, sortedlsr); //compare lsr list and dst list SequenceFile.Reader lsrin = null; SequenceFile.Reader dstin = null; try { lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf); dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf); //compare sorted lsr list and sorted dst list final Text lsrpath = new Text(); final FileStatus lsrstatus = new FileStatus(); final Text dstpath = new Text(); final Text dstfrom = new Text(); final FsShell shell = new FsShell(conf); final String[] shellargs = { "-rmr", null }; boolean hasnext = dstin.next(dstpath, dstfrom); for (; lsrin.next(lsrpath, lsrstatus);) { int dst_cmp_lsr = dstpath.compareTo(lsrpath); for (; hasnext && dst_cmp_lsr < 0;) { hasnext = dstin.next(dstpath, dstfrom); dst_cmp_lsr = dstpath.compareTo(lsrpath); } if (dst_cmp_lsr == 0) { //lsrpath exists in dst, skip it hasnext = dstin.next(dstpath, dstfrom); } else { //lsrpath does not exist, delete it String s = new Path(dstroot.getPath(), lsrpath.toString()).toString(); if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) { shellargs[1] = s; int r = 0; try { r = shell.run(shellargs); } catch (Exception e) { throw new IOException("Exception from shell.", e); } if (r != 0) { throw new IOException( "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r); } } } } } finally { checkAndClose(lsrin); checkAndClose(dstin); } }