Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec) throws IOException 

Source Link

Document

Construct the preferred type of 'raw' SequenceFile Writer.

Usage

From source file:com.m6d.hive.protobuf.LongTest.java

License:Apache License

public void testWriteReadProto() throws Exception {
    Path p = new Path(this.ROOT_DIR, "reallybigfile2");

    SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p,
            BytesWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK);

    long startLoad = System.currentTimeMillis();
    int toLoad = load;
    for (int i = 0; i < toLoad; i++) {
        Person.Builder bbuild = Person.newBuilder();
        Person ed = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt())
                .setHobby(Hobby.newBuilder().setName(randomString())).build();
        Person bo = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt())
                .setHobby(Hobby.newBuilder().setName(randomString())).build();

        BytesWritable key = new BytesWritable();
        BytesWritable value = new BytesWritable();
        ByteArrayOutputStream s = new ByteArrayOutputStream();
        ed.writeTo(s);//from w  ww.j  a v  a  2  s.  com

        ByteArrayOutputStream t = new ByteArrayOutputStream();
        bo.writeTo(t);

        key.set(s.toByteArray(), 0, s.size());
        value.set(t.toByteArray(), 0, t.size());
        w.append(key, value);
    }
    w.close();

    long start = System.currentTimeMillis();
    SequenceFile.Reader r = new SequenceFile.Reader(this.getFileSystem(), p, this.createJobConf());
    BytesWritable readkey = new BytesWritable();
    BytesWritable readval = new BytesWritable();
    while (r.next(readkey, readval)) {
        byte[] c = new byte[readkey.getLength()];
        System.arraycopy(readkey.getBytes(), 0, c, 0, readkey.getLength());
        Person.parseFrom(c);

        byte[] d = new byte[readval.getLength()];
        System.arraycopy(readval.getBytes(), 0, d, 0, readval.getLength());
        Person.parseFrom(d);
    }
    long end = System.currentTimeMillis();

    System.out.println("reading proto took" + (end - start));
    r.close();
}

From source file:com.m6d.hive.protobuf.LongTest.java

License:Apache License

public void testBigProto() throws Exception {
    Path p = new Path(this.ROOT_DIR, "reallybigfile");

    SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p,
            BytesWritable.class, BytesWritable.class, SequenceFile.CompressionType.BLOCK);

    long startLoad = System.currentTimeMillis();
    int toLoad = load;
    for (int i = 0; i < toLoad; i++) {
        Person.Builder bbuild = Person.newBuilder();
        //  Person ed = bbuild.setEmail("ed@email.com").setName("ed").
        //          setId(i).setHobby(Hobby.newBuilder().setName("java")).build();

        Person ed = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt())
                .setHobby(Hobby.newBuilder().setName(randomString())).build();

        // Person bo = bbuild.setEmail("bo@email.com").setName("bo").
        //        setId(i).setHobby(Hobby.newBuilder().setName("bball")).build();
        Person bo = bbuild.setEmail(randomString()).setName(randomString()).setId(randomInt())
                .setHobby(Hobby.newBuilder().setName(randomString())).build();

        BytesWritable key = new BytesWritable();
        BytesWritable value = new BytesWritable();
        ByteArrayOutputStream s = new ByteArrayOutputStream();
        ed.writeTo(s);//from   w ww.ja va  2  s . c o  m

        ByteArrayOutputStream t = new ByteArrayOutputStream();
        bo.writeTo(t);

        key.set(s.toByteArray(), 0, s.size());
        value.set(t.toByteArray(), 0, t.size());
        w.append(key, value);
    }
    w.close();
    System.out.println("len " + this.getFileSystem().getFileStatus(p).getLen());
    long endLoad = System.currentTimeMillis();
    System.out.println((endLoad - startLoad) + " time taken loading");

    String jarFile;
    jarFile = KVAsVSeqFileBinaryInputFormat.class.getProtectionDomain().getCodeSource().getLocation().getFile();
    client.execute("add jar " + jarFile);
    client.execute("set hive.aux.jars.path=file:///" + jarFile);

    client.execute("create table  bigproto   " + " ROW FORMAT SERDE '" + ProtobufDeserializer.class.getName()
            + "'" + " WITH SERDEPROPERTIES ('KEY_SERIALIZE_CLASS'='" + Ex.Person.class.getName()
            + "','VALUE_SERIALIZE_CLASS'='" + Ex.Person.class.getName() + "'   )" + " STORED AS INPUTFORMAT '"
            + KVAsVSeqFileBinaryInputFormat.class.getName() + "'" + " OUTPUTFORMAT '"
            + SequenceFileOutputFormat.class.getName() + "'");

    client.execute("load data local inpath '" + p.toString() + "' into table bigproto");

    long startQuery = System.currentTimeMillis();
    client.execute("SELECT count(1) FROM bigproto");
    List<String> results = client.fetchAll();
    Assert.assertEquals(toLoad + "", results.get(0));
    long endQuery = System.currentTimeMillis();

    System.out.println((endQuery - startQuery) + " Proto Query time taken");
    client.execute("drop table bigproto");

}

From source file:com.m6d.hive.protobuf.LongTest.java

License:Apache License

public void testBigDat() throws Exception {
    Path p = new Path(this.ROOT_DIR, "reallybigflat");
    SequenceFile.Writer w = SequenceFile.createWriter(this.getFileSystem(), new Configuration(), p,
            NullWritable.class, Text.class, SequenceFile.CompressionType.BLOCK);

    long startLoad = System.currentTimeMillis();
    int toLoad = load;
    for (int i = 0; i < toLoad; i++) {
        Text t = new Text();
        //t.set("ed\ted@email.com\t1\tjava\tbob\tbob@email.com\t3\tbball");
        t.set(randomString() + "\t" + randomString() + "\t" + randomInt() + "\t" + randomString() + "\t"
                + randomString() + "\t" + randomString() + "\t" + randomInt() + "\t" + randomString());

        w.append(NullWritable.get(), t);
    }//from  www .j a  v  a2  s  .  c o  m
    w.close();
    System.out.println("len " + this.getFileSystem().getFileStatus(p).getLen());
    long endLoad = System.currentTimeMillis();
    System.out.println((endLoad - startLoad) + " time taken loading");

    String jarFile;
    jarFile = KVAsVSeqFileBinaryInputFormat.class.getProtectionDomain().getCodeSource().getLocation().getFile();
    client.execute("add jar " + jarFile);
    client.execute("set hive.aux.jars.path=file:///" + jarFile);

    client.execute("create table  bigtext   " + "(name string, email string , id int , hobby string, "
            + " name1 string, email1 string, id1 int , hobby1 string)"
            + " ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\t' STORED AS SEQUENCEFILE");

    client.execute("load data local inpath '" + p.toString() + "' into table bigtext");

    long startQuery = System.currentTimeMillis();
    //client.execute( "select distinct(name) from bigtext");
    //List<String> result = client.fetchAll();
    //Assert.assertEquals("edward", result);
    client.execute("SELECT count(1) FROM bigtext");
    List<String> results = client.fetchAll();
    Assert.assertEquals(toLoad + "", results.get(0));
    long endQuery = System.currentTimeMillis();

    System.out.println((endQuery - startQuery) + " Query time taken");
    client.execute("drop table bigproto");

}

From source file:com.mozilla.bagheera.sink.SequenceFileSink.java

License:Apache License

private void initWriter() throws IOException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Thread " + Thread.currentThread().getId() + " - initWriter() called");
    }//from w w w  . ja va  2 s. com

    if (!hdfs.exists(baseDir)) {
        hdfs.mkdirs(baseDir);
    }

    outputPath = new Path(baseDir, new Path(UUID.randomUUID().toString()));
    LOG.info("Opening file handle to: " + outputPath.toString());

    if (useBytesValue) {
        writer = SequenceFile.createWriter(hdfs, conf, outputPath, Text.class, BytesWritable.class,
                CompressionType.BLOCK);
    } else {
        writer = SequenceFile.createWriter(hdfs, conf, outputPath, Text.class, Text.class,
                CompressionType.BLOCK);
    }

    // Get time in millis at a day resolution
    Calendar prev = Calendar.getInstance();
    prev.set(Calendar.HOUR_OF_DAY, 0);
    prev.set(Calendar.MINUTE, 0);
    prev.set(Calendar.SECOND, 0);
    prev.set(Calendar.MILLISECOND, 0);
    nextRolloverMillis = prev.getTimeInMillis() + DAY_IN_MILLIS;
}

From source file:com.ning.metrics.collector.events.hadoop.writer.HadoopFileEventWriter.java

License:Apache License

private HadoopOutputChunk getChunk(final Event event, final String outputDir, final String tmpOutputDir,
        final Object value, final Class<?> clazz) throws IOException {
    if (value == null) {
        // Trying to write a null value triggers an NPE in SequenceFile$BlockCompressWriter.append.
        // Return here to avoid creating useless directories in HDFS.
        log.warn("Deserialized event contains no data: " + event);
        return null;
    }// w w  w  .jav a2s  .  c  om

    HadoopOutputChunk chunk = outputChunks.get(outputDir);
    if (chunk == null) {
        final DateTime now = new DateTime();
        final String filename = String.format("%s-%s", now, sessionId).replace(":", ".");
        Path outputPath = new Path(outputDir, filename);
        Path tmpOutputPath = new Path(tmpOutputDir, filename);

        for (int suffix = 0; fsAccess.get().exists(tmpOutputPath); suffix++) {
            outputPath = new Path(outputDir, String.format("%s-%d", filename, suffix));
            tmpOutputPath = new Path(tmpOutputDir, String.format("%s-%d", filename, suffix));
        }

        log.info(String.format("OutputPath (tmp): %s (%s)", outputPath.toUri().getPath(),
                tmpOutputPath.toUri().getPath()));
        final SequenceFile.Writer writer = SequenceFile.createWriter(fsAccess.get(), fsAccess.get().getConf(),
                tmpOutputPath, TBooleanWritable.class, clazz, SequenceFile.CompressionType.BLOCK);
        chunk = new HadoopOutputChunk(tmpOutputPath, outputPath, writer);
        outputChunks.put(outputDir, chunk);
    }
    return chunk;
}

From source file:com.phantom.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/* w ww  .j a  va  2 s  .c  om*/
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = new Job(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        // compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/**
 * Initialize DFSCopyFileMapper specific job-configuration.
 * @param conf : The dfs/mapred configuration.
 * @param jobConf : The handle to the jobConf object to be initialized.
 * @param args Arguments//from   www  . ja va  2 s.c  o  m
 */
private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException {
    jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString());

    //set boolean values
    final boolean update = args.flags.contains(Options.UPDATE);
    final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE);
    jobConf.setBoolean(Options.UPDATE.propertyname, update);
    jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite);
    jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname,
            args.flags.contains(Options.IGNORE_READ_FAILURES));
    jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS));

    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobConf);
    Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId);
    jobConf.set(JOB_DIR_LABEL, jobDirectory.toString());

    FileSystem dstfs = args.dst.getFileSystem(conf);
    boolean dstExists = dstfs.exists(args.dst);
    boolean dstIsDir = false;
    if (dstExists) {
        dstIsDir = dstfs.getFileStatus(args.dst).isDir();
    }

    // default logPath
    Path logPath = args.log;
    if (logPath == null) {
        String filename = "_distcp_logs_" + randomId;
        if (!dstExists || !dstIsDir) {
            Path parent = args.dst.getParent();
            if (!dstfs.exists(parent)) {
                dstfs.mkdirs(parent);
            }
            logPath = new Path(parent, filename);
        } else {
            logPath = new Path(args.dst, filename);
        }
    }
    FileOutputFormat.setOutputPath(jobConf, logPath);

    // create src list, dst list
    FileSystem jobfs = jobDirectory.getFileSystem(jobConf);

    Path srcfilelist = new Path(jobDirectory, "_distcp_src_files");
    jobConf.set(SRC_LIST_LABEL, srcfilelist.toString());
    SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files");
    SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class,
            Text.class, SequenceFile.CompressionType.NONE);

    Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs");
    jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString());
    SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class,
            FilePair.class, SequenceFile.CompressionType.NONE);

    // handle the case where the destination directory doesn't exist
    // and we've only a single src directory OR we're updating/overwriting
    // the contents of the destination directory.
    final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite;
    int srcCount = 0, cnsyncf = 0, dirsyn = 0;
    long fileCount = 0L, byteCount = 0L, cbsyncs = 0L;
    try {
        for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) {
            final Path src = srcItr.next();
            FileSystem srcfs = src.getFileSystem(conf);
            FileStatus srcfilestat = srcfs.getFileStatus(src);
            Path root = special && srcfilestat.isDir() ? src : src.getParent();
            if (srcfilestat.isDir()) {
                ++srcCount;
            }

            Stack<FileStatus> pathstack = new Stack<FileStatus>();
            for (pathstack.push(srcfilestat); !pathstack.empty();) {
                FileStatus cur = pathstack.pop();
                FileStatus[] children = srcfs.listStatus(cur.getPath());
                for (int i = 0; i < children.length; i++) {
                    boolean skipfile = false;
                    final FileStatus child = children[i];
                    final String dst = makeRelative(root, child.getPath());
                    ++srcCount;

                    if (child.isDir()) {
                        pathstack.push(child);
                    } else {
                        //skip file if the src and the dst files are the same.
                        skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst));
                        //skip file if it exceed file limit or size limit
                        skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit;

                        if (!skipfile) {
                            ++fileCount;
                            byteCount += child.getLen();

                            if (LOG.isTraceEnabled()) {
                                LOG.trace("adding file " + child.getPath());
                            }

                            ++cnsyncf;
                            cbsyncs += child.getLen();
                            if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) {
                                src_writer.sync();
                                dst_writer.sync();
                                cnsyncf = 0;
                                cbsyncs = 0L;
                            }
                        }
                    }

                    if (!skipfile) {
                        src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()),
                                new FilePair(child, dst));
                    }

                    dst_writer.append(new Text(dst), new Text(child.getPath().toString()));
                }

                if (cur.isDir()) {
                    String dst = makeRelative(root, cur.getPath());
                    dir_writer.append(new Text(dst), new FilePair(cur, dst));
                    if (++dirsyn > SYNC_FILE_MAX) {
                        dirsyn = 0;
                        dir_writer.sync();
                    }
                }
            }
        }
    } finally {
        checkAndClose(src_writer);
        checkAndClose(dst_writer);
        checkAndClose(dir_writer);
    }

    FileStatus dststatus = null;
    try {
        dststatus = dstfs.getFileStatus(args.dst);
    } catch (FileNotFoundException fnfe) {
        LOG.info(args.dst + " does not exist.");
    }

    // create dest path dir if copying > 1 file
    if (dststatus == null) {
        if (srcCount > 1 && !dstfs.mkdirs(args.dst)) {
            throw new IOException("Failed to create" + args.dst);
        }
    }

    final Path sorted = new Path(jobDirectory, "_distcp_sorted");
    checkDuplication(jobfs, dstfilelist, sorted, conf);

    if (dststatus != null && args.flags.contains(Options.DELETE)) {
        deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf);
    }

    Path tmpDir = new Path(
            (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst,
            "_distcp_tmp_" + randomId);
    jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString());
    LOG.info("srcCount=" + srcCount);
    jobConf.setInt(SRC_COUNT_LABEL, srcCount);
    jobConf.setLong(TOTAL_SIZE_LABEL, byteCount);
    setMapCount(byteCount, jobConf);
}

From source file:com.pinterest.hdfsbackup.distcp.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }/*from  w  w  w  .j a va 2  s .c  o m*/

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            FileStatus.class, SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}

From source file:com.ripariandata.timberwolf.writer.hive.SequenceFileMailWriter.java

License:Apache License

@Override
public void write(final Iterable<MailboxItem> mails) {
    try {/*  w  w w.  j a v a 2s.  c  om*/
        SequenceFile.Writer writer = SequenceFile.createWriter(new Configuration(), outStream, Text.class,
                Text.class, SequenceFile.CompressionType.NONE, null);
        write(mails, writer);
        writer.close();
    } catch (IOException e) {
        LOG.error("There was an error writing to the Hive file.");
        throw HiveMailWriterException.log(LOG,
                new HiveMailWriterException("There was an error writing to the Hive file", e));
    }
}

From source file:com.scaleunlimited.cascading.DistCp.java

License:Apache License

/** Delete the dst files/dirs which do not exist in src */
static private void deleteNonexisting(FileSystem dstfs, FileStatus dstroot, Path dstsorted, FileSystem jobfs,
        Path jobdir, JobConf jobconf, Configuration conf) throws IOException {
    if (!dstroot.isDir()) {
        throw new IOException("dst must be a directory when option " + Options.DELETE.cmd
                + " is set, but dst (= " + dstroot.getPath() + ") is not a directory.");
    }// w w w  .  j a  va2s  .  co  m

    //write dst lsr results
    final Path dstlsr = new Path(jobdir, "_distcp_dst_lsr");
    final SequenceFile.Writer writer = SequenceFile.createWriter(jobfs, jobconf, dstlsr, Text.class,
            FileStatus.class, SequenceFile.CompressionType.NONE);
    try {
        //do lsr to get all file statuses in dstroot
        final Stack<FileStatus> lsrstack = new Stack<FileStatus>();
        for (lsrstack.push(dstroot); !lsrstack.isEmpty();) {
            final FileStatus status = lsrstack.pop();
            if (status.isDir()) {
                for (FileStatus child : dstfs.listStatus(status.getPath())) {
                    String relative = makeRelative(dstroot.getPath(), child.getPath());
                    writer.append(new Text(relative), child);
                    lsrstack.push(child);
                }
            }
        }
    } finally {
        checkAndClose(writer);
    }

    //sort lsr results
    final Path sortedlsr = new Path(jobdir, "_distcp_dst_lsr_sorted");
    SequenceFile.Sorter sorter = new SequenceFile.Sorter(jobfs, new Text.Comparator(), Text.class,
            FileStatus.class, jobconf);
    sorter.sort(dstlsr, sortedlsr);

    //compare lsr list and dst list  
    SequenceFile.Reader lsrin = null;
    SequenceFile.Reader dstin = null;
    try {
        lsrin = new SequenceFile.Reader(jobfs, sortedlsr, jobconf);
        dstin = new SequenceFile.Reader(jobfs, dstsorted, jobconf);

        //compare sorted lsr list and sorted dst list
        final Text lsrpath = new Text();
        final FileStatus lsrstatus = new FileStatus();
        final Text dstpath = new Text();
        final Text dstfrom = new Text();
        final FsShell shell = new FsShell(conf);
        final String[] shellargs = { "-rmr", null };

        boolean hasnext = dstin.next(dstpath, dstfrom);
        for (; lsrin.next(lsrpath, lsrstatus);) {
            int dst_cmp_lsr = dstpath.compareTo(lsrpath);
            for (; hasnext && dst_cmp_lsr < 0;) {
                hasnext = dstin.next(dstpath, dstfrom);
                dst_cmp_lsr = dstpath.compareTo(lsrpath);
            }

            if (dst_cmp_lsr == 0) {
                //lsrpath exists in dst, skip it
                hasnext = dstin.next(dstpath, dstfrom);
            } else {
                //lsrpath does not exist, delete it
                String s = new Path(dstroot.getPath(), lsrpath.toString()).toString();
                if (shellargs[1] == null || !isAncestorPath(shellargs[1], s)) {
                    shellargs[1] = s;
                    int r = 0;
                    try {
                        r = shell.run(shellargs);
                    } catch (Exception e) {
                        throw new IOException("Exception from shell.", e);
                    }
                    if (r != 0) {
                        throw new IOException(
                                "\"" + shellargs[0] + " " + shellargs[1] + "\" returns non-zero value " + r);
                    }
                }
            }
        }
    } finally {
        checkAndClose(lsrin);
        checkAndClose(dstin);
    }
}