Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec) throws IOException 

Source Link

Document

Construct the preferred type of 'raw' SequenceFile Writer.

Usage

From source file:com.asakusafw.runtime.stage.temporary.TemporaryStorage.java

License:Apache License

private static <V> SequenceFile.Writer newWriter(Configuration conf, FileSystem fs, Class<V> dataType,
        Path path, CompressionCodec compressionCodec) throws IOException {
    if (compressionCodec == null) {
        return SequenceFile.createWriter(fs, conf, path, NullWritable.class, dataType, CompressionType.NONE);
    } else {/*ww w.j ava2 s  .  co m*/
        return SequenceFile.createWriter(fs, conf, path, NullWritable.class, dataType, CompressionType.BLOCK,
                compressionCodec);
    }
}

From source file:com.benchmark.mapred.PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi// w  w  w  . j  a v a 2s  . c om
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    //final Path inDir = new Path(TMP_DIR, "in");
    final Path inDir = new Path("/home/hadoop1/tmp_dir", "in");
    System.out.println("inDir =" + inDir.toString());
    //final Path outDir = new Path(TMP_DIR, "out");
    final Path outDir = new Path("/home/hadoop1/tmp_dir", "out");
    System.out.println("outDir =" + outDir.toString());
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}

From source file:com.bigdata.diane.MiniTestDFSIO.java

License:Apache License

private static void createControlFile(FileSystem fs, int fileSize, // in MB 
        int nrFiles, Configuration fsConfig) throws InterruptedException, IOException {
    LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files");

    for (int i = 0; i < nrFiles; i++) {
        String name = getFileName(i);
        Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
        SequenceFile.Writer writer = null;
        try {/*w w  w  .  j av  a  2s . c  o  m*/
            writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class,
                    CompressionType.NONE);
            writer.append(new Text(name), new LongWritable(fileSize));
        } catch (Exception e) {
            throw new IOException(e.getLocalizedMessage());
        } finally {
            if (writer != null)
                writer.close();
            writer = null;
        }
    }
    LOG.info("created control files for: " + nrFiles + " files now sleep 20 seconds");
    Thread.sleep(20000);
}

From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java

License:Apache License

@Test
public void testWhyFail() throws IOException {

    // There a was a failure case using :
    FlumeConfiguration conf = FlumeConfiguration.get();
    Path path = new Path("file:///tmp/testfile");
    FileSystem hdfs = path.getFileSystem(conf);

    // writing/*from   ww w .  j av a 2 s.c  om*/
    FSDataOutputStream dos = hdfs.create(path);
    hdfs.deleteOnExit(path);

    // this version's Writer has ownOutputStream=false.
    Writer writer = SequenceFile.createWriter(conf, dos, WriteableEventKey.class, WriteableEvent.class,
            SequenceFile.CompressionType.NONE, new DefaultCodec());

    Event e = new EventImpl("EVENT".getBytes());

    writer.append(new WriteableEventKey(e), new WriteableEvent(e));
    writer.sync();
    writer.close();

    dos.close(); // It is strange that I have to close the underlying
    // FSDataOutputStream.

    // WTF: nothing written by this writer!
    FileStatus stats = hdfs.getFileStatus(path);
    assertTrue(stats.getLen() > 0);
    // it should have written something but it failed.
}

From source file:com.cloudera.flume.handlers.seqfile.SequenceFileOutputFormat.java

License:Apache License

@Override
public void format(OutputStream o, Event e) throws IOException {
    if (writer == null) {
        cachedOut = o;/*from w  ww. ja v a  2  s .c  o  m*/
        FSDataOutputStream fsOut;
        if (o instanceof FSDataOutputStream) {
            fsOut = (FSDataOutputStream) o;
        } else {
            fsOut = new FSDataOutputStream(o, null);
        }
        writer = SequenceFile.createWriter(FlumeConfiguration.get(), fsOut, WriteableEventKey.class,
                WriteableEvent.class, compressionType, codec);
    }
    if (cachedOut != o) {
        // different output than last time, fail here
        throw new IOException("OutputFormat instance can only write to the same OutputStream");
    }
    writer.append(new WriteableEventKey(e), new WriteableEvent(e));
}

From source file:com.cloudera.seismic.segy.SegyLoader.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption("cwproot", true, "The path to CWPROOT on this machine");
    options.addOption("input", true, "SEG-Y files to import into Hadoop");
    options.addOption("output", true, "The path of the sequence file to write in Hadoop");

    // Parse the commandline and check for required arguments.
    CommandLine cmdLine = new PosixParser().parse(options, args, false);
    if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) {
        System.out.println("Mising required input/output arguments");
        new HelpFormatter().printHelp("SegyLoader", options);
        System.exit(1);//from   w  w  w. j ava 2  s  .c  om
    }

    String cwproot = System.getenv("CWPROOT");
    if (cmdLine.hasOption("cwproot")) {
        cwproot = cmdLine.getOptionValue("cwproot");
    }
    if (cwproot == null || cwproot.isEmpty()) {
        System.out.println("Could not determine CWPROOT value, using /usr/local/su...");
        cwproot = "/usr/local/su";
    }

    // Assume any remaining args are for segyread
    List<String> segyReadArgs = Lists.newArrayList();
    for (String arg : cmdLine.getArgs()) {
        if (arg.contains("=")) {
            segyReadArgs.add(arg);
        }
    }

    // Open the output sequence file.
    Configuration conf = getConf();
    Path outputPath = new Path(cmdLine.getOptionValue("output"));
    SequenceFile.Writer writer = SequenceFile.createWriter(FileSystem.get(conf), conf, outputPath,
            NullWritable.class, BytesWritable.class, CompressionType.BLOCK);
    int rc = 0;
    SequenceFileCallback sfc = new SequenceFileCallback(writer);
    try {
        for (String filename : cmdLine.getOptionValues("input")) {
            System.out.println("Reading input file: " + filename);
            if (filename.endsWith(".su")) {
                SUReader reader = new SUReader(new BufferedInputStream(new FileInputStream(filename)),
                        ImmutableList.<SUCallback>of(sfc));
                reader.run();
                System.out.println("Bytes read: " + reader.getBytesRead());
            } else {
                SUProcess proc = new SUProcess(cwproot, "segyread");
                for (String arg : segyReadArgs) {
                    proc.addArg(arg);
                }
                proc.addArg(String.format("tape=%s", filename));
                proc.addCallback(sfc);
                proc.start();
                rc += proc.closeAndWait();
                System.out.println("Bytes read: " + proc.getTotalBytesRead());
            }
        }
        System.out.println("Bytes written: " + sfc.getBytesWritten());
    } catch (Throwable t) {
        t.printStackTrace();
        rc = 1;
    } finally {
        writer.close();
    }
    return rc;
}

From source file:com.endgame.binarypig.util.BuildSequenceFileFromArchive.java

License:Apache License

public void load(FileSystem fs, Configuration conf, File archive, Path outputDir) throws Exception {
    Text key = new Text();
    BytesWritable val = new BytesWritable();

    SequenceFile.Writer writer = null;
    ArchiveInputStream archiveInputStream = null;

    try {/*from   w w  w .  j ava  2  s . com*/
        Path sequenceName = new Path(outputDir, archive.getName() + ".seq");
        System.out.println("Writing to " + sequenceName);
        writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class,
                CompressionType.RECORD);
        String lowerName = archive.toString().toLowerCase();

        if (lowerName.endsWith(".tar.gz") || lowerName.endsWith(".tgz")) {
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar",
                    new GZIPInputStream(new FileInputStream(archive)));
        } else if (lowerName.endsWith(".tar.bz") || lowerName.endsWith(".tar.bz2")
                || lowerName.endsWith(".tbz")) {
            FileInputStream is = new FileInputStream(archive);
            is.read(); // read 'B'
            is.read(); // read 'Z'
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar",
                    new CBZip2InputStream(is));
        } else if (lowerName.endsWith(".tar")) {
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar",
                    new FileInputStream(archive));
        } else if (lowerName.endsWith(".zip")) {
            archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("zip",
                    new FileInputStream(archive));
        } else {
            throw new RuntimeException("Can't handle archive format for: " + archive);
        }

        ArchiveEntry entry = null;
        while ((entry = archiveInputStream.getNextEntry()) != null) {
            if (!entry.isDirectory()) {
                try {
                    byte[] outputFile = IOUtils.toByteArray(archiveInputStream);
                    val.set(outputFile, 0, outputFile.length);
                    key.set(DigestUtils.md5Hex(outputFile));

                    writer.append(key, val);
                } catch (IOException e) {
                    System.err.println("Warning: archive may be truncated: " + archive);
                    // Truncated Archive
                    break;
                }
            }
        }
    } finally {
        archiveInputStream.close();
        writer.close();
    }
}

From source file:com.endgame.binarypig.util.BuildSequenceFileFromDir.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDir = new File(args[0]);
    Path name = new Path(args[1]);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, name, Text.class, BytesWritable.class,
            CompressionType.RECORD);/*from  ww  w .  jav  a 2 s  . c  o  m*/

    for (File file : inDir.listFiles()) {
        if (!file.isFile()) {
            System.out.println("Skipping " + file + " (not a file) ...");
            continue;
        }

        byte[] bytes = FileUtils.readFileToByteArray(file);
        val.set(bytes, 0, bytes.length);
        key.set(DigestUtils.md5Hex(bytes));
        writer.append(key, val);
    }
    writer.close();

    return 0;
}

From source file:com.facebook.LinkBench.LinkBenchDriverMR.java

License:Apache License

/**
 * setup input files for map reduce job//  w w  w.j a v  a  2  s  .  c  o m
 * @param jobconf configuration of the map reduce job
 * @param nmappers number of mappers (loader or requester)
 */
private static FileSystem setupInputFiles(JobConf jobconf, int nmappers)
        throws IOException, InterruptedException {
    //setup input/output directories
    final Path indir = new Path(TMP_DIR, "in");
    final Path outdir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobconf, indir);
    FileOutputFormat.setOutputPath(jobconf, outdir);

    final FileSystem fs = FileSystem.get(jobconf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(indir)) {
        throw new IOException("Cannot create input directory " + indir);
    }

    //generate an input file for each map task
    if (USE_INPUT_FILES) {
        for (int i = 0; i < nmappers; ++i) {
            final Path file = new Path(indir, "part" + i);
            final IntWritable mapperid = new IntWritable(i);
            final IntWritable nummappers = new IntWritable(nmappers);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobconf, file, IntWritable.class,
                    IntWritable.class, CompressionType.NONE);
            try {
                writer.append(mapperid, nummappers);
            } finally {
                writer.close();
            }
            logger.info("Wrote input for Map #" + i);
        }
    }
    return fs;
}

From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi/*w w w  . j  av a  2  s .  c o m*/
 */
public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException {
    //setup job conf
    jobConf.setJobName(PiEstimator.class.getSimpleName());

    jobConf.setInputFormat(SequenceFileInputFormat.class);

    jobConf.setOutputKeyClass(BooleanWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    jobConf.setMapperClass(PiMapper.class);
    jobConf.setNumMapTasks(numMaps);

    jobConf.setReducerClass(PiReducer.class);
    jobConf.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    jobConf.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(TMP_DIR, "in");
    final Path outDir = new Path(TMP_DIR, "out");
    FileInputFormat.setInputPaths(jobConf, inDir);
    FileOutputFormat.setOutputPath(jobConf, outDir);

    final FileSystem fs = FileSystem.get(jobConf);
    if (fs.exists(TMP_DIR)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        JobClient.runJob(jobConf);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get()))
                .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints));
    } finally {
        fs.delete(TMP_DIR, true);
    }
}