List of usage examples for org.apache.hadoop.io SequenceFile createWriter
@Deprecated public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec) throws IOException
From source file:com.asakusafw.runtime.stage.temporary.TemporaryStorage.java
License:Apache License
private static <V> SequenceFile.Writer newWriter(Configuration conf, FileSystem fs, Class<V> dataType, Path path, CompressionCodec compressionCodec) throws IOException { if (compressionCodec == null) { return SequenceFile.createWriter(fs, conf, path, NullWritable.class, dataType, CompressionType.NONE); } else {/*ww w.j ava2 s . co m*/ return SequenceFile.createWriter(fs, conf, path, NullWritable.class, dataType, CompressionType.BLOCK, compressionCodec); } }
From source file:com.benchmark.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi// w w w . j a v a 2s . c om */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories //final Path inDir = new Path(TMP_DIR, "in"); final Path inDir = new Path("/home/hadoop1/tmp_dir", "in"); System.out.println("inDir =" + inDir.toString()); //final Path outDir = new Path(TMP_DIR, "out"); final Path outDir = new Path("/home/hadoop1/tmp_dir", "out"); System.out.println("outDir =" + outDir.toString()); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }
From source file:com.bigdata.diane.MiniTestDFSIO.java
License:Apache License
private static void createControlFile(FileSystem fs, int fileSize, // in MB int nrFiles, Configuration fsConfig) throws InterruptedException, IOException { LOG.info("creating control file: " + fileSize + " mega bytes, " + nrFiles + " files"); for (int i = 0; i < nrFiles; i++) { String name = getFileName(i); Path controlFile = new Path(CONTROL_DIR, "in_file_" + name); SequenceFile.Writer writer = null; try {/*w w w . j av a 2s . c o m*/ writer = SequenceFile.createWriter(fs, fsConfig, controlFile, Text.class, LongWritable.class, CompressionType.NONE); writer.append(new Text(name), new LongWritable(fileSize)); } catch (Exception e) { throw new IOException(e.getLocalizedMessage()); } finally { if (writer != null) writer.close(); writer = null; } } LOG.info("created control files for: " + nrFiles + " files now sleep 20 seconds"); Thread.sleep(20000); }
From source file:com.cloudera.flume.handlers.hdfs.TestDFSWrite.java
License:Apache License
@Test public void testWhyFail() throws IOException { // There a was a failure case using : FlumeConfiguration conf = FlumeConfiguration.get(); Path path = new Path("file:///tmp/testfile"); FileSystem hdfs = path.getFileSystem(conf); // writing/*from ww w . j av a 2 s.c om*/ FSDataOutputStream dos = hdfs.create(path); hdfs.deleteOnExit(path); // this version's Writer has ownOutputStream=false. Writer writer = SequenceFile.createWriter(conf, dos, WriteableEventKey.class, WriteableEvent.class, SequenceFile.CompressionType.NONE, new DefaultCodec()); Event e = new EventImpl("EVENT".getBytes()); writer.append(new WriteableEventKey(e), new WriteableEvent(e)); writer.sync(); writer.close(); dos.close(); // It is strange that I have to close the underlying // FSDataOutputStream. // WTF: nothing written by this writer! FileStatus stats = hdfs.getFileStatus(path); assertTrue(stats.getLen() > 0); // it should have written something but it failed. }
From source file:com.cloudera.flume.handlers.seqfile.SequenceFileOutputFormat.java
License:Apache License
@Override public void format(OutputStream o, Event e) throws IOException { if (writer == null) { cachedOut = o;/*from w ww. ja v a 2 s .c o m*/ FSDataOutputStream fsOut; if (o instanceof FSDataOutputStream) { fsOut = (FSDataOutputStream) o; } else { fsOut = new FSDataOutputStream(o, null); } writer = SequenceFile.createWriter(FlumeConfiguration.get(), fsOut, WriteableEventKey.class, WriteableEvent.class, compressionType, codec); } if (cachedOut != o) { // different output than last time, fail here throw new IOException("OutputFormat instance can only write to the same OutputStream"); } writer.append(new WriteableEventKey(e), new WriteableEvent(e)); }
From source file:com.cloudera.seismic.segy.SegyLoader.java
License:Open Source License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption("cwproot", true, "The path to CWPROOT on this machine"); options.addOption("input", true, "SEG-Y files to import into Hadoop"); options.addOption("output", true, "The path of the sequence file to write in Hadoop"); // Parse the commandline and check for required arguments. CommandLine cmdLine = new PosixParser().parse(options, args, false); if (!cmdLine.hasOption("input") || !cmdLine.hasOption("output")) { System.out.println("Mising required input/output arguments"); new HelpFormatter().printHelp("SegyLoader", options); System.exit(1);//from w w w. j ava 2 s .c om } String cwproot = System.getenv("CWPROOT"); if (cmdLine.hasOption("cwproot")) { cwproot = cmdLine.getOptionValue("cwproot"); } if (cwproot == null || cwproot.isEmpty()) { System.out.println("Could not determine CWPROOT value, using /usr/local/su..."); cwproot = "/usr/local/su"; } // Assume any remaining args are for segyread List<String> segyReadArgs = Lists.newArrayList(); for (String arg : cmdLine.getArgs()) { if (arg.contains("=")) { segyReadArgs.add(arg); } } // Open the output sequence file. Configuration conf = getConf(); Path outputPath = new Path(cmdLine.getOptionValue("output")); SequenceFile.Writer writer = SequenceFile.createWriter(FileSystem.get(conf), conf, outputPath, NullWritable.class, BytesWritable.class, CompressionType.BLOCK); int rc = 0; SequenceFileCallback sfc = new SequenceFileCallback(writer); try { for (String filename : cmdLine.getOptionValues("input")) { System.out.println("Reading input file: " + filename); if (filename.endsWith(".su")) { SUReader reader = new SUReader(new BufferedInputStream(new FileInputStream(filename)), ImmutableList.<SUCallback>of(sfc)); reader.run(); System.out.println("Bytes read: " + reader.getBytesRead()); } else { SUProcess proc = new SUProcess(cwproot, "segyread"); for (String arg : segyReadArgs) { proc.addArg(arg); } proc.addArg(String.format("tape=%s", filename)); proc.addCallback(sfc); proc.start(); rc += proc.closeAndWait(); System.out.println("Bytes read: " + proc.getTotalBytesRead()); } } System.out.println("Bytes written: " + sfc.getBytesWritten()); } catch (Throwable t) { t.printStackTrace(); rc = 1; } finally { writer.close(); } return rc; }
From source file:com.endgame.binarypig.util.BuildSequenceFileFromArchive.java
License:Apache License
public void load(FileSystem fs, Configuration conf, File archive, Path outputDir) throws Exception { Text key = new Text(); BytesWritable val = new BytesWritable(); SequenceFile.Writer writer = null; ArchiveInputStream archiveInputStream = null; try {/*from w w w . j ava 2 s . com*/ Path sequenceName = new Path(outputDir, archive.getName() + ".seq"); System.out.println("Writing to " + sequenceName); writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class, CompressionType.RECORD); String lowerName = archive.toString().toLowerCase(); if (lowerName.endsWith(".tar.gz") || lowerName.endsWith(".tgz")) { archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar", new GZIPInputStream(new FileInputStream(archive))); } else if (lowerName.endsWith(".tar.bz") || lowerName.endsWith(".tar.bz2") || lowerName.endsWith(".tbz")) { FileInputStream is = new FileInputStream(archive); is.read(); // read 'B' is.read(); // read 'Z' archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar", new CBZip2InputStream(is)); } else if (lowerName.endsWith(".tar")) { archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("tar", new FileInputStream(archive)); } else if (lowerName.endsWith(".zip")) { archiveInputStream = new ArchiveStreamFactory().createArchiveInputStream("zip", new FileInputStream(archive)); } else { throw new RuntimeException("Can't handle archive format for: " + archive); } ArchiveEntry entry = null; while ((entry = archiveInputStream.getNextEntry()) != null) { if (!entry.isDirectory()) { try { byte[] outputFile = IOUtils.toByteArray(archiveInputStream); val.set(outputFile, 0, outputFile.length); key.set(DigestUtils.md5Hex(outputFile)); writer.append(key, val); } catch (IOException e) { System.err.println("Warning: archive may be truncated: " + archive); // Truncated Archive break; } } } } finally { archiveInputStream.close(); writer.close(); } }
From source file:com.endgame.binarypig.util.BuildSequenceFileFromDir.java
License:Apache License
@Override public int run(String[] args) throws Exception { File inDir = new File(args[0]); Path name = new Path(args[1]); Text key = new Text(); BytesWritable val = new BytesWritable(); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, name, Text.class, BytesWritable.class, CompressionType.RECORD);/*from ww w . jav a 2 s . c o m*/ for (File file : inDir.listFiles()) { if (!file.isFile()) { System.out.println("Skipping " + file + " (not a file) ..."); continue; } byte[] bytes = FileUtils.readFileToByteArray(file); val.set(bytes, 0, bytes.length); key.set(DigestUtils.md5Hex(bytes)); writer.append(key, val); } writer.close(); return 0; }
From source file:com.facebook.LinkBench.LinkBenchDriverMR.java
License:Apache License
/** * setup input files for map reduce job// w w w.j a v a 2 s . c o m * @param jobconf configuration of the map reduce job * @param nmappers number of mappers (loader or requester) */ private static FileSystem setupInputFiles(JobConf jobconf, int nmappers) throws IOException, InterruptedException { //setup input/output directories final Path indir = new Path(TMP_DIR, "in"); final Path outdir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobconf, indir); FileOutputFormat.setOutputPath(jobconf, outdir); final FileSystem fs = FileSystem.get(jobconf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(indir)) { throw new IOException("Cannot create input directory " + indir); } //generate an input file for each map task if (USE_INPUT_FILES) { for (int i = 0; i < nmappers; ++i) { final Path file = new Path(indir, "part" + i); final IntWritable mapperid = new IntWritable(i); final IntWritable nummappers = new IntWritable(nmappers); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobconf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(mapperid, nummappers); } finally { writer.close(); } logger.info("Wrote input for Map #" + i); } } return fs; }
From source file:com.github.gaoyangthu.demo.mapred.PiEstimator.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/*w w w . j av a 2 s . c o m*/ */ public static BigDecimal estimate(int numMaps, long numPoints, JobConf jobConf) throws IOException { //setup job conf jobConf.setJobName(PiEstimator.class.getSimpleName()); jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputKeyClass(BooleanWritable.class); jobConf.setOutputValueClass(LongWritable.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); jobConf.setMapperClass(PiMapper.class); jobConf.setNumMapTasks(numMaps); jobConf.setReducerClass(PiReducer.class); jobConf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobConf.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(TMP_DIR, "in"); final Path outDir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobConf, inDir); FileOutputFormat.setOutputPath(jobConf, outDir); final FileSystem fs = FileSystem.get(jobConf); if (fs.exists(TMP_DIR)) { throw new IOException( "Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, jobConf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); JobClient.runJob(jobConf); final double duration = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + duration + " seconds"); //read outputs Path inFile = new Path(outDir, "reduce-out"); LongWritable numInside = new LongWritable(); LongWritable numOutside = new LongWritable(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, jobConf); try { reader.next(numInside, numOutside); } finally { reader.close(); } //compute estimated value return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())) .divide(BigDecimal.valueOf(numMaps)).divide(BigDecimal.valueOf(numPoints)); } finally { fs.delete(TMP_DIR, true); } }