List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f) throws IOException
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(// cellKeyValue //static <V extends Cell> RecordWriter<ImmutableBytesWritable, V> createRecordWriter() //getRecordWriter() final TaskAttemptContext context) throws IOException, InterruptedException { // Get the path of the temporary output file final Path outputPath = FileOutputFormat.getOutputPath(context); final Path outputdir = new FileOutputCommitter(outputPath, context).getWorkPath(); final Path ignoreOutputPath = getDeleteRowKeyFile(outputPath);// final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); // These configs. are from hbase-*.xml final long maxsize = conf.getLong(HConstants.HREGION_MAX_FILESIZE, HConstants.DEFAULT_MAX_FILE_SIZE); // Invented config. Add to hbase-*.xml if other than default // compression. final String defaultCompression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); final boolean compactionExclude = conf.getBoolean("hbase.mapreduce.hfileoutputformat.compaction.exclude", false);/*from w w w. j a va 2 s . c om*/ // create a map from column family to the compression algorithm final Map<byte[], String> compressionMap = createFamilyCompressionMap(conf); final Map<byte[], String> bloomTypeMap = createFamilyBloomMap(conf); final Map<byte[], String> blockSizeMap = createFamilyBlockSizeMap(conf); String dataBlockEncodingStr = conf.get(DATABLOCK_ENCODING_CONF_KEY); final HFileDataBlockEncoder encoder; if (dataBlockEncodingStr == null) { encoder = NoOpDataBlockEncoder.INSTANCE; } else { try { encoder = new HFileDataBlockEncoderImpl(DataBlockEncoding.valueOf(dataBlockEncodingStr)); } catch (IllegalArgumentException ex) { throw new RuntimeException("Invalid data block encoding type configured for the param " + DATABLOCK_ENCODING_CONF_KEY + " : " + dataBlockEncodingStr); } } return new RecordWriter<ImmutableBytesWritable, KeyValue>() {// VKeyValue // Map of families to writers and how much has been output on the // writer. private final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); private final FSDataOutputStream dos = fs.create(ignoreOutputPath); private byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; private final byte[] now = Bytes.toBytes(System.currentTimeMillis()); private boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv)// V cellKeyValue kv throws IOException { // KeyValue kv = KeyValueUtil.ensureKeyValue(cell);// // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); if (ignore(kv)) {// if byte[] readBuf = rowKey; dos.write(readBuf, 0, readBuf.length); dos.write(Bytes.toBytes("\n")); return; } WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory // exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family, conf); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { LOG.info("Writer=" + wl.writer.getPath() + ((wl.written == 0) ? "" : ", wrote=" + wl.written)); close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } /* * Create a new StoreFile.Writer. * * @param family * * @return A WriterLength, containing a new StoreFile.Writer. * * @throws IOException */ private WriterLength getNewWriter(byte[] family, Configuration conf) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); String compression = compressionMap.get(family); compression = compression == null ? defaultCompression : compression; String bloomTypeStr = bloomTypeMap.get(family); BloomType bloomType = BloomType.NONE; if (bloomTypeStr != null) { bloomType = BloomType.valueOf(bloomTypeStr); } String blockSizeString = blockSizeMap.get(family); int blockSize = blockSizeString == null ? HConstants.DEFAULT_BLOCKSIZE : Integer.parseInt(blockSizeString); Configuration tempConf = new Configuration(conf); tempConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0.0f); wl.writer = new StoreFile.WriterBuilder(conf, new CacheConfig(tempConf), fs) .withOutputDir(familydir).withBloomType(bloomType).withComparator(KeyValue.COMPARATOR) .build(); this.writers.put(family, wl); return wl; } private void close(final StoreFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.appendFileInfo(StoreFile.EXCLUDE_FROM_MINOR_COMPACTION_KEY, Bytes.toBytes(compactionExclude)); w.appendTrackedTimestampsToMetadata(); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { dos.flush();// dos.close();// for (WriterLength wl : this.writers.values()) { close(wl.writer); } } }; }
From source file:com.ostor.dedup.hadoop.DedupHadoopObject.java
License:Open Source License
public void dumpToHDFS(FileSystem fs, Path path) throws Exception { logger.debug("Dump object - " + getName() + " to path - " + path); FSDataOutputStream fdos = fs.create(path); fdos.write(dumpToString().getBytes()); fdos.flush();/* w w w . j av a2 s .c o m*/ fdos.close(); }
From source file:com.ostor.dedup.hadoop.DedupHadoopSegment.java
License:Open Source License
public void dumpToHDFS(FileSystem fs, Path path) throws Exception { logger.debug("Dump segment - " + getId() + " to path - " + path); FSDataOutputStream fdos = fs.create(path); fdos.write(dumpMetaData().getBytes()); fdos.flush();//from w ww. jav a2s . com fdos.close(); logger.debug("Dump segment data of length - " + getData().length); FSDataOutputStream fdosData = fs .create(new Path(path.toString() + DedupSegmentStor.SERIALIZED_DATA_SUFFIX)); fdosData.write(getData()); fdosData.flush(); fdosData.close(); }
From source file:com.phantom.hadoop.examples.dancing.DistributedPentomino.java
License:Apache License
/** * Create the input file with all of the possible combinations of the given * depth.//from w w w . j a v a 2s . co m * * @param fs * the filesystem to write into * @param dir * the directory to write the input file into * @param pent * the puzzle * @param depth * the depth to explore when generating prefixes */ private static long createInputDirectory(FileSystem fs, Path dir, Pentomino pent, int depth) throws IOException { fs.mkdirs(dir); List<int[]> splits = pent.getSplits(depth); Path input = new Path(dir, "part1"); PrintWriter file = new PrintWriter( new OutputStreamWriter(new BufferedOutputStream(fs.create(input), 64 * 1024), Charsets.UTF_8)); for (int[] prefix : splits) { for (int i = 0; i < prefix.length; ++i) { if (i != 0) { file.print(','); } file.print(prefix[i]); } file.print('\n'); } file.close(); return fs.getFileStatus(input).getLen(); }
From source file:com.phantom.hadoop.examples.pi.Util.java
License:Apache License
/** Write results */ static void writeResults(String name, List<TaskResult> results, FileSystem fs, String dir) throws IOException { final Path outfile = new Path(dir, name + ".txt"); Util.out.println(name + "> writing results to " + outfile); final PrintWriter out = new PrintWriter(new OutputStreamWriter(fs.create(outfile), Charsets.UTF_8), true); try {//from w w w .j a v a 2 s .co m for (TaskResult r : results) out.println(r); } finally { out.close(); } }
From source file:com.phantom.hadoop.examples.terasort.TeraOutputFormat.java
License:Apache License
public RecordWriter<Text, Text> getRecordWriter(TaskAttemptContext job) throws IOException { Path file = getDefaultWorkFile(job, ""); FileSystem fs = file.getFileSystem(job.getConfiguration()); FSDataOutputStream fileOut = fs.create(file); return new TeraRecordWriter(fileOut, job); }
From source file:com.philiphubbard.digraph.MRBuildVerticesTest.java
License:Open Source License
private static void setupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(testInput); if (fileSystem.exists(path)) fileSystem.delete(path, true);// ww w . j a va2s . com ArrayList<MRVertex> vertices = new ArrayList<MRVertex>(); MRVertex v0 = new MRVertex(0, conf); v0.addEdgeTo(2); vertices.add(v0); MRVertex v1 = new MRVertex(1, conf); v1.addEdgeTo(2); vertices.add(v1); MRVertex v2 = new MRVertex(2, conf); v2.addEdgeTo(3); vertices.add(v2); MRVertex v3 = new MRVertex(3, conf); v3.addEdgeTo(4); vertices.add(v3); MRVertex v4 = new MRVertex(4, conf); v4.addEdgeTo(5); v4.addEdgeTo(6); vertices.add(v4); MRVertex v5 = new MRVertex(5, conf); vertices.add(v5); MRVertex v6 = new MRVertex(6, conf); v6.addEdgeTo(7); vertices.add(v6); MRVertex v7 = new MRVertex(7, conf); vertices.add(v7); FSDataOutputStream out = fileSystem.create(path); for (MRVertex vertex : vertices) { Text text = vertex.toText(MRVertex.EdgeFormat.EDGES_TO); byte[] bytes = text.copyBytes(); for (byte b : bytes) out.write(b); out.write('\n'); } out.close(); fileSystem.close(); }
From source file:com.philiphubbard.sabe.MRAssembler.java
License:Open Source License
public boolean run(Path inputPath, Path outputPath) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); // Job.getInstance() copies the Configuration argument, so set its properties first. conf.setBoolean(MRVertex.CONFIG_ALLOW_EDGE_MULTIPLES, true); conf.setBoolean(MRVertex.CONFIG_COMPRESS_CHAIN_MULTIPLES_MUST_MATCH, false); conf.setInt(MRMerVertex.CONFIG_MER_LENGTH, vertexMerLength); conf.setBoolean(MRBuildVertices.CONFIG_PARTITION_BRANCHES_CHAINS, true); conf.setInt(MRBuildVertices.CONFIG_COVERAGE, coverage); conf.setInt(MRCompressChains.CONFIG_TERMINATION_COUNT, 1); Job buildJob = Job.getInstance(conf); buildJob.setJobName("mrassemblerbuild"); Path buildInputPath = inputPath; Path buildOutputPath = new Path("sabe.MRAssemblerTmp"); System.out.println("sabe.MRAssembler starting vertex construction"); MRBuildMerVertices.setupJob(buildJob, buildInputPath, buildOutputPath); if (!buildJob.waitForCompletion(true)) return false; //// w w w . ja v a 2s.c o m Path compressInputPath = new Path(buildOutputPath.toString() + "/chain"); Path compressOutputPath = new Path(buildOutputPath.toString() + "/chainCompress"); int iter = 0; boolean keepGoing = true; MRCompressChains.beginIteration(); while (keepGoing) { Job compressJob = Job.getInstance(conf); compressJob.setJobName("mrassemblercompress"); System.out.println("sabe.MRAssembler starting compression iteration " + iter); MRCompressMerChains.setupIterationJob(compressJob, compressInputPath, compressOutputPath); if (!compressJob.waitForCompletion(true)) System.exit(1); iter++; keepGoing = MRCompressChains.continueIteration(compressJob, compressInputPath, compressOutputPath); } System.out.println("sabe.MRAssembler made " + iter + " compression iterations"); // Path branchPath = new Path(buildOutputPath.toString() + "/branch"); Path chainPath = compressOutputPath; FileSystem fileSystem = FileSystem.get(conf); Graph graph = buildCompressedGraph(conf, fileSystem, branchPath, chainPath); if (graph != null) { ArrayList<String> result = graph.assemble(); FSDataOutputStream out = fileSystem.create(outputPath); for (String seq : result) { out.writeBytes(seq); out.writeBytes("\n"); } } // fileSystem.delete(buildOutputPath, true); fileSystem.close(); return true; }
From source file:com.philiphubbard.sabe.MRAssemblerTest1.java
License:Open Source License
private static void setupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(testInput); if (fileSystem.exists(path)) fileSystem.delete(path, true);/*w w w. j a va2s .c om*/ ArrayList<Text> reads = new ArrayList<Text>(); // Goal: AATTCGGCCTTCGGCAT reads.add(new Text("AATTCGGC\n")); reads.add(new Text("CTTCGGCAT\n")); reads.add(new Text("AATT\n")); reads.add(new Text("CGGCCTTCGGCAT\n")); reads.add(new Text("AATTCGGCCTTCG\n")); reads.add(new Text("GCAT\n")); FSDataOutputStream out = fileSystem.create(path); for (Text read : reads) { byte[] bytes = read.copyBytes(); for (byte b : bytes) out.write(b); } out.close(); fileSystem.close(); }
From source file:com.philiphubbard.sabe.MRAssemblerTest2.java
License:Open Source License
private static void setupTest(Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(testInput); if (fileSystem.exists(path)) fileSystem.delete(path, true);// w ww . jav a2 s . c o m ArrayList<Text> reads = new ArrayList<Text>(); // The expected result: // CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACCCGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC // Note that it has the following pattern: // segment 0: CCCTTTCTGT // segment 1, which will be repeated: TGACCCATCA // segment 2: TTGTTTAGTA // segment 3, which will be repeated: ACCCGCGGGA // segment 4: TGCCTGGCAG // segment 3, again: ACCCGCGGGA // segment 5: CGATCTCCTC // segment 1, again: TGACCCATCA // segment 6: TCGAAATTCC reads.add(new Text("CCCTTTC\n")); // Error: initial T omitted. reads.add(new Text("GTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACC")); reads.add(new Text("CGCGGGACGAT\n")); // Error: final C omitted. reads.add(new Text("CTCCTCTGACCCATCATCGAAATTC\n")); reads.add(new Text("CCCTTTCTGTTGACCCAT\n")); // Error: final C replaced with G. reads.add(new Text("CATTGTTTAGTAACCCGCGGGATGCCTGGCAGACG\n")); reads.add(new Text("CGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC\n")); // Error: C at index 14 replaced with A. reads.add(new Text("CCCTTTCTGTTGACACATCATTGTTTAGTAAC")); reads.add(new Text("CCGCGGGATGCC\n")); // Error: C at index 25 omitted. reads.add(new Text("TGGCAGACCCGCGGGACGATCTCCTTGACCCATCATCGAAATTCC\n")); reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTG\n")); // Error: G at index 10 replaced with T. reads.add(new Text("GCAGACCCGCTGGACGA\n")); reads.add(new Text("TCTCCTCTGACCCATCATCGAAATTCC\n")); reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGC")); // Error: final G omitted. reads.add(new Text("CTGGCAGACCCGC\n")); reads.add(new Text("GGACGATCTCCTCT\n")); // Error: CG at index 10 transposed to GC reads.add(new Text("GACCCATCATCGAAATTCC\n")); FSDataOutputStream out = fileSystem.create(path); for (Text read : reads) { byte[] bytes = read.copyBytes(); for (byte b : bytes) out.write(b); } out.close(); fileSystem.close(); }