List of usage examples for org.apache.hadoop.fs FileSystem create
public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize) throws IOException
From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java
License:Apache License
private static void touchFile(String path) throws Exception { FileSystem fs; DataOutputStream outputStream = null; GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfiguration()); Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec); OutputStream compressedOut = null; try {/*from ww w. ja va 2s.com*/ fs = cluster.getFileSystem(); final Path qualifiedPath = new Path(path).makeQualified(fs); final long blockSize = fs.getDefaultBlockSize() * 2; outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize); compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor); Message msg = new Message("generating test data".getBytes()); AuditUtil.attachHeaders(msg, currentTimestamp); byte[] encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); // Genearate a msg with different timestamp. Default window period is 60sec AuditUtil.attachHeaders(msg, nextMinuteTimeStamp); encodeMsg = Base64.encodeBase64(msg.getData().array()); compressedOut.write(encodeMsg); compressedOut.write("\n".getBytes()); compressedOut.flush(); compressedOut.close(); pathList.add(qualifiedPath); ++nFiles; FileStatus fileStatus = fs.getFileStatus(qualifiedPath); System.out.println(fileStatus.getBlockSize()); System.out.println(fileStatus.getReplication()); } finally { compressedOut.close(); IOUtils.cleanup(null, outputStream); CodecPool.returnCompressor(gzipCompressor); } }
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
private static void touchFile(String path) throws Exception { FileSystem fs; DataOutputStream outputStream = null; GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfigurationForCluster()); Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec); OutputStream compressedOut = null; try {/*from w ww. j av a 2 s. co m*/ fs = cluster.getFileSystem(); final Path qualifiedPath = new Path(path).makeQualified(fs); final long blockSize = fs.getDefaultBlockSize() * 2; outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize); compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor); compressedOut.write(new byte[FILE_SIZE]); compressedOut.write("\n".getBytes()); compressedOut.flush(); //outputStream.write(new byte[FILE_SIZE]); pathList.add(qualifiedPath); } finally { compressedOut.close(); IOUtils.cleanup(null, outputStream); CodecPool.returnCompressor(gzipCompressor); } }
From source file:com.mellanox.r4h.TestReadWhileWriting.java
License:Apache License
/** Test reading while writing. */ @Test/*from ww w . j a v a 2 s . com*/ public void pipeline_02_03() throws Exception { final Configuration conf = new HdfsConfiguration(); conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); // create cluster final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build(); try { //change the lease limits. cluster.setLeasePeriod(SOFT_LEASE_LIMIT, HARD_LEASE_LIMIT); //wait for the cluster cluster.waitActive(); final FileSystem fs = cluster.getFileSystem(); final Path p = new Path(DIR, "file1"); final int half = BLOCK_SIZE / 2; //a. On Machine M1, Create file. Write half block of data. // Invoke DFSOutputStream.hflush() on the dfs file handle. // Do not close file yet. { final FSDataOutputStream out = fs.create(p, true, fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 3, BLOCK_SIZE); write(out, 0, half); //hflush ((DFSOutputStream) out.getWrappedStream()).hflush(); } //b. On another machine M2, open file and verify that the half-block // of data can be read successfully. checkFile(p, half, conf); MiniDFSClusterBridge.getAppendTestUtilLOG().info("leasechecker.interruptAndJoin()"); ((DistributedFileSystem) fs).dfs.getLeaseRenewer().interruptAndJoin(); //c. On M1, append another half block of data. Close file on M1. { //sleep to let the lease is expired. Thread.sleep(2 * SOFT_LEASE_LIMIT); final UserGroupInformation current = UserGroupInformation.getCurrentUser(); final UserGroupInformation ugi = UserGroupInformation .createUserForTesting(current.getShortUserName() + "x", new String[] { "supergroup" }); final DistributedFileSystem dfs = ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() { @Override public DistributedFileSystem run() throws Exception { return (DistributedFileSystem) FileSystem.newInstance(conf); } }); final FSDataOutputStream out = append(dfs, p); write(out, 0, half); out.close(); } //d. On M2, open file and read 1 block of data from it. Close file. checkFile(p, 2 * half, conf); } finally { cluster.shutdown(); } }
From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * //from w w w . j a v a2s . c om * @param job * the job to sample * @param partFile * where to write the output file to * @throws Throwable * if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHDFSSourceIT.java
License:Apache License
private static void writeFile(FileSystem fs, Path ph, int size) throws IOException { FSDataOutputStream stm = fs.create(ph, true, 4096, (short) 3, 512); for (int i = 0; i < 1; i++) { stm.write(new byte[size]); }//w w w . j a v a 2 s. co m stm.hsync(); stm.hsync(); stm.close(); }