Example usage for org.apache.hadoop.fs FileSystem create

List of usage examples for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize)
        throws IOException 

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:com.inmobi.conduit.distcp.tools.mapred.TestCopyMapper.java

License:Apache License

private static void touchFile(String path) throws Exception {
    FileSystem fs;
    DataOutputStream outputStream = null;
    GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfiguration());
    Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec);
    OutputStream compressedOut = null;
    try {/*from  ww w.  ja  va 2s.com*/
        fs = cluster.getFileSystem();
        final Path qualifiedPath = new Path(path).makeQualified(fs);
        final long blockSize = fs.getDefaultBlockSize() * 2;
        outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize);
        compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor);
        Message msg = new Message("generating test data".getBytes());
        AuditUtil.attachHeaders(msg, currentTimestamp);
        byte[] encodeMsg = Base64.encodeBase64(msg.getData().array());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        // Genearate a msg with different timestamp.  Default window period is 60sec
        AuditUtil.attachHeaders(msg, nextMinuteTimeStamp);
        encodeMsg = Base64.encodeBase64(msg.getData().array());
        compressedOut.write(encodeMsg);
        compressedOut.write("\n".getBytes());
        compressedOut.flush();
        compressedOut.close();
        pathList.add(qualifiedPath);
        ++nFiles;

        FileStatus fileStatus = fs.getFileStatus(qualifiedPath);
        System.out.println(fileStatus.getBlockSize());
        System.out.println(fileStatus.getReplication());
    } finally {
        compressedOut.close();
        IOUtils.cleanup(null, outputStream);
        CodecPool.returnCompressor(gzipCompressor);
    }
}

From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java

License:Apache License

private static void touchFile(String path) throws Exception {
    FileSystem fs;
    DataOutputStream outputStream = null;
    GzipCodec gzipCodec = ReflectionUtils.newInstance(GzipCodec.class, getConfigurationForCluster());
    Compressor gzipCompressor = CodecPool.getCompressor(gzipCodec);
    OutputStream compressedOut = null;
    try {/*from   w  ww.  j av a  2 s. co m*/
        fs = cluster.getFileSystem();
        final Path qualifiedPath = new Path(path).makeQualified(fs);
        final long blockSize = fs.getDefaultBlockSize() * 2;
        outputStream = fs.create(qualifiedPath, true, 0, (short) (fs.getDefaultReplication() * 2), blockSize);
        compressedOut = gzipCodec.createOutputStream(outputStream, gzipCompressor);
        compressedOut.write(new byte[FILE_SIZE]);
        compressedOut.write("\n".getBytes());
        compressedOut.flush();
        //outputStream.write(new byte[FILE_SIZE]);
        pathList.add(qualifiedPath);
    } finally {
        compressedOut.close();
        IOUtils.cleanup(null, outputStream);
        CodecPool.returnCompressor(gzipCompressor);
    }
}

From source file:com.mellanox.r4h.TestReadWhileWriting.java

License:Apache License

/** Test reading while writing. */
@Test/*from ww w .  j a v a  2 s  . com*/
public void pipeline_02_03() throws Exception {
    final Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);

    // create cluster
    final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
    try {
        //change the lease limits.
        cluster.setLeasePeriod(SOFT_LEASE_LIMIT, HARD_LEASE_LIMIT);

        //wait for the cluster
        cluster.waitActive();
        final FileSystem fs = cluster.getFileSystem();
        final Path p = new Path(DIR, "file1");
        final int half = BLOCK_SIZE / 2;

        //a. On Machine M1, Create file. Write half block of data.
        //   Invoke DFSOutputStream.hflush() on the dfs file handle.
        //   Do not close file yet.
        {
            final FSDataOutputStream out = fs.create(p, true,
                    fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 3,
                    BLOCK_SIZE);
            write(out, 0, half);

            //hflush
            ((DFSOutputStream) out.getWrappedStream()).hflush();
        }

        //b. On another machine M2, open file and verify that the half-block
        //   of data can be read successfully.
        checkFile(p, half, conf);
        MiniDFSClusterBridge.getAppendTestUtilLOG().info("leasechecker.interruptAndJoin()");
        ((DistributedFileSystem) fs).dfs.getLeaseRenewer().interruptAndJoin();

        //c. On M1, append another half block of data.  Close file on M1.
        {
            //sleep to let the lease is expired.
            Thread.sleep(2 * SOFT_LEASE_LIMIT);

            final UserGroupInformation current = UserGroupInformation.getCurrentUser();
            final UserGroupInformation ugi = UserGroupInformation
                    .createUserForTesting(current.getShortUserName() + "x", new String[] { "supergroup" });
            final DistributedFileSystem dfs = ugi.doAs(new PrivilegedExceptionAction<DistributedFileSystem>() {
                @Override
                public DistributedFileSystem run() throws Exception {
                    return (DistributedFileSystem) FileSystem.newInstance(conf);
                }
            });
            final FSDataOutputStream out = append(dfs, p);
            write(out, 0, half);
            out.close();
        }

        //d. On M2, open file and read 1 block of data from it. Close file.
        checkFile(p, 2 * half, conf);
    } finally {
        cluster.shutdown();
    }
}

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * //from  w w  w . j  a v a2s .  c  om
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHDFSSourceIT.java

License:Apache License

private static void writeFile(FileSystem fs, Path ph, int size) throws IOException {
    FSDataOutputStream stm = fs.create(ph, true, 4096, (short) 3, 512);
    for (int i = 0; i < 1; i++) {
        stm.write(new byte[size]);
    }//w  w w . j a v  a 2  s. co m
    stm.hsync();
    stm.hsync();
    stm.close();
}