Example usage for org.apache.hadoop.fs FileSystem create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem create.

Prototype

public FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication, long blockSize)
        throws IOException

Source Link

Document

Create an FSDataOutputStream at the indicated Path.

Usage

From source file:HdfsReader.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]");
        return 1;
    }/*from   ww  w . ja  va 2s  .co m*/

    double fileSize;
    double fileSizeInMB;
    if (args[0].equals("1g")) {
        fileSize = 1073741824.0;
        fileSizeInMB = 1024.0;
    } else if (args[0].equals("10g")) {
        fileSize = 10737418240.0;
        fileSizeInMB = 10240.0;
    } else if (args[0].equals("100g")) {
        fileSize = 107374182400.0;
        fileSizeInMB = 102400.0;
    } else if (args[0].equals("200g")) {
        fileSize = 214748364800.0;
        fileSizeInMB = 204800.0;
    } else {
        throw new IllegalArgumentException("Invalid arg: " + args[0]);
    }

    String fileName = "read-" + args[0] + "-avg.txt";
    File avgFile = new File(fileName);
    PrintWriter avgPW = new PrintWriter(avgFile);
    fileName = "read-" + args[0] + "-min.txt";
    File minFile = new File(fileName);
    PrintWriter minPW = new PrintWriter(minFile);
    fileName = "read-" + args[0] + "-max.txt";
    File maxFile = new File(fileName);
    PrintWriter maxPW = new PrintWriter(maxFile);

    int numIters = 10;
    int bufferSize = 4096;
    long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 };
    short replication[] = new short[] { 1, 4 };
    String hdfsFile = "/hdfs_test/" + args[0] + "/1.in";
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path hdfsFilePath = new Path(hdfsFile);

    for (int i = 0; i < 5; i++) { // blockSize
        for (int j = 0; j < 2; j++) { // replication
            OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]);
            byte[] buf = new byte[bufferSize];
            for (int m = 0; m < bufferSize; m += 4) {
                buf[m] = (byte) m;
            }
            double numBufPerFile = fileSize / (double) bufferSize;

            for (double m = 0.0; m < numBufPerFile; m++) {
                os.write(buf);
            }
            os.close();
            long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE;
            for (int k = 0; k < numIters; k++) {
                InputStream is = fs.open(hdfsFilePath);

                long startTime = System.currentTimeMillis();
                int bytesRead = is.read(buf);
                while (bytesRead != -1) {
                    bytesRead = is.read(buf);
                }
                is.close();
                long endTime = System.currentTimeMillis();
                long duration = (endTime - startTime);
                avg += duration;
                if (duration < min) {
                    min = duration;
                }
                if (duration > max) {
                    max = duration;
                }
            }
            // write result to output
            double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg;
            avgPW.print(avgBW);
            avgPW.print("\t");
            double minBW = fileSizeInMB * 1000.0 / (double) max;
            minPW.print(minBW);
            minPW.print("\t");
            double maxBW = fileSizeInMB * 1000.0 / (double) min;
            maxPW.print(maxBW);
            maxPW.print("\t");
        }
        avgPW.println();
        minPW.println();
        maxPW.println();
    }
    avgPW.close();
    minPW.close();
    maxPW.close();
    return 0;
}

From source file:HdfsCacheReader.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("HdfsReader [FileSize i.e. 1g/10g/100g/200g]");
        return 1;
    }/*from   w w w . java2 s . co m*/

    double fileSize;
    double fileSizeInMB;
    if (args[0].equals("1g")) {
        fileSize = 1073741824.0;
        fileSizeInMB = 1024.0;
    } else if (args[0].equals("10g")) {
        fileSize = 10737418240.0;
        fileSizeInMB = 10240.0;
    } else if (args[0].equals("100g")) {
        fileSize = 107374182400.0;
        fileSizeInMB = 102400.0;
    } else if (args[0].equals("200g")) {
        fileSize = 214748364800.0;
        fileSizeInMB = 204800.0;
    } else {
        throw new IllegalArgumentException("Invalid arg: " + args[0]);
    }

    String fileName = "cacheRead-" + args[0] + "-avg.txt";
    File avgFile = new File(fileName);
    PrintWriter avgPW = new PrintWriter(avgFile);
    fileName = "cacheRead-" + args[0] + "-min.txt";
    File minFile = new File(fileName);
    PrintWriter minPW = new PrintWriter(minFile);
    fileName = "cacheRead-" + args[0] + "-max.txt";
    File maxFile = new File(fileName);
    PrintWriter maxPW = new PrintWriter(maxFile);

    int numIters = 10;
    int bufferSize = 65536;
    long blockSize[] = new long[] { 67108864, 134217728, 268435456, 536870912, 1073741824 };
    short replication[] = new short[] { 1, 4 };
    String hdfsFile = "/hdfs_test/" + args[0] + "/1.in";
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path hdfsFilePath = new Path(hdfsFile);

    for (int i = 0; i < 5; i++) { // blockSize
        for (int j = 0; j < 2; j++) { // replication
            OutputStream os = fs.create(hdfsFilePath, true, bufferSize, replication[j], blockSize[i]);
            byte[] buf = new byte[bufferSize];
            for (int m = 0; m < bufferSize; m += 4) {
                buf[m] = (byte) m;
            }
            double numBufPerFile = fileSize / (double) bufferSize;

            for (double m = 0.0; m < numBufPerFile; m++) {
                os.write(buf);
            }
            os.close();
            String cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -addDirective -path " + hdfsFile
                    + " -pool hdfs_test";
            Process p = Runtime.getRuntime().exec(cmdStr);
            p.waitFor();
            String cmdOutLine = "";
            StringBuffer cmdOut = new StringBuffer();
            BufferedReader cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
            while ((cmdOutLine = cmdOutReader.readLine()) != null) {
                cmdOut.append(cmdOutLine + "\n");
            }
            // System.out.println (cmdOut.toString());

            long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE;
            for (int k = 0; k < numIters; k++) {
                FSDataInputStream in = fs.open(hdfsFilePath);
                ByteBuffer bbuf = null;
                ElasticByteBufferPool ebbp = new ElasticByteBufferPool();
                long startTime = System.currentTimeMillis();
                while ((bbuf = in.read(ebbp, bufferSize, EnumSet.of(ReadOption.SKIP_CHECKSUMS))) != null) {
                    in.releaseBuffer(bbuf);
                }
                long endTime = System.currentTimeMillis();
                in.close();
                long duration = (endTime - startTime);
                avg += duration;
                if (duration < min) {
                    min = duration;
                }
                if (duration > max) {
                    max = duration;
                }
            }
            // write result to output
            double avgBW = fileSizeInMB * 1000.0 * (double) numIters / (double) avg;
            avgPW.print(avgBW);
            avgPW.print("\t");
            double minBW = fileSizeInMB * 1000.0 / (double) max;
            minPW.print(minBW);
            minPW.print("\t");
            double maxBW = fileSizeInMB * 1000.0 / (double) min;
            maxPW.print(maxBW);
            maxPW.print("\t");
            cmdStr = "/usr/local/hadoop/bin/hdfs cacheadmin -removeDirectives -path " + hdfsFile;
            p = Runtime.getRuntime().exec(cmdStr);
            p.waitFor();
            cmdOutLine = "";
            cmdOut.setLength(0);
            cmdOutReader = new BufferedReader(new InputStreamReader(p.getInputStream()));
            while ((cmdOutLine = cmdOutReader.readLine()) != null) {
                cmdOut.append(cmdOutLine + "\n");
            }
            // System.out.println (cmdOut.toString());
            fs.delete(hdfsFilePath, true);
        }
        avgPW.println();
        minPW.println();
        maxPW.println();
    }
    avgPW.close();
    minPW.close();
    maxPW.close();
    return 0;
}

From source file:SingleFileWriter.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("SingleFileWriter [fileSize ie. 1g/10g/100g]");
        return 1;
    }/*from w w  w  .j av  a2 s  .  c o  m*/

    double fileSize = Double.parseDouble((args[0].split("g|G"))[0]) * 1024 * 1024 * 1024;

    String hdfsFolder = "/hdfs_test/";
    String hdfsFile = hdfsFolder + args[0];
    short replication = 1;
    boolean overWrite = true;
    int bufferSize = 65536;
    int blockSize = 536870912;
    double numIters = fileSize / (double) bufferSize;

    /* Initialize byte buffer */
    ByteBuffer buf = ByteBuffer.allocate(bufferSize);
    buf.order(ByteOrder.nativeOrder());
    for (int k = 0; k < bufferSize / Integer.SIZE; k++) {
        buf.putInt(k);
    }
    buf.flip();

    /* Create file on HDFS */
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path hdfsFilePath = new Path(hdfsFile);
    OutputStream os = fs.create(hdfsFilePath, overWrite, bufferSize, replication, blockSize);
    /* Write the content of the byte buffer 
     to the HDFS file*/
    Timer t = new Timer();
    t.start(0);
    for (long i = 0; i < numIters; i++) {
        os.write(buf.array());
        buf.flip();
    }
    t.end(0);
    os.close();
    fs.delete(hdfsFilePath, true);

    t.dump();

    return 0;
}

From source file:a.TestConcatExample.java

License:Apache License

private long writeFile(FileSystem fs, Path p, int blockSize, short replication, int numBlocks)
        throws IOException {

    int bufferSize = 4096;
    FSDataOutputStream os = fs.create(p, true, bufferSize, replication, blockSize);

    int i = 0;/*from  w ww . j  a  v a 2s. com*/

    byte[] data = new byte[bufferSize];
    r.nextBytes(data);
    while (i < blockSize * numBlocks) {
        os.write(data);
        i += data.length;
    }
    os.close();
    FileStatus fileStatus = fs.getFileStatus(p);
    long f1Len = fileStatus.getLen();

    assertEquals(i, f1Len);

    return f1Len;
}

From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java

License:Apache License

/**
 * @param configuration Hadoop configuration
 * @param schema the schema of the data/* w w w  .  j av  a  2 s.  c om*/
 * @param file the file to write to
 * @param mode file creation mode
 * @param rowGroupSize the row group size
 * @throws IOException if the file can not be created
 */
public ParquetFileWriter(Configuration configuration, MessageType schema, Path file, Mode mode,
        long rowGroupSize, int maxPaddingSize) throws IOException {
    TypeUtil.checkValidWriteSchema(schema);
    this.schema = schema;
    FileSystem fs = file.getFileSystem(configuration);
    boolean overwriteFlag = (mode == Mode.OVERWRITE);
    if (supportsBlockSize(fs)) {
        // use the default block size, unless row group size is larger
        long dfsBlockSize = Math.max(fs.getDefaultBlockSize(file), rowGroupSize);

        this.alignment = PaddingAlignment.get(dfsBlockSize, rowGroupSize, maxPaddingSize);
        this.out = fs.create(file, overwriteFlag, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file),
                dfsBlockSize);

    } else {
        this.alignment = NoAlignment.get(rowGroupSize);
        this.out = fs.create(file, overwriteFlag);
    }
}

From source file:cn.uway.util.apache.parquet.hadoop.ParquetFileWriter.java

License:Apache License

/**
 * FOR TESTING ONLY.//  w  w  w  .ja v a  2 s  . com
 *
 * @param configuration Hadoop configuration
 * @param schema the schema of the data
 * @param file the file to write to
 * @param rowAndBlockSize the row group size
 * @throws IOException if the file can not be created
 */
ParquetFileWriter(Configuration configuration, MessageType schema, Path file, long rowAndBlockSize,
        int maxPaddingSize) throws IOException {
    FileSystem fs = file.getFileSystem(configuration);
    this.schema = schema;
    this.alignment = PaddingAlignment.get(rowAndBlockSize, rowAndBlockSize, maxPaddingSize);
    this.out = fs.create(file, true, DFS_BUFFER_SIZE_DEFAULT, fs.getDefaultReplication(file), rowAndBlockSize);
}

From source file:co.cask.cdap.common.logging.SyncTest.java

License:Apache License

@Test
@Ignore//  www .  ja  v  a  2s .  c om
public void testSync() throws IOException {
    FileSystem fs = FileSystem.get(config);
    // create a file and write n bytes, then sync
    Path path = new Path("/myfile");
    FSDataOutputStream out = fs.create(path, false, 4096, (short) 2, 4096L);
    int numBytes = 5000;
    for (int i = 0; i < numBytes; i++) {
        out.write((byte) i);
    }
    out.hflush();
    // verify the file is there
    Assert.assertTrue(fs.exists(path));
    // do not verify the length of the file, hflush() does not update that
    //Assert.assertEquals(numBytes, fs.getFileStatus(path).getLen());
    // read back and verify all bytes
    FSDataInputStream in = fs.open(path);
    byte[] buffer = new byte[numBytes];
    in.readFully(buffer);
    for (int i = 0; i < numBytes; i++) {
        Assert.assertEquals((byte) i, buffer[i]);
    }
    in.close();
    // now close the writer
    out.close();
}

From source file:com.cloudera.CacheTool.java

License:Apache License

public static void createFile(FileSystem fs, Path fileName, long fileLen) throws IOException {
    int bufferLen = 1024;
    assert bufferLen > 0;
    if (!fs.mkdirs(fileName.getParent())) {
        throw new IOException("Mkdirs failed to create " + fileName.getParent().toString());
    }//from   w w  w .  jav  a  2s .c o  m
    FSDataOutputStream out = null;
    try {
        out = fs.create(fileName, true,
                fs.getConf().getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096), (short) 1,
                fs.getDefaultBlockSize(fileName));
        if (fileLen > 0) {
            byte[] toWrite = new byte[bufferLen];
            Random rb = new Random(0);
            long bytesToWrite = fileLen;
            while (bytesToWrite > 0) {
                rb.nextBytes(toWrite);
                int bytesToWriteNext = (bufferLen < bytesToWrite) ? bufferLen : (int) bytesToWrite;

                out.write(toWrite, 0, bytesToWriteNext);
                bytesToWrite -= bytesToWriteNext;
            }
        }
    } finally {
        if (out != null) {
            out.close();
        }
    }
}

From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample//from   ww  w  .ja v  a2 s  .co  m
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            @Override
            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.hadoop.hdfs_test.HdfsWriter.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length < 1) {
        System.err.println("HdfsWriter [fileSize ie. 1g/10g/40g]");
        return 1;
    }//from  w ww .  j  av a2  s  .c om

    double fileSize;
    double fileSizeInMB;
    if (args[0].equals("1g")) {
        fileSize = 1073741824.0;
        fileSizeInMB = 1024.0;
    } else if (args[0].equals("10g")) {
        fileSize = 10737418240.0;
        fileSizeInMB = 10240.0;
    } else if (args[0].equals("100g")) {
        fileSize = 107374182400.0;
        fileSizeInMB = 102400.0;
    } else if (args[0].equals("200g")) {
        fileSize = 214748364800.0;
        fileSizeInMB = 204800.0;
    } else {
        throw new IllegalArgumentException("Invalid arg: " + args[0]);
    }

    String localFolder = "/home/hduser/projects/hdfs_test/input/";
    String hdfsFolder = "/hdfs_test/";
    int numFiles = 5;
    short replication = 1;
    String hdfsFile;
    long startTime, endTime, duration = 0;
    long avg = 0, min = Long.MAX_VALUE, max = Long.MIN_VALUE;
    String fileName = args[0] + "-avg.txt";
    File avgFile = new File(fileName);
    PrintWriter avgPW = new PrintWriter(avgFile);
    fileName = args[0] + "-min.txt";
    File minFile = new File(fileName);
    PrintWriter minPW = new PrintWriter(minFile);
    fileName = args[0] + "-max.txt";
    File maxFile = new File(fileName);
    PrintWriter maxPW = new PrintWriter(maxFile);

    boolean overWrite = true;
    int bufferSize[] = new int[] { 4096, 16384, 65536, 262144 };
    long blockSize[] = new long[] { 67108864, 134217728, 268435456 };

    Configuration conf = getConf();
    System.out.println("configured filesystem = " + conf.get(FS_PARAM_NAME));
    FileSystem fs = FileSystem.get(conf);

    for (int i = 0; i < 4; i++) { // different buffer size
        for (int j = 0; j < 3; j++) { // different block size
            double numIters = fileSize / (double) bufferSize[i];
            byte[] buf = new byte[bufferSize[i]];
            for (int m = 0; m < bufferSize[i]; m += 4) {
                buf[m] = (byte) m;
            }

            for (int k = 1; k <= numFiles; k++) {
                hdfsFile = hdfsFolder + args[0] + "/" + i + ".in";
                Path outputPath = new Path(hdfsFile);
                OutputStream os = fs.create(outputPath, overWrite, bufferSize[i], replication, blockSize[j]);
                startTime = System.currentTimeMillis();
                for (long m = 0; m < numIters; m++) {
                    os.write(buf);
                }
                endTime = System.currentTimeMillis();
                os.close();
                fs.delete(outputPath, true);

                duration = endTime - startTime;
                avg += duration;
                if (duration < min) {
                    min = duration;
                }
                if (duration > max) {
                    max = duration;
                }
            }
            // write result to output
            Double avgBW = fileSizeInMB * 1000.0 * (double) numFiles / (double) avg;
            avgPW.print(avgBW);
            avgPW.print("\t");
            double minBW = fileSizeInMB * 1000.0 / (double) max;
            minPW.print(minBW);
            minPW.print("\t");
            double maxBW = fileSizeInMB * 1000.0 / (double) min;
            maxPW.print(maxBW);
            maxPW.print("\t");

            duration = 0;
            avg = 0;
            min = Long.MAX_VALUE;
            max = Long.MIN_VALUE;
        }
        avgPW.println();
        minPW.println();
        maxPW.println();
    }

    //avgFile.close();
    avgPW.close();
    //minFile.close();
    minPW.close();
    //maxFile.close();
    maxPW.close();
    /*
    System.out.println ("avg: " + (fileSizeInMB*1000.0*(double)numFiles/(double)avg)
     + " max: " + (fileSizeInMB*1000.0)/(double)min
     + " min: " + (fileSizeInMB*1000.0)/(double)max);
    */
    return 0;
}