Example usage for org.apache.hadoop.fs FSDataOutputStream FSDataOutputStream

List of usage examples for org.apache.hadoop.fs FSDataOutputStream FSDataOutputStream

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FSDataOutputStream FSDataOutputStream.

Prototype

public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats) 

Source Link

Usage

From source file:alluxio.hadoop.AbstractFileSystem.java

License:Apache License

@Override
public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException {
    LOG.info("append({}, {}, {})", path, bufferSize, progress);
    if (mStatistics != null) {
        mStatistics.incrementWriteOps(1);
    }/*from  w w  w.  j  a  va 2 s.c  om*/
    AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path));
    try {
        if (!sFileSystem.exists(uri)) {
            return new FSDataOutputStream(sFileSystem.createFile(uri), mStatistics);
        } else {
            throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri));
        }
    } catch (AlluxioException e) {
        throw new IOException(e);
    }
}

From source file:alluxio.hadoop.AbstractFileSystem.java

License:Apache License

/**
 * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder.
 *
 * @param path path to create//  w  w w  . j  a v a  2 s  .c om
 * @param permission permissions of the created file/folder
 * @param overwrite overwrite if file exists
 * @param bufferSize the size in bytes of the buffer to be used
 * @param replication under filesystem replication factor
 * @param blockSize block size in bytes
 * @param progress queryable progress
 * @return an {@link FSDataOutputStream} created at the indicated path of a file
 * @throws IOException if overwrite is not specified and the path already exists or if the path is
 *         a folder
 */
@Override
public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    LOG.info("create({}, {}, {}, {}, {}, {}, {})", path, permission, overwrite, bufferSize, replication,
            blockSize, progress);
    if (mStatistics != null) {
        mStatistics.incrementWriteOps(1);
    }

    // Check whether the file already exists, and delete it if overwrite is true
    AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path));
    try {
        if (sFileSystem.exists(uri)) {
            if (!overwrite) {
                throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri));
            }
            if (sFileSystem.getStatus(uri).isFolder()) {
                throw new IOException(ExceptionMessage.FILE_CREATE_IS_DIRECTORY.getMessage(uri));
            }
            sFileSystem.delete(uri);
        }
    } catch (AlluxioException e) {
        throw new IOException(e);
    }

    // The file no longer exists at this point, so we can create it
    CreateFileOptions options = CreateFileOptions.defaults().setBlockSizeBytes(blockSize);
    try {
        FileOutStream outStream = sFileSystem.createFile(uri, options);
        return new FSDataOutputStream(outStream, mStatistics);
    } catch (AlluxioException e) {
        throw new IOException(e);
    }
}

From source file:cascading.tap.hadoop.S3HttpFileSystem.java

License:Open Source License

@Override
public FSDataOutputStream create(final Path path, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    if (!overwrite && exists(path))
        throw new IOException("file already exists: " + path);

    if (LOG.isDebugEnabled())
        LOG.debug("creating file: " + path);

    final ByteArrayOutputStream stream = new ByteArrayOutputStream();
    final DigestOutputStream digestStream = new DigestOutputStream(stream, getMD5Digest());

    return new FSDataOutputStream(digestStream, null) {
        @Override//from   w w w  .  j a  v  a  2  s .  c om
        public void close() throws IOException {
            super.close();

            S3Object object = S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.CREATE_OBJECT);

            object.setContentType("text/plain"); // todo use 'binary/octet-stream'
            object.setMd5Hash(digestStream.getMessageDigest().digest());

            // todo buffer to disk instead
            byte[] bytes = stream.toByteArray();

            object.setDataInputStream(new ByteArrayInputStream(bytes));
            object.setContentLength(bytes.length);

            if (LOG.isDebugEnabled())
                LOG.debug("putting file: " + path);

            S3Util.putObject(s3Service, s3Bucket, object);
        }
    };
}

From source file:co.cask.cdap.template.etl.batch.ETLTPFSTest.java

License:Apache License

@Test
public void testAvroSourceConversionToAvroSink() throws Exception {

    Schema eventSchema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)));

    org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(eventSchema.toString());

    GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).build();

    String filesetName = "tpfs";
    DatasetAdmin datasetAdmin = addDatasetInstance("timePartitionedFileSet", filesetName,
            FileSetProperties.builder().setInputFormat(AvroKeyInputFormat.class)
                    .setOutputFormat(AvroKeyOutputFormat.class)
                    .setInputProperty("schema", avroSchema.toString())
                    .setOutputProperty("schema", avroSchema.toString()).setEnableExploreOnCreate(true)
                    .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe")
                    .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat")
                    .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat")
                    .setTableProperty("avro.schema.literal", (avroSchema.toString())).build());
    DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset(filesetName);
    TimePartitionedFileSet tpfs = fileSetManager.get();

    TransactionManager txService = getTxService();
    Transaction tx1 = txService.startShort(100);
    TransactionAware txTpfs = (TransactionAware) tpfs;
    txTpfs.startTx(tx1);//from  ww w . j a va2  s .c  om

    long timeInMillis = System.currentTimeMillis();
    fileSetManager.get().addPartition(timeInMillis, "directory", ImmutableMap.of("key1", "value1"));
    Location location = fileSetManager.get().getPartitionByTime(timeInMillis).getLocation();
    location = location.append("file.avro");
    FSDataOutputStream outputStream = new FSDataOutputStream(location.getOutputStream(), null);
    DataFileWriter dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema));
    dataFileWriter.create(avroSchema, outputStream);
    dataFileWriter.append(record);
    dataFileWriter.flush();

    txTpfs.commitTx();
    txService.canCommit(tx1, txTpfs.getTxChanges());
    txService.commit(tx1);
    txTpfs.postTxCommit();

    String newFilesetName = filesetName + "_op";
    ETLBatchConfig etlBatchConfig = constructTPFSETLConfig(filesetName, newFilesetName, eventSchema);

    AdapterConfig newAdapterConfig = new AdapterConfig("description", TEMPLATE_ID.getId(),
            GSON.toJsonTree(etlBatchConfig));
    Id.Adapter newAdapterId = Id.Adapter.from(NAMESPACE, "sconversion1");
    AdapterManager tpfsAdapterManager = createAdapter(newAdapterId, newAdapterConfig);

    tpfsAdapterManager.start();
    tpfsAdapterManager.waitForOneRunToFinish(4, TimeUnit.MINUTES);
    tpfsAdapterManager.stop();

    DataSetManager<TimePartitionedFileSet> newFileSetManager = getDataset(newFilesetName);
    TimePartitionedFileSet newFileSet = newFileSetManager.get();

    List<GenericRecord> newRecords = readOutput(newFileSet, eventSchema);
    Assert.assertEquals(1, newRecords.size());
    Assert.assertEquals(Integer.MAX_VALUE, newRecords.get(0).get("int"));
}

From source file:co.cask.hydrator.plugin.batch.ETLTPFSTestRun.java

License:Apache License

@Test
public void testAvroSourceConversionToAvroSink() throws Exception {

    Schema eventSchema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT)));

    org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(eventSchema.toString());

    GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).build();

    String filesetName = "tpfs";
    addDatasetInstance(TimePartitionedFileSet.class.getName(), filesetName,
            FileSetProperties.builder().setInputFormat(AvroKeyInputFormat.class)
                    .setOutputFormat(AvroKeyOutputFormat.class)
                    .setInputProperty("schema", avroSchema.toString())
                    .setOutputProperty("schema", avroSchema.toString()).setEnableExploreOnCreate(true)
                    .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe")
                    .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat")
                    .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat")
                    .setTableProperty("avro.schema.literal", (avroSchema.toString())).build());
    DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset(filesetName);
    TimePartitionedFileSet tpfs = fileSetManager.get();

    TransactionManager txService = getTxService();
    Transaction tx1 = txService.startShort(100);
    TransactionAware txTpfs = (TransactionAware) tpfs;
    txTpfs.startTx(tx1);/*  www . j  av  a2 s . com*/

    long timeInMillis = System.currentTimeMillis();
    fileSetManager.get().addPartition(timeInMillis, "directory", ImmutableMap.of("key1", "value1"));
    Location location = fileSetManager.get().getPartitionByTime(timeInMillis).getLocation();
    location = location.append("file.avro");
    FSDataOutputStream outputStream = new FSDataOutputStream(location.getOutputStream(), null);
    DataFileWriter dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema));
    dataFileWriter.create(avroSchema, outputStream);
    dataFileWriter.append(record);
    dataFileWriter.flush();

    txTpfs.commitTx();
    txService.canCommit(tx1, txTpfs.getTxChanges());
    txService.commit(tx1);
    txTpfs.postTxCommit();

    String newFilesetName = filesetName + "_op";
    ETLBatchConfig etlBatchConfig = constructTPFSETLConfig(filesetName, newFilesetName, eventSchema);

    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlBatchConfig);
    Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "sconversion1");
    ApplicationManager appManager = deployApplication(appId, appRequest);

    WorkflowManager workflowManager = appManager.getWorkflowManager(ETLWorkflow.NAME);
    // add a minute to the end time to make sure the newly added partition is included in the run.
    workflowManager.start(ImmutableMap.of("logical.start.time", String.valueOf(timeInMillis + 60 * 1000)));
    workflowManager.waitForFinish(4, TimeUnit.MINUTES);

    DataSetManager<TimePartitionedFileSet> newFileSetManager = getDataset(newFilesetName);
    TimePartitionedFileSet newFileSet = newFileSetManager.get();

    List<GenericRecord> newRecords = readOutput(newFileSet, eventSchema);
    Assert.assertEquals(1, newRecords.size());
    Assert.assertEquals(Integer.MAX_VALUE, newRecords.get(0).get("int"));
}

From source file:com.aliyun.fs.oss.blk.OssFileSystem.java

License:Apache License

public FSDataOutputStream append(Path file, int bufferSize, Progressable progress) throws IOException {
    this.blocksForOneTime.clear();
    INode inode = checkFile(file);//  w  ww.  ja  va 2s  . c o  m
    return new FSDataOutputStream(
            new OssAppendOutputStream(getConf(), store, makeAbsolute(file), inode, getDefaultBlockSize(file),
                    progress, getConf().getInt("io.file.buffer.size", 4096), blocksForOneTime),
            statistics);
}

From source file:com.aliyun.fs.oss.blk.OssFileSystem.java

License:Apache License

/**
 * @param permission Currently ignored./* ww w  .  j  ava2s.c om*/
 */
@Override
public FSDataOutputStream create(Path file, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    this.blocksForOneTime.clear();
    INode inode = store.retrieveINode(makeAbsolute(file));
    if (inode != null) {
        if (overwrite) {
            delete(file);
        } else {
            throw new IOException("File already exists: " + file);
        }
    } else {
        Path parent = file.getParent();
        if (parent != null) {
            if (!mkdirs(parent)) {
                throw new IOException("Mkdirs failed to create " + parent.toString());
            }
        }
    }
    return new FSDataOutputStream(new OssOutputStream(getConf(), store, makeAbsolute(file), blockSize, progress,
            bufferSize, blocksForOneTime), statistics);
}

From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java

License:Apache License

@Override
public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException {
    Path absolutePath = makeAbsolute(f);
    String key = pathToKey(absolutePath);
    return new FSDataOutputStream(
            new NativeOssFsOutputStream(getConf(), store, key, true, progress, bufferSize), statistics);
}

From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java

License:Apache License

@Override
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    FileStatus status = null;// w  ww .j  av a  2s .co m
    try {
        // get the status or throw an FNFE
        status = getFileStatus(f);

        // if the thread reaches here, there is something at the path
        if (status.isDirectory()) {
            // path references a directory: automatic error
            throw new FileAlreadyExistsException(f + " is a directory");
        }
        if (!overwrite) {
            // path references a file and overwrite is disabled
            throw new FileAlreadyExistsException(f + " already exists");
        }
        LOG.debug("Overwriting file " + f);
    } catch (FileNotFoundException e) {
        // this means the file is not found

    }

    Path absolutePath = makeAbsolute(f);
    String key = pathToKey(absolutePath);
    return new FSDataOutputStream(
            new NativeOssFsOutputStream(getConf(), store, key, false, progress, bufferSize), statistics);
}

From source file:com.aliyun.odps.fs.VolumeFileSystem.java

License:Apache License

@Override
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize,
        short replication, long blockSize, Progressable progress) throws IOException {
    Path absF = fixRelativePart(f);
    String filePath = getPathName(absF);
    if (!VolumeFSUtil.isValidName(filePath)) {
        throw new IllegalArgumentException(
                VolumeFSErrorMessageGenerator.isNotAValidODPSVolumeFSFilename(filePath));
    }//from w  w w .ja v  a 2s . c  om
    if (VolumeFSUtil.checkPathIsJustVolume(filePath)) {
        throw new IOException(
                VolumeFSErrorMessageGenerator.theOpreationIsNotAllowed("Create file in the root path!"));
    }
    try {
        return new FSDataOutputStream(new VolumeFSOutputStream(filePath, volumeClient, permission, overwrite,
                replication, blockSize, progress), statistics);
    } catch (VolumeException e) {
        logException(e);
        throw wrapExceptions(filePath, e);
    }
}