List of usage examples for org.apache.hadoop.fs FSDataOutputStream FSDataOutputStream
public FSDataOutputStream(OutputStream out, FileSystem.Statistics stats)
From source file:alluxio.hadoop.AbstractFileSystem.java
License:Apache License
@Override public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException { LOG.info("append({}, {}, {})", path, bufferSize, progress); if (mStatistics != null) { mStatistics.incrementWriteOps(1); }/*from w w w. j a va 2 s.c om*/ AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); try { if (!sFileSystem.exists(uri)) { return new FSDataOutputStream(sFileSystem.createFile(uri), mStatistics); } else { throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri)); } } catch (AlluxioException e) { throw new IOException(e); } }
From source file:alluxio.hadoop.AbstractFileSystem.java
License:Apache License
/** * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder. * * @param path path to create// w w w . j a v a 2 s .c om * @param permission permissions of the created file/folder * @param overwrite overwrite if file exists * @param bufferSize the size in bytes of the buffer to be used * @param replication under filesystem replication factor * @param blockSize block size in bytes * @param progress queryable progress * @return an {@link FSDataOutputStream} created at the indicated path of a file * @throws IOException if overwrite is not specified and the path already exists or if the path is * a folder */ @Override public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { LOG.info("create({}, {}, {}, {}, {}, {}, {})", path, permission, overwrite, bufferSize, replication, blockSize, progress); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } // Check whether the file already exists, and delete it if overwrite is true AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); try { if (sFileSystem.exists(uri)) { if (!overwrite) { throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri)); } if (sFileSystem.getStatus(uri).isFolder()) { throw new IOException(ExceptionMessage.FILE_CREATE_IS_DIRECTORY.getMessage(uri)); } sFileSystem.delete(uri); } } catch (AlluxioException e) { throw new IOException(e); } // The file no longer exists at this point, so we can create it CreateFileOptions options = CreateFileOptions.defaults().setBlockSizeBytes(blockSize); try { FileOutStream outStream = sFileSystem.createFile(uri, options); return new FSDataOutputStream(outStream, mStatistics); } catch (AlluxioException e) { throw new IOException(e); } }
From source file:cascading.tap.hadoop.S3HttpFileSystem.java
License:Open Source License
@Override public FSDataOutputStream create(final Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { if (!overwrite && exists(path)) throw new IOException("file already exists: " + path); if (LOG.isDebugEnabled()) LOG.debug("creating file: " + path); final ByteArrayOutputStream stream = new ByteArrayOutputStream(); final DigestOutputStream digestStream = new DigestOutputStream(stream, getMD5Digest()); return new FSDataOutputStream(digestStream, null) { @Override//from w w w . j a v a 2 s . c om public void close() throws IOException { super.close(); S3Object object = S3Util.getObject(s3Service, s3Bucket, path, S3Util.Request.CREATE_OBJECT); object.setContentType("text/plain"); // todo use 'binary/octet-stream' object.setMd5Hash(digestStream.getMessageDigest().digest()); // todo buffer to disk instead byte[] bytes = stream.toByteArray(); object.setDataInputStream(new ByteArrayInputStream(bytes)); object.setContentLength(bytes.length); if (LOG.isDebugEnabled()) LOG.debug("putting file: " + path); S3Util.putObject(s3Service, s3Bucket, object); } }; }
From source file:co.cask.cdap.template.etl.batch.ETLTPFSTest.java
License:Apache License
@Test public void testAvroSourceConversionToAvroSink() throws Exception { Schema eventSchema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT))); org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(eventSchema.toString()); GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).build(); String filesetName = "tpfs"; DatasetAdmin datasetAdmin = addDatasetInstance("timePartitionedFileSet", filesetName, FileSetProperties.builder().setInputFormat(AvroKeyInputFormat.class) .setOutputFormat(AvroKeyOutputFormat.class) .setInputProperty("schema", avroSchema.toString()) .setOutputProperty("schema", avroSchema.toString()).setEnableExploreOnCreate(true) .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe") .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat") .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat") .setTableProperty("avro.schema.literal", (avroSchema.toString())).build()); DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset(filesetName); TimePartitionedFileSet tpfs = fileSetManager.get(); TransactionManager txService = getTxService(); Transaction tx1 = txService.startShort(100); TransactionAware txTpfs = (TransactionAware) tpfs; txTpfs.startTx(tx1);//from ww w . j a va2 s .c om long timeInMillis = System.currentTimeMillis(); fileSetManager.get().addPartition(timeInMillis, "directory", ImmutableMap.of("key1", "value1")); Location location = fileSetManager.get().getPartitionByTime(timeInMillis).getLocation(); location = location.append("file.avro"); FSDataOutputStream outputStream = new FSDataOutputStream(location.getOutputStream(), null); DataFileWriter dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema)); dataFileWriter.create(avroSchema, outputStream); dataFileWriter.append(record); dataFileWriter.flush(); txTpfs.commitTx(); txService.canCommit(tx1, txTpfs.getTxChanges()); txService.commit(tx1); txTpfs.postTxCommit(); String newFilesetName = filesetName + "_op"; ETLBatchConfig etlBatchConfig = constructTPFSETLConfig(filesetName, newFilesetName, eventSchema); AdapterConfig newAdapterConfig = new AdapterConfig("description", TEMPLATE_ID.getId(), GSON.toJsonTree(etlBatchConfig)); Id.Adapter newAdapterId = Id.Adapter.from(NAMESPACE, "sconversion1"); AdapterManager tpfsAdapterManager = createAdapter(newAdapterId, newAdapterConfig); tpfsAdapterManager.start(); tpfsAdapterManager.waitForOneRunToFinish(4, TimeUnit.MINUTES); tpfsAdapterManager.stop(); DataSetManager<TimePartitionedFileSet> newFileSetManager = getDataset(newFilesetName); TimePartitionedFileSet newFileSet = newFileSetManager.get(); List<GenericRecord> newRecords = readOutput(newFileSet, eventSchema); Assert.assertEquals(1, newRecords.size()); Assert.assertEquals(Integer.MAX_VALUE, newRecords.get(0).get("int")); }
From source file:co.cask.hydrator.plugin.batch.ETLTPFSTestRun.java
License:Apache License
@Test public void testAvroSourceConversionToAvroSink() throws Exception { Schema eventSchema = Schema.recordOf("record", Schema.Field.of("int", Schema.of(Schema.Type.INT))); org.apache.avro.Schema avroSchema = new org.apache.avro.Schema.Parser().parse(eventSchema.toString()); GenericRecord record = new GenericRecordBuilder(avroSchema).set("int", Integer.MAX_VALUE).build(); String filesetName = "tpfs"; addDatasetInstance(TimePartitionedFileSet.class.getName(), filesetName, FileSetProperties.builder().setInputFormat(AvroKeyInputFormat.class) .setOutputFormat(AvroKeyOutputFormat.class) .setInputProperty("schema", avroSchema.toString()) .setOutputProperty("schema", avroSchema.toString()).setEnableExploreOnCreate(true) .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe") .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat") .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat") .setTableProperty("avro.schema.literal", (avroSchema.toString())).build()); DataSetManager<TimePartitionedFileSet> fileSetManager = getDataset(filesetName); TimePartitionedFileSet tpfs = fileSetManager.get(); TransactionManager txService = getTxService(); Transaction tx1 = txService.startShort(100); TransactionAware txTpfs = (TransactionAware) tpfs; txTpfs.startTx(tx1);/* www . j av a2 s . com*/ long timeInMillis = System.currentTimeMillis(); fileSetManager.get().addPartition(timeInMillis, "directory", ImmutableMap.of("key1", "value1")); Location location = fileSetManager.get().getPartitionByTime(timeInMillis).getLocation(); location = location.append("file.avro"); FSDataOutputStream outputStream = new FSDataOutputStream(location.getOutputStream(), null); DataFileWriter dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema)); dataFileWriter.create(avroSchema, outputStream); dataFileWriter.append(record); dataFileWriter.flush(); txTpfs.commitTx(); txService.canCommit(tx1, txTpfs.getTxChanges()); txService.commit(tx1); txTpfs.postTxCommit(); String newFilesetName = filesetName + "_op"; ETLBatchConfig etlBatchConfig = constructTPFSETLConfig(filesetName, newFilesetName, eventSchema); AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlBatchConfig); Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "sconversion1"); ApplicationManager appManager = deployApplication(appId, appRequest); WorkflowManager workflowManager = appManager.getWorkflowManager(ETLWorkflow.NAME); // add a minute to the end time to make sure the newly added partition is included in the run. workflowManager.start(ImmutableMap.of("logical.start.time", String.valueOf(timeInMillis + 60 * 1000))); workflowManager.waitForFinish(4, TimeUnit.MINUTES); DataSetManager<TimePartitionedFileSet> newFileSetManager = getDataset(newFilesetName); TimePartitionedFileSet newFileSet = newFileSetManager.get(); List<GenericRecord> newRecords = readOutput(newFileSet, eventSchema); Assert.assertEquals(1, newRecords.size()); Assert.assertEquals(Integer.MAX_VALUE, newRecords.get(0).get("int")); }
From source file:com.aliyun.fs.oss.blk.OssFileSystem.java
License:Apache License
public FSDataOutputStream append(Path file, int bufferSize, Progressable progress) throws IOException { this.blocksForOneTime.clear(); INode inode = checkFile(file);// w ww. ja va 2s . c o m return new FSDataOutputStream( new OssAppendOutputStream(getConf(), store, makeAbsolute(file), inode, getDefaultBlockSize(file), progress, getConf().getInt("io.file.buffer.size", 4096), blocksForOneTime), statistics); }
From source file:com.aliyun.fs.oss.blk.OssFileSystem.java
License:Apache License
/** * @param permission Currently ignored./* ww w . j ava2s.c om*/ */ @Override public FSDataOutputStream create(Path file, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { this.blocksForOneTime.clear(); INode inode = store.retrieveINode(makeAbsolute(file)); if (inode != null) { if (overwrite) { delete(file); } else { throw new IOException("File already exists: " + file); } } else { Path parent = file.getParent(); if (parent != null) { if (!mkdirs(parent)) { throw new IOException("Mkdirs failed to create " + parent.toString()); } } } return new FSDataOutputStream(new OssOutputStream(getConf(), store, makeAbsolute(file), blockSize, progress, bufferSize, blocksForOneTime), statistics); }
From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java
License:Apache License
@Override public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) throws IOException { Path absolutePath = makeAbsolute(f); String key = pathToKey(absolutePath); return new FSDataOutputStream( new NativeOssFsOutputStream(getConf(), store, key, true, progress, bufferSize), statistics); }
From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java
License:Apache License
@Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { FileStatus status = null;// w ww .j av a 2s .co m try { // get the status or throw an FNFE status = getFileStatus(f); // if the thread reaches here, there is something at the path if (status.isDirectory()) { // path references a directory: automatic error throw new FileAlreadyExistsException(f + " is a directory"); } if (!overwrite) { // path references a file and overwrite is disabled throw new FileAlreadyExistsException(f + " already exists"); } LOG.debug("Overwriting file " + f); } catch (FileNotFoundException e) { // this means the file is not found } Path absolutePath = makeAbsolute(f); String key = pathToKey(absolutePath); return new FSDataOutputStream( new NativeOssFsOutputStream(getConf(), store, key, false, progress, bufferSize), statistics); }
From source file:com.aliyun.odps.fs.VolumeFileSystem.java
License:Apache License
@Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { Path absF = fixRelativePart(f); String filePath = getPathName(absF); if (!VolumeFSUtil.isValidName(filePath)) { throw new IllegalArgumentException( VolumeFSErrorMessageGenerator.isNotAValidODPSVolumeFSFilename(filePath)); }//from w w w .ja v a 2s . c om if (VolumeFSUtil.checkPathIsJustVolume(filePath)) { throw new IOException( VolumeFSErrorMessageGenerator.theOpreationIsNotAllowed("Create file in the root path!")); } try { return new FSDataOutputStream(new VolumeFSOutputStream(filePath, volumeClient, permission, overwrite, replication, blockSize, progress), statistics); } catch (VolumeException e) { logException(e); throw wrapExceptions(filePath, e); } }