Example usage for org.apache.hadoop.fs FileContext create

List of usage examples for org.apache.hadoop.fs FileContext create

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileContext create.

Prototype

public FSDataOutputStream create(final Path f, final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts)
        throws AccessControlException, FileAlreadyExistsException, FileNotFoundException,
        ParentNotDirectoryException, UnsupportedFileSystemException, IOException 

Source Link

Document

Create or overwrite file on indicated path and returns an output stream for writing into the file.

Usage

From source file:com.ikanow.aleph2.aleph2_rest_utils.FileUtils.java

License:Apache License

public static void writeFile(final FileContext fileContext, final InputStream input, final String path)
        throws AccessControlException, FileAlreadyExistsException, FileNotFoundException,
        ParentNotDirectoryException, UnsupportedFileSystemException, IOException {
    final Path p = new Path(path);
    try (FSDataOutputStream outer = fileContext.create(p, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE),
            org.apache.hadoop.fs.Options.CreateOpts.createParent())) {
        IOUtils.copyLarge(input, outer, new byte[DEFAULT_BUFFER_SIZE]);
    }//  www  . j  a  v a  2  s .c  o m
}

From source file:com.ikanow.aleph2.analytics.r.services.BeJobLauncher.java

License:Apache License

/** Cache the system and user classpaths
 * @param job//  w  w  w.j  a v  a 2 s.  c  o m
 * @param context
 * @throws IOException 
 * @throws ExecutionException 
 * @throws InterruptedException 
 * @throws IllegalArgumentException 
 */
protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context)
        throws IllegalArgumentException, InterruptedException, ExecutionException, IOException {
    final FileContext fc = context.getServiceContext().getStorageService()
            .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();
    final String rootPath = context.getServiceContext().getStorageService().getRootPath();

    // Aleph2 libraries: need to cache them
    context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f))
            .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> {
                final FileStatus fs = Lambdas.get(() -> {
                    //TODO (ALEPH-12): need to clear out the cache intermittently
                    try {
                        return fc.getFileStatus(f_p._2());
                    } catch (Exception e) {
                        return null;
                    }
                });
                if (null == fs) { //cache doesn't exist
                    // Local version
                    try (FSDataOutputStream outer = fc.create(f_p._2(), EnumSet.of(CreateFlag.CREATE), // ie should fail if the destination file already exists 
                            org.apache.hadoop.fs.Options.CreateOpts.createParent())) {
                        Files.copy(f_p._1(), outer.getWrappedStream());
                    } catch (FileAlreadyExistsException e) {//(carry on - the file is versioned so it can't be out of date)
                    }
                }
                return f_p._2();
            })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path)));
    ;

    // User libraries: this is slightly easier since one of the 2 keys
    // is the HDFS path (the other is the _id)
    context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet()
            .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath))
            .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path))));
    ;
}

From source file:com.ikanow.aleph2.analytics.spark.utils.SparkTechnologyUtils.java

License:Apache License

/** Cache the system and user classpaths and return HDFS paths
 * @param bucket/*  w ww . j a v  a  2 s  .c  o  m*/
 * @param main_jar_path - my JAR path
 * @param context
 * @throws IOException 
 * @throws ExecutionException 
 * @throws InterruptedException 
 * @throws IllegalArgumentException 
 */
public static List<String> getCachedJarList(final DataBucketBean bucket, final String main_jar_path,
        final IAnalyticsContext context)
        throws IllegalArgumentException, InterruptedException, ExecutionException, IOException {
    final FileContext fc = context.getServiceContext().getStorageService()
            .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();
    final String root_path = context.getServiceContext().getStorageService().getRootPath();
    final String tmp_dir = System.getProperty("java.io.tmpdir");

    // Aleph2 libraries: need to cache them
    final Stream<String> context_stream = context.getAnalyticsContextLibraries(Optional.empty()).stream()
            .filter(jar -> !jar.equals(main_jar_path)) // (this is the service case, eg "/opt/aleph2-home/lib/aleph2_spark_analytic_services.jar")
            .map(Lambdas.wrap_u(f_str -> {

                final Tuple3<File, Path, FileStatus> f_p_fs = f_str.contains("core_distributed_services")
                        || f_str.contains("data_model") ? removeSparkConflictsAndCache(f_str, root_path, fc)
                                : checkCache(f_str, root_path, fc);

                if (null == f_p_fs._3()) { //cache doesn't exist
                    // Local version
                    try (FSDataOutputStream outer = fc.create(f_p_fs._2(), EnumSet.of(CreateFlag.CREATE), // ie should fail if the destination file already exists 
                            org.apache.hadoop.fs.Options.CreateOpts.createParent())) {
                        Files.copy(f_p_fs._1(), outer.getWrappedStream());
                    } catch (FileAlreadyExistsException e) {//(carry on - the file is versioned so it can't be out of date)
                    }
                    if (f_p_fs._1().getPath().startsWith(tmp_dir)) { // (delete tmp files)
                        f_p_fs._1().delete();
                    }
                }

                return f_p_fs._2();
            })).map(p -> transformFromPath(p.toString()));

    // User libraries: this is slightly easier since one of the 2 keys
    // is the HDFS path (the other is the _id)
    final Stream<String> lib_stream = context
            .getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet()
            .stream().map(kv -> kv.getKey()).filter(jar -> !jar.equals(main_jar_path)) // (this is the uploaded case, eg "/app/aleph2/library/blah.jar")
            .filter(path -> path.startsWith(root_path)).map(s -> transformFromPath(s));

    return Stream.concat(context_stream, lib_stream).collect(Collectors.toList());
}

From source file:com.ikanow.aleph2.management_db.mongodb.services.IkanowV1SyncService_LibraryJars.java

License:Apache License

protected static void copyFile(final String binary_id, final String path, final IStorageService aleph2_fs,
        final GridFS share_fs) throws IOException {
    try (ByteArrayOutputStream out = new ByteArrayOutputStream()) {
        final GridFSDBFile file = share_fs.find(new ObjectId(binary_id));
        file.writeTo(out);/*from www .j av  a  2s.  c  o  m*/
        final FileContext fs = aleph2_fs.getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();
        final Path file_path = fs.makeQualified(new Path(path));
        try (FSDataOutputStream outer = fs.create(file_path,
                EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE),
                org.apache.hadoop.fs.Options.CreateOpts.createParent())) {
            outer.write(out.toByteArray());
        }
    }
}

From source file:com.uber.hoodie.common.file.HoodieAppendLog.java

License:Apache License

/**
 * Construct the preferred type of SequenceFile Writer.
 * @param fc The context for the specified file.
 * @param conf The configuration./* w w  w .j  a v a 2 s.  c  om*/
 * @param name The name of the file.
 * @param keyClass The 'key' type.
 * @param valClass The 'value' type.
 * @param compressionType The compression type.
 * @param codec The compression codec.
 * @param metadata The metadata of the file.
 * @param createFlag gives the semantics of create: overwrite, append etc.
 * @param opts file creation options; see {@link CreateOpts}.
 * @return Returns the handle to the constructed SequenceFile Writer.
 * @throws IOException
 */
public static Writer createWriter(FileContext fc, Configuration conf, Path name, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec, Metadata metadata,
        final EnumSet<CreateFlag> createFlag, CreateOpts... opts) throws IOException {
    return createWriter(conf, fc.create(name, createFlag, opts), keyClass, valClass, compressionType, codec,
            metadata).ownStream();
}

From source file:org.apache.apex.malhar.lib.utils.IOUtilsTest.java

License:Apache License

private void testCopyPartialHelper(int dataSize, int offset, long size) throws IOException {
    FileUtils.deleteQuietly(new File("target/IOUtilsTest"));
    File file = new File("target/IOUtilsTest/testCopyPartial/input");
    createDataFile(file, dataSize);// w  ww  . j  a v a  2s  . co  m

    FileContext fileContext = FileContext.getFileContext();
    DataInputStream inputStream = fileContext.open(new Path(file.getAbsolutePath()));

    Path output = new Path("target/IOUtilsTest/testCopyPartial/output");
    DataOutputStream outputStream = fileContext.create(output,
            EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE),
            Options.CreateOpts.CreateParent.createParent());

    if (offset == 0) {
        IOUtils.copyPartial(inputStream, size, outputStream);
    } else {
        IOUtils.copyPartial(inputStream, offset, size, outputStream);
    }

    outputStream.close();

    Assert.assertTrue("output exists", fileContext.util().exists(output));
    Assert.assertEquals("output size", size, fileContext.getFileStatus(output).getLen());
    //    FileUtils.deleteQuietly(new File("target/IOUtilsTest"));
}

From source file:org.elasticsearch.repositories.hdfs.HdfsBlobContainer.java

License:Apache License

@Override
public void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException {
    if (blobExists(blobName)) {
        throw new FileAlreadyExistsException("blob [" + blobName + "] already exists, cannot overwrite");
    }/* ww w. j a  v a 2  s.co m*/
    store.execute(new Operation<Void>() {
        @Override
        public Void run(FileContext fileContext) throws IOException {
            Path blob = new Path(path, blobName);
            // we pass CREATE, which means it fails if a blob already exists.
            // NOTE: this behavior differs from FSBlobContainer, which passes TRUNCATE_EXISTING
            // that should be fixed there, no need to bring truncation into this, give the user an error.
            EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.CREATE, CreateFlag.SYNC_BLOCK);
            CreateOpts[] opts = { CreateOpts.bufferSize(bufferSize) };
            try (FSDataOutputStream stream = fileContext.create(blob, flags, opts)) {
                int bytesRead;
                byte[] buffer = new byte[bufferSize];
                while ((bytesRead = inputStream.read(buffer)) != -1) {
                    stream.write(buffer, 0, bytesRead);
                    //  For safety we also hsync each write as well, because of its docs:
                    //  SYNC_BLOCK - to force closed blocks to the disk device
                    // "In addition Syncable.hsync() should be called after each write,
                    //  if true synchronous behavior is required"
                    stream.hsync();
                }
            }
            return null;
        }
    });
}