List of usage examples for org.apache.hadoop.fs FileContext create
public FSDataOutputStream create(final Path f, final EnumSet<CreateFlag> createFlag, Options.CreateOpts... opts) throws AccessControlException, FileAlreadyExistsException, FileNotFoundException, ParentNotDirectoryException, UnsupportedFileSystemException, IOException
From source file:com.ikanow.aleph2.aleph2_rest_utils.FileUtils.java
License:Apache License
public static void writeFile(final FileContext fileContext, final InputStream input, final String path) throws AccessControlException, FileAlreadyExistsException, FileNotFoundException, ParentNotDirectoryException, UnsupportedFileSystemException, IOException { final Path p = new Path(path); try (FSDataOutputStream outer = fileContext.create(p, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), org.apache.hadoop.fs.Options.CreateOpts.createParent())) { IOUtils.copyLarge(input, outer, new byte[DEFAULT_BUFFER_SIZE]); }// www . j a v a 2 s .c o m }
From source file:com.ikanow.aleph2.analytics.r.services.BeJobLauncher.java
License:Apache License
/** Cache the system and user classpaths * @param job// w w w.j a v a 2 s. c o m * @param context * @throws IOException * @throws ExecutionException * @throws InterruptedException * @throws IllegalArgumentException */ protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context) throws IllegalArgumentException, InterruptedException, ExecutionException, IOException { final FileContext fc = context.getServiceContext().getStorageService() .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final String rootPath = context.getServiceContext().getStorageService().getRootPath(); // Aleph2 libraries: need to cache them context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f)) .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> { final FileStatus fs = Lambdas.get(() -> { //TODO (ALEPH-12): need to clear out the cache intermittently try { return fc.getFileStatus(f_p._2()); } catch (Exception e) { return null; } }); if (null == fs) { //cache doesn't exist // Local version try (FSDataOutputStream outer = fc.create(f_p._2(), EnumSet.of(CreateFlag.CREATE), // ie should fail if the destination file already exists org.apache.hadoop.fs.Options.CreateOpts.createParent())) { Files.copy(f_p._1(), outer.getWrappedStream()); } catch (FileAlreadyExistsException e) {//(carry on - the file is versioned so it can't be out of date) } } return f_p._2(); })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path))); ; // User libraries: this is slightly easier since one of the 2 keys // is the HDFS path (the other is the _id) context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet() .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath)) .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path)))); ; }
From source file:com.ikanow.aleph2.analytics.spark.utils.SparkTechnologyUtils.java
License:Apache License
/** Cache the system and user classpaths and return HDFS paths * @param bucket/* w ww . j a v a 2 s .c o m*/ * @param main_jar_path - my JAR path * @param context * @throws IOException * @throws ExecutionException * @throws InterruptedException * @throws IllegalArgumentException */ public static List<String> getCachedJarList(final DataBucketBean bucket, final String main_jar_path, final IAnalyticsContext context) throws IllegalArgumentException, InterruptedException, ExecutionException, IOException { final FileContext fc = context.getServiceContext().getStorageService() .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final String root_path = context.getServiceContext().getStorageService().getRootPath(); final String tmp_dir = System.getProperty("java.io.tmpdir"); // Aleph2 libraries: need to cache them final Stream<String> context_stream = context.getAnalyticsContextLibraries(Optional.empty()).stream() .filter(jar -> !jar.equals(main_jar_path)) // (this is the service case, eg "/opt/aleph2-home/lib/aleph2_spark_analytic_services.jar") .map(Lambdas.wrap_u(f_str -> { final Tuple3<File, Path, FileStatus> f_p_fs = f_str.contains("core_distributed_services") || f_str.contains("data_model") ? removeSparkConflictsAndCache(f_str, root_path, fc) : checkCache(f_str, root_path, fc); if (null == f_p_fs._3()) { //cache doesn't exist // Local version try (FSDataOutputStream outer = fc.create(f_p_fs._2(), EnumSet.of(CreateFlag.CREATE), // ie should fail if the destination file already exists org.apache.hadoop.fs.Options.CreateOpts.createParent())) { Files.copy(f_p_fs._1(), outer.getWrappedStream()); } catch (FileAlreadyExistsException e) {//(carry on - the file is versioned so it can't be out of date) } if (f_p_fs._1().getPath().startsWith(tmp_dir)) { // (delete tmp files) f_p_fs._1().delete(); } } return f_p_fs._2(); })).map(p -> transformFromPath(p.toString())); // User libraries: this is slightly easier since one of the 2 keys // is the HDFS path (the other is the _id) final Stream<String> lib_stream = context .getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet() .stream().map(kv -> kv.getKey()).filter(jar -> !jar.equals(main_jar_path)) // (this is the uploaded case, eg "/app/aleph2/library/blah.jar") .filter(path -> path.startsWith(root_path)).map(s -> transformFromPath(s)); return Stream.concat(context_stream, lib_stream).collect(Collectors.toList()); }
From source file:com.ikanow.aleph2.management_db.mongodb.services.IkanowV1SyncService_LibraryJars.java
License:Apache License
protected static void copyFile(final String binary_id, final String path, final IStorageService aleph2_fs, final GridFS share_fs) throws IOException { try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { final GridFSDBFile file = share_fs.find(new ObjectId(binary_id)); file.writeTo(out);/*from www .j av a 2s. c o m*/ final FileContext fs = aleph2_fs.getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final Path file_path = fs.makeQualified(new Path(path)); try (FSDataOutputStream outer = fs.create(file_path, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), org.apache.hadoop.fs.Options.CreateOpts.createParent())) { outer.write(out.toByteArray()); } } }
From source file:com.uber.hoodie.common.file.HoodieAppendLog.java
License:Apache License
/** * Construct the preferred type of SequenceFile Writer. * @param fc The context for the specified file. * @param conf The configuration./* w w w .j a v a 2 s. c om*/ * @param name The name of the file. * @param keyClass The 'key' type. * @param valClass The 'value' type. * @param compressionType The compression type. * @param codec The compression codec. * @param metadata The metadata of the file. * @param createFlag gives the semantics of create: overwrite, append etc. * @param opts file creation options; see {@link CreateOpts}. * @return Returns the handle to the constructed SequenceFile Writer. * @throws IOException */ public static Writer createWriter(FileContext fc, Configuration conf, Path name, Class keyClass, Class valClass, CompressionType compressionType, CompressionCodec codec, Metadata metadata, final EnumSet<CreateFlag> createFlag, CreateOpts... opts) throws IOException { return createWriter(conf, fc.create(name, createFlag, opts), keyClass, valClass, compressionType, codec, metadata).ownStream(); }
From source file:org.apache.apex.malhar.lib.utils.IOUtilsTest.java
License:Apache License
private void testCopyPartialHelper(int dataSize, int offset, long size) throws IOException { FileUtils.deleteQuietly(new File("target/IOUtilsTest")); File file = new File("target/IOUtilsTest/testCopyPartial/input"); createDataFile(file, dataSize);// w ww . j a v a 2s . co m FileContext fileContext = FileContext.getFileContext(); DataInputStream inputStream = fileContext.open(new Path(file.getAbsolutePath())); Path output = new Path("target/IOUtilsTest/testCopyPartial/output"); DataOutputStream outputStream = fileContext.create(output, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), Options.CreateOpts.CreateParent.createParent()); if (offset == 0) { IOUtils.copyPartial(inputStream, size, outputStream); } else { IOUtils.copyPartial(inputStream, offset, size, outputStream); } outputStream.close(); Assert.assertTrue("output exists", fileContext.util().exists(output)); Assert.assertEquals("output size", size, fileContext.getFileStatus(output).getLen()); // FileUtils.deleteQuietly(new File("target/IOUtilsTest")); }
From source file:org.elasticsearch.repositories.hdfs.HdfsBlobContainer.java
License:Apache License
@Override public void writeBlob(String blobName, InputStream inputStream, long blobSize) throws IOException { if (blobExists(blobName)) { throw new FileAlreadyExistsException("blob [" + blobName + "] already exists, cannot overwrite"); }/* ww w. j a v a 2 s.co m*/ store.execute(new Operation<Void>() { @Override public Void run(FileContext fileContext) throws IOException { Path blob = new Path(path, blobName); // we pass CREATE, which means it fails if a blob already exists. // NOTE: this behavior differs from FSBlobContainer, which passes TRUNCATE_EXISTING // that should be fixed there, no need to bring truncation into this, give the user an error. EnumSet<CreateFlag> flags = EnumSet.of(CreateFlag.CREATE, CreateFlag.SYNC_BLOCK); CreateOpts[] opts = { CreateOpts.bufferSize(bufferSize) }; try (FSDataOutputStream stream = fileContext.create(blob, flags, opts)) { int bytesRead; byte[] buffer = new byte[bufferSize]; while ((bytesRead = inputStream.read(buffer)) != -1) { stream.write(buffer, 0, bytesRead); // For safety we also hsync each write as well, because of its docs: // SYNC_BLOCK - to force closed blocks to the disk device // "In addition Syncable.hsync() should be called after each write, // if true synchronous behavior is required" stream.hsync(); } } return null; } }); }