List of usage examples for org.apache.hadoop.fs FileContext util
Util util
To view the source code for org.apache.hadoop.fs FileContext util.
Click Source Link
From source file:com.datatorrent.common.util.AsyncFSStorageAgentTest.java
License:Apache License
@Test public void testDelete() throws IOException { testLoad();//w ww . ja v a 2 s . co m testMeta.storageAgent.delete(1, 1); Path appPath = new Path(testMeta.applicationPath); FileContext fileContext = FileContext.getFileContext(); Assert.assertTrue("operator 2 window 1", fileContext.util().exists(new Path(appPath + "/" + 2 + "/" + 1))); Assert.assertFalse("operator 1 window 1", fileContext.util().exists(new Path(appPath + "/" + 1 + "/" + 1))); }
From source file:com.datatorrent.common.util.FSStorageAgentTest.java
License:Apache License
@Test public void testDelete() throws IOException { testLoad();/*from ww w . j a v a 2s . c o m*/ testMeta.storageAgent.delete(1, 1); Path appPath = new Path(testMeta.applicationPath); FileContext fileContext = FileContext.getFileContext(); Assert.assertTrue("operator 2 window 1", fileContext.util().exists(new Path(appPath + "/" + 2 + "/" + 1))); Assert.assertFalse("operator 1 window 1", fileContext.util().exists(new Path(appPath + "/" + 1 + "/" + 1))); }
From source file:com.datatorrent.stram.FSRecoveryHandler.java
License:Apache License
@Override public Object restore() throws IOException { FileContext fc = FileContext.getFileContext(fs.getUri()); // recover from wherever it was left if (fc.util().exists(snapshotBackupPath)) { LOG.warn("Incomplete checkpoint, reverting to {}", snapshotBackupPath); fc.rename(snapshotBackupPath, snapshotPath, Rename.OVERWRITE); // combine logs (w/o append, create new file) Path tmpLogPath = new Path(basedir, "log.combined"); FSDataOutputStream fsOut = fc.create(tmpLogPath, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE)); try {// w w w .ja va 2s .c o m FSDataInputStream fsIn = fc.open(logBackupPath); try { IOUtils.copy(fsIn, fsOut); } finally { fsIn.close(); } fsIn = fc.open(logPath); try { IOUtils.copy(fsIn, fsOut); } finally { fsIn.close(); } } finally { fsOut.close(); } fc.rename(tmpLogPath, logPath, Rename.OVERWRITE); fc.delete(logBackupPath, false); } else { // we have log backup, but no checkpoint backup // failure between log rotation and writing checkpoint if (fc.util().exists(logBackupPath)) { LOG.warn("Found {}, did checkpointing fail?", logBackupPath); fc.rename(logBackupPath, logPath, Rename.OVERWRITE); } } if (!fc.util().exists(snapshotPath)) { LOG.debug("No existing checkpoint."); return null; } LOG.debug("Reading checkpoint {}", snapshotPath); InputStream is = fc.open(snapshotPath); // indeterministic class loading behavior // http://stackoverflow.com/questions/9110677/readresolve-not-working-an-instance-of-guavas-serializedform-appears final ClassLoader loader = Thread.currentThread().getContextClassLoader(); ObjectInputStream ois = new ObjectInputStream(is) { @Override protected Class<?> resolveClass(ObjectStreamClass objectStreamClass) throws IOException, ClassNotFoundException { return Class.forName(objectStreamClass.getName(), true, loader); } }; //ObjectInputStream ois = new ObjectInputStream(is); try { return ois.readObject(); } catch (ClassNotFoundException cnfe) { throw new IOException("Failed to read checkpointed state", cnfe); } finally { ois.close(); } }
From source file:com.ikanow.aleph2.analytics.hadoop.services.BeJobLauncher.java
License:Open Source License
/** Cache the system and user classpaths * @param job//w w w .ja va2 s. co m * @param context * @throws IOException * @throws ExecutionException * @throws InterruptedException * @throws IllegalArgumentException */ protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context) throws IllegalArgumentException, InterruptedException, ExecutionException, IOException { final FileContext fc = context.getServiceContext().getStorageService() .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final String rootPath = context.getServiceContext().getStorageService().getRootPath(); // Aleph2 libraries: need to cache them context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f)) .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> { final FileStatus fs = Lambdas.get(() -> { try { return fc.getFileStatus(f_p._2()); } catch (Exception e) { return null; } }); if (null == fs) { //cache doesn't exist // Local version Path srcPath = FileContext.getLocalFSFileContext() .makeQualified(new Path(f_p._1().toString())); fc.util().copy(srcPath, f_p._2()); } return f_p._2(); })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path))); ; // User libraries: this is slightly easier since one of the 2 keys // is the HDFS path (the other is the _id) context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet() .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath)) .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path)))); ; }
From source file:com.ikanow.aleph2.analytics.services.AnalyticsContext.java
License:Apache License
@Override public List<String> getInputPaths(final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job, final AnalyticThreadJobInputBean job_input) { final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get()); final AuthorizationBean auth_bean = new AuthorizationBean(my_bucket.owner_id()); final ICrudService<DataBucketBean> secured_bucket_crud = _core_management_db.readOnlyVersion() .getDataBucketStore().secured(_service_context, auth_bean); return Optional.of(job_input).filter(i -> null != i.data_service()) .filter(i -> "batch".equalsIgnoreCase(i.data_service()) || DataSchemaBean.StorageSchemaBean.name.equalsIgnoreCase(i.data_service())) .map(Lambdas.wrap_u(i -> { if ("batch".equalsIgnoreCase(i.data_service())) { final String[] bucket_subchannel = Lambdas.<String, String[]>wrap_u(s -> { // 1) If the resource starts with "/" then must point to an intermediate batch result of an external bucket // 2) If the resource is a pointer then if (s.startsWith("/")) { //1.* if (s.endsWith(":")) { return new String[] { s.substring(0, s.length() - 1), "" }; // (1.2a) } else { final String[] b_sc = s.split(":"); if (1 == b_sc.length) { return new String[] { my_bucket.full_name(), "" }; } else { return b_sc; //(1.1) }// w ww .jav a2 s . c o m } } else { //2.* return new String[] { my_bucket.full_name(), s }; } }).apply(Optional.ofNullable(i.resource_name_or_id()).orElse("")); final Optional<DataBucketBean> bucket_to_check = Lambdas.get(Lambdas.wrap_u(() -> { if (bucket_subchannel[0] == my_bucket.full_name()) { return Optional.of(my_bucket); } else { return secured_bucket_crud.getObjectBySpec(CrudUtils.allOf(DataBucketBean.class) .when(DataBucketBean::full_name, bucket_subchannel[0])).get(); } })); return Lambdas.get(() -> { if (!bucket_subchannel[0].equals(my_bucket.full_name()) || !bucket_subchannel[1].isEmpty()) { bucket_to_check.map(input_bucket -> input_bucket.analytic_thread()) .flatMap( a_thread -> Optional.ofNullable(a_thread.jobs())) .flatMap(jobs -> jobs.stream() .filter(j -> bucket_subchannel[1].equals(j.name())) .filter(j -> _batch_types .contains(Optionals.of(() -> j.output().transient_type()) .orElse(MasterEnrichmentType.none))) .filter(j -> Optionals.of(() -> j.output().is_transient()) .orElse(false)) .findFirst()) .orElseThrow(() -> new RuntimeException(ErrorUtils.get( ErrorUtils.INPUT_PATH_NOT_A_TRANSIENT_BATCH, my_bucket.full_name(), job.name(), bucket_subchannel[0], bucket_subchannel[1]))); return Arrays.asList(_storage_service.getBucketRootPath() + bucket_subchannel[0] + IStorageService.TRANSIENT_DATA_SUFFIX_SECONDARY + bucket_subchannel[1] + IStorageService.PRIMARY_BUFFER_SUFFIX + "**/*"); } else { // This is my input directory return Arrays.asList(_storage_service.getBucketRootPath() + my_bucket.full_name() + IStorageService.TO_IMPORT_DATA_SUFFIX + "*"); } }); } else { // storage service ... 3 options :raw, :json, :processed (defaults to :processed) if (Optional.of(true).equals( Optional.ofNullable(i.config()).map(cfg -> cfg.high_granularity_filter()))) { throw new RuntimeException(ErrorUtils.get( ErrorUtils.HIGH_GRANULARITY_FILTER_NOT_SUPPORTED, my_bucket.full_name(), job.name(), Optional.ofNullable(i.name()).orElse("(no name)"))); } final String bucket_name = i.resource_name_or_id().split(":")[0]; // Check we have authentication for this bucket: final boolean found_bucket = secured_bucket_crud .getObjectBySpec( CrudUtils.allOf(DataBucketBean.class).when(DataBucketBean::full_name, bucket_name), Collections.emptyList(), // (don't want any part of the bucket, just whether it exists or not) true) .get().isPresent(); if (!found_bucket) { throw new RuntimeException( ErrorUtils.get(ErrorUtils.BUCKET_NOT_FOUND_OR_NOT_READABLE, bucket_name)); } final String sub_service = Patterns.match(i.resource_name_or_id()).<String>andReturn() .when(s -> s.endsWith(":raw"), __ -> "raw/current/") // (input paths are always from primary) .when(s -> s.endsWith(":json"), __ -> "json/current/") .otherwise(__ -> "processed/current/"); final String base_path = _storage_service.getBucketRootPath() + bucket_name + IStorageService.STORED_DATA_SUFFIX + sub_service; return Optional.ofNullable(i.config()) .filter(cfg -> (null != cfg.time_min()) || (null != cfg.time_max())).map(cfg -> { try { final FileContext fc = _storage_service .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()) .get(); //DEBUG //_logger.warn("Found1: " + Arrays.stream(fc.util().listStatus(new Path(base_path))).map(f -> f.getPath().toString()).collect(Collectors.joining(";"))); //_logger.warn("Found2: " + TimeSliceDirUtils.annotateTimedDirectories(tmp_paths).map(t -> t.toString()).collect(Collectors.joining(";"))); //_logger.warn("Found3: " + TimeSliceDirUtils.getQueryTimeRange(cfg, new Date())); final Stream<String> paths = Arrays .stream(fc.util().listStatus(new Path(base_path))) .filter(f -> f.isDirectory()) .map(f -> f.getPath().toUri().getPath()) // (remove the hdfs:// bit, which seems to be breaking with HA) ; return TimeSliceDirUtils .filterTimedDirectories( TimeSliceDirUtils.annotateTimedDirectories(paths), TimeSliceDirUtils.getQueryTimeRange(cfg, new Date())) .map(s -> s + "/*").collect(Collectors.toList()); } catch (Exception e) { return null; } // will fall through to... }).orElseGet(() -> { // No time based filtering possible final String suffix = "**/*"; return Arrays.asList(base_path + suffix); }); } })).orElse(Collections.emptyList()); }
From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java
License:Apache License
/** This method returns the path to the first subdirectory matching the subDirectoryName parameter or null if not found. * @param fileContext// w w w . j a va2 s .co m * @param start * @param subDirectoryName * @return */ public static Path findOneSubdirectory(FileContext fileContext, Path start, String subDirectoryName) { Path p = null; try { logger.debug("findOneSubdirectory :" + start.toString()); FileStatus[] statuss = fileContext.util().listStatus(start); for (int i = 0; i < statuss.length; i++) { FileStatus dir = statuss[i]; logger.debug("FileStatus:" + statuss[i].getPath().toString()); if (dir.isDirectory()) { if (dir.getPath().getName().contains(subDirectoryName)) { logger.debug("findOneSubdirectory match:" + dir.getPath().getName()); return dir.getPath(); } else { p = findOneSubdirectory(fileContext, dir.getPath(), subDirectoryName); if (p != null) { return p; } } } } } catch (Exception e) { logger.error("findOneSubdirectory Caught Exception", e); } return p; }
From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java
License:Apache License
/** Creates a directory in the storage servce * @param fileContext/*from w w w.ja v a 2 s .co m*/ * @param pathString */ public static void createDirectory(FileContext fileContext, String pathString) { if (fileContext != null && pathString != null) { try { Path dir = new Path(pathString); if (!fileContext.util().exists(dir)) { fileContext.mkdir(dir, DEFAULT_DIR_PERMS, true); //(note perm is & with umask) try { fileContext.setPermission(dir, DEFAULT_DIR_PERMS); } catch (Exception e) { } // (not supported in all FS) } } catch (Exception e) { logger.error("createFolderStructure Caught Exception", e); } } }
From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java
License:Apache License
/** * @param allPaths//from w w w .j a v a2 s . c o m * @param fileContext * @param start * @param subDirectoryName * @param includeMatched */ public static void findAllSubdirectories(List<Path> allPaths, FileContext fileContext, Path start, String subDirectoryName, boolean includeMatched) { try { logger.debug("findAllSubdirectories :" + start.toString()); FileStatus[] statuss = fileContext.util().listStatus(start); for (int i = 0; i < statuss.length; i++) { FileStatus dir = statuss[i]; logger.debug("FileStatus:" + statuss[i].getPath().toString()); if (dir.isDirectory()) { if (dir.getPath().getName().contains(subDirectoryName)) { logger.debug("findOneSubdirectory match:" + dir.getPath().getName()); if (includeMatched) { allPaths.add(dir.getPath()); } else { allPaths.add(dir.getPath().getParent()); } } else { findAllSubdirectories(allPaths, fileContext, dir.getPath(), subDirectoryName, includeMatched); } } } } catch (Exception e) { logger.error("findAllSubdirectories Caught Exception", e); } }
From source file:com.ikanow.aleph2.core.shared.utils.JarCacheUtils.java
License:Apache License
/** Moves a shared JAR into a local spot (if required) * @param library_bean// www . j ava 2s .c o m * @param fs * @return either a basic message bean containing an error, or the fully qualified path of the cached JAR */ public static <M> CompletableFuture<Validation<BasicMessageBean, String>> getCachedJar( final String local_cached_jar_dir, final SharedLibraryBean library_bean, final IStorageService fs, final String handler_for_errors, final M msg_for_errors) { try { final FileContext dfs = fs.getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final FileContext lfs = fs.getUnderlyingPlatformDriver(FileContext.class, IStorageService.LOCAL_FS) .get(); final Path cached_jar_file = lfs .makeQualified(new Path(local_cached_jar_dir + "/" + buildCachedJarName(library_bean))); final Path original_jar_file = dfs.makeQualified(new Path(library_bean.path_name())); final FileStatus file_status = dfs.getFileStatus(original_jar_file); // (this will exception out if it doesn't exist, as it should) try { final FileStatus local_file_status = lfs.getFileStatus(cached_jar_file); // (this will exception in to case 2 if it doesn't exist) // if the local version exists then overwrite it if (file_status.getModificationTime() > local_file_status.getModificationTime()) { // (it gets kinda complicated here so just invalidate the entire classloader cache..) // TODO (ALEPH-12): add a coverage test for this ClassloaderUtils.clearCache(); lfs.util().copy(original_jar_file, cached_jar_file, false, true); } } catch (FileNotFoundException f) { // 2) if the local version doesn't exist then just copy the distributed file across // (note: don't need to do anything with the classloader cache here since the file doesn't exist so can't have a cache key) lfs.util().copy(original_jar_file, cached_jar_file); } return CompletableFuture.completedFuture(Validation.success(cached_jar_file.toString())); } catch (Throwable e) { return CompletableFuture.completedFuture( Validation.fail(SharedErrorUtils.buildErrorMessage(handler_for_errors, msg_for_errors, SharedErrorUtils.getLongForm(SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, e, library_bean.path_name())))); } }
From source file:com.ikanow.aleph2.data_import_manager.batch_enrichment.actors.BeBucketActor.java
License:Apache License
public static List<String> launchReadyJobs(FileContext fileContext, String bucketFullName, String bucketPathStr, IManagementDbService managementDbService, ActorRef closingSelf) { List<String> jobNames = new ArrayList<String>(); try {//from w ww. ja v a 2s. c o m Path bucketReady = new Path(bucketPathStr + "/managed_bucket/import/ready"); //Path bucketTmp = new Path(bucketPathStr + "/managed_bucket/import/temp"); if (fileContext.util().exists(bucketReady)) { FileStatus[] statuss = fileContext.util().listStatus(bucketReady); if (statuss.length > 0) { logger.debug("Detected " + statuss.length + " ready files."); IManagementCrudService<DataBucketBean> dataBucketStore = managementDbService .getDataBucketStore(); SingleQueryComponent<DataBucketBean> querydatBucketFullName = CrudUtils .anyOf(DataBucketBean.class).when("full_name", bucketFullName); dataBucketStore.getObjectBySpec(querydatBucketFullName).thenAccept(odb -> { if (odb.isPresent()) { DataBucketBean dataBucketBean = odb.get(); List<EnrichmentControlMetadataBean> enrichmentConfigs = dataBucketBean .batch_enrichment_configs(); for (EnrichmentControlMetadataBean ec : enrichmentConfigs) { if (ec.enabled()) { logger.info("starting batch enhancment job: " + bucketFullName + " for " + ec.name()); // run enhancement job //TODO (ALEPH-12): this now should communicate with the enrichment actors //String jobName = beJobService.runEnhancementJob(bucketFullName, bucketPathStr, ec.name()); String jobName = Lambdas.get(() -> null); if (jobName != null) { jobNames.add(jobName); logger.info("Enrichment job for , no enrichment enabled:" + bucketFullName + " ec:" + ec.name() + " launched unsuccessfully, jobName = " + jobName); } else { logger.error("Enrichment job for , no enrichment enabled:" + bucketFullName + " ec:" + ec.name() + " launch was unsuccessful"); } } // if enabled else { logger.info("Skipping Enrichment, no enrichment enabled:" + bucketFullName + " ec:" + ec.name()); } } // for } else { logger.info("Skipping Enrichment, no enrichment config found in db: " + bucketFullName); if (closingSelf != null) { closingSelf.tell(PoisonPill.getInstance(), closingSelf); } } }); } // status length else { logger.info("Skipping, no files found in ready folder: " + bucketReady); } } else { logger.info("Skipping, ready folder does not exist: " + bucketReady); } } catch (Exception e) { logger.error("checkReady caught Exception:", e); } return jobNames; }