Example usage for org.apache.hadoop.fs FileContext util

List of usage examples for org.apache.hadoop.fs FileContext util

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileContext util.

Prototype

Util util

To view the source code for org.apache.hadoop.fs FileContext util.

Click Source Link

Usage

From source file:com.datatorrent.common.util.AsyncFSStorageAgentTest.java

License:Apache License

@Test
public void testDelete() throws IOException {
    testLoad();//w ww  .  ja  v  a 2 s .  co  m
    testMeta.storageAgent.delete(1, 1);
    Path appPath = new Path(testMeta.applicationPath);
    FileContext fileContext = FileContext.getFileContext();
    Assert.assertTrue("operator 2 window 1", fileContext.util().exists(new Path(appPath + "/" + 2 + "/" + 1)));
    Assert.assertFalse("operator 1 window 1", fileContext.util().exists(new Path(appPath + "/" + 1 + "/" + 1)));
}

From source file:com.datatorrent.common.util.FSStorageAgentTest.java

License:Apache License

@Test
public void testDelete() throws IOException {
    testLoad();/*from  ww  w  . j a v a  2s  .  c o m*/

    testMeta.storageAgent.delete(1, 1);
    Path appPath = new Path(testMeta.applicationPath);
    FileContext fileContext = FileContext.getFileContext();
    Assert.assertTrue("operator 2 window 1", fileContext.util().exists(new Path(appPath + "/" + 2 + "/" + 1)));
    Assert.assertFalse("operator 1 window 1", fileContext.util().exists(new Path(appPath + "/" + 1 + "/" + 1)));
}

From source file:com.datatorrent.stram.FSRecoveryHandler.java

License:Apache License

@Override
public Object restore() throws IOException {
    FileContext fc = FileContext.getFileContext(fs.getUri());

    // recover from wherever it was left
    if (fc.util().exists(snapshotBackupPath)) {
        LOG.warn("Incomplete checkpoint, reverting to {}", snapshotBackupPath);
        fc.rename(snapshotBackupPath, snapshotPath, Rename.OVERWRITE);

        // combine logs (w/o append, create new file)
        Path tmpLogPath = new Path(basedir, "log.combined");
        FSDataOutputStream fsOut = fc.create(tmpLogPath, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE));
        try {//  w  w w  .ja va  2s .c  o m
            FSDataInputStream fsIn = fc.open(logBackupPath);
            try {
                IOUtils.copy(fsIn, fsOut);
            } finally {
                fsIn.close();
            }

            fsIn = fc.open(logPath);
            try {
                IOUtils.copy(fsIn, fsOut);
            } finally {
                fsIn.close();
            }
        } finally {
            fsOut.close();
        }

        fc.rename(tmpLogPath, logPath, Rename.OVERWRITE);
        fc.delete(logBackupPath, false);
    } else {
        // we have log backup, but no checkpoint backup
        // failure between log rotation and writing checkpoint
        if (fc.util().exists(logBackupPath)) {
            LOG.warn("Found {}, did checkpointing fail?", logBackupPath);
            fc.rename(logBackupPath, logPath, Rename.OVERWRITE);
        }
    }

    if (!fc.util().exists(snapshotPath)) {
        LOG.debug("No existing checkpoint.");
        return null;
    }

    LOG.debug("Reading checkpoint {}", snapshotPath);
    InputStream is = fc.open(snapshotPath);
    // indeterministic class loading behavior
    // http://stackoverflow.com/questions/9110677/readresolve-not-working-an-instance-of-guavas-serializedform-appears
    final ClassLoader loader = Thread.currentThread().getContextClassLoader();
    ObjectInputStream ois = new ObjectInputStream(is) {
        @Override
        protected Class<?> resolveClass(ObjectStreamClass objectStreamClass)
                throws IOException, ClassNotFoundException {
            return Class.forName(objectStreamClass.getName(), true, loader);
        }
    };
    //ObjectInputStream ois = new ObjectInputStream(is);
    try {
        return ois.readObject();
    } catch (ClassNotFoundException cnfe) {
        throw new IOException("Failed to read checkpointed state", cnfe);
    } finally {
        ois.close();
    }
}

From source file:com.ikanow.aleph2.analytics.hadoop.services.BeJobLauncher.java

License:Open Source License

/** Cache the system and user classpaths
 * @param job//w w  w .ja va2 s.  co m
 * @param context
 * @throws IOException 
 * @throws ExecutionException 
 * @throws InterruptedException 
 * @throws IllegalArgumentException 
 */
protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context)
        throws IllegalArgumentException, InterruptedException, ExecutionException, IOException {
    final FileContext fc = context.getServiceContext().getStorageService()
            .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();
    final String rootPath = context.getServiceContext().getStorageService().getRootPath();

    // Aleph2 libraries: need to cache them
    context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f))
            .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> {
                final FileStatus fs = Lambdas.get(() -> {
                    try {
                        return fc.getFileStatus(f_p._2());
                    } catch (Exception e) {
                        return null;
                    }
                });
                if (null == fs) { //cache doesn't exist
                    // Local version
                    Path srcPath = FileContext.getLocalFSFileContext()
                            .makeQualified(new Path(f_p._1().toString()));
                    fc.util().copy(srcPath, f_p._2());
                }
                return f_p._2();
            })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path)));
    ;

    // User libraries: this is slightly easier since one of the 2 keys
    // is the HDFS path (the other is the _id)
    context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet()
            .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath))
            .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path))));
    ;
}

From source file:com.ikanow.aleph2.analytics.services.AnalyticsContext.java

License:Apache License

@Override
public List<String> getInputPaths(final Optional<DataBucketBean> bucket, final AnalyticThreadJobBean job,
        final AnalyticThreadJobInputBean job_input) {

    final DataBucketBean my_bucket = bucket.orElseGet(() -> _mutable_state.bucket.get());

    final AuthorizationBean auth_bean = new AuthorizationBean(my_bucket.owner_id());
    final ICrudService<DataBucketBean> secured_bucket_crud = _core_management_db.readOnlyVersion()
            .getDataBucketStore().secured(_service_context, auth_bean);

    return Optional.of(job_input).filter(i -> null != i.data_service())
            .filter(i -> "batch".equalsIgnoreCase(i.data_service())
                    || DataSchemaBean.StorageSchemaBean.name.equalsIgnoreCase(i.data_service()))
            .map(Lambdas.wrap_u(i -> {
                if ("batch".equalsIgnoreCase(i.data_service())) {
                    final String[] bucket_subchannel = Lambdas.<String, String[]>wrap_u(s -> {

                        // 1) If the resource starts with "/" then must point to an intermediate batch result of an external bucket
                        // 2) If the resource is a pointer then

                        if (s.startsWith("/")) { //1.*
                            if (s.endsWith(":")) {
                                return new String[] { s.substring(0, s.length() - 1), "" }; // (1.2a)
                            } else {
                                final String[] b_sc = s.split(":");
                                if (1 == b_sc.length) {
                                    return new String[] { my_bucket.full_name(), "" };
                                } else {
                                    return b_sc; //(1.1)
                                }// w ww  .jav a2 s  . c o  m
                            }
                        } else { //2.*
                            return new String[] { my_bucket.full_name(), s };
                        }
                    }).apply(Optional.ofNullable(i.resource_name_or_id()).orElse(""));

                    final Optional<DataBucketBean> bucket_to_check = Lambdas.get(Lambdas.wrap_u(() -> {
                        if (bucket_subchannel[0] == my_bucket.full_name()) {
                            return Optional.of(my_bucket);
                        } else {
                            return secured_bucket_crud.getObjectBySpec(CrudUtils.allOf(DataBucketBean.class)
                                    .when(DataBucketBean::full_name, bucket_subchannel[0])).get();
                        }
                    }));
                    return Lambdas.get(() -> {
                        if (!bucket_subchannel[0].equals(my_bucket.full_name())
                                || !bucket_subchannel[1].isEmpty()) {
                            bucket_to_check.map(input_bucket -> input_bucket.analytic_thread())
                                    .flatMap(
                                            a_thread -> Optional.ofNullable(a_thread.jobs()))
                                    .flatMap(jobs -> jobs.stream()
                                            .filter(j -> bucket_subchannel[1].equals(j.name()))
                                            .filter(j -> _batch_types
                                                    .contains(Optionals.of(() -> j.output().transient_type())
                                                            .orElse(MasterEnrichmentType.none)))
                                            .filter(j -> Optionals.of(() -> j.output().is_transient())
                                                    .orElse(false))
                                            .findFirst())
                                    .orElseThrow(() -> new RuntimeException(ErrorUtils.get(
                                            ErrorUtils.INPUT_PATH_NOT_A_TRANSIENT_BATCH, my_bucket.full_name(),
                                            job.name(), bucket_subchannel[0], bucket_subchannel[1])));

                            return Arrays.asList(_storage_service.getBucketRootPath() + bucket_subchannel[0]
                                    + IStorageService.TRANSIENT_DATA_SUFFIX_SECONDARY + bucket_subchannel[1]
                                    + IStorageService.PRIMARY_BUFFER_SUFFIX + "**/*");
                        } else { // This is my input directory
                            return Arrays.asList(_storage_service.getBucketRootPath() + my_bucket.full_name()
                                    + IStorageService.TO_IMPORT_DATA_SUFFIX + "*");
                        }
                    });
                } else { // storage service ... 3 options :raw, :json, :processed (defaults to :processed)
                    if (Optional.of(true).equals(
                            Optional.ofNullable(i.config()).map(cfg -> cfg.high_granularity_filter()))) {
                        throw new RuntimeException(ErrorUtils.get(
                                ErrorUtils.HIGH_GRANULARITY_FILTER_NOT_SUPPORTED, my_bucket.full_name(),
                                job.name(), Optional.ofNullable(i.name()).orElse("(no name)")));
                    }

                    final String bucket_name = i.resource_name_or_id().split(":")[0];

                    // Check we have authentication for this bucket:

                    final boolean found_bucket = secured_bucket_crud
                            .getObjectBySpec(
                                    CrudUtils.allOf(DataBucketBean.class).when(DataBucketBean::full_name,
                                            bucket_name),
                                    Collections.emptyList(), // (don't want any part of the bucket, just whether it exists or not)
                                    true)
                            .get().isPresent();

                    if (!found_bucket) {
                        throw new RuntimeException(
                                ErrorUtils.get(ErrorUtils.BUCKET_NOT_FOUND_OR_NOT_READABLE, bucket_name));
                    }
                    final String sub_service = Patterns.match(i.resource_name_or_id()).<String>andReturn()
                            .when(s -> s.endsWith(":raw"), __ -> "raw/current/") // (input paths are always from primary)
                            .when(s -> s.endsWith(":json"), __ -> "json/current/")
                            .otherwise(__ -> "processed/current/");

                    final String base_path = _storage_service.getBucketRootPath() + bucket_name
                            + IStorageService.STORED_DATA_SUFFIX + sub_service;
                    return Optional.ofNullable(i.config())
                            .filter(cfg -> (null != cfg.time_min()) || (null != cfg.time_max())).map(cfg -> {
                                try {
                                    final FileContext fc = _storage_service
                                            .getUnderlyingPlatformDriver(FileContext.class, Optional.empty())
                                            .get();

                                    //DEBUG
                                    //_logger.warn("Found1: " + Arrays.stream(fc.util().listStatus(new Path(base_path))).map(f -> f.getPath().toString()).collect(Collectors.joining(";")));                                                            
                                    //_logger.warn("Found2: " + TimeSliceDirUtils.annotateTimedDirectories(tmp_paths).map(t -> t.toString()).collect(Collectors.joining(";")));
                                    //_logger.warn("Found3: " + TimeSliceDirUtils.getQueryTimeRange(cfg, new Date()));

                                    final Stream<String> paths = Arrays
                                            .stream(fc.util().listStatus(new Path(base_path)))
                                            .filter(f -> f.isDirectory())
                                            .map(f -> f.getPath().toUri().getPath()) // (remove the hdfs:// bit, which seems to be breaking with HA)
                                    ;

                                    return TimeSliceDirUtils
                                            .filterTimedDirectories(
                                                    TimeSliceDirUtils.annotateTimedDirectories(paths),
                                                    TimeSliceDirUtils.getQueryTimeRange(cfg, new Date()))
                                            .map(s -> s + "/*").collect(Collectors.toList());
                                } catch (Exception e) {
                                    return null;
                                } // will fall through to...
                            }).orElseGet(() -> {
                                // No time based filtering possible
                                final String suffix = "**/*";
                                return Arrays.asList(base_path + suffix);
                            });
                }
            })).orElse(Collections.emptyList());

}

From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java

License:Apache License

/** This method returns the path to the first subdirectory matching the subDirectoryName parameter or null if not found.
* @param fileContext// w  w w .  j a  va2  s .co  m
* @param start
* @param subDirectoryName
* @return
*/
public static Path findOneSubdirectory(FileContext fileContext, Path start, String subDirectoryName) {
    Path p = null;
    try {
        logger.debug("findOneSubdirectory :" + start.toString());
        FileStatus[] statuss = fileContext.util().listStatus(start);
        for (int i = 0; i < statuss.length; i++) {
            FileStatus dir = statuss[i];
            logger.debug("FileStatus:" + statuss[i].getPath().toString());
            if (dir.isDirectory()) {
                if (dir.getPath().getName().contains(subDirectoryName)) {
                    logger.debug("findOneSubdirectory match:" + dir.getPath().getName());
                    return dir.getPath();
                } else {
                    p = findOneSubdirectory(fileContext, dir.getPath(), subDirectoryName);
                    if (p != null) {
                        return p;
                    }
                }
            }
        }

    } catch (Exception e) {
        logger.error("findOneSubdirectory Caught Exception", e);
    }

    return p;
}

From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java

License:Apache License

/** Creates a directory in the storage servce
 * @param fileContext/*from  w  w  w.ja v  a 2 s  .co m*/
 * @param pathString
 */
public static void createDirectory(FileContext fileContext, String pathString) {
    if (fileContext != null && pathString != null) {
        try {
            Path dir = new Path(pathString);
            if (!fileContext.util().exists(dir)) {
                fileContext.mkdir(dir, DEFAULT_DIR_PERMS, true); //(note perm is & with umask)
                try {
                    fileContext.setPermission(dir, DEFAULT_DIR_PERMS);
                } catch (Exception e) {
                } // (not supported in all FS)
            }
        } catch (Exception e) {
            logger.error("createFolderStructure Caught Exception", e);
        }
    }

}

From source file:com.ikanow.aleph2.core.shared.utils.DirUtils.java

License:Apache License

/**
 * @param allPaths//from w w w  .j  a v  a2  s .  c  o  m
 * @param fileContext
 * @param start
 * @param subDirectoryName
 * @param includeMatched
 */
public static void findAllSubdirectories(List<Path> allPaths, FileContext fileContext, Path start,
        String subDirectoryName, boolean includeMatched) {
    try {
        logger.debug("findAllSubdirectories :" + start.toString());
        FileStatus[] statuss = fileContext.util().listStatus(start);
        for (int i = 0; i < statuss.length; i++) {
            FileStatus dir = statuss[i];
            logger.debug("FileStatus:" + statuss[i].getPath().toString());
            if (dir.isDirectory()) {
                if (dir.getPath().getName().contains(subDirectoryName)) {
                    logger.debug("findOneSubdirectory match:" + dir.getPath().getName());
                    if (includeMatched) {
                        allPaths.add(dir.getPath());
                    } else {
                        allPaths.add(dir.getPath().getParent());
                    }
                } else {
                    findAllSubdirectories(allPaths, fileContext, dir.getPath(), subDirectoryName,
                            includeMatched);
                }
            }
        }

    } catch (Exception e) {
        logger.error("findAllSubdirectories Caught Exception", e);
    }
}

From source file:com.ikanow.aleph2.core.shared.utils.JarCacheUtils.java

License:Apache License

/** Moves a shared JAR into a local spot (if required)
 * @param library_bean// www  . j ava 2s  .c  o m
 * @param fs
 * @return either a basic message bean containing an error, or the fully qualified path of the cached JAR
 */
public static <M> CompletableFuture<Validation<BasicMessageBean, String>> getCachedJar(
        final String local_cached_jar_dir, final SharedLibraryBean library_bean, final IStorageService fs,
        final String handler_for_errors, final M msg_for_errors) {
    try {
        final FileContext dfs = fs.getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();
        final FileContext lfs = fs.getUnderlyingPlatformDriver(FileContext.class, IStorageService.LOCAL_FS)
                .get();

        final Path cached_jar_file = lfs
                .makeQualified(new Path(local_cached_jar_dir + "/" + buildCachedJarName(library_bean)));
        final Path original_jar_file = dfs.makeQualified(new Path(library_bean.path_name()));

        final FileStatus file_status = dfs.getFileStatus(original_jar_file); // (this will exception out if it doesn't exist, as it should)

        try {
            final FileStatus local_file_status = lfs.getFileStatus(cached_jar_file); // (this will exception in to case 2 if it doesn't exist)

            // if the local version exists then overwrite it

            if (file_status.getModificationTime() > local_file_status.getModificationTime()) {
                // (it gets kinda complicated here so just invalidate the entire classloader cache..)
                // TODO (ALEPH-12): add a coverage test for this
                ClassloaderUtils.clearCache();

                lfs.util().copy(original_jar_file, cached_jar_file, false, true);
            }
        } catch (FileNotFoundException f) {

            // 2) if the local version doesn't exist then just copy the distributed file across
            // (note: don't need to do anything with the classloader cache here since the file doesn't exist so can't have a cache key)

            lfs.util().copy(original_jar_file, cached_jar_file);
        }
        return CompletableFuture.completedFuture(Validation.success(cached_jar_file.toString()));

    } catch (Throwable e) {
        return CompletableFuture.completedFuture(
                Validation.fail(SharedErrorUtils.buildErrorMessage(handler_for_errors, msg_for_errors,
                        SharedErrorUtils.getLongForm(SharedErrorUtils.SHARED_LIBRARY_NAME_NOT_FOUND, e,
                                library_bean.path_name()))));
    }
}

From source file:com.ikanow.aleph2.data_import_manager.batch_enrichment.actors.BeBucketActor.java

License:Apache License

public static List<String> launchReadyJobs(FileContext fileContext, String bucketFullName, String bucketPathStr,
        IManagementDbService managementDbService, ActorRef closingSelf) {
    List<String> jobNames = new ArrayList<String>();
    try {//from w  ww. ja  v  a 2s. c o m
        Path bucketReady = new Path(bucketPathStr + "/managed_bucket/import/ready");
        //Path bucketTmp = new Path(bucketPathStr + "/managed_bucket/import/temp");
        if (fileContext.util().exists(bucketReady)) {
            FileStatus[] statuss = fileContext.util().listStatus(bucketReady);
            if (statuss.length > 0) {
                logger.debug("Detected " + statuss.length + " ready files.");

                IManagementCrudService<DataBucketBean> dataBucketStore = managementDbService
                        .getDataBucketStore();
                SingleQueryComponent<DataBucketBean> querydatBucketFullName = CrudUtils
                        .anyOf(DataBucketBean.class).when("full_name", bucketFullName);

                dataBucketStore.getObjectBySpec(querydatBucketFullName).thenAccept(odb -> {
                    if (odb.isPresent()) {
                        DataBucketBean dataBucketBean = odb.get();
                        List<EnrichmentControlMetadataBean> enrichmentConfigs = dataBucketBean
                                .batch_enrichment_configs();
                        for (EnrichmentControlMetadataBean ec : enrichmentConfigs) {
                            if (ec.enabled()) {
                                logger.info("starting batch enhancment job: " + bucketFullName + " for "
                                        + ec.name());
                                // run enhancement job

                                //TODO (ALEPH-12): this now should communicate with the enrichment actors
                                //String jobName = beJobService.runEnhancementJob(bucketFullName, bucketPathStr, ec.name());
                                String jobName = Lambdas.get(() -> null);
                                if (jobName != null) {
                                    jobNames.add(jobName);
                                    logger.info("Enrichment job for , no enrichment enabled:" + bucketFullName
                                            + " ec:" + ec.name() + " launched unsuccessfully, jobName = "
                                            + jobName);
                                } else {
                                    logger.error("Enrichment job for , no enrichment enabled:" + bucketFullName
                                            + " ec:" + ec.name() + " launch was unsuccessful");
                                }
                            } // if enabled
                            else {
                                logger.info("Skipping Enrichment, no enrichment enabled:" + bucketFullName
                                        + " ec:" + ec.name());
                            }
                        } // for
                    } else {
                        logger.info("Skipping Enrichment, no enrichment config found in db: " + bucketFullName);
                        if (closingSelf != null) {
                            closingSelf.tell(PoisonPill.getInstance(), closingSelf);
                        }
                    }
                });
            } // status length
            else {
                logger.info("Skipping, no files found in ready folder: " + bucketReady);
            }
        } else {
            logger.info("Skipping,  ready folder does not exist: " + bucketReady);
        }

    } catch (Exception e) {
        logger.error("checkReady caught Exception:", e);
    }
    return jobNames;
}