Example usage for org.apache.commons.io FilenameUtils concat

List of usage examples for org.apache.commons.io FilenameUtils concat

Introduction

In this page you can find the example usage for org.apache.commons.io FilenameUtils concat.

Prototype

public static String concat(String basePath, String fullFilenameToAdd) 

Source Link

Document

Concatenates a filename to a base path using normal command line style rules.

Usage

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProviderService.java

private void initializeIndexCopier(BundleContext bundleContext, Map<String, ?> config) throws IOException {
    if (indexCopier != null) {
        return;//from  w  w  w  .j  a v  a2  s  . co  m
    }
    String indexDirPath = PropertiesUtil.toString(config.get(PROP_LOCAL_INDEX_DIR), null);
    boolean prefetchEnabled = PropertiesUtil.toBoolean(config.get(PROP_PREFETCH_INDEX_FILES),
            PROP_PREFETCH_INDEX_FILES_DEFAULT);
    if (Strings.isNullOrEmpty(indexDirPath)) {
        String repoHome = bundleContext.getProperty(REPOSITORY_HOME);
        if (repoHome != null) {
            indexDirPath = FilenameUtils.concat(repoHome, "index");
        }
    }

    checkNotNull(indexDirPath,
            "Index directory cannot be determined as neither index "
                    + "directory path [%s] nor repository home [%s] defined",
            PROP_LOCAL_INDEX_DIR, REPOSITORY_HOME);

    if (prefetchEnabled) {
        log.info(
                "Prefetching of index files enabled. Index would be opened after copying all new files locally");
    }

    indexDir = new File(indexDirPath);
    indexCopier = new IndexCopier(getExecutorService(), indexDir, prefetchEnabled);

    oakRegs.add(registerMBean(whiteboard, CopyOnReadStatsMBean.class, indexCopier, CopyOnReadStatsMBean.TYPE,
            "IndexCopier support statistics"));

}

From source file:org.apache.jackrabbit.oak.run.osgi.OakOSGiRepositoryFactory.java

@SuppressWarnings("unchecked")
private static void processConfig(Map config) {
    String home = (String) config.get(REPOSITORY_HOME);
    checkNotNull(home, "Repository home not defined via [%s]", REPOSITORY_HOME);

    home = FilenameUtils.normalizeNoEndSeparator(home);

    String bundleDir = FilenameUtils.concat(home, "bundles");
    config.put(Constants.FRAMEWORK_STORAGE, bundleDir);

    //FIXME Pojo SR currently reads this from system property instead of Framework Property
    config.put(Constants.FRAMEWORK_STORAGE, bundleDir);

    //Directory used by Felix File Install to watch for configs
    config.put("felix.fileinstall.dir", FilenameUtils.concat(home, "config"));

    //Set log level for config to INFO LogService.LOG_INFO
    config.put("felix.fileinstall.log.level", "3");

    //This ensures that configuration is registered in main thread
    //and not in a different thread
    config.put("felix.fileinstall.noInitialDelay", "true");

    config.put("repository.home", FilenameUtils.concat(home, "repository"));

}

From source file:org.apache.jackrabbit.oak.run.osgi.SimpleRepositoryFactoryTest.java

private static String path(String path) {
    File file = new File(FilenameUtils.concat(getBaseDir(), "src/test/resources/" + path));
    assert file.exists() : "No file found at " + file.getAbsolutePath();
    return file.getAbsolutePath();
}

From source file:org.apache.jackrabbit.oak.spi.blob.osgi.FileBlobStoreService.java

@Activate
protected void activate(ComponentContext context, Map<String, Object> config) {
    String homeDir = lookup(context, PROP_HOME);
    if (homeDir != null) {
        log.info("Initializing the FileBlobStore with homeDir [{}]", homeDir);
    }/* www .  j a  v  a  2s  . c o m*/
    BlobStore blobStore = new FileBlobStore(FilenameUtils.concat(homeDir, "datastore"));
    PropertiesUtil.populate(blobStore, config, false);
    Dictionary<String, Object> props = new Hashtable<String, Object>();
    if (context.getProperties().get(PROP_SPLIT_BLOBSTORE) != null) {
        props.put(PROP_SPLIT_BLOBSTORE, context.getProperties().get(PROP_SPLIT_BLOBSTORE));
    }
    reg = context.getBundleContext().registerService(
            new String[] { BlobStore.class.getName(), GarbageCollectableBlobStore.class.getName() }, blobStore,
            props);
}

From source file:org.apache.jackrabbit.oak.upgrade.blob.LengthCachingDataStore.java

private void initializeMappingData(String homeDir) {
    mappingFile = new File(FilenameUtils.concat(homeDir, mappingFilePath));
    if (mappingFile.exists()) {
        try {/*  w ww  .j av  a 2  s  .com*/
            existingMappings = loadMappingData(mappingFile);
        } catch (FileNotFoundException e) {
            throw new RuntimeException("Failed to read mapping data from " + mappingFile, e);
        }
    } else {
        log.info("Mapping file {} not found. Would create a new one.", mappingFile);
    }
}

From source file:org.apache.mahout.freqtermsets.AggregatorReducer.java

@Override
protected void reduce(Text key, Iterable<TopKStringPatterns> values, Context context)
        throws IOException, InterruptedException {

    // YA get data to do more than freq merge
    int myMaxHeapSize = maxHeapSize;
    Configuration conf = context.getConfiguration();
    FileSystem fs = FileSystem.get(conf); //TODO: do I need?getLocal(conf);
    String cachePath = FilenameUtils.concat(FileUtils.getTempDirectory().toURI().toString(),
            Thread.currentThread().getName() + "_" + key.hashCode() + "_patterns");
    org.apache.hadoop.io.ArrayFile.Writer cacheWr = new ArrayFile.Writer(conf, fs, cachePath,
            TopKStringPatterns.class);
    final String keyStr = key.toString();
    final OpenObjectLongHashMap<String> jointFreq = new OpenObjectLongHashMap<String>();

    TopKStringPatterns metaPatterns = new TopKStringPatterns();

    for (TopKStringPatterns value : values) {

        List<Pair<List<String>, Long>> vPatterns = value.getPatterns();
        for (int p = vPatterns.size() - 1; p >= 0; --p) {
            Pair<List<String>, Long> pattern = vPatterns.get(p);
            if (pattern == null) {
                continue; // just like their merge
            }/*from   www .j a  va2 s.co  m*/
            for (String other : pattern.getFirst()) {
                if (other.charAt(0) == METADATA_PREFIX) {
                    // Keep metadata out of merge
                    vPatterns.remove(p);

                    // Make sure it has space to be merged
                    ++myMaxHeapSize;

                    // Store the metadata temporarily.. we will add it in the end
                    // where it can't be pruned out
                    metaPatterns.getPatterns().add(pattern);

                    // done processing metadata itemset
                    break;
                }
                if (keyStr.equals(other)) {
                    continue;
                }
                long freq = jointFreq.get(other);
                if (pattern.getSecond() > freq) {
                    freq = pattern.getSecond();
                }
                jointFreq.put(other, freq);
            }
        }

        cacheWr.append(value);
    }
    cacheWr.close();

    org.apache.hadoop.io.ArrayFile.Reader cacheRd = new ArrayFile.Reader(fs, cachePath, conf);
    // END YA get data

    TopKStringPatterns patterns = new TopKStringPatterns();
    TopKStringPatterns value = new TopKStringPatterns();
    while (cacheRd.next(value) != null) {
        context.setStatus("Aggregator Reducer: Selecting TopK patterns for: " + key);

        // YA Mutual info merge.. TODO: more metrics passed as class name of comparator
        if (sortByMutualInfo) {
            patterns = patterns.merge(value, myMaxHeapSize, new Comparator<Pair<List<String>, Long>>() {

                private double calcNormalizedMutualInfo(String[] bagOfTokens) {
                    double numer = 0;
                    double denim = 0;
                    double ft1 = fMap.get(keyStr);
                    for (int t2 = 0; t2 < bagOfTokens.length; ++t2) {
                        if (bagOfTokens[t2].equals(keyStr)) {
                            continue;
                        }
                        double ft2 = fMap.get(bagOfTokens[t2]);
                        double jf = jointFreq.get(bagOfTokens[t2]);

                        // This check shouldn't be even plausible.. save time:
                        // if(jf != 0){
                        double jp = jf / totalNterms;

                        numer += jp * (Math.log(jf / (ft1 * ft2)) + lnTotalNTerms);

                        denim += jp * Math.log(jp);
                    }

                    double result = numer;
                    if (denim != 0) {
                        result /= -denim;
                    }
                    return result;
                }

                @Override
                public int compare(Pair<List<String>, Long> o1, Pair<List<String>, Long> o2) {
                    String[] bagOfTokens = o1.getFirst().toArray(new String[0]);

                    double mi1 = calcNormalizedMutualInfo(bagOfTokens);

                    bagOfTokens = o2.getFirst().toArray(new String[0]);

                    double mi2 = calcNormalizedMutualInfo(bagOfTokens);

                    int result = Double.compare(mi1, mi2);
                    if (result == 0) {
                        result = Double.compare(o1.getFirst().size(), o2.getFirst().size());

                        if (result == 0) {
                            result = o1.getSecond().compareTo(o2.getSecond());
                        }
                    }
                    return result;
                }
            });
            // END YA Mutual info merge
        } else {
            patterns = patterns.mergeFreq(value, myMaxHeapSize);
        }
    }

    // YA get data
    cacheRd.close();
    fs.delete(new Path(cachePath), true);

    patterns = patterns.merge(metaPatterns, myMaxHeapSize, new Comparator<Pair<List<String>, Long>>() {
        @Override
        public int compare(Pair<List<String>, Long> o1, Pair<List<String>, Long> o2) {
            // Force the metadata to be accepted
            return -1;
        }
    });
    // END YA get data

    context.write(key, patterns);
}

From source file:org.apache.mahout.freqtermsets.FPGrowthDriver.java

/**
 * Run TopK FPGrowth given the input file,
 *///from  www .  ja va2 s . c  om
@Override
public int run(String[] args) throws Exception {
    addInputOption();
    addOutputOption();

    addOption("minSupport", "s",
            "(Optional) The minimum number of times a co-occurrence must be present." + " Default Value: 3",
            "3");
    addOption("maxHeapSize", "k",
            "(Optional) Maximum Heap Size k, to denote the requirement to mine top K items."
                    + " Default value: 50",
            "50");
    addOption(PFPGrowth.NUM_GROUPS, "g",
            "(Optional) Number of groups the features should be divided in the map-reduce version."
                    + " Doesn't work in sequential version Default Value:" + PFPGrowth.NUM_GROUPS_DEFAULT,
            Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT));
    // addOption("splitterPattern", "regex",
    // "Regular Expression pattern used to split given string transaction into"
    // + " itemsets. Default value splits comma separated itemsets.  Default Value:"
    // + " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ", "[ ,\t]*[,|\t][ ,\t]*");
    addOption("numTreeCacheEntries", "tc", "(Optional) Number of entries in the tree cache to prevent duplicate"
            + " tree building. (Warning) a first level conditional FP-Tree might consume a lot of memory, "
            + "so keep this value small, but big enough to prevent duplicate tree building. "
            + "Default Value:5 Recommended Values: [5-10]", "5");
    // addOption("method", "method", "Method of processing: sequential|mapreduce", "mapreduce");
    // //"sequential");
    addOption("encoding", "e", "(Optional) The file encoding.  Default value: UTF-8", "UTF-8");
    // addFlag("useFPG2", "2", "Use an alternate FPG implementation");
    addOption(PFPGrowth.COUNT_IN, "cnt",
            "(Optional) In case of mapreduce, if this is set parallel counting will be skipped and counts will be read from the path specified");
    // addFlag(PFPGrowth.PSEUDO, "ps",
    // "Running on a Pseudo-Cluster (one machine). Uses hardcoded configurations for each job.");
    addOption(PFPGrowth.GROUP_FIS_IN, "gfis",
            "(Optional) In case of mapreduce, if this is set execution will start from the aggregation phase, and group dependent frequent itemsets will be read from the path specified");
    addFlag(AggregatorReducer.MUTUAL_INFO_FLAG, "mi",
            "Set to selec the top K patterns based on the Normalized Mutual Information rather than frequency of pattern");
    addOption(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID, "lid",
            "The mimun length of a pattern that would be used for language identification");
    addOption(PFPGrowth.MIN_FREQ, "mf",
            "The minimum frequency of a token. Any token with less frequency will be pruned from the begining.");
    addOption(PFPGrowth.PRUNE_PCTILE, "pct",
            "The percentile of frequencies that will be considered; any token with a higher frequency will be pruned");
    //    addFlag("shift", "shift", "If set (and window must be set) it shifts the window by half");
    addFlag(TokenIterator.PARAM_REPEAT_HASHTAG, "rht",
            "If set, each hashtag is repeated, removing the # sign from the second token returned for the same hashtag");
    addOption(PFPGrowth.PARAM_INTERVAL_START, "st",
            "The start time of interval to be mined.. defaults to first known tweet time");
    addOption(PFPGrowth.PARAM_INTERVAL_END, "et",
            "The end time of interval to be mined.. defaults to long.maxvalue");
    addOption(PFPGrowth.PARAM_WINDOW_SIZE, "ws",
            "The duration of windows that will be mined.. defaults to end - start");
    addOption(PFPGrowth.PARAM_STEP_SIZE, "ss",
            "The step by which the window will be advanced.. defaults to windowSize");

    addOption(PARAM_NUM_THREADS, "j",
            "The number of PFP jobs, because in case of intervals resources are under utilized");

    // addOption(PFPGrowth.INDEX_OUT,
    // "ix",
    // "The local folder to which the frequent itemset index will be written");

    if (parseArguments(args) == null) {
        return -1;
    }

    Parameters params = new Parameters();

    if (hasOption("minSupport")) {
        String minSupportString = getOption("minSupport");
        params.set("minSupport", minSupportString);
    }
    if (hasOption("maxHeapSize")) {
        String maxHeapSizeString = getOption("maxHeapSize");
        params.set("maxHeapSize", maxHeapSizeString);
    }
    if (hasOption(PFPGrowth.NUM_GROUPS)) {
        String numGroupsString = getOption(PFPGrowth.NUM_GROUPS);
        params.set(PFPGrowth.NUM_GROUPS, numGroupsString);
    }

    if (hasOption("numTreeCacheEntries")) {
        String numTreeCacheString = getOption("numTreeCacheEntries");
        params.set("treeCacheSize", numTreeCacheString);
    }

    // if (hasOption("splitterPattern")) {
    // String patternString = getOption("splitterPattern");
    // params.set("splitPattern", patternString);
    // }

    String encoding = "UTF-8";
    if (hasOption("encoding")) {
        encoding = getOption("encoding");
    }
    params.set("encoding", encoding);

    // if (hasOption("useFPG2")) {
    // params.set(PFPGrowth.USE_FPG2, "true");
    // }

    // if (hasOption(PFPGrowth.COUNT_IN)) {
    // params.set(PFPGrowth.COUNT_IN, getOption(PFPGrowth.COUNT_IN));
    // }

    // if(hasOption(PFPGrowth.PSEUDO)){
    // params.set(PFPGrowth.PSEUDO, "true");
    // }

    // if (hasOption(PFPGrowth.GROUP_FIS_IN)) {
    // params.set(PFPGrowth.GROUP_FIS_IN, getOption(PFPGrowth.GROUP_FIS_IN));
    // }

    if (hasOption(AggregatorReducer.MUTUAL_INFO_FLAG)) {
        params.set(AggregatorReducer.MUTUAL_INFO_FLAG, "true");
    } else {
        params.set(AggregatorReducer.MUTUAL_INFO_FLAG, "false");
    }

    if (hasOption(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID)) {
        params.set(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID,
                getOption(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID));
    }

    if (hasOption(PFPGrowth.MIN_FREQ)) {
        params.set(PFPGrowth.MIN_FREQ, getOption(PFPGrowth.MIN_FREQ));
    }

    if (hasOption(PFPGrowth.PRUNE_PCTILE)) {
        params.set(PFPGrowth.PRUNE_PCTILE, getOption(PFPGrowth.PRUNE_PCTILE));
    }

    // if (hasOption(PFPGrowth.PARAM_INTERVAL_END)) {
    params.set(PFPGrowth.PARAM_INTERVAL_END,
            getOption(PFPGrowth.PARAM_INTERVAL_END, Long.toString(Long.MAX_VALUE)));
    // }

    if (hasOption(PFPGrowth.PARAM_WINDOW_SIZE)) {
        params.set(PFPGrowth.PARAM_WINDOW_SIZE, getOption(PFPGrowth.PARAM_WINDOW_SIZE));
    }

    if (hasOption(PFPGrowth.PARAM_STEP_SIZE)) {
        params.set(PFPGrowth.PARAM_STEP_SIZE, getOption(PFPGrowth.PARAM_STEP_SIZE));
    }

    // if (hasOption(PFPGrowth.PARAM_INTERVAL_START)) {
    // params.set(PFPGrowth.PARAM_INTERVAL_START, getOption(PFPGrowth.PARAM_INTERVAL_START));
    // }

    // if (hasOption(PFPGrowth.INDEX_OUT)) {
    // params.set(PFPGrowth.INDEX_OUT, getOption(PFPGrowth.INDEX_OUT));
    // }

    if (hasOption(TokenIterator.PARAM_REPEAT_HASHTAG)) {
        params.set(TokenIterator.PARAM_REPEAT_HASHTAG, "true");
    }

    //    boolean shiftedWindow = hasOption("shift");

    Path inputDir = getInputPath();
    Path outputDir = getOutputPath();

    params.set(PFPGrowth.INPUT, inputDir.toString());
    params.set(PFPGrowth.OUTROOT, outputDir.toString());

    Configuration conf = new Configuration();
    //    HadoopUtil.delete(conf, outputDir);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outputDir)) {
        throw new IllegalArgumentException(
                "Output path already exists.. please delete it yourself: " + outputDir);
    }

    int nThreads = Integer.parseInt(getOption(PARAM_NUM_THREADS, DEFAULT_NUM_THREADS));
    if (!PFPGrowth.runMode.equals(RunningMode.Batch) && nThreads != 1) {
        throw new UnsupportedOperationException("We use mining results from earlier windows. j must be 1");
    }
    ExecutorService exec = Executors.newFixedThreadPool(nThreads);
    Future<Void> lastFuture = null;

    String startTimeStr = getOption(PFPGrowth.PARAM_INTERVAL_START);
    // params.get(PFPGrowth.PARAM_INTERVAL_START);
    if (startTimeStr == null) {
        // FIXME: Will fail if not running locally.. like many things now
        // FileSystem fs = FileSystem.getLocal(conf);
        // startTimeStr = fs.listStatus(inputDir)[0].getPath().getName();
        File[] startFolders = FileUtils.toFile(inputDir.toUri().toURL()).listFiles();
        Arrays.sort(startFolders);
        startTimeStr = startFolders[0].getName();
    }
    long startTime = Long.parseLong(startTimeStr);
    // Long.toString(PFPGrowth.TREC2011_MIN_TIMESTAMP)));// GMT23JAN2011)));
    long endTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_END));
    // Long.toString(Long.MAX_VALUE)));
    long windowSize = Long
            .parseLong(params.get(PFPGrowth.PARAM_WINDOW_SIZE, Long.toString(endTime - startTime)));
    long stepSize = Long.parseLong(params.get(PFPGrowth.PARAM_STEP_SIZE, Long.toString(windowSize)));

    // int numJobs = 0;
    while (startTime < endTime) {
        // if(++numJobs % 100 == 0){
        // Thread.sleep(60000);
        // }
        long shift = 0;
        //      if(shiftedWindow){
        //        shift = (long)Math.floor(windowSize / 2.0f);
        //      }
        params.set(PFPGrowth.PARAM_INTERVAL_START, Long.toString(startTime + shift));

        if (hasOption(PFPGrowth.GROUP_FIS_IN)) {
            String gfisIn = getOption(PFPGrowth.GROUP_FIS_IN);
            gfisIn = FilenameUtils.concat(gfisIn, Long.toString(startTime + shift));
            gfisIn = FilenameUtils.concat(gfisIn,
                    Long.toString(Math.min(endTime, startTime + windowSize) + shift));
            params.set(PFPGrowth.GROUP_FIS_IN, gfisIn);
        }

        if (hasOption(PFPGrowth.COUNT_IN)) {
            String countIn = getOption(PFPGrowth.COUNT_IN);
            //        countIn = FilenameUtils.concat(countIn, Long.toString(startTime + shift));
            //        countIn = FilenameUtils.concat(countIn,
            //            Long.toString(Math.min(endTime, startTime + windowSize) + shift));
            params.set(PFPGrowth.COUNT_IN, countIn);
        }

        String outPathStr = FilenameUtils.concat(outputDir.toString(), Long.toString(startTime + shift));
        outPathStr = FilenameUtils.concat(outPathStr,
                Long.toString(Math.min(endTime, startTime + windowSize) + shift));
        params.set(PFPGrowth.OUTPUT, outPathStr);

        // PFPGrowth.runPFPGrowth(params);
        lastFuture = exec.submit(new PFPGrowth(params));

        //      startTime += windowSize;
        startTime += stepSize;

        //      Thread.sleep(10000);
    }

    lastFuture.get();
    exec.shutdown();

    while (!exec.isTerminated()) {
        Thread.sleep(1000);
    }

    return 0;
}

From source file:org.apache.mahout.freqtermsets.PFPGrowth.java

public static int cacheFList(Parameters params, Configuration conf, String countIn, int minSupport, int minFr,
        int prunePct) throws IOException {

    long startTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_START));
    long endTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_END));
    long windowSize = Long
            .parseLong(params.get(PFPGrowth.PARAM_WINDOW_SIZE, Long.toString(endTime - startTime)));
    long stepSize = Long.parseLong(params.get(PFPGrowth.PARAM_STEP_SIZE, Long.toString(windowSize)));
    endTime = Math.min(endTime, startTime + windowSize);

    Path cachedPath = new Path(countIn, Long.toString(startTime));
    cachedPath = new Path(cachedPath, Long.toString(Math.min(endTime, startTime + windowSize)));
    cachedPath = new Path(cachedPath, F_LIST);

    FileSystem fs = FileSystem.getLocal(conf);
    int result;//  w  w  w .  j  ava 2  s  . c om
    if (fs.exists(cachedPath)) {
        // assert FPSTREAM;
        result = -1;
        DistributedCache.addCacheFile(cachedPath.toUri(), conf);
    } else {
        OpenObjectLongHashMap<String> freqMap;
        if (runMode.equals(RunningMode.SlidingWin)) {
            freqMap = new OpenObjectLongHashMap<String>();
            while (startTime < endTime) {
                String stepCount = FilenameUtils.concat(countIn, Long.toString(startTime));
                stepCount = FilenameUtils.concat(stepCount,
                        Long.toString(Math.min(endTime, startTime + stepSize)));

                appendParallelCountingResults(stepCount, conf, freqMap);
                startTime += stepSize;
            }
        } else {
            countIn = FilenameUtils.concat(countIn, Long.toString(startTime));
            countIn = FilenameUtils.concat(countIn, Long.toString(Math.min(endTime, startTime + windowSize)));
            freqMap = readParallelCountingResults(countIn, conf);
        }
        List<Pair<String, Long>> flist = pruneParallelCountingResults(freqMap, minSupport, minFr, prunePct);
        saveFList(flist, // params,
                conf, cachedPath);
        result = flist.size();
    }
    return result;
}

From source file:org.apache.mahout.freqtermsets.PFPGrowth.java

/**
 * //  www.j  a v  a  2s  . co  m
 * @param params
 *          params should contain input and output locations as a string value, the additional
 *          parameters include minSupport(3), maxHeapSize(50), numGroups(1000)
 * @throws NoSuchAlgorithmException
 * @throws ParseException
 */
public static void runPFPGrowth(Parameters params) throws IOException, InterruptedException,
        ClassNotFoundException, NoSuchAlgorithmException, ParseException {
    Configuration conf = new Configuration();
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");

    long startTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_START));
    long endTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_END));
    long windowSize = Long
            .parseLong(params.get(PFPGrowth.PARAM_WINDOW_SIZE, Long.toString(endTime - startTime)));
    long stepSize = Long.parseLong(params.get(PFPGrowth.PARAM_STEP_SIZE, Long.toString(windowSize)));
    endTime = Math.min(endTime, startTime + windowSize);

    int minSupport = Integer.valueOf(params.get(MIN_SUPPORT, "3"));
    String countIn = params.get(COUNT_IN);
    if (countIn == null) {
        countIn = params.get(OUTROOT); // PUT);
    }
    int minFr = params.getInt(MIN_FREQ, MIN_FREQ_DEFAULT);
    int prunePct = params.getInt(PRUNE_PCTILE, PRUNE_PCTILE_DEFAULT);

    if (params.get(COUNT_IN) == null) {
        startParallelCounting(params, conf);
    }

    if (params.get(GROUP_FIS_IN) == null) {
        // save feature list to dcache
        // List<Pair<String, Long>> fList = readFList(params);
        // saveFList(fList, params, conf);

        int fListSize = cacheFList(params, conf, countIn, minSupport, minFr, prunePct);

        if (runMode.equals(RunningMode.BlockUpdate)) {
            fListSize = -1;
            Path timeRoot = new Path(countIn).getParent().getParent();
            FileSystem fs = FileSystem.getLocal(conf);
            final long currStartTime = startTime;
            for (FileStatus earlierWindow : fs.listStatus(timeRoot, new PathFilter() {
                @Override
                public boolean accept(Path p) {
                    // should have used end time, but it doesn't make a difference,
                    // AS LONG AS windows don't overlap
                    return Long.parseLong(p.getName()) < currStartTime;
                }
            })) {
                // TODO: At such low frequency and support, does pruning out items with less frequency
                // than minFreq cause loosing itemsets that are frequent but through a longer time frame
                cacheFList(params, conf, fs.listStatus(earlierWindow.getPath())[0].getPath().toString(),
                        minSupport, minFr, prunePct);
            }
        } else {
            // set param to control group size in MR jobs
            int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, PFPGrowth.NUM_GROUPS_DEFAULT);
            int maxPerGroup = fListSize / numGroups;
            if (fListSize % numGroups != 0)
                maxPerGroup++;
            params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));
        }
        // fList = null;

        startParallelFPGrowth(params, conf);
    } else {
        cacheFList(params, conf, countIn, minSupport, minFr, prunePct);
    }
    startAggregating(params, conf);

    if (runMode.equals(RunningMode.BlockUpdate)) {
        String indexDirStr;// = params.get(INDEX_OUT);
        // if (indexDirStr == null || indexDirStr.isEmpty()) {
        indexDirStr = FilenameUtils.concat(params.get(OUTPUT), "index");
        // } else {
        // indexDirStr = FilenameUtils.concat(indexDirStr, startTime);
        // indexDirStr = FilenameUtils.concat(indexDirStr, endTime);
        // }
        File indexDir = FileUtils.toFile(new URL(indexDirStr));

        // clean up
        FileUtils.deleteQuietly(indexDir);

        Path seqPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS);
        Directory earlierIndex = null;

        Path timeRoot = new Path(params.get(OUTPUT)).getParent().getParent();
        FileSystem fs = FileSystem.getLocal(conf);

        long mostRecent = Long.MIN_VALUE;
        Path mostRecentPath = null;
        for (FileStatus earlierWindow : fs.listStatus(timeRoot)) {
            long earlierStart = Long.parseLong(earlierWindow.getPath().getName());
            // should have used end time, but it doesn't make a difference,
            // AS LONG AS windows don't overlap
            if (earlierStart < startTime && earlierStart > mostRecent) {
                mostRecentPath = earlierWindow.getPath();
                mostRecent = earlierStart;
            }
        }
        if (mostRecentPath != null) {
            mostRecentPath = fs.listStatus(mostRecentPath)[0].getPath();
            mostRecentPath = new Path(mostRecentPath, "index");
            // earlierIndex = new Directory[1];
            // FIXME: as with anything that involves lucene.. won't work except on a local machine
            earlierIndex = new MMapDirectory(FileUtils.toFile(mostRecentPath.toUri().toURL()));
        }
    }
    // FIXME: When we want to stream, we have to build the index of earlier window
    // ItemSetIndexBuilder.buildIndex(seqPath, indexDir,
    // startTime, Math.min(endTime, startTime + windowSize), earlierIndex);
}

From source file:org.apache.oozie.test.XTestCase.java

private void setUpEmbeddedHadoop2() throws Exception {
    if (dfsCluster != null && dfsCluster2 == null) {
        // Trick dfs location for MiniDFSCluster since it doesn't accept location as input)
        String testBuildDataSaved = System.getProperty("test.build.data", "build/test/data");
        try {//  ww w  .  j ava 2s .  com
            System.setProperty("test.build.data", FilenameUtils.concat(testBuildDataSaved, "2"));
            // Only DFS cluster is created based upon current need
            dfsCluster2 = new MiniDFSCluster(createDFSConfig(), 2, true, null);
            FileSystem fileSystem = dfsCluster2.getFileSystem();
            fileSystem.mkdirs(new Path("target/test-data"));
            fileSystem.mkdirs(new Path("/user"));
            fileSystem.mkdirs(new Path("/tmp"));
            fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
            fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
            System.setProperty(OOZIE_TEST_NAME_NODE2, fileSystem.getConf().get("fs.default.name"));
        } catch (Exception ex) {
            shutdownMiniCluster2();
            throw ex;
        } finally {
            // Restore previus value
            System.setProperty("test.build.data", testBuildDataSaved);
        }
    }
}