List of usage examples for org.apache.commons.io FilenameUtils concat
public static String concat(String basePath, String fullFilenameToAdd)
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProviderService.java
private void initializeIndexCopier(BundleContext bundleContext, Map<String, ?> config) throws IOException { if (indexCopier != null) { return;//from w w w .j a v a2 s . co m } String indexDirPath = PropertiesUtil.toString(config.get(PROP_LOCAL_INDEX_DIR), null); boolean prefetchEnabled = PropertiesUtil.toBoolean(config.get(PROP_PREFETCH_INDEX_FILES), PROP_PREFETCH_INDEX_FILES_DEFAULT); if (Strings.isNullOrEmpty(indexDirPath)) { String repoHome = bundleContext.getProperty(REPOSITORY_HOME); if (repoHome != null) { indexDirPath = FilenameUtils.concat(repoHome, "index"); } } checkNotNull(indexDirPath, "Index directory cannot be determined as neither index " + "directory path [%s] nor repository home [%s] defined", PROP_LOCAL_INDEX_DIR, REPOSITORY_HOME); if (prefetchEnabled) { log.info( "Prefetching of index files enabled. Index would be opened after copying all new files locally"); } indexDir = new File(indexDirPath); indexCopier = new IndexCopier(getExecutorService(), indexDir, prefetchEnabled); oakRegs.add(registerMBean(whiteboard, CopyOnReadStatsMBean.class, indexCopier, CopyOnReadStatsMBean.TYPE, "IndexCopier support statistics")); }
From source file:org.apache.jackrabbit.oak.run.osgi.OakOSGiRepositoryFactory.java
@SuppressWarnings("unchecked") private static void processConfig(Map config) { String home = (String) config.get(REPOSITORY_HOME); checkNotNull(home, "Repository home not defined via [%s]", REPOSITORY_HOME); home = FilenameUtils.normalizeNoEndSeparator(home); String bundleDir = FilenameUtils.concat(home, "bundles"); config.put(Constants.FRAMEWORK_STORAGE, bundleDir); //FIXME Pojo SR currently reads this from system property instead of Framework Property config.put(Constants.FRAMEWORK_STORAGE, bundleDir); //Directory used by Felix File Install to watch for configs config.put("felix.fileinstall.dir", FilenameUtils.concat(home, "config")); //Set log level for config to INFO LogService.LOG_INFO config.put("felix.fileinstall.log.level", "3"); //This ensures that configuration is registered in main thread //and not in a different thread config.put("felix.fileinstall.noInitialDelay", "true"); config.put("repository.home", FilenameUtils.concat(home, "repository")); }
From source file:org.apache.jackrabbit.oak.run.osgi.SimpleRepositoryFactoryTest.java
private static String path(String path) { File file = new File(FilenameUtils.concat(getBaseDir(), "src/test/resources/" + path)); assert file.exists() : "No file found at " + file.getAbsolutePath(); return file.getAbsolutePath(); }
From source file:org.apache.jackrabbit.oak.spi.blob.osgi.FileBlobStoreService.java
@Activate protected void activate(ComponentContext context, Map<String, Object> config) { String homeDir = lookup(context, PROP_HOME); if (homeDir != null) { log.info("Initializing the FileBlobStore with homeDir [{}]", homeDir); }/* www . j a v a 2s . c o m*/ BlobStore blobStore = new FileBlobStore(FilenameUtils.concat(homeDir, "datastore")); PropertiesUtil.populate(blobStore, config, false); Dictionary<String, Object> props = new Hashtable<String, Object>(); if (context.getProperties().get(PROP_SPLIT_BLOBSTORE) != null) { props.put(PROP_SPLIT_BLOBSTORE, context.getProperties().get(PROP_SPLIT_BLOBSTORE)); } reg = context.getBundleContext().registerService( new String[] { BlobStore.class.getName(), GarbageCollectableBlobStore.class.getName() }, blobStore, props); }
From source file:org.apache.jackrabbit.oak.upgrade.blob.LengthCachingDataStore.java
private void initializeMappingData(String homeDir) { mappingFile = new File(FilenameUtils.concat(homeDir, mappingFilePath)); if (mappingFile.exists()) { try {/* w ww .j av a 2 s .com*/ existingMappings = loadMappingData(mappingFile); } catch (FileNotFoundException e) { throw new RuntimeException("Failed to read mapping data from " + mappingFile, e); } } else { log.info("Mapping file {} not found. Would create a new one.", mappingFile); } }
From source file:org.apache.mahout.freqtermsets.AggregatorReducer.java
@Override protected void reduce(Text key, Iterable<TopKStringPatterns> values, Context context) throws IOException, InterruptedException { // YA get data to do more than freq merge int myMaxHeapSize = maxHeapSize; Configuration conf = context.getConfiguration(); FileSystem fs = FileSystem.get(conf); //TODO: do I need?getLocal(conf); String cachePath = FilenameUtils.concat(FileUtils.getTempDirectory().toURI().toString(), Thread.currentThread().getName() + "_" + key.hashCode() + "_patterns"); org.apache.hadoop.io.ArrayFile.Writer cacheWr = new ArrayFile.Writer(conf, fs, cachePath, TopKStringPatterns.class); final String keyStr = key.toString(); final OpenObjectLongHashMap<String> jointFreq = new OpenObjectLongHashMap<String>(); TopKStringPatterns metaPatterns = new TopKStringPatterns(); for (TopKStringPatterns value : values) { List<Pair<List<String>, Long>> vPatterns = value.getPatterns(); for (int p = vPatterns.size() - 1; p >= 0; --p) { Pair<List<String>, Long> pattern = vPatterns.get(p); if (pattern == null) { continue; // just like their merge }/*from www .j a va2 s.co m*/ for (String other : pattern.getFirst()) { if (other.charAt(0) == METADATA_PREFIX) { // Keep metadata out of merge vPatterns.remove(p); // Make sure it has space to be merged ++myMaxHeapSize; // Store the metadata temporarily.. we will add it in the end // where it can't be pruned out metaPatterns.getPatterns().add(pattern); // done processing metadata itemset break; } if (keyStr.equals(other)) { continue; } long freq = jointFreq.get(other); if (pattern.getSecond() > freq) { freq = pattern.getSecond(); } jointFreq.put(other, freq); } } cacheWr.append(value); } cacheWr.close(); org.apache.hadoop.io.ArrayFile.Reader cacheRd = new ArrayFile.Reader(fs, cachePath, conf); // END YA get data TopKStringPatterns patterns = new TopKStringPatterns(); TopKStringPatterns value = new TopKStringPatterns(); while (cacheRd.next(value) != null) { context.setStatus("Aggregator Reducer: Selecting TopK patterns for: " + key); // YA Mutual info merge.. TODO: more metrics passed as class name of comparator if (sortByMutualInfo) { patterns = patterns.merge(value, myMaxHeapSize, new Comparator<Pair<List<String>, Long>>() { private double calcNormalizedMutualInfo(String[] bagOfTokens) { double numer = 0; double denim = 0; double ft1 = fMap.get(keyStr); for (int t2 = 0; t2 < bagOfTokens.length; ++t2) { if (bagOfTokens[t2].equals(keyStr)) { continue; } double ft2 = fMap.get(bagOfTokens[t2]); double jf = jointFreq.get(bagOfTokens[t2]); // This check shouldn't be even plausible.. save time: // if(jf != 0){ double jp = jf / totalNterms; numer += jp * (Math.log(jf / (ft1 * ft2)) + lnTotalNTerms); denim += jp * Math.log(jp); } double result = numer; if (denim != 0) { result /= -denim; } return result; } @Override public int compare(Pair<List<String>, Long> o1, Pair<List<String>, Long> o2) { String[] bagOfTokens = o1.getFirst().toArray(new String[0]); double mi1 = calcNormalizedMutualInfo(bagOfTokens); bagOfTokens = o2.getFirst().toArray(new String[0]); double mi2 = calcNormalizedMutualInfo(bagOfTokens); int result = Double.compare(mi1, mi2); if (result == 0) { result = Double.compare(o1.getFirst().size(), o2.getFirst().size()); if (result == 0) { result = o1.getSecond().compareTo(o2.getSecond()); } } return result; } }); // END YA Mutual info merge } else { patterns = patterns.mergeFreq(value, myMaxHeapSize); } } // YA get data cacheRd.close(); fs.delete(new Path(cachePath), true); patterns = patterns.merge(metaPatterns, myMaxHeapSize, new Comparator<Pair<List<String>, Long>>() { @Override public int compare(Pair<List<String>, Long> o1, Pair<List<String>, Long> o2) { // Force the metadata to be accepted return -1; } }); // END YA get data context.write(key, patterns); }
From source file:org.apache.mahout.freqtermsets.FPGrowthDriver.java
/** * Run TopK FPGrowth given the input file, *///from www . ja va2 s . c om @Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption("minSupport", "s", "(Optional) The minimum number of times a co-occurrence must be present." + " Default Value: 3", "3"); addOption("maxHeapSize", "k", "(Optional) Maximum Heap Size k, to denote the requirement to mine top K items." + " Default value: 50", "50"); addOption(PFPGrowth.NUM_GROUPS, "g", "(Optional) Number of groups the features should be divided in the map-reduce version." + " Doesn't work in sequential version Default Value:" + PFPGrowth.NUM_GROUPS_DEFAULT, Integer.toString(PFPGrowth.NUM_GROUPS_DEFAULT)); // addOption("splitterPattern", "regex", // "Regular Expression pattern used to split given string transaction into" // + " itemsets. Default value splits comma separated itemsets. Default Value:" // + " \"[ ,\\t]*[,|\\t][ ,\\t]*\" ", "[ ,\t]*[,|\t][ ,\t]*"); addOption("numTreeCacheEntries", "tc", "(Optional) Number of entries in the tree cache to prevent duplicate" + " tree building. (Warning) a first level conditional FP-Tree might consume a lot of memory, " + "so keep this value small, but big enough to prevent duplicate tree building. " + "Default Value:5 Recommended Values: [5-10]", "5"); // addOption("method", "method", "Method of processing: sequential|mapreduce", "mapreduce"); // //"sequential"); addOption("encoding", "e", "(Optional) The file encoding. Default value: UTF-8", "UTF-8"); // addFlag("useFPG2", "2", "Use an alternate FPG implementation"); addOption(PFPGrowth.COUNT_IN, "cnt", "(Optional) In case of mapreduce, if this is set parallel counting will be skipped and counts will be read from the path specified"); // addFlag(PFPGrowth.PSEUDO, "ps", // "Running on a Pseudo-Cluster (one machine). Uses hardcoded configurations for each job."); addOption(PFPGrowth.GROUP_FIS_IN, "gfis", "(Optional) In case of mapreduce, if this is set execution will start from the aggregation phase, and group dependent frequent itemsets will be read from the path specified"); addFlag(AggregatorReducer.MUTUAL_INFO_FLAG, "mi", "Set to selec the top K patterns based on the Normalized Mutual Information rather than frequency of pattern"); addOption(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID, "lid", "The mimun length of a pattern that would be used for language identification"); addOption(PFPGrowth.MIN_FREQ, "mf", "The minimum frequency of a token. Any token with less frequency will be pruned from the begining."); addOption(PFPGrowth.PRUNE_PCTILE, "pct", "The percentile of frequencies that will be considered; any token with a higher frequency will be pruned"); // addFlag("shift", "shift", "If set (and window must be set) it shifts the window by half"); addFlag(TokenIterator.PARAM_REPEAT_HASHTAG, "rht", "If set, each hashtag is repeated, removing the # sign from the second token returned for the same hashtag"); addOption(PFPGrowth.PARAM_INTERVAL_START, "st", "The start time of interval to be mined.. defaults to first known tweet time"); addOption(PFPGrowth.PARAM_INTERVAL_END, "et", "The end time of interval to be mined.. defaults to long.maxvalue"); addOption(PFPGrowth.PARAM_WINDOW_SIZE, "ws", "The duration of windows that will be mined.. defaults to end - start"); addOption(PFPGrowth.PARAM_STEP_SIZE, "ss", "The step by which the window will be advanced.. defaults to windowSize"); addOption(PARAM_NUM_THREADS, "j", "The number of PFP jobs, because in case of intervals resources are under utilized"); // addOption(PFPGrowth.INDEX_OUT, // "ix", // "The local folder to which the frequent itemset index will be written"); if (parseArguments(args) == null) { return -1; } Parameters params = new Parameters(); if (hasOption("minSupport")) { String minSupportString = getOption("minSupport"); params.set("minSupport", minSupportString); } if (hasOption("maxHeapSize")) { String maxHeapSizeString = getOption("maxHeapSize"); params.set("maxHeapSize", maxHeapSizeString); } if (hasOption(PFPGrowth.NUM_GROUPS)) { String numGroupsString = getOption(PFPGrowth.NUM_GROUPS); params.set(PFPGrowth.NUM_GROUPS, numGroupsString); } if (hasOption("numTreeCacheEntries")) { String numTreeCacheString = getOption("numTreeCacheEntries"); params.set("treeCacheSize", numTreeCacheString); } // if (hasOption("splitterPattern")) { // String patternString = getOption("splitterPattern"); // params.set("splitPattern", patternString); // } String encoding = "UTF-8"; if (hasOption("encoding")) { encoding = getOption("encoding"); } params.set("encoding", encoding); // if (hasOption("useFPG2")) { // params.set(PFPGrowth.USE_FPG2, "true"); // } // if (hasOption(PFPGrowth.COUNT_IN)) { // params.set(PFPGrowth.COUNT_IN, getOption(PFPGrowth.COUNT_IN)); // } // if(hasOption(PFPGrowth.PSEUDO)){ // params.set(PFPGrowth.PSEUDO, "true"); // } // if (hasOption(PFPGrowth.GROUP_FIS_IN)) { // params.set(PFPGrowth.GROUP_FIS_IN, getOption(PFPGrowth.GROUP_FIS_IN)); // } if (hasOption(AggregatorReducer.MUTUAL_INFO_FLAG)) { params.set(AggregatorReducer.MUTUAL_INFO_FLAG, "true"); } else { params.set(AggregatorReducer.MUTUAL_INFO_FLAG, "false"); } if (hasOption(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID)) { params.set(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID, getOption(ParallelFPGrowthReducer.MIN_WORDS_FOR_LANG_ID)); } if (hasOption(PFPGrowth.MIN_FREQ)) { params.set(PFPGrowth.MIN_FREQ, getOption(PFPGrowth.MIN_FREQ)); } if (hasOption(PFPGrowth.PRUNE_PCTILE)) { params.set(PFPGrowth.PRUNE_PCTILE, getOption(PFPGrowth.PRUNE_PCTILE)); } // if (hasOption(PFPGrowth.PARAM_INTERVAL_END)) { params.set(PFPGrowth.PARAM_INTERVAL_END, getOption(PFPGrowth.PARAM_INTERVAL_END, Long.toString(Long.MAX_VALUE))); // } if (hasOption(PFPGrowth.PARAM_WINDOW_SIZE)) { params.set(PFPGrowth.PARAM_WINDOW_SIZE, getOption(PFPGrowth.PARAM_WINDOW_SIZE)); } if (hasOption(PFPGrowth.PARAM_STEP_SIZE)) { params.set(PFPGrowth.PARAM_STEP_SIZE, getOption(PFPGrowth.PARAM_STEP_SIZE)); } // if (hasOption(PFPGrowth.PARAM_INTERVAL_START)) { // params.set(PFPGrowth.PARAM_INTERVAL_START, getOption(PFPGrowth.PARAM_INTERVAL_START)); // } // if (hasOption(PFPGrowth.INDEX_OUT)) { // params.set(PFPGrowth.INDEX_OUT, getOption(PFPGrowth.INDEX_OUT)); // } if (hasOption(TokenIterator.PARAM_REPEAT_HASHTAG)) { params.set(TokenIterator.PARAM_REPEAT_HASHTAG, "true"); } // boolean shiftedWindow = hasOption("shift"); Path inputDir = getInputPath(); Path outputDir = getOutputPath(); params.set(PFPGrowth.INPUT, inputDir.toString()); params.set(PFPGrowth.OUTROOT, outputDir.toString()); Configuration conf = new Configuration(); // HadoopUtil.delete(conf, outputDir); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputDir)) { throw new IllegalArgumentException( "Output path already exists.. please delete it yourself: " + outputDir); } int nThreads = Integer.parseInt(getOption(PARAM_NUM_THREADS, DEFAULT_NUM_THREADS)); if (!PFPGrowth.runMode.equals(RunningMode.Batch) && nThreads != 1) { throw new UnsupportedOperationException("We use mining results from earlier windows. j must be 1"); } ExecutorService exec = Executors.newFixedThreadPool(nThreads); Future<Void> lastFuture = null; String startTimeStr = getOption(PFPGrowth.PARAM_INTERVAL_START); // params.get(PFPGrowth.PARAM_INTERVAL_START); if (startTimeStr == null) { // FIXME: Will fail if not running locally.. like many things now // FileSystem fs = FileSystem.getLocal(conf); // startTimeStr = fs.listStatus(inputDir)[0].getPath().getName(); File[] startFolders = FileUtils.toFile(inputDir.toUri().toURL()).listFiles(); Arrays.sort(startFolders); startTimeStr = startFolders[0].getName(); } long startTime = Long.parseLong(startTimeStr); // Long.toString(PFPGrowth.TREC2011_MIN_TIMESTAMP)));// GMT23JAN2011))); long endTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_END)); // Long.toString(Long.MAX_VALUE))); long windowSize = Long .parseLong(params.get(PFPGrowth.PARAM_WINDOW_SIZE, Long.toString(endTime - startTime))); long stepSize = Long.parseLong(params.get(PFPGrowth.PARAM_STEP_SIZE, Long.toString(windowSize))); // int numJobs = 0; while (startTime < endTime) { // if(++numJobs % 100 == 0){ // Thread.sleep(60000); // } long shift = 0; // if(shiftedWindow){ // shift = (long)Math.floor(windowSize / 2.0f); // } params.set(PFPGrowth.PARAM_INTERVAL_START, Long.toString(startTime + shift)); if (hasOption(PFPGrowth.GROUP_FIS_IN)) { String gfisIn = getOption(PFPGrowth.GROUP_FIS_IN); gfisIn = FilenameUtils.concat(gfisIn, Long.toString(startTime + shift)); gfisIn = FilenameUtils.concat(gfisIn, Long.toString(Math.min(endTime, startTime + windowSize) + shift)); params.set(PFPGrowth.GROUP_FIS_IN, gfisIn); } if (hasOption(PFPGrowth.COUNT_IN)) { String countIn = getOption(PFPGrowth.COUNT_IN); // countIn = FilenameUtils.concat(countIn, Long.toString(startTime + shift)); // countIn = FilenameUtils.concat(countIn, // Long.toString(Math.min(endTime, startTime + windowSize) + shift)); params.set(PFPGrowth.COUNT_IN, countIn); } String outPathStr = FilenameUtils.concat(outputDir.toString(), Long.toString(startTime + shift)); outPathStr = FilenameUtils.concat(outPathStr, Long.toString(Math.min(endTime, startTime + windowSize) + shift)); params.set(PFPGrowth.OUTPUT, outPathStr); // PFPGrowth.runPFPGrowth(params); lastFuture = exec.submit(new PFPGrowth(params)); // startTime += windowSize; startTime += stepSize; // Thread.sleep(10000); } lastFuture.get(); exec.shutdown(); while (!exec.isTerminated()) { Thread.sleep(1000); } return 0; }
From source file:org.apache.mahout.freqtermsets.PFPGrowth.java
public static int cacheFList(Parameters params, Configuration conf, String countIn, int minSupport, int minFr, int prunePct) throws IOException { long startTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_START)); long endTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_END)); long windowSize = Long .parseLong(params.get(PFPGrowth.PARAM_WINDOW_SIZE, Long.toString(endTime - startTime))); long stepSize = Long.parseLong(params.get(PFPGrowth.PARAM_STEP_SIZE, Long.toString(windowSize))); endTime = Math.min(endTime, startTime + windowSize); Path cachedPath = new Path(countIn, Long.toString(startTime)); cachedPath = new Path(cachedPath, Long.toString(Math.min(endTime, startTime + windowSize))); cachedPath = new Path(cachedPath, F_LIST); FileSystem fs = FileSystem.getLocal(conf); int result;// w w w . j ava 2 s . c om if (fs.exists(cachedPath)) { // assert FPSTREAM; result = -1; DistributedCache.addCacheFile(cachedPath.toUri(), conf); } else { OpenObjectLongHashMap<String> freqMap; if (runMode.equals(RunningMode.SlidingWin)) { freqMap = new OpenObjectLongHashMap<String>(); while (startTime < endTime) { String stepCount = FilenameUtils.concat(countIn, Long.toString(startTime)); stepCount = FilenameUtils.concat(stepCount, Long.toString(Math.min(endTime, startTime + stepSize))); appendParallelCountingResults(stepCount, conf, freqMap); startTime += stepSize; } } else { countIn = FilenameUtils.concat(countIn, Long.toString(startTime)); countIn = FilenameUtils.concat(countIn, Long.toString(Math.min(endTime, startTime + windowSize))); freqMap = readParallelCountingResults(countIn, conf); } List<Pair<String, Long>> flist = pruneParallelCountingResults(freqMap, minSupport, minFr, prunePct); saveFList(flist, // params, conf, cachedPath); result = flist.size(); } return result; }
From source file:org.apache.mahout.freqtermsets.PFPGrowth.java
/** * // www.j a v a 2s . co m * @param params * params should contain input and output locations as a string value, the additional * parameters include minSupport(3), maxHeapSize(50), numGroups(1000) * @throws NoSuchAlgorithmException * @throws ParseException */ public static void runPFPGrowth(Parameters params) throws IOException, InterruptedException, ClassNotFoundException, NoSuchAlgorithmException, ParseException { Configuration conf = new Configuration(); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); long startTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_START)); long endTime = Long.parseLong(params.get(PFPGrowth.PARAM_INTERVAL_END)); long windowSize = Long .parseLong(params.get(PFPGrowth.PARAM_WINDOW_SIZE, Long.toString(endTime - startTime))); long stepSize = Long.parseLong(params.get(PFPGrowth.PARAM_STEP_SIZE, Long.toString(windowSize))); endTime = Math.min(endTime, startTime + windowSize); int minSupport = Integer.valueOf(params.get(MIN_SUPPORT, "3")); String countIn = params.get(COUNT_IN); if (countIn == null) { countIn = params.get(OUTROOT); // PUT); } int minFr = params.getInt(MIN_FREQ, MIN_FREQ_DEFAULT); int prunePct = params.getInt(PRUNE_PCTILE, PRUNE_PCTILE_DEFAULT); if (params.get(COUNT_IN) == null) { startParallelCounting(params, conf); } if (params.get(GROUP_FIS_IN) == null) { // save feature list to dcache // List<Pair<String, Long>> fList = readFList(params); // saveFList(fList, params, conf); int fListSize = cacheFList(params, conf, countIn, minSupport, minFr, prunePct); if (runMode.equals(RunningMode.BlockUpdate)) { fListSize = -1; Path timeRoot = new Path(countIn).getParent().getParent(); FileSystem fs = FileSystem.getLocal(conf); final long currStartTime = startTime; for (FileStatus earlierWindow : fs.listStatus(timeRoot, new PathFilter() { @Override public boolean accept(Path p) { // should have used end time, but it doesn't make a difference, // AS LONG AS windows don't overlap return Long.parseLong(p.getName()) < currStartTime; } })) { // TODO: At such low frequency and support, does pruning out items with less frequency // than minFreq cause loosing itemsets that are frequent but through a longer time frame cacheFList(params, conf, fs.listStatus(earlierWindow.getPath())[0].getPath().toString(), minSupport, minFr, prunePct); } } else { // set param to control group size in MR jobs int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, PFPGrowth.NUM_GROUPS_DEFAULT); int maxPerGroup = fListSize / numGroups; if (fListSize % numGroups != 0) maxPerGroup++; params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup)); } // fList = null; startParallelFPGrowth(params, conf); } else { cacheFList(params, conf, countIn, minSupport, minFr, prunePct); } startAggregating(params, conf); if (runMode.equals(RunningMode.BlockUpdate)) { String indexDirStr;// = params.get(INDEX_OUT); // if (indexDirStr == null || indexDirStr.isEmpty()) { indexDirStr = FilenameUtils.concat(params.get(OUTPUT), "index"); // } else { // indexDirStr = FilenameUtils.concat(indexDirStr, startTime); // indexDirStr = FilenameUtils.concat(indexDirStr, endTime); // } File indexDir = FileUtils.toFile(new URL(indexDirStr)); // clean up FileUtils.deleteQuietly(indexDir); Path seqPath = new Path(params.get(OUTPUT), FREQUENT_PATTERNS); Directory earlierIndex = null; Path timeRoot = new Path(params.get(OUTPUT)).getParent().getParent(); FileSystem fs = FileSystem.getLocal(conf); long mostRecent = Long.MIN_VALUE; Path mostRecentPath = null; for (FileStatus earlierWindow : fs.listStatus(timeRoot)) { long earlierStart = Long.parseLong(earlierWindow.getPath().getName()); // should have used end time, but it doesn't make a difference, // AS LONG AS windows don't overlap if (earlierStart < startTime && earlierStart > mostRecent) { mostRecentPath = earlierWindow.getPath(); mostRecent = earlierStart; } } if (mostRecentPath != null) { mostRecentPath = fs.listStatus(mostRecentPath)[0].getPath(); mostRecentPath = new Path(mostRecentPath, "index"); // earlierIndex = new Directory[1]; // FIXME: as with anything that involves lucene.. won't work except on a local machine earlierIndex = new MMapDirectory(FileUtils.toFile(mostRecentPath.toUri().toURL())); } } // FIXME: When we want to stream, we have to build the index of earlier window // ItemSetIndexBuilder.buildIndex(seqPath, indexDir, // startTime, Math.min(endTime, startTime + windowSize), earlierIndex); }
From source file:org.apache.oozie.test.XTestCase.java
private void setUpEmbeddedHadoop2() throws Exception { if (dfsCluster != null && dfsCluster2 == null) { // Trick dfs location for MiniDFSCluster since it doesn't accept location as input) String testBuildDataSaved = System.getProperty("test.build.data", "build/test/data"); try {// ww w . j ava 2s . com System.setProperty("test.build.data", FilenameUtils.concat(testBuildDataSaved, "2")); // Only DFS cluster is created based upon current need dfsCluster2 = new MiniDFSCluster(createDFSConfig(), 2, true, null); FileSystem fileSystem = dfsCluster2.getFileSystem(); fileSystem.mkdirs(new Path("target/test-data")); fileSystem.mkdirs(new Path("/user")); fileSystem.mkdirs(new Path("/tmp")); fileSystem.setPermission(new Path("target/test-data"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx")); fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx")); System.setProperty(OOZIE_TEST_NAME_NODE2, fileSystem.getConf().get("fs.default.name")); } catch (Exception ex) { shutdownMiniCluster2(); throw ex; } finally { // Restore previus value System.setProperty("test.build.data", testBuildDataSaved); } } }