List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java
License:Apache License
/** * Read result from HDFS reduce output directory and return the results * @param outputDir where to read the data from. Expects the file to be {SequenceFile} * @param jobConf// w w w . j a va 2 s . co m * @return * @throws IOException */ private SortedMap<Long, ReduceResult> readResult(Path outputDir, JobConf jobConf) throws IOException { SortedMap<Long, ReduceResult> timeseriesResults = new TreeMap<Long, ReduceResult>(); FileStatus[] files = fs.listStatus(outputDir, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part"); } }); for (FileStatus status : files) { Path path = status.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, jobConf); LongWritable timeStamp = new LongWritable(); ReduceResult result = new ReduceResult(); while (reader.next(timeStamp, result)) { LOG.info("Timestamp: " + timeStamp); LOG.info("ThreadCount: " + result.getThreadCount()); LOG.info("Stats:\n" + result.getStatistics()); LOG.info("Errors: " + result.getNumErrors()); timeseriesResults.put(timeStamp.get(), result); timeStamp = new LongWritable(); // initialize, so as to use new objects for next round reading result = new ReduceResult(); } reader.close(); } return timeseriesResults; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopTeraSortTest.java
License:Apache License
/** * Implements validation phase of the sample. * @throws Exception//from w w w . j a va2s . co m */ private void teraValidate() throws Exception { System.out.println("TeraValidate ==============================================================="); getFileSystem().delete(new Path(validateOutDir), true); // Generate input data: int res = ToolRunner.run(new Configuration(), new TeraValidate(), new String[] { "-Dmapreduce.framework.name=local", sortOutDir, validateOutDir }); assertEquals(0, res); FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(validateOutDir), new PathFilter() { @Override public boolean accept(Path path) { // Typically name is "part-r-00000": return path.getName().startsWith("part-r-"); } }); // TeraValidate has only 1 reduce, so should be only 1 result file: assertEquals(1, fileStatuses.length); // The result file must contain only 1 line with the checksum, like this: // "checksum 7a27e2d0d55de", // typically it has length of 23 bytes. // If sorting was not correct, the result contains list of K-V pairs that are not ordered correctly. // In such case the size of the output will be much larger. long len = fileStatuses[0].getLen(); assertTrue("TeraValidate length: " + len, len >= 16 && len <= 32); }
From source file:org.apache.kylin.common.util.HadoopUtil.java
License:Apache License
public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException { if (fs.exists(baseDir) == false) { return null; }/*from w ww . jav a 2 s.c o m*/ FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(filter); } }); if (fileStatus.length == 1) { return fileStatus[0].getPath(); } else { return null; } }
From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java
License:Apache License
private void migrateOldLayout() throws IOException { FileStatus[] sliceFiles = fileSystem.listStatus(basePath, new PathFilter() { @Override/* w w w . ja va 2 s .com*/ public boolean accept(Path path) { return path.getName().startsWith(IndexFormatV1.SLICE_PREFIX); } }); Path indexFile = new Path(basePath, V1_INDEX_NAME); if (fileSystem.exists(indexFile) && sliceFiles.length > 0) { // old layout final long version = System.currentTimeMillis(); Path tempDir = new Path(basePath, "tmp_" + VERSION_PREFIX + version); Path versionDir = getVersionDir(version); logger.info("Convert global dict at {} to new layout with version {}", basePath, version); fileSystem.mkdirs(tempDir); // convert to new layout try { // copy index and slice files to temp FileUtil.copy(fileSystem, indexFile, fileSystem, tempDir, false, conf); for (FileStatus sliceFile : sliceFiles) { FileUtil.copy(fileSystem, sliceFile.getPath(), fileSystem, tempDir, false, conf); } // rename fileSystem.rename(tempDir, versionDir); // delete index and slices files in base dir fileSystem.delete(indexFile, false); for (FileStatus sliceFile : sliceFiles) { fileSystem.delete(sliceFile.getPath(), true); } } finally { if (fileSystem.exists(tempDir)) { fileSystem.delete(tempDir, true); } } } }
From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java
License:Apache License
@Override public Long[] listAllVersions() throws IOException { FileStatus[] versionDirs = fileSystem.listStatus(basePath, new PathFilter() { @Override/*from ww w. j ava2 s. c o m*/ public boolean accept(Path path) { return path.getName().startsWith(VERSION_PREFIX); } }); TreeSet<Long> versions = new TreeSet<>(); for (int i = 0; i < versionDirs.length; i++) { Path path = versionDirs[i].getPath(); versions.add(Long.parseLong(path.getName().substring(VERSION_PREFIX.length()))); } return versions.toArray(new Long[versions.size()]); }
From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java
License:Apache License
@Override public GlobalDictMetadata getMetadata(long version) throws IOException { Path versionDir = getVersionDir(version); FileStatus[] indexFiles = fileSystem.listStatus(versionDir, new PathFilter() { @Override//from w ww.j av a 2 s . c o m public boolean accept(Path path) { return path.getName().startsWith(V1_INDEX_NAME); } }); checkState(indexFiles.length == 1, "zero or more than one index file found: %s", Arrays.toString(indexFiles)); IndexFormat format; String indexFile = indexFiles[0].getPath().getName(); if (V2_INDEX_NAME.equals(indexFile)) { format = new IndexFormatV2(fileSystem, conf); } else if (V1_INDEX_NAME.equals(indexFile)) { format = new IndexFormatV1(fileSystem, conf); } else { throw new RuntimeException("Unknown index file: " + indexFile); } return format.readIndexFile(versionDir); }
From source file:org.apache.kylin.engine.mr.steps.UpdateDictionaryStep.java
License:Apache License
@Override protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException { final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig()); final DictionaryManager dictMgrHdfs; final DictionaryManager dictMgrHbase; final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams())); final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams())); final List<CubeSegment> mergingSegments = getMergingSegments(cube); final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH); final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL); final KylinConfig kylinConfHbase = cube.getConfig(); final KylinConfig kylinConfHdfs = KylinConfig.createInstanceFromUri(metadataUrl); Collections.sort(mergingSegments); try {//from www . j a v a 2 s.c om Configuration conf = HadoopUtil.getCurrentConfiguration(); FileSystem fs = HadoopUtil.getWorkingFileSystem(); ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase); ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs); dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs); dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase); // work on copy instead of cached objects CubeInstance cubeCopy = cube.latestCopyForWrite(); CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid()); // update cube segment dictionary FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("part"); } }); for (FileStatus fileStatus : fileStatuss) { Path filePath = fileStatus.getPath(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf); while (reader.next(key, value)) { String tblCol = key.toString(); String dictInfoResource = value.toString(); if (StringUtils.isNotEmpty(dictInfoResource)) { logger.info(dictInfoResource); // put dictionary file to metadata store DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource); DictionaryInfo dicInfoHbase = dictMgrHbase .trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs); if (dicInfoHbase != null) { TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0], tblCol.split(":")[1]); newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath()); } } } IOUtils.closeStream(reader); } CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1); for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) { newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue()); } // update statistics // put the statistics to metadata store String statisticsFileName = newSegment.getStatisticsResourcePath(); hbaseRS.putResource(statisticsFileName, hdfsRS.getResource(newSegment.getStatisticsResourcePath()).inputStream, System.currentTimeMillis()); CubeUpdate update = new CubeUpdate(cubeCopy); update.setToUpdateSegs(newSegCopy); cubeMgr.updateCube(update); return ExecuteResult.createSucceed(); } catch (IOException e) { logger.error("fail to merge dictionary", e); return ExecuteResult.createError(e); } }
From source file:org.apache.lens.driver.hive.TestHiveDriver.java
License:Apache License
/** * Validate persistent result.//from www . j av a 2 s. c o m * * @param resultSet the result set * @param dataFile the data file * @param outptuDir the outptu dir * @param formatNulls the format nulls * @throws Exception the exception */ private void validatePersistentResult(LensResultSet resultSet, String dataFile, Path outptuDir, boolean formatNulls) throws Exception { assertTrue(resultSet instanceof HivePersistentResultSet, "resultset class: " + resultSet.getClass().getName()); HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet; String path = persistentResultSet.getOutputPath(); Path actualPath = new Path(path); FileSystem fs = actualPath.getFileSystem(driverConf); assertEquals(actualPath, fs.makeQualified(outptuDir)); List<String> actualRows = new ArrayList<String>(); for (FileStatus stat : fs.listStatus(actualPath, new PathFilter() { @Override public boolean accept(Path path) { return !new File(path.toUri()).isDirectory(); } })) { FSDataInputStream in = fs.open(stat.getPath()); BufferedReader br = null; try { br = new BufferedReader(new InputStreamReader(in)); String line = ""; while ((line = br.readLine()) != null) { System.out.println("Actual:" + line); actualRows.add(line.trim()); } } finally { if (br != null) { br.close(); } } } BufferedReader br = null; List<String> expectedRows = new ArrayList<String>(); try { br = new BufferedReader(new FileReader(new File(dataFile))); String line = ""; while ((line = br.readLine()) != null) { String row = line.trim(); if (formatNulls) { row += ",-NA-,"; row += line.trim(); } expectedRows.add(row); } } finally { if (br != null) { br.close(); } } assertEquals(actualRows, expectedRows); }
From source file:org.apache.lens.lib.query.FilePersistentFormatter.java
License:Apache License
@Override public void addRowsFromPersistedPath(final Path persistedDir) throws IOException { final FileSystem persistFs = persistedDir.getFileSystem(ctx.getConf()); FileStatus[] partFiles = persistFs.listStatus(persistedDir, new PathFilter() { @Override// w w w .jav a 2s . c om public boolean accept(Path path) { return !path.getName().startsWith("_") && !path.getName().startsWith("."); } }); TreeMap<PartFile, FileStatus> partFileMap = new TreeMap<PartFile, FileStatus>(); try { for (FileStatus file : partFiles) { partFileMap.put(new PartFile(file.getPath().getName()), file); } for (Map.Entry<PartFile, FileStatus> entry : partFileMap.entrySet()) { log.info("Processing file:{}", entry.getValue().getPath()); BufferedReader in = null; try { // default encoding in hadoop filesystem is utf-8 in = new BufferedReader( new InputStreamReader(persistFs.open(entry.getValue().getPath()), "UTF-8")); String row = in.readLine(); while (row != null) { writeRow(row); row = in.readLine(); } } finally { if (in != null) { in.close(); } } } } catch (ParseException e) { throw new IOException(e); } }
From source file:org.apache.mahout.classifier.sgd.TestASFEmail.java
License:Apache License
public void run(PrintWriter output) throws IOException { File base = new File(inputFile); //contains the best model OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile), OnlineLogisticRegression.class); Dictionary asfDictionary = new Dictionary(); Configuration conf = new Configuration(); PathFilter testFilter = new PathFilter() { @Override//from w w w.j av a 2 s. c o m public boolean accept(Path path) { return path.getName().contains("test"); } }; SequenceFileDirIterator<Text, VectorWritable> iter = new SequenceFileDirIterator<Text, VectorWritable>( new Path(base.toString()), PathType.LIST, testFilter, null, true, conf); long numItems = 0; while (iter.hasNext()) { Pair<Text, VectorWritable> next = iter.next(); asfDictionary.intern(next.getFirst().toString()); numItems++; } System.out.println(numItems + " test files"); ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT"); iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST, testFilter, null, true, conf); while (iter.hasNext()) { Pair<Text, VectorWritable> next = iter.next(); String ng = next.getFirst().toString(); int actual = asfDictionary.intern(ng); Vector result = classifier.classifyFull(next.getSecond().get()); int cat = result.maxValueIndex(); double score = result.maxValue(); double ll = classifier.logLikelihood(actual, next.getSecond().get()); ClassifierResult cr = new ClassifierResult(asfDictionary.values().get(cat), score, ll); ra.addInstance(asfDictionary.values().get(actual), cr); } output.println(ra); }