Example usage for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java

License:Apache License

/**
 * Read result from HDFS reduce output directory and return the results
 * @param outputDir where to read the data from. Expects the file to be {SequenceFile}
 * @param jobConf// w w w  .  j  a  va 2  s . co  m
 * @return
 * @throws IOException
 */
private SortedMap<Long, ReduceResult> readResult(Path outputDir, JobConf jobConf) throws IOException {
    SortedMap<Long, ReduceResult> timeseriesResults = new TreeMap<Long, ReduceResult>();
    FileStatus[] files = fs.listStatus(outputDir, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });
    for (FileStatus status : files) {
        Path path = status.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, jobConf);
        LongWritable timeStamp = new LongWritable();
        ReduceResult result = new ReduceResult();
        while (reader.next(timeStamp, result)) {
            LOG.info("Timestamp: " + timeStamp);
            LOG.info("ThreadCount: " + result.getThreadCount());
            LOG.info("Stats:\n" + result.getStatistics());
            LOG.info("Errors: " + result.getNumErrors());
            timeseriesResults.put(timeStamp.get(), result);
            timeStamp = new LongWritable(); // initialize, so as to use new objects for next round reading
            result = new ReduceResult();
        }
        reader.close();
    }
    return timeseriesResults;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopTeraSortTest.java

License:Apache License

/**
 * Implements validation phase of the sample.
 * @throws Exception//from  w  w w .  j a  va2s  .  co  m
 */
private void teraValidate() throws Exception {
    System.out.println("TeraValidate ===============================================================");

    getFileSystem().delete(new Path(validateOutDir), true);

    // Generate input data:
    int res = ToolRunner.run(new Configuration(), new TeraValidate(),
            new String[] { "-Dmapreduce.framework.name=local", sortOutDir, validateOutDir });

    assertEquals(0, res);

    FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(validateOutDir), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            // Typically name is "part-r-00000":
            return path.getName().startsWith("part-r-");
        }
    });

    // TeraValidate has only 1 reduce, so should be only 1 result file:
    assertEquals(1, fileStatuses.length);

    // The result file must contain only 1 line with the checksum, like this:
    // "checksum        7a27e2d0d55de",
    // typically it has length of 23 bytes.
    // If sorting was not correct, the result contains list of K-V pairs that are not ordered correctly.
    // In such case the size of the output will be much larger.
    long len = fileStatuses[0].getLen();

    assertTrue("TeraValidate length: " + len, len >= 16 && len <= 32);
}

From source file:org.apache.kylin.common.util.HadoopUtil.java

License:Apache License

public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException {
    if (fs.exists(baseDir) == false) {
        return null;
    }/*from  w ww .  jav  a  2 s.c o m*/

    FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(filter);
        }
    });

    if (fileStatus.length == 1) {
        return fileStatus[0].getPath();
    } else {
        return null;
    }
}

From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java

License:Apache License

private void migrateOldLayout() throws IOException {
    FileStatus[] sliceFiles = fileSystem.listStatus(basePath, new PathFilter() {
        @Override/*  w  w w  .  ja va  2 s  .com*/
        public boolean accept(Path path) {
            return path.getName().startsWith(IndexFormatV1.SLICE_PREFIX);
        }
    });
    Path indexFile = new Path(basePath, V1_INDEX_NAME);

    if (fileSystem.exists(indexFile) && sliceFiles.length > 0) { // old layout
        final long version = System.currentTimeMillis();
        Path tempDir = new Path(basePath, "tmp_" + VERSION_PREFIX + version);
        Path versionDir = getVersionDir(version);

        logger.info("Convert global dict at {} to new layout with version {}", basePath, version);

        fileSystem.mkdirs(tempDir);
        // convert to new layout
        try {
            // copy index and slice files to temp
            FileUtil.copy(fileSystem, indexFile, fileSystem, tempDir, false, conf);
            for (FileStatus sliceFile : sliceFiles) {
                FileUtil.copy(fileSystem, sliceFile.getPath(), fileSystem, tempDir, false, conf);
            }
            // rename
            fileSystem.rename(tempDir, versionDir);
            // delete index and slices files in base dir
            fileSystem.delete(indexFile, false);
            for (FileStatus sliceFile : sliceFiles) {
                fileSystem.delete(sliceFile.getPath(), true);
            }

        } finally {
            if (fileSystem.exists(tempDir)) {
                fileSystem.delete(tempDir, true);
            }
        }
    }
}

From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java

License:Apache License

@Override
public Long[] listAllVersions() throws IOException {
    FileStatus[] versionDirs = fileSystem.listStatus(basePath, new PathFilter() {
        @Override/*from  ww w.  j  ava2 s.  c  o  m*/
        public boolean accept(Path path) {
            return path.getName().startsWith(VERSION_PREFIX);
        }
    });
    TreeSet<Long> versions = new TreeSet<>();
    for (int i = 0; i < versionDirs.length; i++) {
        Path path = versionDirs[i].getPath();
        versions.add(Long.parseLong(path.getName().substring(VERSION_PREFIX.length())));
    }
    return versions.toArray(new Long[versions.size()]);
}

From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java

License:Apache License

@Override
public GlobalDictMetadata getMetadata(long version) throws IOException {
    Path versionDir = getVersionDir(version);
    FileStatus[] indexFiles = fileSystem.listStatus(versionDir, new PathFilter() {
        @Override//from w  ww.j  av  a  2 s  . c o  m
        public boolean accept(Path path) {
            return path.getName().startsWith(V1_INDEX_NAME);
        }
    });
    checkState(indexFiles.length == 1, "zero or more than one index file found: %s",
            Arrays.toString(indexFiles));

    IndexFormat format;
    String indexFile = indexFiles[0].getPath().getName();
    if (V2_INDEX_NAME.equals(indexFile)) {
        format = new IndexFormatV2(fileSystem, conf);
    } else if (V1_INDEX_NAME.equals(indexFile)) {
        format = new IndexFormatV1(fileSystem, conf);
    } else {
        throw new RuntimeException("Unknown index file: " + indexFile);
    }

    return format.readIndexFile(versionDir);
}

From source file:org.apache.kylin.engine.mr.steps.UpdateDictionaryStep.java

License:Apache License

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = KylinConfig.createInstanceFromUri(metadataUrl);

    Collections.sort(mergingSegments);

    try {//from  www  . j  a v a 2 s.c om
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase
                            .trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null) {
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0],
                                tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName,
                hdfsRS.getResource(newSegment.getStatisticsResourcePath()).inputStream,
                System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}

From source file:org.apache.lens.driver.hive.TestHiveDriver.java

License:Apache License

/**
 * Validate persistent result.//from www  . j av a 2  s. c o m
 *
 * @param resultSet   the result set
 * @param dataFile    the data file
 * @param outptuDir   the outptu dir
 * @param formatNulls the format nulls
 * @throws Exception the exception
 */
private void validatePersistentResult(LensResultSet resultSet, String dataFile, Path outptuDir,
        boolean formatNulls) throws Exception {
    assertTrue(resultSet instanceof HivePersistentResultSet,
            "resultset class: " + resultSet.getClass().getName());
    HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet;
    String path = persistentResultSet.getOutputPath();

    Path actualPath = new Path(path);
    FileSystem fs = actualPath.getFileSystem(driverConf);
    assertEquals(actualPath, fs.makeQualified(outptuDir));
    List<String> actualRows = new ArrayList<String>();
    for (FileStatus stat : fs.listStatus(actualPath, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !new File(path.toUri()).isDirectory();
        }
    })) {
        FSDataInputStream in = fs.open(stat.getPath());
        BufferedReader br = null;
        try {
            br = new BufferedReader(new InputStreamReader(in));
            String line = "";

            while ((line = br.readLine()) != null) {
                System.out.println("Actual:" + line);
                actualRows.add(line.trim());
            }
        } finally {
            if (br != null) {
                br.close();
            }
        }
    }

    BufferedReader br = null;
    List<String> expectedRows = new ArrayList<String>();

    try {
        br = new BufferedReader(new FileReader(new File(dataFile)));
        String line = "";
        while ((line = br.readLine()) != null) {
            String row = line.trim();
            if (formatNulls) {
                row += ",-NA-,";
                row += line.trim();
            }
            expectedRows.add(row);
        }
    } finally {
        if (br != null) {
            br.close();
        }
    }
    assertEquals(actualRows, expectedRows);
}

From source file:org.apache.lens.lib.query.FilePersistentFormatter.java

License:Apache License

@Override
public void addRowsFromPersistedPath(final Path persistedDir) throws IOException {
    final FileSystem persistFs = persistedDir.getFileSystem(ctx.getConf());

    FileStatus[] partFiles = persistFs.listStatus(persistedDir, new PathFilter() {
        @Override//  w  w w .jav a 2s .  c  om
        public boolean accept(Path path) {
            return !path.getName().startsWith("_") && !path.getName().startsWith(".");
        }
    });

    TreeMap<PartFile, FileStatus> partFileMap = new TreeMap<PartFile, FileStatus>();
    try {
        for (FileStatus file : partFiles) {
            partFileMap.put(new PartFile(file.getPath().getName()), file);
        }

        for (Map.Entry<PartFile, FileStatus> entry : partFileMap.entrySet()) {
            log.info("Processing file:{}", entry.getValue().getPath());
            BufferedReader in = null;
            try {
                // default encoding in hadoop filesystem is utf-8
                in = new BufferedReader(
                        new InputStreamReader(persistFs.open(entry.getValue().getPath()), "UTF-8"));
                String row = in.readLine();
                while (row != null) {
                    writeRow(row);
                    row = in.readLine();
                }
            } finally {
                if (in != null) {
                    in.close();
                }
            }
        }
    } catch (ParseException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.mahout.classifier.sgd.TestASFEmail.java

License:Apache License

public void run(PrintWriter output) throws IOException {

    File base = new File(inputFile);
    //contains the best model
    OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile),
            OnlineLogisticRegression.class);

    Dictionary asfDictionary = new Dictionary();
    Configuration conf = new Configuration();
    PathFilter testFilter = new PathFilter() {
        @Override//from  w w  w.j  av  a  2 s.  c  o m
        public boolean accept(Path path) {
            return path.getName().contains("test");
        }
    };
    SequenceFileDirIterator<Text, VectorWritable> iter = new SequenceFileDirIterator<Text, VectorWritable>(
            new Path(base.toString()), PathType.LIST, testFilter, null, true, conf);

    long numItems = 0;
    while (iter.hasNext()) {
        Pair<Text, VectorWritable> next = iter.next();
        asfDictionary.intern(next.getFirst().toString());
        numItems++;
    }

    System.out.println(numItems + " test files");
    ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT");
    iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST,
            testFilter, null, true, conf);
    while (iter.hasNext()) {
        Pair<Text, VectorWritable> next = iter.next();
        String ng = next.getFirst().toString();

        int actual = asfDictionary.intern(ng);
        Vector result = classifier.classifyFull(next.getSecond().get());
        int cat = result.maxValueIndex();
        double score = result.maxValue();
        double ll = classifier.logLikelihood(actual, next.getSecond().get());
        ClassifierResult cr = new ClassifierResult(asfDictionary.values().get(cat), score, ll);
        ra.addInstance(asfDictionary.values().get(actual), cr);

    }
    output.println(ra);
}