Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java

License:Apache License

/**
 * Read result from HDFS reduce output directory and return the results
 * @param outputDir where to read the data from. Expects the file to be {SequenceFile}
 * @param jobConf// w w w  .  j  a  va 2  s . co  m
 * @return
 * @throws IOException
 */
private SortedMap<Long, ReduceResult> readResult(Path outputDir, JobConf jobConf) throws IOException {
    SortedMap<Long, ReduceResult> timeseriesResults = new TreeMap<Long, ReduceResult>();
    FileStatus[] files = fs.listStatus(outputDir, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });
    for (FileStatus status : files) {
        Path path = status.getPath();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, path, jobConf);
        LongWritable timeStamp = new LongWritable();
        ReduceResult result = new ReduceResult();
        while (reader.next(timeStamp, result)) {
            LOG.info("Timestamp: " + timeStamp);
            LOG.info("ThreadCount: " + result.getThreadCount());
            LOG.info("Stats:\n" + result.getStatistics());
            LOG.info("Errors: " + result.getNumErrors());
            timeseriesResults.put(timeStamp.get(), result);
            timeStamp = new LongWritable(); // initialize, so as to use new objects for next round reading
            result = new ReduceResult();
        }
        reader.close();
    }
    return timeseriesResults;
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopTeraSortTest.java

License:Apache License

/**
 * Implements validation phase of the sample.
 * @throws Exception//from  w  w w .  j a  va2s  .  co  m
 */
private void teraValidate() throws Exception {
    System.out.println("TeraValidate ===============================================================");

    getFileSystem().delete(new Path(validateOutDir), true);

    // Generate input data:
    int res = ToolRunner.run(new Configuration(), new TeraValidate(),
            new String[] { "-Dmapreduce.framework.name=local", sortOutDir, validateOutDir });

    assertEquals(0, res);

    FileStatus[] fileStatuses = getFileSystem().listStatus(new Path(validateOutDir), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            // Typically name is "part-r-00000":
            return path.getName().startsWith("part-r-");
        }
    });

    // TeraValidate has only 1 reduce, so should be only 1 result file:
    assertEquals(1, fileStatuses.length);

    // The result file must contain only 1 line with the checksum, like this:
    // "checksum        7a27e2d0d55de",
    // typically it has length of 23 bytes.
    // If sorting was not correct, the result contains list of K-V pairs that are not ordered correctly.
    // In such case the size of the output will be much larger.
    long len = fileStatuses[0].getLen();

    assertTrue("TeraValidate length: " + len, len >= 16 && len <= 32);
}

From source file:org.apache.kylin.common.util.HadoopUtil.java

License:Apache License

public static Path getFilterOnlyPath(FileSystem fs, Path baseDir, final String filter) throws IOException {
    if (fs.exists(baseDir) == false) {
        return null;
    }/*from  w ww .  jav  a  2 s.c o m*/

    FileStatus[] fileStatus = fs.listStatus(baseDir, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(filter);
        }
    });

    if (fileStatus.length == 1) {
        return fileStatus[0].getPath();
    } else {
        return null;
    }
}

From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java

License:Apache License

private void migrateOldLayout() throws IOException {
    FileStatus[] sliceFiles = fileSystem.listStatus(basePath, new PathFilter() {
        @Override/*  w  w w  .  ja va  2 s  .com*/
        public boolean accept(Path path) {
            return path.getName().startsWith(IndexFormatV1.SLICE_PREFIX);
        }
    });
    Path indexFile = new Path(basePath, V1_INDEX_NAME);

    if (fileSystem.exists(indexFile) && sliceFiles.length > 0) { // old layout
        final long version = System.currentTimeMillis();
        Path tempDir = new Path(basePath, "tmp_" + VERSION_PREFIX + version);
        Path versionDir = getVersionDir(version);

        logger.info("Convert global dict at {} to new layout with version {}", basePath, version);

        fileSystem.mkdirs(tempDir);
        // convert to new layout
        try {
            // copy index and slice files to temp
            FileUtil.copy(fileSystem, indexFile, fileSystem, tempDir, false, conf);
            for (FileStatus sliceFile : sliceFiles) {
                FileUtil.copy(fileSystem, sliceFile.getPath(), fileSystem, tempDir, false, conf);
            }
            // rename
            fileSystem.rename(tempDir, versionDir);
            // delete index and slices files in base dir
            fileSystem.delete(indexFile, false);
            for (FileStatus sliceFile : sliceFiles) {
                fileSystem.delete(sliceFile.getPath(), true);
            }

        } finally {
            if (fileSystem.exists(tempDir)) {
                fileSystem.delete(tempDir, true);
            }
        }
    }
}

From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java

License:Apache License

@Override
public Long[] listAllVersions() throws IOException {
    FileStatus[] versionDirs = fileSystem.listStatus(basePath, new PathFilter() {
        @Override/*from  ww w.  j  ava2 s.  c  o  m*/
        public boolean accept(Path path) {
            return path.getName().startsWith(VERSION_PREFIX);
        }
    });
    TreeSet<Long> versions = new TreeSet<>();
    for (int i = 0; i < versionDirs.length; i++) {
        Path path = versionDirs[i].getPath();
        versions.add(Long.parseLong(path.getName().substring(VERSION_PREFIX.length())));
    }
    return versions.toArray(new Long[versions.size()]);
}

From source file:org.apache.kylin.dict.global.GlobalDictHDFSStore.java

License:Apache License

@Override
public GlobalDictMetadata getMetadata(long version) throws IOException {
    Path versionDir = getVersionDir(version);
    FileStatus[] indexFiles = fileSystem.listStatus(versionDir, new PathFilter() {
        @Override//from w  ww.j  av  a  2 s  . c o  m
        public boolean accept(Path path) {
            return path.getName().startsWith(V1_INDEX_NAME);
        }
    });
    checkState(indexFiles.length == 1, "zero or more than one index file found: %s",
            Arrays.toString(indexFiles));

    IndexFormat format;
    String indexFile = indexFiles[0].getPath().getName();
    if (V2_INDEX_NAME.equals(indexFile)) {
        format = new IndexFormatV2(fileSystem, conf);
    } else if (V1_INDEX_NAME.equals(indexFile)) {
        format = new IndexFormatV1(fileSystem, conf);
    } else {
        throw new RuntimeException("Unknown index file: " + indexFile);
    }

    return format.readIndexFile(versionDir);
}

From source file:org.apache.kylin.engine.mr.steps.UpdateDictionaryStep.java

License:Apache License

@Override
protected ExecuteResult doWork(ExecutableContext context) throws ExecuteException {
    final CubeManager cubeMgr = CubeManager.getInstance(context.getConfig());
    final DictionaryManager dictMgrHdfs;
    final DictionaryManager dictMgrHbase;
    final CubeInstance cube = cubeMgr.getCube(CubingExecutableUtil.getCubeName(this.getParams()));
    final CubeSegment newSegment = cube.getSegmentById(CubingExecutableUtil.getSegmentId(this.getParams()));
    final List<CubeSegment> mergingSegments = getMergingSegments(cube);
    final String dictInfoPath = this.getParams().get(BatchConstants.ARG_DICT_PATH);
    final String metadataUrl = this.getParams().get(BatchConstants.ARG_META_URL);

    final KylinConfig kylinConfHbase = cube.getConfig();
    final KylinConfig kylinConfHdfs = KylinConfig.createInstanceFromUri(metadataUrl);

    Collections.sort(mergingSegments);

    try {//from  www  . j  a v a 2 s.c om
        Configuration conf = HadoopUtil.getCurrentConfiguration();
        FileSystem fs = HadoopUtil.getWorkingFileSystem();
        ResourceStore hbaseRS = ResourceStore.getStore(kylinConfHbase);
        ResourceStore hdfsRS = ResourceStore.getStore(kylinConfHdfs);
        dictMgrHdfs = DictionaryManager.getInstance(kylinConfHdfs);
        dictMgrHbase = DictionaryManager.getInstance(kylinConfHbase);

        // work on copy instead of cached objects
        CubeInstance cubeCopy = cube.latestCopyForWrite();
        CubeSegment newSegCopy = cubeCopy.getSegmentById(newSegment.getUuid());

        // update cube segment dictionary

        FileStatus[] fileStatuss = fs.listStatus(new Path(dictInfoPath), new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("part");
            }
        });

        for (FileStatus fileStatus : fileStatuss) {
            Path filePath = fileStatus.getPath();

            SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
            Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
            Text value = (Text) ReflectionUtils.newInstance(reader.getValueClass(), conf);

            while (reader.next(key, value)) {
                String tblCol = key.toString();
                String dictInfoResource = value.toString();

                if (StringUtils.isNotEmpty(dictInfoResource)) {
                    logger.info(dictInfoResource);
                    // put dictionary file to metadata store
                    DictionaryInfo dictInfoHdfs = dictMgrHdfs.getDictionaryInfo(dictInfoResource);
                    DictionaryInfo dicInfoHbase = dictMgrHbase
                            .trySaveNewDict(dictInfoHdfs.getDictionaryObject(), dictInfoHdfs);

                    if (dicInfoHbase != null) {
                        TblColRef tblColRef = cube.getDescriptor().findColumnRef(tblCol.split(":")[0],
                                tblCol.split(":")[1]);
                        newSegCopy.putDictResPath(tblColRef, dicInfoHbase.getResourcePath());
                    }
                }
            }

            IOUtils.closeStream(reader);
        }

        CubeSegment lastSeg = mergingSegments.get(mergingSegments.size() - 1);
        for (Map.Entry<String, String> entry : lastSeg.getSnapshots().entrySet()) {
            newSegCopy.putSnapshotResPath(entry.getKey(), entry.getValue());
        }

        // update statistics
        // put the statistics to metadata store
        String statisticsFileName = newSegment.getStatisticsResourcePath();
        hbaseRS.putResource(statisticsFileName,
                hdfsRS.getResource(newSegment.getStatisticsResourcePath()).inputStream,
                System.currentTimeMillis());

        CubeUpdate update = new CubeUpdate(cubeCopy);
        update.setToUpdateSegs(newSegCopy);
        cubeMgr.updateCube(update);

        return ExecuteResult.createSucceed();
    } catch (IOException e) {
        logger.error("fail to merge dictionary", e);
        return ExecuteResult.createError(e);
    }
}

From source file:org.apache.lens.driver.hive.TestHiveDriver.java

License:Apache License

/**
 * Validate persistent result.//from www  . j av a 2  s. c o m
 *
 * @param resultSet   the result set
 * @param dataFile    the data file
 * @param outptuDir   the outptu dir
 * @param formatNulls the format nulls
 * @throws Exception the exception
 */
private void validatePersistentResult(LensResultSet resultSet, String dataFile, Path outptuDir,
        boolean formatNulls) throws Exception {
    assertTrue(resultSet instanceof HivePersistentResultSet,
            "resultset class: " + resultSet.getClass().getName());
    HivePersistentResultSet persistentResultSet = (HivePersistentResultSet) resultSet;
    String path = persistentResultSet.getOutputPath();

    Path actualPath = new Path(path);
    FileSystem fs = actualPath.getFileSystem(driverConf);
    assertEquals(actualPath, fs.makeQualified(outptuDir));
    List<String> actualRows = new ArrayList<String>();
    for (FileStatus stat : fs.listStatus(actualPath, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !new File(path.toUri()).isDirectory();
        }
    })) {
        FSDataInputStream in = fs.open(stat.getPath());
        BufferedReader br = null;
        try {
            br = new BufferedReader(new InputStreamReader(in));
            String line = "";

            while ((line = br.readLine()) != null) {
                System.out.println("Actual:" + line);
                actualRows.add(line.trim());
            }
        } finally {
            if (br != null) {
                br.close();
            }
        }
    }

    BufferedReader br = null;
    List<String> expectedRows = new ArrayList<String>();

    try {
        br = new BufferedReader(new FileReader(new File(dataFile)));
        String line = "";
        while ((line = br.readLine()) != null) {
            String row = line.trim();
            if (formatNulls) {
                row += ",-NA-,";
                row += line.trim();
            }
            expectedRows.add(row);
        }
    } finally {
        if (br != null) {
            br.close();
        }
    }
    assertEquals(actualRows, expectedRows);
}

From source file:org.apache.lens.lib.query.FilePersistentFormatter.java

License:Apache License

@Override
public void addRowsFromPersistedPath(final Path persistedDir) throws IOException {
    final FileSystem persistFs = persistedDir.getFileSystem(ctx.getConf());

    FileStatus[] partFiles = persistFs.listStatus(persistedDir, new PathFilter() {
        @Override//  w  w w .jav a 2s .  c  om
        public boolean accept(Path path) {
            return !path.getName().startsWith("_") && !path.getName().startsWith(".");
        }
    });

    TreeMap<PartFile, FileStatus> partFileMap = new TreeMap<PartFile, FileStatus>();
    try {
        for (FileStatus file : partFiles) {
            partFileMap.put(new PartFile(file.getPath().getName()), file);
        }

        for (Map.Entry<PartFile, FileStatus> entry : partFileMap.entrySet()) {
            log.info("Processing file:{}", entry.getValue().getPath());
            BufferedReader in = null;
            try {
                // default encoding in hadoop filesystem is utf-8
                in = new BufferedReader(
                        new InputStreamReader(persistFs.open(entry.getValue().getPath()), "UTF-8"));
                String row = in.readLine();
                while (row != null) {
                    writeRow(row);
                    row = in.readLine();
                }
            } finally {
                if (in != null) {
                    in.close();
                }
            }
        }
    } catch (ParseException e) {
        throw new IOException(e);
    }
}

From source file:org.apache.mahout.classifier.sgd.TestASFEmail.java

License:Apache License

public void run(PrintWriter output) throws IOException {

    File base = new File(inputFile);
    //contains the best model
    OnlineLogisticRegression classifier = ModelSerializer.readBinary(new FileInputStream(modelFile),
            OnlineLogisticRegression.class);

    Dictionary asfDictionary = new Dictionary();
    Configuration conf = new Configuration();
    PathFilter testFilter = new PathFilter() {
        @Override//from  w w  w.j  av  a  2 s.  c  o m
        public boolean accept(Path path) {
            return path.getName().contains("test");
        }
    };
    SequenceFileDirIterator<Text, VectorWritable> iter = new SequenceFileDirIterator<Text, VectorWritable>(
            new Path(base.toString()), PathType.LIST, testFilter, null, true, conf);

    long numItems = 0;
    while (iter.hasNext()) {
        Pair<Text, VectorWritable> next = iter.next();
        asfDictionary.intern(next.getFirst().toString());
        numItems++;
    }

    System.out.println(numItems + " test files");
    ResultAnalyzer ra = new ResultAnalyzer(asfDictionary.values(), "DEFAULT");
    iter = new SequenceFileDirIterator<Text, VectorWritable>(new Path(base.toString()), PathType.LIST,
            testFilter, null, true, conf);
    while (iter.hasNext()) {
        Pair<Text, VectorWritable> next = iter.next();
        String ng = next.getFirst().toString();

        int actual = asfDictionary.intern(ng);
        Vector result = classifier.classifyFull(next.getSecond().get());
        int cat = result.maxValueIndex();
        double score = result.maxValue();
        double ll = classifier.logLikelihood(actual, next.getSecond().get());
        ClassifierResult cr = new ClassifierResult(asfDictionary.values().get(cat), score, ll);
        ra.addInstance(asfDictionary.values().get(actual), cr);

    }
    output.println(ra);
}