Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:org.apache.solr.update.HdfsUpdateLog.java

License:Apache License

public static String[] getLogList(FileSystem fs, Path tlogDir) {
    final String prefix = TLOG_NAME + '.';
    assert fs != null;
    FileStatus[] fileStatuses;// w  w w  .  j  a  va  2 s .c o  m
    try {
        fileStatuses = fs.listStatus(tlogDir, new PathFilter() {

            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith(prefix);
            }
        });
    } catch (FileNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String[] names = new String[fileStatuses.length];
    for (int i = 0; i < fileStatuses.length; i++) {
        names[i] = fileStatuses[i].getPath().getName();
    }
    Arrays.sort(names);

    return names;
}

From source file:org.apache.solr.update.HdfsUpdateLog.java

License:Apache License

private String[] getLogList(Path tlogDir) throws FileNotFoundException, IOException {
    final String prefix = TLOG_NAME + '.';
    FileStatus[] files = fs.listStatus(tlogDir, new PathFilter() {

        @Override/*from w w  w  . java 2s  . c  o  m*/
        public boolean accept(Path name) {
            return name.getName().startsWith(prefix);
        }
    });
    List<String> fileList = new ArrayList<String>(files.length);
    for (FileStatus file : files) {
        fileList.add(file.getPath().getName());
    }
    return fileList.toArray(new String[0]);
}

From source file:org.apache.sqoop.avro.AvroUtil.java

License:Apache License

/**
 * Get the schema of AVRO files stored in a directory
 */// w  ww  .j a va  2  s.  com
public static Schema getAvroSchema(Path path, Configuration conf) throws IOException {
    FileSystem fs = path.getFileSystem(conf);
    Path fileToTest;
    if (fs.isDirectory(path)) {
        FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path p) {
                String name = p.getName();
                return !name.startsWith("_") && !name.startsWith(".");
            }
        });
        if (fileStatuses.length == 0) {
            return null;
        }
        fileToTest = fileStatuses[0].getPath();
    } else {
        fileToTest = path;
    }

    SeekableInput input = new FsInput(fileToTest, conf);
    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);

    Schema result = fileReader.getSchema();
    fileReader.close();
    return result;
}

From source file:org.apache.tajo.engine.function.FunctionLoader.java

License:Apache License

/**
 * Load functions that are defined by users.
 *
 * @param conf/* www.  j  a v a2  s  .  c o m*/
 * @param functionMap
 * @return
 * @throws IOException
 */
public static Map<FunctionSignature, FunctionDesc> loadUserDefinedFunctions(TajoConf conf,
        Map<FunctionSignature, FunctionDesc> functionMap) throws IOException {

    String[] codePaths = conf.getStrings(TajoConf.ConfVars.PYTHON_CODE_DIR.varname);
    if (codePaths != null) {
        FileSystem localFS = FileSystem.getLocal(conf);
        for (String codePathStr : codePaths) {
            Path codePath = new Path(codePathStr);
            List<Path> filePaths = TUtil.newList();
            if (localFS.isDirectory(codePath)) {
                for (FileStatus file : localFS.listStatus(codePath, new PathFilter() {
                    @Override
                    public boolean accept(Path path) {
                        return path.getName().endsWith(PythonScriptEngine.FILE_EXTENSION);
                    }
                })) {
                    filePaths.add(file.getPath());
                }
            } else {
                filePaths.add(codePath);
            }
            for (Path filePath : filePaths) {
                for (FunctionDesc f : PythonScriptEngine.registerFunctions(filePath.toUri(),
                        FunctionLoader.PYTHON_FUNCTION_NAMESPACE)) {
                    functionMap.put(f.getSignature(), f);
                }
            }
        }
    }
    return functionMap;
}

From source file:org.apache.tajo.engine.query.ResultSetImpl.java

License:Apache License

private Collection<Fragment> getFragmentsNG(TableMeta meta, Path tablePath) throws IOException {
    List<Fragment> fraglist = Lists.newArrayList();
    FileStatus[] files = fs.listStatus(tablePath, new PathFilter() {
        @Override//from  w  w  w. j  av a 2s .com
        public boolean accept(Path path) {
            return path.getName().charAt(0) != '.';
        }
    });
    Arrays.sort(files, new FileNameComparator());

    String tbname = tablePath.getName();
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() == 0) {
            continue;
        }
        fraglist.add(new Fragment(tbname + "_" + i, files[i].getPath(), meta, 0l, files[i].getLen(), null));
    }
    return fraglist;
}

From source file:org.apache.tajo.jdbc.TajoResultSet.java

License:Apache License

private List<Fragment> getFragments(Path tablePath) throws IOException {
    List<Fragment> fragments = Lists.newArrayList();
    FileStatus[] files = fs.listStatus(tablePath, new PathFilter() {
        @Override/*from ww  w .ja  v  a  2 s  .c  o m*/
        public boolean accept(Path path) {
            return path.getName().charAt(0) != '.';
        }
    });

    // The files must be sorted in an ascending order of file names
    // in order to guarantee the order of a sort operation.
    // This is because our distributed sort algorithm outputs
    // a sequence of sorted data files, each of which contains sorted rows
    // within each file.
    Arrays.sort(files, new FileNameComparator());

    String tbname = tablePath.getName();
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() == 0) {
            continue;
        }
        fragments.add(new FileFragment(tbname + "_" + i, files[i].getPath(), 0l, files[i].getLen()));
    }
    return ImmutableList.copyOf(fragments);
}

From source file:org.apache.tez.test.TestRecovery.java

License:Apache License

private void testHashJoinExample(SimpleShutdownCondition shutdownCondition, boolean enableAutoParallelism,
        boolean generateSplitInClient) throws Exception {
    HashJoinExample hashJoinExample = new HashJoinExample();
    TezConfiguration tezConf = new TezConfiguration(miniTezCluster.getConfig());
    tezConf.setInt(TezConfiguration.TEZ_AM_MAX_APP_ATTEMPTS, 4);
    tezConf.set(TezConfiguration.TEZ_AM_RECOVERY_SERVICE_CLASS,
            RecoveryServiceWithEventHandlingHook.class.getName());
    tezConf.set(RecoveryServiceWithEventHandlingHook.AM_RECOVERY_SERVICE_HOOK_CLASS,
            SimpleRecoveryEventHook.class.getName());
    tezConf.set(SimpleRecoveryEventHook.SIMPLE_SHUTDOWN_CONDITION, shutdownCondition.serialize());
    tezConf.setBoolean(ShuffleVertexManager.TEZ_SHUFFLE_VERTEX_MANAGER_ENABLE_AUTO_PARALLEL,
            enableAutoParallelism);/* w w  w .j  av a2  s.c o  m*/
    tezConf.setBoolean(RecoveryService.TEZ_TEST_RECOVERY_DRAIN_EVENTS_WHEN_STOPPED, false);
    tezConf.setBoolean(TezConfiguration.TEZ_AM_STAGING_SCRATCH_DATA_AUTO_DELETE, false);
    tezConf.set(TezConfiguration.TEZ_AM_LOG_LEVEL, "INFO;org.apache.tez=DEBUG");

    hashJoinExample.setConf(tezConf);
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    Path inPath1 = new Path("/tmp/hashJoin/inPath1");
    Path inPath2 = new Path("/tmp/hashJoin/inPath2");
    Path outPath = new Path("/tmp/hashJoin/outPath");
    remoteFs.delete(outPath, true);
    remoteFs.mkdirs(inPath1);
    remoteFs.mkdirs(inPath2);
    remoteFs.mkdirs(stagingDirPath);

    Set<String> expectedResult = new HashSet<String>();

    FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
    FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
    BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
    BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
    for (int i = 0; i < 20; i++) {
        String term = "term" + i;
        writer1.write(term);
        writer1.newLine();
        if (i % 2 == 0) {
            writer2.write(term);
            writer2.newLine();
            expectedResult.add(term);
        }
    }
    writer1.close();
    writer2.close();
    out1.close();
    out2.close();

    String[] args = null;
    if (generateSplitInClient) {
        args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
                "-generateSplitInClient", inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    } else {
        args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
                inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    }
    assertEquals(0, hashJoinExample.run(args));

    FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
    assertEquals(1, statuses.length);
    FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
    BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
    String line;
    while ((line = reader.readLine()) != null) {
        assertTrue(expectedResult.remove(line));
    }
    reader.close();
    inStream.close();
    assertEquals(0, expectedResult.size());

    List<HistoryEvent> historyEventsOfAttempt1 = RecoveryParser.readRecoveryEvents(tezConf,
            hashJoinExample.getAppId(), 1);
    HistoryEvent lastEvent = historyEventsOfAttempt1.get(historyEventsOfAttempt1.size() - 1);
    assertEquals(shutdownCondition.getEvent().getEventType(), lastEvent.getEventType());
    assertTrue(shutdownCondition.match(lastEvent));
}

From source file:org.apache.tez.test.TestTezJobs.java

License:Apache License

@Test(timeout = 60000)
public void testHashJoinExample() throws Exception {
    HashJoinExample hashJoinExample = new HashJoinExample();
    hashJoinExample.setConf(mrrTezCluster.getConfig());
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    Path inPath1 = new Path("/tmp/hashJoin/inPath1");
    Path inPath2 = new Path("/tmp/hashJoin/inPath2");
    Path outPath = new Path("/tmp/hashJoin/outPath");
    remoteFs.mkdirs(inPath1);/*from  w w w.  j a va2  s .  c o m*/
    remoteFs.mkdirs(inPath2);
    remoteFs.mkdirs(stagingDirPath);

    Set<String> expectedResult = new HashSet<String>();

    FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
    FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
    BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
    BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
    for (int i = 0; i < 20; i++) {
        String term = "term" + i;
        writer1.write(term);
        writer1.newLine();
        if (i % 2 == 0) {
            writer2.write(term);
            writer2.newLine();
            expectedResult.add(term);
        }
    }
    writer1.close();
    writer2.close();
    out1.close();
    out2.close();

    String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
            inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    assertEquals(0, hashJoinExample.run(args));

    FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
    assertEquals(1, statuses.length);
    FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
    BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
    String line;
    while ((line = reader.readLine()) != null) {
        assertTrue(expectedResult.remove(line));
    }
    reader.close();
    inStream.close();
    assertEquals(0, expectedResult.size());
}

From source file:org.apache.tez.test.TestTezJobs.java

License:Apache License

@Test(timeout = 60000)
public void testHashJoinExampleDisableSplitGrouping() throws Exception {
    HashJoinExample hashJoinExample = new HashJoinExample();
    hashJoinExample.setConf(conf);//  ww w. j  ava2 s. co  m
    Path stagingDirPath = new Path(TEST_ROOT_DIR + "/tmp/tez-staging-dir");
    Path inPath1 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath1");
    Path inPath2 = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/inPath2");
    Path outPath = new Path(TEST_ROOT_DIR + "/tmp/hashJoin/outPath");
    localFs.delete(outPath, true);
    localFs.mkdirs(inPath1);
    localFs.mkdirs(inPath2);
    localFs.mkdirs(stagingDirPath);

    Set<String> expectedResult = new HashSet<String>();

    FSDataOutputStream out1 = localFs.create(new Path(inPath1, "file"));
    FSDataOutputStream out2 = localFs.create(new Path(inPath2, "file"));
    BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
    BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
    for (int i = 0; i < 20; i++) {
        String term = "term" + i;
        writer1.write(term);
        writer1.newLine();
        if (i % 2 == 0) {
            writer2.write(term);
            writer2.newLine();
            expectedResult.add(term);
        }
    }
    writer1.close();
    writer2.close();
    out1.close();
    out2.close();

    String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
            "-local", "-disableSplitGrouping", inPath1.toString(), inPath2.toString(), "1",
            outPath.toString() };
    assertEquals(0, hashJoinExample.run(args));

    FileStatus[] statuses = localFs.listStatus(outPath, new PathFilter() {
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
    assertEquals(1, statuses.length);
    FSDataInputStream inStream = localFs.open(statuses[0].getPath());
    BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
    String line;
    while ((line = reader.readLine()) != null) {
        assertTrue(expectedResult.remove(line));
    }
    reader.close();
    inStream.close();
    assertEquals(0, expectedResult.size());
}

From source file:org.apache.tez.test.TestTezJobs.java

License:Apache License

@Test(timeout = 60000)
public void testSortMergeJoinExample() throws Exception {
    SortMergeJoinExample sortMergeJoinExample = new SortMergeJoinExample();
    sortMergeJoinExample.setConf(new Configuration(mrrTezCluster.getConfig()));
    Path stagingDirPath = new Path("/tmp/tez-staging-dir");
    Path inPath1 = new Path("/tmp/sortMerge/inPath1");
    Path inPath2 = new Path("/tmp/sortMerge/inPath2");
    Path outPath = new Path("/tmp/sortMerge/outPath");
    remoteFs.mkdirs(inPath1);//w w  w . ja va 2s  .c  om
    remoteFs.mkdirs(inPath2);
    remoteFs.mkdirs(stagingDirPath);

    Set<String> expectedResult = new HashSet<String>();

    FSDataOutputStream out1 = remoteFs.create(new Path(inPath1, "file"));
    FSDataOutputStream out2 = remoteFs.create(new Path(inPath2, "file"));
    BufferedWriter writer1 = new BufferedWriter(new OutputStreamWriter(out1));
    BufferedWriter writer2 = new BufferedWriter(new OutputStreamWriter(out2));
    for (int i = 0; i < 20; i++) {
        String term = "term" + i;
        writer1.write(term);
        writer1.newLine();
        if (i % 2 == 0) {
            writer2.write(term);
            writer2.newLine();
            expectedResult.add(term);
        }
    }
    writer1.close();
    writer2.close();
    out1.close();
    out2.close();

    String[] args = new String[] { "-D" + TezConfiguration.TEZ_AM_STAGING_DIR + "=" + stagingDirPath.toString(),
            inPath1.toString(), inPath2.toString(), "1", outPath.toString() };
    assertEquals(0, sortMergeJoinExample.run(args));

    FileStatus[] statuses = remoteFs.listStatus(outPath, new PathFilter() {
        public boolean accept(Path p) {
            String name = p.getName();
            return !name.startsWith("_") && !name.startsWith(".");
        }
    });
    assertEquals(1, statuses.length);
    FSDataInputStream inStream = remoteFs.open(statuses[0].getPath());
    BufferedReader reader = new BufferedReader(new InputStreamReader(inStream));
    String line;
    while ((line = reader.readLine()) != null) {
        assertTrue(expectedResult.remove(line));
    }
    reader.close();
    inStream.close();
    assertEquals(0, expectedResult.size());
}