Example usage for org.apache.hadoop.fs PathFilter PathFilter

List of usage examples for org.apache.hadoop.fs PathFilter PathFilter

Introduction

In this page you can find the example usage for org.apache.hadoop.fs PathFilter PathFilter.

Prototype

PathFilter

Source Link

Usage

From source file:org.apache.nifi.processors.hadoop.ListHDFS.java

License:Apache License

private PathFilter createPathFilter(final ProcessContext context) {
    final Pattern filePattern = Pattern.compile(context.getProperty(FILE_FILTER).getValue());
    return new PathFilter() {
        @Override/*from   w w  w.j  a  v  a  2  s  . c om*/
        public boolean accept(Path path) {
            return filePattern.matcher(path.getName()).matches();
        }
    };
}

From source file:org.apache.oozie.action.hadoop.TestMapReduceActionExecutor.java

License:Apache License

private Path getOutputFile(Path outputDir, FileSystem fs) throws FileNotFoundException, IOException {
    final FileStatus[] files = fs.listStatus(outputDir, new PathFilter() {

        @Override/* w  w  w  .  ja va2  s  .c o  m*/
        public boolean accept(Path path) {
            return path.getName().startsWith("part");
        }
    });
    return files[0].getPath(); //part-[m/r]-00000
}

From source file:org.apache.oozie.action.hadoop.TestSqoopActionExecutor.java

License:Apache License

public void testSqoopActionFreeFormQuery() throws Exception {
    createDB();/*w w  w  . ja va2 s .  com*/

    Context context = createContext(getActionXmlFreeFromQuery());
    final RunningJob launcherJob = submitAction(context);
    String launcherId = context.getAction().getExternalId();
    waitFor(120 * 1000, new Predicate() {
        public boolean evaluate() throws Exception {
            return launcherJob.isComplete();
        }
    });
    assertTrue(launcherJob.isSuccessful());
    Map<String, String> actionData = LauncherMapperHelper.getActionData(getFileSystem(), context.getActionDir(),
            context.getProtoActionConf());
    assertFalse(LauncherMapperHelper.hasIdSwap(actionData));

    SqoopActionExecutor ae = new SqoopActionExecutor();
    ae.check(context, context.getAction());
    assertTrue(launcherId.equals(context.getAction().getExternalId()));
    assertEquals("SUCCEEDED", context.getAction().getExternalStatus());
    assertNotNull(context.getAction().getExternalChildIDs());
    ae.end(context, context.getAction());
    assertEquals(WorkflowAction.Status.OK, context.getAction().getStatus());

    String hadoopCounters = context.getVar(MapReduceActionExecutor.HADOOP_COUNTERS);
    assertNotNull(hadoopCounters);
    assertFalse(hadoopCounters.isEmpty());

    FileSystem fs = getFileSystem();
    FileStatus[] parts = fs.listStatus(new Path(getSqoopOutputDir()), new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith("part-");
        }
    });
    int count = 0;
    for (FileStatus part : parts) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(part.getPath())));
        String line = br.readLine();
        while (line != null) {
            assertTrue(line.contains("a"));
            count++;
            line = br.readLine();
        }
        br.close();
    }
    assertEquals(3, count);

    assertNotNull(context.getAction().getData());
    Properties outputData = new Properties();
    outputData.load(new StringReader(context.getAction().getData()));
    assertTrue(outputData.containsKey(LauncherMain.HADOOP_JOBS));
    assertTrue(outputData.getProperty(LauncherMain.HADOOP_JOBS).trim().length() > 0);
}

From source file:org.apache.oozie.service.ShareLibService.java

License:Apache License

/**
 * Purge libs./*w w  w  .  j a v  a  2s .  c o m*/
 *
 * @param fs the fs
 * @param prefix the prefix
 * @param current the current time
 * @throws IOException Signals that an I/O exception has occurred.
 */
private void purgeLibs(FileSystem fs, final String prefix, final Date current) throws IOException {
    Path executorLibBasePath = services.get(WorkflowAppService.class).getSystemLibPath();
    PathFilter directoryFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            if (path.getName().startsWith(prefix)) {
                String name = path.getName();
                String time = name.substring(prefix.length());
                Date d = null;
                try {
                    d = dateFormat.parse(time);
                } catch (ParseException e) {
                    return false;
                }
                return (current.getTime() - d.getTime()) > retentionTime;
            } else {
                return false;
            }
        }
    };
    FileStatus[] dirList = fs.listStatus(executorLibBasePath, directoryFilter);
    Arrays.sort(dirList, new Comparator<FileStatus>() {
        // sort in desc order
        @Override
        public int compare(FileStatus o1, FileStatus o2) {
            return o2.getPath().getName().compareTo(o1.getPath().getName());
        }
    });

    // Logic is to keep all share-lib between current timestamp and 7days old + 1 latest sharelib older than 7 days.
    // refer OOZIE-1761
    for (int i = 1; i < dirList.length; i++) {
        Path dirPath = dirList[i].getPath();
        fs.delete(dirPath, true);
        LOG.info("Deleted old launcher jar lib directory {0}", dirPath.getName());
    }
}

From source file:org.apache.oozie.service.ShareLibService.java

License:Apache License

/**
 * Gets the Latest lib path.//from  w w w .  j  a v  a 2  s  . c  o  m
 *
 * @param rootDir the root dir
 * @param prefix the prefix
 * @return latest lib path
 * @throws IOException Signals that an I/O exception has occurred.
 */
public Path getLatestLibPath(Path rootDir, final String prefix) throws IOException {
    Date max = new Date(0L);
    Path path = null;
    PathFilter directoryFilter = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().startsWith(prefix);
        }
    };

    FileStatus[] files = fs.listStatus(rootDir, directoryFilter);
    for (FileStatus file : files) {
        String name = file.getPath().getName().toString();
        String time = name.substring(prefix.length());
        Date d = null;
        try {
            d = dateFormat.parse(time);
        } catch (ParseException e) {
            continue;
        }
        if (d.compareTo(max) > 0) {
            path = file.getPath();
            max = d;
        }
    }
    // If there are no timestamped directories, fall back to root directory
    if (path == null) {
        path = rootDir;
    }
    return path;
}

From source file:org.apache.pig.test.TestEvalPipeline2.java

License:Apache License

@SuppressWarnings("unchecked")
@Test/*from  ww w. j  av  a 2 s.c  o m*/
public void testCrossAfterGroupAll() throws Exception {
    String[] input = { "1\tA", "2\tB", "3\tC", "4\tD", };

    Util.createInputFile(cluster, "table_testCrossAfterGroupAll", input);

    try {
        pigServer.getPigContext().getProperties().setProperty("pig.exec.reducers.bytes.per.reducer", "40");
        pigServer.registerQuery("A = load 'table_testCrossAfterGroupAll' as (a0:int, a1:chararray);");
        pigServer.registerQuery("B = group A all;");
        pigServer.registerQuery("C = foreach B generate COUNT(A);");
        pigServer.registerQuery("D = cross A, C;");
        Path output = FileLocalizer.getTemporaryPath(pigServer.getPigContext());
        ExecJob job = pigServer.store("D", output.toString());
        FileSystem fs = output.getFileSystem(cluster.getConfiguration());
        FileStatus[] partFiles = fs.listStatus(output, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                if (path.getName().startsWith("part")) {
                    return true;
                }
                return false;
            }
        });
        // auto-parallelism is 2 in MR, 20 in Tez, so check >=2
        Assert.assertTrue(partFiles.length >= 2);
        // Check the output
        Iterator<Tuple> iter = job.getResults();
        List<Tuple> results = new ArrayList<Tuple>();
        while (iter.hasNext()) {
            results.add(iter.next());
        }
        Collections.sort(results);
        Assert.assertEquals(4, results.size());
        Assert.assertEquals("(1,A,4)", results.get(0).toString());
        Assert.assertEquals("(2,B,4)", results.get(1).toString());
        Assert.assertEquals("(3,C,4)", results.get(2).toString());
        Assert.assertEquals("(4,D,4)", results.get(3).toString());
    } finally {
        pigServer.getPigContext().getProperties().remove("pig.exec.reducers.bytes.per.reducer");
    }
}

From source file:org.apache.pig.test.Util.java

License:Apache License

static public String[] readOutput(FileSystem fs, String fileName) throws IOException {
    if (Util.WINDOWS) {
        fileName = fileName.replace('\\', '/');
    }/* w  w  w . j  a v a 2  s.c o m*/
    Path path = new Path(fileName);
    if (!fs.exists(path)) {
        throw new IOException("Path " + fileName + " does not exist on the FileSystem");
    }
    FileStatus fileStatus = fs.getFileStatus(path);
    FileStatus[] files;
    if (fileStatus.isDir()) {
        files = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path p) {
                return !p.getName().startsWith("_");
            }
        });
    } else {
        files = new FileStatus[] { fileStatus };
    }
    List<String> result = new ArrayList<String>();
    for (FileStatus f : files) {
        FSDataInputStream stream = fs.open(f.getPath());
        BufferedReader br = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String line;
        while ((line = br.readLine()) != null) {
            result.add(line);
        }
        br.close();
    }
    return result.toArray(new String[result.size()]);
}

From source file:org.apache.pig.test.Util.java

License:Apache License

/**
 * Returns a PathFilter that filters out filenames that start with _.
 * @return PathFilter//from   w w w . j a  v  a2s  . co  m
 */
public static PathFilter getSuccessMarkerPathFilter() {
    return new PathFilter() {
        @Override
        public boolean accept(Path p) {
            return !p.getName().startsWith("_");
        }
    };
}

From source file:org.apache.pig.tez.TestTezAutoParallelism.java

License:Apache License

@Test
public void testGroupBy() throws IOException {
    // parallelism is 3 originally, reduce to 1
    pigServer.getPigContext().getProperties().setProperty(PigConfiguration.PIG_NO_SPLIT_COMBINATION, "true");
    pigServer.getPigContext().getProperties().setProperty(MRConfiguration.MAX_SPLIT_SIZE, "3000");
    pigServer.getPigContext().getProperties().setProperty(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
            Long.toString(InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER));
    pigServer.registerQuery("A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);");
    pigServer.registerQuery("B = group A by name;");
    pigServer.store("B", "output1");
    FileSystem fs = cluster.getFileSystem();
    FileStatus[] files = fs.listStatus(new Path("output1"), new PathFilter() {
        public boolean accept(Path path) {
            if (path.getName().startsWith("part")) {
                return true;
            }//from w w  w .  jav  a 2s.com
            return false;
        }
    });
    assertEquals(files.length, 1);
}

From source file:org.apache.pig.tez.TestTezAutoParallelism.java

License:Apache License

@Test
public void testOrderbyDecreaseParallelism() throws IOException {
    // order by parallelism is 3 originally, reduce to 1
    pigServer.getPigContext().getProperties().setProperty(PigConfiguration.PIG_NO_SPLIT_COMBINATION, "true");
    pigServer.getPigContext().getProperties().setProperty(MRConfiguration.MAX_SPLIT_SIZE, "3000");
    pigServer.getPigContext().getProperties().setProperty(InputSizeReducerEstimator.BYTES_PER_REDUCER_PARAM,
            Long.toString(InputSizeReducerEstimator.DEFAULT_BYTES_PER_REDUCER));
    pigServer.registerQuery("A = load '" + INPUT_FILE1 + "' as (name:chararray, age:int);");
    pigServer.registerQuery("B = group A by name parallel 3;");
    pigServer.registerQuery("C = foreach B generate group as name, AVG(A.age) as age;");
    pigServer.registerQuery("D = order C by age;");
    pigServer.store("D", "output2");
    FileSystem fs = cluster.getFileSystem();
    FileStatus[] files = fs.listStatus(new Path("output2"), new PathFilter() {
        public boolean accept(Path path) {
            if (path.getName().startsWith("part")) {
                return true;
            }//from   www . j a va  2  s . c  o m
            return false;
        }
    });
    assertEquals(files.length, 1);
}