List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths
public static Path[] stat2Paths(FileStatus[] stats)
From source file:org.apache.oozie.action.hadoop.FSLauncherURIHandler.java
License:Apache License
@Override public boolean delete(URI uri, Configuration conf) throws LauncherException { boolean status = false; try {//from w ww.j av a 2s . c o m FileSystem fs = FileSystem.get(uri, conf); Path[] pathArr = FileUtil.stat2Paths(fs.globStatus(getNormalizedPath(uri))); if (pathArr != null && pathArr.length > 0) { int fsGlobMax = conf.getInt(LauncherMapper.CONF_OOZIE_ACTION_FS_GLOB_MAX, 1000); if (pathArr.length > fsGlobMax) { throw new LauncherException( "exceeds max number (" + fsGlobMax + ") of files/dirs to delete in <prepare>"); } for (Path path : pathArr) { if (fs.exists(path)) { status = fs.delete(path, true); if (status) { System.out.println("Deletion of path " + path + " succeeded."); } else { System.out.println("Deletion of path " + path + " failed."); } } } } } catch (IOException e) { throw new LauncherException("Deletion of path " + uri + " failed.", e); } return status; }
From source file:org.apache.oozie.command.XLogPurgeXCommand.java
License:Apache License
private void deleteJobLogs(FileSystem fs, String hdfsDir, List<String> jobIds) { Path[] paths = null;// w w w.j ava 2s .c o m try { FileStatus[] fileStatuses = fs.listStatus(new Path(hdfsDir)); paths = FileUtil.stat2Paths(fileStatuses); } catch (IOException ex) { LOG.error("file not found " + ex.getMessage()); } for (Path path : paths) { for (String jobId : jobIds) { final Path p = new Path(path, jobId + ".log"); try { if (fs.exists(p)) { fs.delete(p, true); } } catch (IOException ex) { LOG.error("cannot delete job logs in hdfs", new HadoopAccessorException(ErrorCode.E0902, "cannot delete file " + p)); } } } }
From source file:org.apache.pig.builtin.TestAvroStorage.java
License:Apache License
private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); /* read in expected results*/ Set<GenericData.Record> expected = getExpected(expectedOutpath); /* read in output results and compare */ Path output = new Path(outPath); assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir()); Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter)); assertTrue("Split field dirs not found!", paths != null); for (Path path : paths) { Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter)); assertTrue("No files found for path: " + path.toUri().getPath(), files != null); for (Path filePath : files) { assertTrue("This shouldn't be a directory", fs.isFile(filePath)); GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>(); DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(fs.open(filePath), reader);//from www. j a va 2 s. c om assertEquals("codec", expectedCodec, in.getMetaString("avro.codec")); int count = 0; while (in.hasNext()) { GenericData.Record obj = in.next(); assertTrue( "Avro result object found that's not expected: Found " + (obj != null ? obj.getSchema() : "null") + ", " + obj.toString() + "\nExpected " + (expected != null ? expected.toString() : "null") + "\n", expected.contains(obj)); count++; } in.close(); assertEquals(expected.size(), count); } } }
From source file:org.apache.pig.builtin.TestAvroStorage.java
License:Apache License
private Set<GenericData.Record> getExpected(String pathstr) throws IOException { Set<GenericData.Record> ret = new TreeSet<GenericData.Record>(new Comparator<GenericData.Record>() { @Override/*from w ww. j a v a 2s. c o m*/ public int compare(Record o1, Record o2) { return o1.toString().compareTo(o2.toString()); } }); FileSystem fs = FileSystem.getLocal(new Configuration()); /* read in output results and compare */ Path output = new Path(pathstr); assertTrue("Expected output does not exists!", fs.exists(output)); Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter)); assertTrue("Split field dirs not found!", paths != null); for (Path path : paths) { Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter)); assertTrue("No files found for path: " + path.toUri().getPath(), files != null); for (Path filePath : files) { assertTrue("This shouldn't be a directory", fs.isFile(filePath)); GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<GenericData.Record>(); DataFileStream<GenericData.Record> in = new DataFileStream<GenericData.Record>(fs.open(filePath), reader); while (in.hasNext()) { GenericData.Record obj = in.next(); ret.add(obj); } in.close(); } } return ret; }
From source file:org.apache.pig.piggybank.test.storage.avro.TestAvroStorage.java
License:Apache License
private void verifyResults(String outPath, String expectedOutpath, String expectedCodec) throws IOException { FileSystem fs = FileSystem.getLocal(new Configuration()); /* read in expected results*/ Set<Object> expected = getExpected(expectedOutpath); /* read in output results and compare */ Path output = new Path(outPath); assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir()); Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter)); assertTrue("Split field dirs not found!", paths != null); for (Path path : paths) { Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter)); assertTrue("No files found for path: " + path.toUri().getPath(), files != null); for (Path filePath : files) { assertTrue("This shouldn't be a directory", fs.isFile(filePath)); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); DataFileStream<Object> in = new DataFileStream<Object>(fs.open(filePath), reader); assertEquals("codec", expectedCodec, in.getMetaString("avro.codec")); int count = 0; while (in.hasNext()) { Object obj = in.next(); //System.out.println("obj = " + (GenericData.Array<Float>)obj); assertTrue("Avro result object found that's not expected: " + obj, expected.contains(obj)); count++;/*from w w w . j ava 2 s . c om*/ } in.close(); assertEquals(expected.size(), count); } } }
From source file:org.apache.pig.piggybank.test.storage.avro.TestAvroStorage.java
License:Apache License
private Set<Object> getExpected(String pathstr) throws IOException { Set<Object> ret = new HashSet<Object>(); FileSystem fs = FileSystem.getLocal(new Configuration()); /* read in output results and compare */ Path output = new Path(pathstr); assertTrue("Expected output does not exists!", fs.exists(output)); Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter)); assertTrue("Split field dirs not found!", paths != null); for (Path path : paths) { Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter)); assertTrue("No files found for path: " + path.toUri().getPath(), files != null); for (Path filePath : files) { assertTrue("This shouldn't be a directory", fs.isFile(filePath)); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); DataFileStream<Object> in = new DataFileStream<Object>(fs.open(filePath), reader); while (in.hasNext()) { Object obj = in.next(); ret.add(obj);/*ww w . j a va 2 s . c o m*/ } in.close(); } } return ret; }
From source file:org.apache.pig.piggybank.test.storage.TestMultiStorage.java
License:Apache License
/** * Test if records are split into directories corresponding to split field * values/* ww w . java 2 s . c o m*/ * * @param mode * @throws IOException */ private void verifyResults(Mode mode, String outPath) throws IOException { FileSystem fs = (Mode.local == mode ? FileSystem.getLocal(new Configuration()) : cluster.getFileSystem()); Path output = new Path(outPath); Assert.assertTrue("Output dir does not exists!", fs.exists(output) && fs.getFileStatus(output).isDir()); Path[] paths = FileUtil.stat2Paths(fs.listStatus(output, hiddenPathFilter)); Assert.assertTrue("Split field dirs not found!", paths != null); for (Path path : paths) { String splitField = path.getName(); Path[] files = FileUtil.stat2Paths(fs.listStatus(path, hiddenPathFilter)); Assert.assertTrue("No files found for path: " + path.toUri().getPath(), files != null); for (Path filePath : files) { Assert.assertTrue("This shouldn't be a directory", fs.isFile(filePath)); BufferedReader reader = new BufferedReader(new InputStreamReader(fs.open(filePath))); String line = ""; int count = 0; while ((line = reader.readLine()) != null) { String[] fields = line.split("\\t"); Assert.assertEquals(fields.length, 3); Assert.assertEquals("Unexpected field value in the output record", splitField, fields[1]); count++; System.out.println("field: " + fields[1]); } reader.close(); Assert.assertEquals(count, 3); } } }
From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java
License:Apache License
@Test public void mrRun() throws Exception { FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true);/*from www. j av a2 s. com*/ String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, "UTF-8"); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); JobConf jobConf = getJobConf(); if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints jobConf.set("mapred.job.tracker", "local"); } jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); int shards = 2; int maxReducers = Integer.MAX_VALUE; if (ENABLE_LOCAL_JOB_RUNNER) { // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work. // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ maxReducers = 1; shards = 1; } String[] args = new String[] { "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf", "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose", numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) }; if (numRuns % 3 == 2) { args = concat(args, new String[] { "--fanout=2" }); } if (numRuns == 0) { // force (slow) MapReduce based randomization to get coverage for that as well args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" }, args); } MapReduceIndexerTool tool = createTool(); int res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); Job job = tool.job; assertTrue(job.isComplete()); assertTrue(job.isSuccessful()); if (numRuns % 3 != 2) { // Only run this check if mtree merge is disabled. // With mtree merge enabled the BatchWriter counters aren't available anymore because // variable "job" now refers to the merge job rather than the indexing job assertEquals( "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, count, job.getCounters() .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()) .getValue()); } // Check the output is as expected outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); System.out.println("outputfiles:" + Arrays.toString(outputFiles)); TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); // run again with --dryrun mode: tool = createTool(); args = concat(args, new String[] { "--dry-run" }); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); numRuns++; }
From source file:org.apache.sqoop.mapreduce.CombineFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; Configuration conf = job.getConfiguration(); // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode;//from w ww . j a v a 2 s.c o m } else { minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = conf.getLong("mapred.max.split.size", 0); } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException("Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException("Minimum split size per rack" + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException("Minimum split size per node" + minSizeNode + " cannot be smaller than minimum split " + "size per rack " + minSizeRack); } // all the files in input set Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0])); List<InputSplit> splits = new ArrayList<InputSplit>(); if (paths.length == 0) { return splits; } // Convert them to Paths first. This is a costly operation and // we should do it first, otherwise we will incur doing it multiple // times, one time each for each pool in the next loop. List<Path> newpaths = new LinkedList<Path>(); for (int i = 0; i < paths.length; i++) { FileSystem fs = paths[i].getFileSystem(conf); //the scheme and authority will be kept if the path is //a valid path for a non-default file system Path p = fs.makeQualified(paths[i]); newpaths.add(p); } paths = null; // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contains paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<Path> myPaths = new ArrayList<Path>(); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) { Path p = iter.next(); if (onepool.accept(p)) { myPaths.add(p); // add it to my output set iter.remove(); } } // create splits for all files in this pool. getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); } // create splits for all files that are not in any pool. getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits; }
From source file:org.apache.tinkerpop.gremlin.hadoop.structure.hdfs.HDFSTools.java
License:Apache License
public static void decompressPath(final FileSystem fs, final String in, final String out, final String compressedFileSuffix, final boolean deletePrevious) throws IOException { final Path inPath = new Path(in); if (fs.isFile(inPath)) HDFSTools.decompressFile(fs, in, out, deletePrevious); else {//w w w . j a v a 2s .co m final Path outPath = new Path(out); if (!fs.exists(outPath)) fs.mkdirs(outPath); for (final Path path : FileUtil.stat2Paths(fs.globStatus(new Path(in + FORWARD_ASTERISK)))) { if (path.getName().endsWith(compressedFileSuffix)) HDFSTools.decompressFile(fs, path.toString(), outPath.toString() + FORWARD_SLASH + path.getName().split("\\.")[0], deletePrevious); } } }