List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:org.apache.oozie.action.hadoop.GitMain.java
License:Apache License
/** * Parse action configuration and set configuration variables * * @param actionConf Oozie action configuration * @throws OozieActionConfiguratorException upon any required properties missing *//*from w w w . j a v a 2s. c om*/ private void parseActionConfiguration(final Configuration actionConf) throws OozieActionConfiguratorException { nameNode = checkAndGetTrimmed(actionConf, GitActionExecutor.NAME_NODE); destinationUri = checkAndGetTrimmed(actionConf, GitActionExecutor.DESTINATION_URI); try { final FileSystem fs = FileSystem.get(isValidUri(destinationUri), actionConf); destinationUri = fs.makeQualified(new Path(destinationUri)).toString(); } catch (final IOException e) { throw new OozieActionConfiguratorException( "Action Configuration does not have " + "a valid filesystem for URI " + GitActionExecutor.DESTINATION_URI + "exception " + e.toString()); } gitUri = isValidUri(checkAndGetTrimmed(actionConf, GitActionExecutor.GIT_URI)).toString(); gitBranch = actionConf.get(GitActionExecutor.GIT_BRANCH); keyPath = actionConf.get(GitActionExecutor.KEY_PATH); }
From source file:org.apache.oozie.service.TestAuthorizationService.java
License:Apache License
private void _testAuthorizationService(boolean useDefaultGroup) throws Exception { init(useDefaultGroup, true);/*from w ww . j a va 2 s. co m*/ Reader reader = IOUtils.getResourceAsReader("wf-ext-schema-valid.xml", -1); Writer writer = new FileWriter(new File(getTestCaseDir(), "workflow.xml")); IOUtils.copyCharStream(reader, writer); final DagEngine engine = new DagEngine(getTestUser()); Configuration jobConf = new XConfiguration(); jobConf.set(OozieClient.APP_PATH, getTestCaseFileUri("workflow.xml")); jobConf.set(OozieClient.USER_NAME, getTestUser()); if (useDefaultGroup) { jobConf.set(OozieClient.GROUP_NAME, getTestGroup()); } else { jobConf.set(OozieClient.GROUP_NAME, getTestGroup() + ",foo"); } jobConf.set(OozieClient.LOG_TOKEN, "t"); jobConf.set("external-status", "ok"); jobConf.set("signal-value", "based_on_action_status"); final String jobId = engine.submitJob(jobConf, true); HadoopAccessorService has = Services.get().get(HadoopAccessorService.class); URI uri = getFileSystem().getUri(); Configuration fsConf = has.createJobConf(uri.getAuthority()); FileSystem fileSystem = has.createFileSystem(getTestUser(), uri, fsConf); Path path = new Path(fileSystem.getWorkingDirectory(), UUID.randomUUID().toString()); Path fsTestDir = fileSystem.makeQualified(path); System.out.println(XLog.format("Setting FS testcase work dir[{0}]", fsTestDir)); fileSystem.delete(fsTestDir, true); if (!fileSystem.mkdirs(path)) { throw new IOException(XLog.format("Could not create FS testcase dir [{0}]", fsTestDir)); } String appPath = fsTestDir.toString() + "/app"; Path jobXmlPath = new Path(appPath, "workflow.xml"); fileSystem.create(jobXmlPath).close(); fileSystem.setOwner(jobXmlPath, getTestUser(), getTestGroup()); FsPermission permissions = new FsPermission(FsAction.READ_WRITE, FsAction.READ, FsAction.NONE); fileSystem.setPermission(jobXmlPath, permissions); AuthorizationService as = services.get(AuthorizationService.class); assertNotNull(as); as.authorizeForGroup(getTestUser(), getTestGroup()); assertNotNull(as.getDefaultGroup(getTestUser())); as.authorizeForApp(getTestUser2(), getTestGroup(), appPath, jobConf); try { as.authorizeForApp(getTestUser3(), getTestGroup(), appPath, jobConf); fail(); } catch (AuthorizationException ex) { } as.authorizeForJob(getTestUser(), jobId, false); as.authorizeForJob(getTestUser(), jobId, true); if (!useDefaultGroup) { as.authorizeForJob("foo", jobId, true); } try { as.authorizeForJob("bar", jobId, true); fail(); } catch (AuthorizationException ex) { } }
From source file:org.apache.oozie.util.JobUtils.java
License:Apache License
/** * This method provides a wrapper around hadoop 0.20/1.x and 0.23/2.x implementations. * TODO: Remove the workaround when we drop the support for hadoop 0.20. * @param file Path of the file to be added * @param conf Configuration that contains the classpath setting * @param fs FileSystem with respect to which path should be interpreted (may be null) * @throws IOException/* w w w.j av a 2s . co m*/ */ public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) throws IOException { if (fs == null) { Configuration defaultConf = Services.get().get(HadoopAccessorService.class) .createJobConf(conf.get(JavaActionExecutor.HADOOP_JOB_TRACKER)); XConfiguration.copy(conf, defaultConf); // it fails with conf, therefore we pass defaultConf instead fs = file.getFileSystem(defaultConf); } // Hadoop 0.20/1.x. if (Services.get().get(HadoopAccessorService.class).getCachedConf() .get("yarn.resourcemanager.webapp.address") == null) { // Duplicate hadoop 1.x code to workaround MAPREDUCE-2361 in Hadoop 0.20 // Refer OOZIE-1806. String filepath = file.toUri().getPath(); String classpath = conf.get("mapred.job.classpath.files"); conf.set("mapred.job.classpath.files", classpath == null ? filepath : classpath + System.getProperty("path.separator") + filepath); URI uri = fs.makeQualified(file).toUri(); DistributedCache.addCacheFile(uri, conf); } else { // Hadoop 0.23/2.x DistributedCache.addFileToClassPath(file, conf, fs); } }
From source file:org.apache.parquet.hadoop.TestParquetFileWriter.java
License:Apache License
/** * {@link ParquetFileWriter#mergeFooters(Path, List)} expects a fully-qualified * path for the root and crashes if a relative one is provided. *//*from www. j av a 2 s .c o m*/ @Test public void testWriteMetadataFileWithRelativeOutputPath() throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path relativeRoot = new Path("target/_test_relative"); Path qualifiedRoot = fs.makeQualified(relativeRoot); ParquetMetadata mock = Mockito.mock(ParquetMetadata.class); FileMetaData fileMetaData = new FileMetaData( new MessageType("root1", new PrimitiveType(REPEATED, BINARY, "a")), new HashMap<String, String>(), "test"); Mockito.when(mock.getFileMetaData()).thenReturn(fileMetaData); List<Footer> footers = new ArrayList<Footer>(); Footer footer = new Footer(new Path(qualifiedRoot, "one"), mock); footers.add(footer); // This should not throw an exception ParquetFileWriter.writeMetadataFile(conf, relativeRoot, footers, JobSummaryLevel.ALL); }
From source file:org.apache.parquet.hadoop.util.HadoopOutputFile.java
License:Apache License
public static HadoopOutputFile fromPath(Path path, Configuration conf) throws IOException { FileSystem fs = path.getFileSystem(conf); return new HadoopOutputFile(fs, fs.makeQualified(path), conf); }
From source file:org.apache.phoenix.mapreduce.MultiHfileOutputFormat.java
License:Apache License
/** * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against * <code>splitPoints</code>. Cleans up the partitions file after job exists. *//*from ww w .j av a2s . com*/ static void configurePartitioner(Job job, Set<TableRowkeyPair> tablesStartKeys) throws IOException { Configuration conf = job.getConfiguration(); // create the partitions file Path partitionsPath = new Path(conf.get("hadoop.tmp.dir"), "partitions_" + UUID.randomUUID()); FileSystem fs = partitionsPath.getFileSystem(conf); fs.makeQualified(partitionsPath); writePartitions(conf, partitionsPath, tablesStartKeys); fs.deleteOnExit(partitionsPath); // configure job to use it job.setPartitionerClass(TotalOrderPartitioner.class); TotalOrderPartitioner.setPartitionFile(conf, partitionsPath); }
From source file:org.apache.pirk.schema.data.DataSchemaLoader.java
License:Apache License
private static DataSchema readSchemaFile(String schemaFile, FileSystem fs, boolean hdfs) throws IOException, PIRException { logger.info("Loading data schemaFile = " + schemaFile + " hdfs = " + hdfs); // Parse and load the schema file into a DataSchema object; place in the schemaMap DataSchemaLoader loader = new DataSchemaLoader(); InputStream is;// w ww . jav a 2s. c om if (hdfs) { logger.info("hdfs: filePath = " + schemaFile); is = fs.open(fs.makeQualified(new Path(schemaFile))); } else { logger.info("localFS: inputFile = " + schemaFile); is = new FileInputStream(schemaFile); } try { return loader.loadSchema(is); } finally { is.close(); } }
From source file:org.apache.solr.hadoop.MorphlineBasicMiniMRTest.java
License:Apache License
@Test public void mrRun() throws Exception { FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true);/*from www . ja va2s . c om*/ String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = new Path(inDir, "input.txt"); OutputStream os = fs.create(INPATH); Writer wr = new OutputStreamWriter(os, "UTF-8"); wr.write(DATADIR + "/" + inputAvroFile); wr.close(); assertTrue(fs.mkdirs(dataDir)); fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir); JobConf jobConf = getJobConf(); if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints jobConf.set("mapred.job.tracker", "local"); } jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); int shards = 2; int maxReducers = Integer.MAX_VALUE; if (ENABLE_LOCAL_JOB_RUNNER) { // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work. // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/ maxReducers = 1; shards = 1; } String[] args = new String[] { "--morphline-file=" + RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf", "--morphline-id=morphline1", "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards=" + shards, "--verbose", numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers)) }; if (numRuns % 3 == 2) { args = concat(args, new String[] { "--fanout=2" }); } if (numRuns == 0) { // force (slow) MapReduce based randomization to get coverage for that as well args = concat(new String[] { "-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1" }, args); } MapReduceIndexerTool tool = createTool(); int res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); Job job = tool.job; assertTrue(job.isComplete()); assertTrue(job.isSuccessful()); if (numRuns % 3 != 2) { // Only run this check if mtree merge is disabled. // With mtree merge enabled the BatchWriter counters aren't available anymore because // variable "job" now refers to the merge job rather than the indexing job assertEquals( "Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN, count, job.getCounters() .findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()) .getValue()); } // Check the output is as expected outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir)); System.out.println("outputfiles:" + Arrays.toString(outputFiles)); TestUtils.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards); // run again with --dryrun mode: tool = createTool(); args = concat(args, new String[] { "--dry-run" }); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); numRuns++; }
From source file:org.apache.solr.hadoop.MorphlineGoLiveMiniMRTest.java
License:Apache License
@Override public void doTest() throws Exception { waitForRecoveriesToFinish(false);//from w ww .ja va 2 s .c om FileSystem fs = dfsCluster.getFileSystem(); Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input")); fs.delete(inDir, true); String DATADIR = "/user/testing/testMapperReducer/data"; Path dataDir = fs.makeQualified(new Path(DATADIR)); fs.delete(dataDir, true); Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output")); fs.delete(outDir, true); assertTrue(fs.mkdirs(inDir)); Path INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile1); JobConf jobConf = getJobConf(); // enable mapred.job.tracker = local to run in debugger and set breakpoints // jobConf.set("mapred.job.tracker", "local"); jobConf.setMaxMapAttempts(1); jobConf.setMaxReduceAttempts(1); jobConf.setJar(SEARCH_ARCHIVES_JAR); jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false); MapReduceIndexerTool tool; int res; QueryResponse results; HttpSolrServer server = new HttpSolrServer(cloudJettys.get(0).url); String[] args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--mappers=3", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url", cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url", cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1), "--verbose", "--go-live" }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = server.query(new SolrQuery("*:*")); assertEquals(20, results.getResults().getNumFound()); } fs.delete(inDir, true); fs.delete(outDir, true); fs.delete(dataDir, true); assertTrue(fs.mkdirs(inDir)); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile2); args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--mappers=3", "--verbose", "--go-live", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--shard-url", cloudJettys.get(0).url, "--shard-url", cloudJettys.get(1).url, "--shard-url", cloudJettys.get(2).url, "--go-live-threads", Integer.toString(random().nextInt(15) + 1) }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = server.query(new SolrQuery("*:*")); assertEquals(22, results.getResults().getNumFound()); } // try using zookeeper String collection = "collection1"; if (random().nextBoolean()) { // sometimes, use an alias createAlias("updatealias", "collection1"); collection = "updatealias"; } fs.delete(inDir, true); fs.delete(outDir, true); fs.delete(dataDir, true); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); args = new String[] { "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose", "--go-live", ++numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(), "--zk-host", zkServer.getZkAddress(), "--collection", collection }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = server.query(new SolrQuery("*:*")); assertEquals(2126, results.getResults().getNumFound()); } server.shutdown(); // try using zookeeper with replication String replicatedCollection = "replicated_collection"; createCollection(replicatedCollection, 2, 3, 2); waitForRecoveriesToFinish(false); cloudClient.setDefaultCollection(replicatedCollection); fs.delete(inDir, true); fs.delete(outDir, true); fs.delete(dataDir, true); assertTrue(fs.mkdirs(dataDir)); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--mappers=3", "--reducers=6", "--verbose", "--go-live", "--zk-host", zkServer.getZkAddress(), "--collection", replicatedCollection, dataDir.toString() }; args = prependInitialArgs(args); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); results = cloudClient.query(new SolrQuery("*:*")); assertEquals(2104, results.getResults().getNumFound()); checkConsistency(replicatedCollection); } // try using solr_url with replication cloudClient.deleteByQuery("*:*"); cloudClient.commit(); fs.delete(inDir, true); fs.delete(dataDir, true); assertTrue(fs.mkdirs(dataDir)); INPATH = upAvroFile(fs, inDir, DATADIR, dataDir, inputAvroFile3); args = new String[] { "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(), "--output-dir=" + outDir.toString(), "--shards", "2", "--mappers=3", "--verbose", "--go-live", "--go-live-threads", Integer.toString(random().nextInt(15) + 1), dataDir.toString() }; args = prependInitialArgs(args); List<String> argList = new ArrayList<String>(); getShardUrlArgs(argList, replicatedCollection); args = concat(args, argList.toArray(new String[0])); if (true) { tool = new MapReduceIndexerTool(); res = ToolRunner.run(jobConf, tool, args); assertEquals(0, res); assertTrue(tool.job.isComplete()); assertTrue(tool.job.isSuccessful()); checkConsistency(replicatedCollection); results = cloudClient.query(new SolrQuery("*:*")); assertEquals(2104, results.getResults().getNumFound()); } }
From source file:org.apache.sqoop.mapreduce.CombineFileInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext job) throws IOException { long minSizeNode = 0; long minSizeRack = 0; long maxSize = 0; Configuration conf = job.getConfiguration(); // the values specified by setxxxSplitSize() takes precedence over the // values that might have been specified in the config if (minSplitSizeNode != 0) { minSizeNode = minSplitSizeNode;//from ww w . jav a 2s. c o m } else { minSizeNode = conf.getLong(SPLIT_MINSIZE_PERNODE, 0); } if (minSplitSizeRack != 0) { minSizeRack = minSplitSizeRack; } else { minSizeRack = conf.getLong(SPLIT_MINSIZE_PERRACK, 0); } if (maxSplitSize != 0) { maxSize = maxSplitSize; } else { maxSize = conf.getLong("mapred.max.split.size", 0); } if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) { throw new IOException("Minimum split size pernode " + minSizeNode + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) { throw new IOException("Minimum split size per rack" + minSizeRack + " cannot be larger than maximum split size " + maxSize); } if (minSizeRack != 0 && minSizeNode > minSizeRack) { throw new IOException("Minimum split size per node" + minSizeNode + " cannot be smaller than minimum split " + "size per rack " + minSizeRack); } // all the files in input set Path[] paths = FileUtil.stat2Paths(listStatus(job).toArray(new FileStatus[0])); List<InputSplit> splits = new ArrayList<InputSplit>(); if (paths.length == 0) { return splits; } // Convert them to Paths first. This is a costly operation and // we should do it first, otherwise we will incur doing it multiple // times, one time each for each pool in the next loop. List<Path> newpaths = new LinkedList<Path>(); for (int i = 0; i < paths.length; i++) { FileSystem fs = paths[i].getFileSystem(conf); //the scheme and authority will be kept if the path is //a valid path for a non-default file system Path p = fs.makeQualified(paths[i]); newpaths.add(p); } paths = null; // In one single iteration, process all the paths in a single pool. // Processing one pool at a time ensures that a split contains paths // from a single pool only. for (MultiPathFilter onepool : pools) { ArrayList<Path> myPaths = new ArrayList<Path>(); // pick one input path. If it matches all the filters in a pool, // add it to the output set for (Iterator<Path> iter = newpaths.iterator(); iter.hasNext();) { Path p = iter.next(); if (onepool.accept(p)) { myPaths.add(p); // add it to my output set iter.remove(); } } // create splits for all files in this pool. getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits); } // create splits for all files that are not in any pool. getMoreSplits(job, newpaths.toArray(new Path[newpaths.size()]), maxSize, minSizeNode, minSizeRack, splits); // free up rackToNodes map rackToNodes.clear(); return splits; }