List of usage examples for org.apache.hadoop.fs FileSystem makeQualified
public Path makeQualified(Path path)
From source file:gobblin.data.management.trash.TrashFactoryTest.java
License:Apache License
@Test public void test() throws IOException { FileSystem fs = mock(FileSystem.class); Path homeDirectory = new Path("/home/directory"); Path trashDirectory = new Path(homeDirectory, Trash.DEFAULT_TRASH_DIRECTORY); Path trashIdentifierFile = new Path(trashDirectory, Trash.TRASH_IDENTIFIER_FILE); when(fs.getHomeDirectory()).thenReturn(homeDirectory); when(fs.exists(trashDirectory)).thenReturn(true); when(fs.exists(trashIdentifierFile)).thenReturn(true); when(fs.listStatus(trashDirectory)).thenReturn(new FileStatus[] {}); when(fs.isDirectory(trashDirectory)).thenReturn(true); when(fs.mkdirs(any(Path.class))).thenReturn(true); when(fs.mkdirs(any(Path.class), any(FsPermission.class))).thenReturn(true); when(fs.createNewFile(any(Path.class))).thenReturn(true); when(fs.makeQualified(any(Path.class))).thenAnswer(new Answer<Path>() { @Override/* w w w. j av a 2s .co m*/ public Path answer(InvocationOnMock invocation) throws Throwable { return (Path) invocation.getArguments()[0]; } }); Properties properties; properties = getBaseProperties(trashDirectory); Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof Trash); Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof ProxiedTrash); properties = getBaseProperties(trashDirectory); properties.setProperty(TrashFactory.SIMULATE, Boolean.toString(true)); Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof MockTrash); Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof MockTrash); properties = getBaseProperties(trashDirectory); properties.setProperty(TrashFactory.TRASH_TEST, Boolean.toString(true)); Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof TestTrash); Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof TestTrash); properties = getBaseProperties(trashDirectory); properties.setProperty(TrashFactory.SKIP_TRASH, Boolean.toString(true)); Assert.assertTrue(TrashFactory.createTrash(fs, properties) instanceof ImmediateDeletionTrash); Assert.assertTrue(TrashFactory.createProxiedTrash(fs, properties) instanceof ImmediateDeletionTrash); }
From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java
License:Apache License
@Test public void testGetSplits() throws Exception { URI baseUri = new URI(GobblinWorkUnitsInputFormatTest.class.getSimpleName() + "://testGetSplits"); Configuration configuration = new Configuration(); Path workUnitsDir = new Path(new Path(baseUri), "/workUnits"); FileSystem fs = Mockito.mock(FileSystem.class); FileStatus[] statuses = createFileStatuses(20, workUnitsDir); Mockito.when(fs.listStatus(workUnitsDir)).thenReturn(statuses); Mockito.when(fs.makeQualified(Mockito.any(Path.class))).thenAnswer(new Answer<Path>() { @Override/*from w w w. j a va2 s .co m*/ public Path answer(InvocationOnMock invocation) throws Throwable { return (Path) invocation.getArguments()[0]; } }); FileSystemTestUtils.addFileSystemForTest(baseUri, configuration, fs); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); Job job = Job.getInstance(configuration); FileInputFormat.addInputPath(job, workUnitsDir); List<InputSplit> splits = inputFormat.getSplits(job); Assert.assertEquals(splits.size(), 20); verifyPaths(splits, statuses); }
From source file:gobblin.runtime.mapreduce.GobblinWorkUnitsInputFormatTest.java
License:Apache License
@Test public void testGetSplitsMaxSize() throws Exception { URI baseUri = new URI(GobblinWorkUnitsInputFormatTest.class.getSimpleName() + "://testGetSplitsMaxSize"); Configuration configuration = new Configuration(); Path workUnitsDir = new Path(new Path(baseUri), "/workUnits"); FileSystem fs = Mockito.mock(FileSystem.class); FileStatus[] statuses = createFileStatuses(20, workUnitsDir); Mockito.when(fs.listStatus(workUnitsDir)).thenReturn(statuses); Mockito.when(fs.makeQualified(Mockito.any(Path.class))).thenAnswer(new Answer<Path>() { @Override/*from w w w . j a v a2 s . c o m*/ public Path answer(InvocationOnMock invocation) throws Throwable { return (Path) invocation.getArguments()[0]; } }); FileSystemTestUtils.addFileSystemForTest(baseUri, configuration, fs); GobblinWorkUnitsInputFormat inputFormat = new GobblinWorkUnitsInputFormat(); Job job = Job.getInstance(configuration); FileInputFormat.addInputPath(job, workUnitsDir); GobblinWorkUnitsInputFormat.setMaxMappers(job, 6); List<InputSplit> splits = inputFormat.getSplits(job); Assert.assertTrue(splits.size() < 6); verifyPaths(splits, statuses); }
From source file:gobblin.test.TestExtractor.java
License:Apache License
public TestExtractor(WorkUnitState workUnitState) { //super(workUnitState); Schema schema = new Schema.Parser().parse(AVRO_SCHEMA); Path sourceFile = new Path(workUnitState.getWorkunit().getProp(TestSource.SOURCE_FILE_KEY)); LOG.info("Reading from source file " + sourceFile); DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema); try {/*w w w .j av a 2 s . c o m*/ FileSystem fs = FileSystem.get( URI.create(workUnitState.getProp(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)), new Configuration()); fs.makeQualified(sourceFile); this.dataFileReader = new DataFileReader<GenericRecord>(new FsInput(sourceFile, new Configuration()), datumReader); } catch (IOException ioe) { LOG.error("Failed to read the source file " + sourceFile, ioe); } }
From source file:inflater.runner.RunInflater.java
License:MIT License
@Override public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException, ParseException { if (args.length < 2) { return -1; }//from w w w. j av a 2s . co m if (conf == null) { conf = new Configuration(); } GiraphConfiguration giraphConf = new GiraphConfiguration(getConf()); giraphConf.addResource(new Path("giraph-site.xml")); GiraphJob job = new GiraphJob(giraphConf, giraphConf.getComputationName()); Path inputLocal = new Path(args[0]); Path outputLocal = new Path(args[1]); // We copy file from local file system to HDFS FileSystem fs = FileSystem.get(giraphConf); inputHDFS = new Path(fs.getHomeDirectory(), "Giraph Source" + File.separator + inputLocal.getName() + File.separator + inputLocal.getName()); inputHDFS = fs.makeQualified(inputHDFS); outputHDFS = new Path(fs.getHomeDirectory(), "Giraph Source" + File.separator + inputLocal.getName() + File.separator + "output"); outputHDFS = fs.makeQualified(outputHDFS); fs.copyFromLocalFile(false, true, inputLocal, inputHDFS); // Delete output path because Hadoop cannot override it. if (fs.exists(outputHDFS)) fs.delete(outputHDFS, true); FileOutputFormat.setOutputPath(job.getInternalJob(), outputHDFS); GiraphFileInputFormat.addVertexInputPath(giraphConf, inputHDFS); new GiraphConfigurationValidator<>(giraphConf).validateConfiguration(); boolean result = job.run(true); if (result) { fs.copyToLocalFile(false, new Path(outputHDFS, "part-m-00000"), outputLocal); } return result ? 0 : -1; }
From source file:io.druid.indexer.JobHelper.java
License:Apache License
public static Path prependFSIfNullScheme(FileSystem fs, Path path) { if (path.toUri().getScheme() == null) { path = fs.makeQualified(path); }//w w w . j a v a 2 s . co m return path; }
From source file:it.polito.dbdmg.searum.ARM.java
License:Apache License
/** * Serializes the header table and returns the string representation of the * header table//from w w w. j a va 2s .c om * * @return Serialized String representation of header table */ public static void saveFList(Iterable<Pair<String, Long>> flist, Parameters params, Configuration conf) throws IOException { Path flistPath = new Path(params.get(OUTPUT), HEADER_TABLE); FileSystem fs = FileSystem.get(flistPath.toUri(), conf); flistPath = fs.makeQualified(flistPath); HadoopUtil.delete(conf, flistPath); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, LongWritable.class); try { for (Pair<String, Long> pair : flist) { writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond())); } } finally { writer.close(); } DistributedCache.addCacheFile(flistPath.toUri(), conf); }
From source file:it.polito.dbdmg.searum.ARM.java
License:Apache License
/** * Generates the header table from the serialized string representation * /*from w w w .j a v a 2 s . c o m*/ * @return Deserialized header table */ public static List<Pair<String, Long>> readFList(Configuration conf) throws IOException { List<Pair<String, Long>> list = new ArrayList<Pair<String, Long>>(); Path[] files = DistributedCache.getLocalCacheFiles(conf); if (files == null) { throw new IOException("Cannot read Frequency list from Distributed Cache"); } if (files.length != 1) { throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')'); } FileSystem fs = FileSystem.getLocal(conf); Path fListLocalPath = fs.makeQualified(files[0]); // Fallback if we are running locally. if (!fs.exists(fListLocalPath)) { URI[] filesURIs = DistributedCache.getCacheFiles(conf); if (filesURIs == null) { throw new IOException("Cannot read header table from Distributed Cache"); } if (filesURIs.length != 1) { throw new IOException("Cannot read header table from Distributed Cache (" + files.length + ')'); } fListLocalPath = new Path(filesURIs[0].getPath()); } for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(fListLocalPath, true, conf)) { list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get())); } return list; }
From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java
License:Apache License
/** * Modify configuration according user-specified generic options * //from w w w . j a v a 2s . c o m * @param conf * Configuration to be modified * @param line * User-specified generic options */ private void processGeneralOptions(Configuration conf, CommandLine line) throws IOException { if (line.hasOption("fs")) { FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); } if (line.hasOption("jt")) { conf.set("mapred.job.tracker", line.getOptionValue("jt")); } if (line.hasOption("conf")) { String[] values = line.getOptionValues("conf"); for (String value : values) { conf.addResource(new Path(value)); } } if (line.hasOption("libjars")) { conf.set("tmpjars", validateFiles(line.getOptionValue("libjars"), conf)); // setting libjars in client classpath URL[] libjars = getLibJars(conf); if (libjars != null && libjars.length > 0) { conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); Thread.currentThread().setContextClassLoader( new URLClassLoader(libjars, Thread.currentThread().getContextClassLoader())); } } if (line.hasOption("files")) { conf.set("tmpfiles", validateFiles(line.getOptionValue("files"), conf)); } if (line.hasOption("archives")) { conf.set("tmparchives", validateFiles(line.getOptionValue("archives"), conf)); } if (line.hasOption('D')) { String[] property = line.getOptionValues('D'); for (String prop : property) { String[] keyval = prop.split("=", 2); if (keyval.length == 2) { conf.set(keyval[0], keyval[1]); } } } conf.setBoolean("mapred.used.genericoptionsparser", true); // tokensFile if (line.hasOption("tokenCacheFile")) { String fileName = line.getOptionValue("tokenCacheFile"); // check if the local file exists try { FileSystem localFs = FileSystem.getLocal(conf); Path p = new Path(fileName); if (!localFs.exists(p)) { throw new FileNotFoundException("File " + fileName + " does not exist."); } LOG.debug("setting conf tokensFile: {}", fileName); conf.set("mapreduce.job.credentials.json", localFs.makeQualified(p).toString()); } catch (IOException e) { throw new RuntimeException(e); } } }
From source file:ml.shifu.guagua.hadoop.ZooKeeperWorkerInterceptor.java
License:Apache License
@Override public void preApplication(WorkerContext<MASTER_RESULT, WORKER_RESULT> context) { String zkServers = context.getProps().getProperty(GuaguaConstants.GUAGUA_ZK_SERVERS); if (zkServers == null || zkServers.length() == 0 || !ZooKeeperUtils.checkServers(zkServers)) { this.sleepTime = NumberFormatUtils.getLong( context.getProps().getProperty(GuaguaConstants.GUAGUA_COORDINATOR_SLEEP_UNIT), WAIT_SLOT_MILLS); this.isFixedTime = Boolean.TRUE.toString().equalsIgnoreCase( context.getProps().getProperty(GuaguaConstants.GUAGUA_COORDINATOR_FIXED_SLEEP_ENABLE, GuaguaConstants.GUAGUA_COORDINATOR_FIXED_SLEEP)); String hdfsZookeeperServerFolder = getZookeeperServerFolder(context); long start = System.nanoTime(); while (true) { if (TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start) > 10 * 60 * 1000L) { throw new GuaguaRuntimeException("Cannot get zookeeper server address in 10 minutes."); }/*from www . j a v a 2 s .com*/ BufferedReader br = null; try { final FileSystem fileSystem = FileSystem.get(new Configuration()); final Path zookeeperServerPath = fileSystem.makeQualified(new Path(hdfsZookeeperServerFolder, GuaguaConstants.GUAGUA_CLUSTER_ZOOKEEPER_SERVER_FILE)); LOG.info("Embeded zookeeper server address is {}", zookeeperServerPath); new RetryCoordinatorCommand(this.isFixedTime, this.sleepTime) { @Override public boolean retryExecution() throws Exception, InterruptedException { return fileSystem.exists(zookeeperServerPath); } }.execute(); FSDataInputStream fis = fileSystem.open(zookeeperServerPath); br = new BufferedReader(new InputStreamReader(fis)); String zookeeperServer = br.readLine(); if (zookeeperServer == null || zookeeperServer.length() == 0) { LOG.warn("Cannot get zookeeper server in {} ", zookeeperServerPath.toString()); // retry continue; } // set server info to context for next intercepters. LOG.info("Embeded zookeeper instance is {}", zookeeperServer); context.getProps().setProperty(GuaguaConstants.GUAGUA_ZK_SERVERS, zookeeperServer); break; } catch (Throwable t) { LOG.warn(String.format("Error in get zookeeper address message: %s", t.getMessage())); continue; } finally { IOUtils.closeQuietly(br); } } } }