List of usage examples for org.apache.hadoop.fs Path makeQualified
@Deprecated
public Path makeQualified(FileSystem fs)
From source file:com.inmobi.databus.distcp.MirrorStreamService.java
License:Apache License
LinkedHashMap<FileStatus, Path> prepareForCommit(Path tmpOut) throws Exception { /*/* w w w . j av a 2 s . c o m*/ * tmpOut would be like - * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/ After * distcp paths inside tmpOut would be eg: * * /databus/system/distcp_mirror_ua1_uj1 * /databus/streams/<streamName>/2012/1/13/15/7/ * <hostname>-<streamName>-2012-01-16-07-21_00000.gz * * tmpStreamRoot eg: /databus/system/distcp_mirror_<srcCluster>_ * <destCluster>/databus/streams/ */ Path tmpStreamRoot = new Path(tmpOut.makeQualified(getDestFs()).toString() + File.separator + getSrcCluster().getUnqaulifiedFinalDestDirRoot()); LOG.debug("tmpStreamRoot [" + tmpStreamRoot + "]"); /* tmpStreamRoot eg - * /databus/system/tmp/distcp_mirror_<srcCluster>_<destCluster>/databus * /streams/ * * multiple streams can get mirrored from the same cluster * streams can get processed in any order but we have to retain order * of paths within a stream*/ FileStatus[] fileStatuses = getDestFs().listStatus(tmpStreamRoot); //Retain the order of commitPaths LinkedHashMap<FileStatus, Path> commitPaths = new LinkedHashMap<FileStatus, Path>(); if (fileStatuses != null) { for (FileStatus streamRoot : fileStatuses) { //for each stream : list the path in order of YYYY/mm/DD/HH/MM LOG.debug("StreamRoot [" + streamRoot.getPath() + "] streamName [" + streamRoot.getPath().getName() + "]"); List<FileStatus> streamPaths = new ArrayList<FileStatus>(); createListing(getDestFs(), streamRoot, streamPaths); Collections.sort(streamPaths, new DatePathComparator()); LOG.debug("createListing size: [" + streamPaths.size() + "]"); createCommitPaths(commitPaths, streamPaths); } } return commitPaths; }
From source file:com.inmobi.databus.local.LocalStreamServiceTest.java
License:Apache License
private void createMockForFileSystem(FileSystem fs, Cluster cluster) throws Exception { FileStatus[] files = createTestData(2, "/databus/data/stream", true); FileStatus[] stream1 = createTestData(2, "/databus/data/stream1/collector", true); FileStatus[] stream3 = createTestData(number_files, "/databus/data/stream1/collector1/file", true); FileStatus[] stream4 = createTestData(number_files, "/databus/data/stream1/collector2/file", true); FileStatus[] stream2 = createTestData(2, "/databus/data/stream2/collector", true); FileStatus[] stream5 = createTestData(number_files, "/databus/data/stream2/collector1/file", true); FileStatus[] stream6 = createTestData(number_files, "/databus/data/stream2/collector2/file", true); when(fs.getWorkingDirectory()).thenReturn(new Path("/tmp/")); when(fs.getUri()).thenReturn(new URI("localhost")); when(fs.listStatus(cluster.getDataDir())).thenReturn(files); when(fs.listStatus(new Path("/databus/data/stream1"))).thenReturn(stream1); when(fs.listStatus(new Path("/databus/data/stream1/collector1"), any(CollectorPathFilter.class))) .thenReturn(stream3);//from w w w . j a va 2s .c o m when(fs.listStatus(new Path("/databus/data/stream2"))).thenReturn(stream2); when(fs.listStatus(new Path("/databus/data/stream1/collector2"), any(CollectorPathFilter.class))) .thenReturn(stream4); when(fs.listStatus(new Path("/databus/data/stream2/collector1"), any(CollectorPathFilter.class))) .thenReturn(stream5); when(fs.listStatus(new Path("/databus/data/stream2/collector2"), any(CollectorPathFilter.class))) .thenReturn(stream6); Path file = mock(Path.class); when(file.makeQualified(any(FileSystem.class))).thenReturn(new Path("/databus/data/stream1/collector1/")); }
From source file:com.inmobi.grill.driver.hive.HiveDriver.java
License:Apache License
void addPersistentPath(QueryContext context) throws IOException { String hiveQuery;//from ww w . j a va 2 s . co m if (context.isPersistent() && context.getConf().getBoolean(GrillConfConstants.GRILL_ADD_INSERT_OVEWRITE, true)) { // store persistent data into user specified location // If absent, take default home directory String resultSetParentDir = context.getResultSetParentDir(); StringBuilder builder; Path resultSetPath; if (StringUtils.isNotBlank(resultSetParentDir)) { resultSetPath = new Path(resultSetParentDir, context.getQueryHandle().toString()); // create query builder = new StringBuilder("INSERT OVERWRITE DIRECTORY "); } else { // Write to /tmp/grillreports resultSetPath = new Path(GrillConfConstants.GRILL_RESULT_SET_PARENT_DIR_DEFAULT, context.getQueryHandle().toString()); builder = new StringBuilder("INSERT OVERWRITE LOCAL DIRECTORY "); } context.setResultSetPath( resultSetPath.makeQualified(resultSetPath.getFileSystem(context.getConf())).toString()); builder.append('"').append(resultSetPath).append("\" "); String outputDirFormat = context.getConf().get(GrillConfConstants.GRILL_OUTPUT_DIRECTORY_FORMAT); if (outputDirFormat != null) { builder.append(outputDirFormat); } builder.append(' ').append(context.getDriverQuery()).append(' '); hiveQuery = builder.toString(); } else { hiveQuery = context.getDriverQuery(); } LOG.info("Hive driver query:" + hiveQuery); context.setDriverQuery(hiveQuery); }
From source file:com.lightboxtechnologies.spectrum.ExtractData.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>"); return 2; }/*www .ja v a 2 s . com*/ final String imageID = args[0]; final String friendlyName = args[1]; final String extentsPath = args[2]; final String image = args[3]; Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf); job.setJarByClass(ExtractData.class); job.setMapperClass(ExtractDataMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setNumReduceTasks(1); // job ctor copies the Configuration we pass it, get the real one conf = job.getConfiguration(); conf.setLong("timestamp", System.currentTimeMillis()); job.setInputFormatClass(RawFileInputFormat.class); RawFileInputFormat.addInputPath(job, new Path(image)); job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); conf.setInt("mapreduce.job.jvm.numtasks", -1); final FileSystem fs = FileSystem.get(conf); Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString()); hfileDir = hfileDir.makeQualified(fs); LOG.info("Hashes will be written temporarily to " + hfileDir); HFileOutputFormat.setOutputPath(job, hfileDir); final Path extp = new Path(extentsPath); final URI extents = extp.toUri(); LOG.info("extents file is " + extents); DistributedCache.addCacheFile(extents, conf); conf.set("com.lbt.extentsname", extp.getName()); // job.getConfiguration().setBoolean("mapred.task.profile", true); // job.getConfiguration().setBoolean("mapreduce.task.profile", true); HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B); HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B); final boolean result = job.waitForCompletion(true); if (result) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); HBaseConfiguration.addHbaseResources(conf); loader.setConf(conf); LOG.info("Loading hashes into hbase"); chmodR(fs, hfileDir); loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B)); // result = fs.delete(hfileDir, true); } return result ? 0 : 1; }
From source file:com.moz.fiji.mapreduce.kvstore.lib.FileStoreHelper.java
License:Apache License
/** * Serializes file- and DistributedCache-specific properties associated * with the KeyValueStore that owns this FileStoreHelper to the specified configuration. * * @param conf the configuration to populate. * @throws IOException if there's an error serializing the state. *///from w ww . j a v a 2s . co m public void storeToConf(KeyValueStoreConfiguration conf) throws IOException { if (mInputPaths.isEmpty()) { throw new IOException("Required attribute not set: input path"); } if (mUseDCache && !"local".equals(conf.get("mapreduce.jobtracker.address", ""))) { // If we're scheduled to use the distributed cache, and we're not in the LocalJobRunner, // add these files to the DistributedCache. // TODO(aaron): This does not handle any sort of MapperTester, etc. // We need a separate flag that tells this to ignore mUseDCache if we're in a test // environment, and just use the original input file specs. final String dCachePrefix = getCachePrefix(); // Associate this randomly chosen prefix id with this KVStore implementation. conf.set(CONF_DCACHE_PREFIX_KEY, dCachePrefix); // Add the input paths to the DistributedCache and translate path names. int uniqueId = 0; // TODO: getExpandedInputPaths() should use the Configuration from conf, not our getConf(). for (Path inputPath : getExpandedInputPaths()) { FileSystem fs = inputPath.getFileSystem(conf.getDelegate()); Path absolutePath = inputPath.makeQualified(fs); String uriStr = absolutePath.toString() + "#" + dCachePrefix + "-" + uniqueId; LOG.debug("Adding to DistributedCache: " + uriStr); uniqueId++; try { DistributedCache.addCacheFile(new URI(uriStr), conf.getDelegate()); } catch (URISyntaxException use) { throw new IOException("Could not construct URI for file: " + uriStr, use); } } // Ensure that symlinks are created for cached files. DistributedCache.createSymlink(conf.getDelegate()); // Now save the cache prefix into the local state. We couldn't set this earlier, // because we wanted getExpandedInputPaths() to actually unglob things. That // function will behave differently if mDCachePrefix is already initialized. mDCachePrefix = dCachePrefix; } else { // Just put the regular HDFS paths in the Configuration. conf.setStrings(CONF_PATHS_KEY, Lists.toArray(Lists.map(mInputPaths, new Lists.ToStringFn<Path>()), String.class)); } }
From source file:com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput.java
License:Apache License
/** * Configures the partitioner for generating HFiles. * * <p>Each generated HFile should fit within a region of of the target table. * Additionally, it's optimal to have only one HFile to load into each region, since a * read from that region will require reading from each HFile under management (until * compaction happens and merges them all back into one HFile).</p> * * <p>To achieve this, we configure a TotalOrderPartitioner that will partition the * records output from the Mapper based on their rank in a total ordering of the * keys. The <code>startKeys</code> argument should contain a list of the first key in * each of those partitions.</p>// w w w . ja v a2 s . c o m * * @param job The job to configure. * @param startKeys A list of keys that will mark the boundaries between the partitions * for the sorted map output records. * @throws IOException If there is an error. */ public static void configurePartitioner(Job job, List<HFileKeyValue> startKeys) throws IOException { FijiMRPlatformBridge.get().setTotalOrderPartitionerClass(job); LOG.info("Configuring " + startKeys.size() + " reduce partitions."); job.setNumReduceTasks(startKeys.size()); // Write the file that the TotalOrderPartitioner reads to determine where to partition records. Path partitionFilePath = new Path(job.getWorkingDirectory(), "partitions_" + System.currentTimeMillis()); LOG.info("Writing partition information to " + partitionFilePath); final FileSystem fs = partitionFilePath.getFileSystem(job.getConfiguration()); partitionFilePath = partitionFilePath.makeQualified(fs); writePartitionFile(job.getConfiguration(), partitionFilePath, startKeys); // Add it to the distributed cache. try { final URI cacheUri = new URI(partitionFilePath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(cacheUri, job.getConfiguration()); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.createSymlink(job.getConfiguration()); }
From source file:com.moz.fiji.schema.mapreduce.DistributedCacheJars.java
License:Apache License
/** * @param conf Configuration to get FileSystem from * @param jarDirectory The directory of jars to get. * @return A list of qualified paths to the jars in jarDirectory. * @throws IOException if there's a problem. *///w w w.j a va2 s . c o m public static List<String> getJarsFromDirectory(Configuration conf, File jarDirectory) throws IOException { if (!jarDirectory.isDirectory()) { throw new IOException("Attempted to add jars from non-directory: " + jarDirectory.getCanonicalPath()); } List<String> allJars = new ArrayList<String>(); FileSystem fileSystem = FileSystem.getLocal(conf); for (File jar : jarDirectory.listFiles()) { if (jar.exists() && !jar.isDirectory() && jar.getName().endsWith(".jar")) { Path jarPath = new Path(jar.getCanonicalPath()); String qualifiedPath = jarPath.makeQualified(fileSystem).toString(); allJars.add(qualifiedPath); } } return allJars; }
From source file:com.phantom.hadoop.examples.Sort.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.//from w ww. j a v a 2s. c o m * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { Configuration conf = getConf(); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = conf.get(REDUCES_PER_HOST); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs job = new Job(conf); job.setJobName("sorter"); job.setJarByClass(Sort.class); job.setMapperClass(Mapper.class); job.setReducerClass(Reducer.class); job.setNumReduceTasks(num_reduces); job.setInputFormatClass(inputFormatClass); job.setOutputFormatClass(outputFormatClass); job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(job, otherArgs.get(0)); FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); job.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(job)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(conf, partitionFile); InputSampler.<K, V>writePartitionFile(job, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, conf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); int ret = job.waitForCompletion(true) ? 0 : 1; Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return ret; }
From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java
License:Apache License
/** * Add the jars containing the given classes to the job's configuration * such that JobClient will ship them to the cluster and add them to * the DistributedCache./*from w w w. ja v a2 s . c o m*/ */ public static void addDependencyJars(Configuration conf, Class... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // Add jars containing the specified classes for (Class clazz : classes) { if (clazz == null) continue; String pathStr = findOrCreateJar(clazz); if (pathStr == null) { LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } Path path = new Path(pathStr); if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.makeQualified(localFs).toString()); } if (jars.isEmpty()) return; conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); }
From source file:com.splout.db.hadoop.DeployerCMD.java
License:Apache License
@SuppressWarnings("deprecation") @Override//ww w. j a v a 2 s.c om public int run(String[] args) throws Exception { JCommander jComm = new JCommander(this); jComm.setProgramName("Tablespaces Deployer"); try { jComm.parse(args); } catch (ParameterException e) { System.out.println(e.getMessage()); jComm.usage(); return -1; } catch (Throwable t) { t.printStackTrace(); jComm.usage(); return -1; } StoreDeployerTool deployer = new StoreDeployerTool(qnode, getConf()); ArrayList<TablespaceDepSpec> deployments = new ArrayList<TablespaceDepSpec>(); if (configFile != null) { deployments = JSONSerDe.deSer(Files.toString(new File(configFile), Charset.forName("UTF-8")), new TypeReference<ArrayList<TablespaceDepSpec>>() { }); } else { Path rootPath = new Path(root); if (tablespaceName == null && tablespaces.size() == 0) { System.err.println( "Tablespace name for root folder or tablespaces contained in them is lacking. Either use tablespacename or tablespaces option."); jComm.usage(); return -1; } if (tablespaceName != null && tablespaces.size() > 0) { System.err.println( "Can't use tablespacename and tablespaces at the same time. Root is to be either a generated tablespace or a folder with multiple generated tablespaces."); jComm.usage(); return -1; } if (tablespaceName != null) { deployments.add(new TablespaceDepSpec(tablespaceName, rootPath.toString(), replicationFactor, initStatements)); } for (String tb : tablespaces) { Path tablespacePath = new Path(rootPath, tb); deployments.add( new TablespaceDepSpec(tb, tablespacePath.toString(), replicationFactor, initStatements)); } } // Checking for file existence for (TablespaceDepSpec spec : deployments) { Path tablespacePath = new Path(spec.getSourcePath()); FileSystem fs = tablespacePath.getFileSystem(getConf()); if (!fs.exists(tablespacePath)) { System.out.println("ERROR: Path [" + tablespacePath.makeQualified(fs) + "] not found."); return 1; } } deployer.deploy(deployments); return 0; }