List of usage examples for org.apache.hadoop.util StringUtils arrayToString
public static String arrayToString(String[] strs)
From source file:boostingPL.driver.AdaBoostPLDriver.java
License:Open Source License
@Override public int run(String[] args) throws Exception { int status = commandAnalysis(args); if (status != 0) { return status; }/*from w w w. j a va 2 s. c o m*/ @SuppressWarnings("deprecation") Job job = new Job(getConf()); job.setJobName("AdaBoostPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " " + numLinesPerMap + " " + numIterations); job.setJarByClass(AdaBoostPLDriver.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dataPath); NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap); if (runModel.equals("train")) { job.setMapperClass(AdaBoostPLMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ClassifierWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(ClassifierWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, modelPath); } else { job.setMapperClass(AdaBoostPLTestMapper.class); job.setReducerClass(AdaBoostPLTestReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); } Configuration conf = job.getConfiguration(); conf.set("BoostingPL.boostingName", "AdaBoost"); conf.set("BoostingPL.numIterations", String.valueOf(numIterations)); conf.set("BoostingPL.modelPath", modelPath.toString()); if (metadataPath == null) { conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata"); } else { conf.set("BoostingPL.metadata", metadataPath.toString()); } if (outputFolder != null) { conf.set("BoostingPL.outputFolder", outputFolder.toString()); } LOG.info(StringUtils.arrayToString(args)); return job.waitForCompletion(true) == true ? 0 : -1; }
From source file:boostingPL.driver.SAMMEPLDriver.java
License:Open Source License
@Override public int run(String[] args) throws Exception { int status = commandAnalysis(args); if (status != 0) { return status; }/*from w w w .j av a2 s .c om*/ @SuppressWarnings("deprecation") Job job = new Job(getConf()); job.setJobName("SAMMEPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " " + numLinesPerMap + " " + numIterations); job.setJarByClass(SAMMEPLDriver.class); job.setInputFormatClass(NLineInputFormat.class); NLineInputFormat.addInputPath(job, dataPath); NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap); FileSystem fs = modelPath.getFileSystem(getConf()); if (fs.exists(modelPath)) { fs.delete(modelPath, true); } job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, modelPath); if (runModel.equals("train")) { job.setMapperClass(AdaBoostPLMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ClassifierWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(ClassifierWritable.class); } else { job.setMapperClass(AdaBoostPLTestMapper.class); job.setReducerClass(AdaBoostPLTestReducer.class); job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); } Configuration conf = job.getConfiguration(); conf.set("BoostingPL.boostingName", "SAMME"); conf.set("BoostingPL.numIterations", String.valueOf(numIterations)); conf.set("BoostingPL.modelPath", modelPath.toString()); if (metadataPath == null) { conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata"); } else { conf.set("BoostingPL.metadata", metadataPath.toString()); } if (outputFolder != null) { conf.set("BoostingPL.outputFolder", outputFolder.toString()); } LOG.info(StringUtils.arrayToString(args)); return job.waitForCompletion(true) == true ? 0 : -1; }
From source file:co.cask.tigon.conf.Configuration.java
License:Apache License
/** * Set the array of string values for the <code>name</code> property as * as comma delimited values./*from ww w. j a v a2 s . c o m*/ * * @param name property name. * @param values The values */ public void setStrings(String name, String... values) { set(name, StringUtils.arrayToString(values)); }
From source file:com.ebay.erl.mobius.core.criterion.TupleRestrictions.java
License:Apache License
/** * Create a tuple criterion that only accepts tuples when the value * of the <code>column</code> are presented in the given <code>file</code> * <p>//from www. j a v a 2 s . c om * * The assumption of the file is that, it's single column and one to many * line text file. Each line is read into a case insensitive set, and * using the set to check the value of the <code>column</code> within * the set or not. * * * @param column the name of a column to be tested that whether its value is in * the given <code>file</code> or not * * @param file a single column and multiple lines of file that contains strings/numbers, * each line is treated as a single unit. * * @return an instance of {@link TupleCriterion} that extracts only the records * when the value of its <code>column</code> are presented in the given * <code>file</code>. * * @throws FileNotFoundException if the given file cannot be found. */ public static TupleCriterion within(final String column, File file) throws FileNotFoundException { final File f = TupleRestrictions.checkFileExist(file); return new TupleCriterion() { private static final long serialVersionUID = -1121221619118915652L; private Set<String> set; @Override public void setConf(Configuration conf) { try { if (conf.get("tmpfiles") == null || conf.get("tmpfiles").trim().length() == 0) { conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf)); } else { conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf) + "," + conf.get("tmpfiles")); } } catch (IOException e) { throw new IllegalArgumentException(e); } } /** * COPIED FROM org.apache.hadoop.util.GenericOptionsParser */ private String validateFiles(String files, Configuration conf) throws IOException { if (files == null) return null; String[] fileArr = files.split(","); String[] finalArr = new String[fileArr.length]; for (int i = 0; i < fileArr.length; i++) { String tmp = fileArr[i]; String finalPath; Path path = new Path(tmp); URI pathURI = path.toUri(); FileSystem localFs = FileSystem.getLocal(conf); if (pathURI.getScheme() == null) { // default to the local file system // check if the file exists or not first if (!localFs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(localFs).toString(); } else { // check if the file exists in this file system // we need to recreate this filesystem object to copy // these files to the file system jobtracker is running // on. FileSystem fs = path.getFileSystem(conf); if (!fs.exists(path)) { throw new FileNotFoundException("File " + tmp + " does not exist."); } finalPath = path.makeQualified(fs).toString(); try { fs.close(); } catch (IOException e) { } ; } finalArr[i] = finalPath; } return StringUtils.arrayToString(finalArr); } @Override protected boolean evaluate(Tuple tuple, Configuration configuration) { if (set == null) { set = new CaseInsensitiveTreeSet(); BufferedReader br = null; try { br = new BufferedReader(new FileReader(new File(f.getName()))); String newLine = null; while ((newLine = br.readLine()) != null) { this.set.add(newLine); } } catch (IOException e) { throw new RuntimeException(e); } finally { try { br.close(); } catch (Throwable e) { } } } String value = tuple.getString(column); if (value != null) { return this.set.contains(value); } else { return false; } } @Override public String[] getInvolvedColumns() { return new String[] { column }; } }; }
From source file:com.github.dryangkun.hbase.tidx.hive.HBaseStorageHandler.java
License:Apache License
@Override public void configureJobConf(TableDesc tableDesc, JobConf jobConf) { try {/*from w w w .j a v a 2 s . c om*/ HBaseSerDe.configureJobConf(tableDesc, jobConf); /* * HIVE-6356 * The following code change is only needed for hbase-0.96.0 due to HBASE-9165, and * will not be required once Hive bumps up its hbase version). At that time , we will * only need TableMapReduceUtil.addDependencyJars(jobConf) here. */ if (counterClass != null) { TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class, counterClass); } else { TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class); } if (HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_SNAPSHOT_NAME) != null) { // There is an extra dependency on MetricsRegistry for snapshot IF. TableMapReduceUtil.addDependencyJars(jobConf, MetricsRegistry.class); } Set<String> merged = new LinkedHashSet<String>(jobConf.getStringCollection("tmpjars")); Job copy = new Job(jobConf); TableMapReduceUtil.addDependencyJars(copy); merged.addAll(copy.getConfiguration().getStringCollection("tmpjars")); jobConf.set("tmpjars", StringUtils.arrayToString(merged.toArray(new String[0]))); // Get credentials using the configuration instance which has HBase properties JobConf hbaseJobConf = new JobConf(getConf()); org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf); ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.intel.hadoop.hbase.dot.DotUtil.java
License:Apache License
/** * Prepare a column for dot table//from www. j a va 2 s. c o m * * @param conf * @param hcd * HColumnDescriptor * @param htd * HTableDescriptor * @param schemas * cf => doc => schemaString * @return HColumnDescriptor */ public static HColumnDescriptor prepareDotColumn(Configuration conf, HColumnDescriptor hcd, HTableDescriptor htd, Map<byte[], Map<byte[], JSONObject>> schemas) { if (!isDot(htd)) return hcd; byte[] cf = hcd.getName(); Map<byte[], JSONObject> docSchemaString = schemas.get(cf); String[] elements = new String[docSchemaString.keySet().size()]; int index = 0; Iterator iter = docSchemaString.entrySet().iterator(); while (iter.hasNext()) { Map.Entry<byte[], JSONObject> entry = (Entry<byte[], JSONObject>) iter.next(); byte[] doc = entry.getKey(); elements[index] = Bytes.toString(doc); LOG.info("doc: " + elements[index]); hcd.setValue(DotConstants.HBASE_DOT_COLUMNFAMILY_DOC_SCHEMA_PREFIX + elements[index], entry.getValue().toString()); index++; } hcd.setValue(DotConstants.HBASE_DOT_COLUMNFAMILY_DOC_ELEMENT, StringUtils.arrayToString(elements)); return hcd; }
From source file:com.intel.hadoop.hbase.dot.KEY.java
License:Apache License
private void createDotTable(String tableName, Map<String, List<String>> layouts, byte[][] splits) { HTableDescriptor htd = new HTableDescriptor(tableName); for (Map.Entry<String, List<String>> cfLayout : layouts.entrySet()) { String family = cfLayout.getKey(); List<String> columns = cfLayout.getValue(); HColumnDescriptor cfdesc = new HColumnDescriptor(family); Map<String, List<String>> docsMap = new HashMap<String, List<String>>(); for (String q : columns) { int idx = q.indexOf("."); String doc = q.substring(0, idx); String field = q.substring(idx + 1); List<String> fieldList = docsMap.get(doc); if (fieldList == null) { fieldList = new ArrayList<String>(); docsMap.put(doc, fieldList); }/*from ww w . j ava 2 s .c o m*/ fieldList.add(field); } String[] docs = new String[docsMap.entrySet().size()]; int index = 0; for (Map.Entry<String, List<String>> m : docsMap.entrySet()) { String docName = m.getKey(); List<String> fields = m.getValue(); boolean firstField = true; docs[index++] = docName; String docSchemaId = "hbase.dot.columnfamily.doc.schema." + docName; String docSchemaValue = " { \n" + " \"name\": \"" + docName + "\", \n" + " \"type\": \"record\",\n" + " \"fields\": [\n"; for (String field : fields) { if (firstField) { firstField = false; } else { docSchemaValue += ", \n"; } docSchemaValue += " {\"name\": \"" + field + "\", \"type\": \"bytes\"}"; } docSchemaValue += " ]}"; LOG.info("--- " + family + ":" + docName + " = " + docSchemaValue); cfdesc.setValue(docSchemaId, docSchemaValue); } String docElements = StringUtils.arrayToString(docs); cfdesc.setValue("hbase.dot.columnfamily.doc.element", docElements); htd.addFamily(cfdesc); } htd.setValue("hbase.dot.enable", "true"); htd.setValue("hbase.dot.type", "ANALYTICAL"); try { if (splits == null) { admin.createTable(htd); } else { admin.createTable(htd, splits); } } catch (IOException e) { e.printStackTrace(); } }
From source file:com.intel.hadoop.hbase.dot.StringRange.java
License:Apache License
private static void createDotTable(String tableName, Map<String, List<String>> layouts) { HTableDescriptor htd = new HTableDescriptor(tableName); for (Map.Entry<String, List<String>> cfLayout : layouts.entrySet()) { String family = cfLayout.getKey(); List<String> columns = cfLayout.getValue(); HColumnDescriptor cfdesc = new HColumnDescriptor(family); Map<String, List<String>> docsMap = new HashMap<String, List<String>>(); for (String q : columns) { int idx = q.indexOf("."); String doc = q.substring(0, idx); String field = q.substring(idx + 1); List<String> fieldList = docsMap.get(doc); if (fieldList == null) { fieldList = new ArrayList<String>(); docsMap.put(doc, fieldList); }// w w w . j av a2s . co m fieldList.add(field); } String[] docs = new String[docsMap.entrySet().size()]; int index = 0; for (Map.Entry<String, List<String>> m : docsMap.entrySet()) { String docName = m.getKey(); List<String> fields = m.getValue(); boolean firstField = true; docs[index++] = docName; String docSchemaId = "hbase.dot.columnfamily.doc.schema." + docName; String docSchemaValue = " { \n" + " \"name\": \"" + docName + "\", \n" + " \"type\": \"record\",\n" + " \"fields\": [\n"; for (String field : fields) { if (firstField) { firstField = false; } else { docSchemaValue += ", \n"; } docSchemaValue += " {\"name\": \"" + field + "\", \"type\": \"bytes\"}"; } docSchemaValue += " ]}"; LOG.info("--- " + family + ":" + docName + " = " + docSchemaValue); cfdesc.setValue(docSchemaId, docSchemaValue); } String docElements = StringUtils.arrayToString(docs); cfdesc.setValue("hbase.dot.columnfamily.doc.element", docElements); htd.addFamily(cfdesc); } htd.setValue("hbase.dot.enable", "true"); htd.setValue("hbase.dot.type", "ANALYTICAL"); try { admin.createTable(htd); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java
License:Apache License
/** * Set up the distributed cache by localizing the resources, and updating * the configuration with references to the localized resources. * @param conf job configuration//from w w w . j a va 2s.c o m * @throws IOException */ public void setup(Configuration conf) throws IOException { //If we are not 0th worker, wait for 0th worker to set up the cache if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS, WAIT_GRANULARITY_MS); } catch (Exception e) { throw new RuntimeException(e); } return; } File workDir = new File(System.getProperty("user.dir")); // Generate YARN local resources objects corresponding to the distributed // cache configuration Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>(); MRApps.setupDistributedCache(conf, localResources); //CODE CHANGE FROM ORIGINAL FILE: //We need to clear the resources from jar files, since they are distributed through the IG. // Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator(); while (iterator.hasNext()) { Entry<String, LocalResource> entry = iterator.next(); if (entry.getKey().endsWith(".jar")) { iterator.remove(); } } // Generating unique numbers for FSDownload. AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis()); // Find which resources are to be put on the local classpath Map<String, Path> classpaths = new HashMap<String, Path>(); Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClassPaths != null) { for (Path p : archiveClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf); if (fileClassPaths != null) { for (Path p : fileClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } // Localize the resources LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR); FileContext localFSFileContext = FileContext.getLocalFSFileContext(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); ExecutorService exec = null; try { ThreadFactory tf = new ThreadFactoryBuilder() .setNameFormat("LocalDistributedCacheManager Downloader #%d").build(); exec = Executors.newCachedThreadPool(tf); Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap(); for (LocalResource resource : localResources.values()) { Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource); Future<Path> future = exec.submit(download); resourcesToPaths.put(resource, future); } for (Entry<String, LocalResource> entry : localResources.entrySet()) { LocalResource resource = entry.getValue(); Path path; try { path = resourcesToPaths.get(resource).get(); } catch (InterruptedException e) { throw new IOException(e); } catch (ExecutionException e) { throw new IOException(e); } String pathString = path.toUri().toString(); String link = entry.getKey(); String target = new File(path.toUri()).getPath(); symlink(workDir, target, link); if (resource.getType() == LocalResourceType.ARCHIVE) { localArchives.add(pathString); } else if (resource.getType() == LocalResourceType.FILE) { localFiles.add(pathString); } else if (resource.getType() == LocalResourceType.PATTERN) { //PATTERN is not currently used in local mode throw new IllegalArgumentException( "Resource type PATTERN is not " + "implemented yet. " + resource.getResource()); } Path resourcePath; try { resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource()); } catch (URISyntaxException e) { throw new IOException(e); } LOG.info(String.format("Localized %s as %s", resourcePath, path)); String cp = resourcePath.toUri().getPath(); if (classpaths.keySet().contains(cp)) { localClasspaths.add(path.toUri().getPath().toString()); } } } finally { if (exec != null) { exec.shutdown(); } } // Update the configuration object with localized data. if (!localArchives.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()]))); } if (!localFiles.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()]))); } setupCalled = true; //If we are 0th worker, signal action complete if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME); } catch (Exception e) { throw new RuntimeException(e); } } }
From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java
License:Apache License
/** * Add the jars containing the given classes to the job's configuration * such that JobClient will ship them to the cluster and add them to * the DistributedCache.//from w ww .j a v a2 s .c o m */ public static void addDependencyJars(Configuration conf, Class... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // Add jars containing the specified classes for (Class clazz : classes) { if (clazz == null) continue; String pathStr = findOrCreateJar(clazz); if (pathStr == null) { LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } Path path = new Path(pathStr); if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.makeQualified(localFs).toString()); } if (jars.isEmpty()) return; conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); }