Example usage for org.apache.hadoop.util StringUtils arrayToString

List of usage examples for org.apache.hadoop.util StringUtils arrayToString

Introduction

In this page you can find the example usage for org.apache.hadoop.util StringUtils arrayToString.

Prototype


public static String arrayToString(String[] strs) 

Source Link

Document

Given an array of strings, return a comma-separated list of its elements.

Usage

From source file:boostingPL.driver.AdaBoostPLDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    int status = commandAnalysis(args);
    if (status != 0) {
        return status;
    }/*from  w  w  w. j a va  2 s.  c  o  m*/

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJobName("AdaBoostPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " "
            + numLinesPerMap + " " + numIterations);
    job.setJarByClass(AdaBoostPLDriver.class);

    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dataPath);
    NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap);

    if (runModel.equals("train")) {
        job.setMapperClass(AdaBoostPLMapper.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ClassifierWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(ClassifierWritable.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputPath(job, modelPath);
    } else {
        job.setMapperClass(AdaBoostPLTestMapper.class);
        job.setReducerClass(AdaBoostPLTestReducer.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
    }

    Configuration conf = job.getConfiguration();
    conf.set("BoostingPL.boostingName", "AdaBoost");
    conf.set("BoostingPL.numIterations", String.valueOf(numIterations));
    conf.set("BoostingPL.modelPath", modelPath.toString());
    if (metadataPath == null) {
        conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata");
    } else {
        conf.set("BoostingPL.metadata", metadataPath.toString());
    }
    if (outputFolder != null) {
        conf.set("BoostingPL.outputFolder", outputFolder.toString());
    }

    LOG.info(StringUtils.arrayToString(args));
    return job.waitForCompletion(true) == true ? 0 : -1;
}

From source file:boostingPL.driver.SAMMEPLDriver.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    int status = commandAnalysis(args);
    if (status != 0) {
        return status;
    }/*from w  w  w  .j  av  a2  s  .c  om*/

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJobName("SAMMEPL:" + runModel + " " + dataPath.toString() + " " + modelPath.toString() + " "
            + numLinesPerMap + " " + numIterations);
    job.setJarByClass(SAMMEPLDriver.class);

    job.setInputFormatClass(NLineInputFormat.class);
    NLineInputFormat.addInputPath(job, dataPath);
    NLineInputFormat.setNumLinesPerSplit(job, numLinesPerMap);
    FileSystem fs = modelPath.getFileSystem(getConf());
    if (fs.exists(modelPath)) {
        fs.delete(modelPath, true);
    }
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, modelPath);

    if (runModel.equals("train")) {
        job.setMapperClass(AdaBoostPLMapper.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(ClassifierWritable.class);
        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(ClassifierWritable.class);
    } else {
        job.setMapperClass(AdaBoostPLTestMapper.class);
        job.setReducerClass(AdaBoostPLTestReducer.class);
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setMapOutputKeyClass(LongWritable.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);
    }

    Configuration conf = job.getConfiguration();
    conf.set("BoostingPL.boostingName", "SAMME");
    conf.set("BoostingPL.numIterations", String.valueOf(numIterations));
    conf.set("BoostingPL.modelPath", modelPath.toString());
    if (metadataPath == null) {
        conf.set("BoostingPL.metadata", dataPath.toString() + ".metadata");
    } else {
        conf.set("BoostingPL.metadata", metadataPath.toString());
    }
    if (outputFolder != null) {
        conf.set("BoostingPL.outputFolder", outputFolder.toString());
    }

    LOG.info(StringUtils.arrayToString(args));
    return job.waitForCompletion(true) == true ? 0 : -1;
}

From source file:co.cask.tigon.conf.Configuration.java

License:Apache License

/**
 * Set the array of string values for the <code>name</code> property as
 * as comma delimited values./*from   ww  w.  j a v a2  s  . c o  m*/
 *
 * @param name property name.
 * @param values The values
 */
public void setStrings(String name, String... values) {
    set(name, StringUtils.arrayToString(values));
}

From source file:com.ebay.erl.mobius.core.criterion.TupleRestrictions.java

License:Apache License

/**
 * Create a tuple criterion that only accepts tuples when the value 
 * of the <code>column</code> are presented in the given <code>file</code>
 * <p>//from www. j  a  v  a 2 s . c  om
 * 
 * The assumption of the file is that, it's single column and one to many
 * line text file.  Each line is read into a case insensitive set, and 
 * using the set to check the value of the <code>column</code> within
 * the set or not.
 * 
 * 
 * @param column the name of a column to be tested that whether its value is in 
 * the given <code>file</code> or not
 * 
 * @param file a single column and multiple lines of file that contains strings/numbers,
 * each line is treated as a single unit.
 *
 * @return an instance of {@link TupleCriterion} that extracts only the records 
 * when the value of its <code>column</code> are presented in the given 
 * <code>file</code>.
 * 
 * @throws FileNotFoundException if the given file cannot be found.
 */
public static TupleCriterion within(final String column, File file) throws FileNotFoundException {
    final File f = TupleRestrictions.checkFileExist(file);

    return new TupleCriterion() {

        private static final long serialVersionUID = -1121221619118915652L;
        private Set<String> set;

        @Override
        public void setConf(Configuration conf) {
            try {
                if (conf.get("tmpfiles") == null || conf.get("tmpfiles").trim().length() == 0) {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf));
                } else {
                    conf.set("tmpfiles", validateFiles(f.getAbsolutePath(), conf) + "," + conf.get("tmpfiles"));
                }

            } catch (IOException e) {
                throw new IllegalArgumentException(e);
            }
        }

        /**
         * COPIED FROM org.apache.hadoop.util.GenericOptionsParser
         */
        private String validateFiles(String files, Configuration conf) throws IOException {
            if (files == null)
                return null;
            String[] fileArr = files.split(",");
            String[] finalArr = new String[fileArr.length];
            for (int i = 0; i < fileArr.length; i++) {
                String tmp = fileArr[i];
                String finalPath;
                Path path = new Path(tmp);
                URI pathURI = path.toUri();
                FileSystem localFs = FileSystem.getLocal(conf);
                if (pathURI.getScheme() == null) {
                    // default to the local file system
                    // check if the file exists or not first
                    if (!localFs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(localFs).toString();
                } else {
                    // check if the file exists in this file system
                    // we need to recreate this filesystem object to copy
                    // these files to the file system jobtracker is running
                    // on.
                    FileSystem fs = path.getFileSystem(conf);
                    if (!fs.exists(path)) {
                        throw new FileNotFoundException("File " + tmp + " does not exist.");
                    }
                    finalPath = path.makeQualified(fs).toString();
                    try {
                        fs.close();
                    } catch (IOException e) {
                    }
                    ;
                }
                finalArr[i] = finalPath;
            }
            return StringUtils.arrayToString(finalArr);
        }

        @Override
        protected boolean evaluate(Tuple tuple, Configuration configuration) {
            if (set == null) {
                set = new CaseInsensitiveTreeSet();
                BufferedReader br = null;
                try {
                    br = new BufferedReader(new FileReader(new File(f.getName())));
                    String newLine = null;
                    while ((newLine = br.readLine()) != null) {
                        this.set.add(newLine);
                    }
                } catch (IOException e) {
                    throw new RuntimeException(e);
                } finally {
                    try {
                        br.close();
                    } catch (Throwable e) {
                    }
                }
            }

            String value = tuple.getString(column);
            if (value != null) {
                return this.set.contains(value);
            } else {
                return false;
            }
        }

        @Override
        public String[] getInvolvedColumns() {
            return new String[] { column };
        }
    };
}

From source file:com.github.dryangkun.hbase.tidx.hive.HBaseStorageHandler.java

License:Apache License

@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
    try {/*from   w w  w  .j a v  a 2  s  .  c om*/
        HBaseSerDe.configureJobConf(tableDesc, jobConf);
        /*
         * HIVE-6356
         * The following code change is only needed for hbase-0.96.0 due to HBASE-9165, and
         * will not be required once Hive bumps up its hbase version). At that time , we will
         * only need TableMapReduceUtil.addDependencyJars(jobConf) here.
         */
        if (counterClass != null) {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class, TableInputFormatBase.class,
                    counterClass);
        } else {
            TableMapReduceUtil.addDependencyJars(jobConf, HBaseStorageHandler.class,
                    TableInputFormatBase.class);
        }
        if (HiveConf.getVar(jobConf, HiveConf.ConfVars.HIVE_HBASE_SNAPSHOT_NAME) != null) {
            // There is an extra dependency on MetricsRegistry for snapshot IF.
            TableMapReduceUtil.addDependencyJars(jobConf, MetricsRegistry.class);
        }
        Set<String> merged = new LinkedHashSet<String>(jobConf.getStringCollection("tmpjars"));

        Job copy = new Job(jobConf);
        TableMapReduceUtil.addDependencyJars(copy);
        merged.addAll(copy.getConfiguration().getStringCollection("tmpjars"));
        jobConf.set("tmpjars", StringUtils.arrayToString(merged.toArray(new String[0])));

        // Get credentials using the configuration instance which has HBase properties
        JobConf hbaseJobConf = new JobConf(getConf());
        org.apache.hadoop.hbase.mapred.TableMapReduceUtil.initCredentials(hbaseJobConf);
        ShimLoader.getHadoopShims().mergeCredentials(jobConf, hbaseJobConf);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.intel.hadoop.hbase.dot.DotUtil.java

License:Apache License

/**
 * Prepare a column for dot table//from www.  j  a  va 2  s.  c  o m
 *
 * @param conf
 * @param hcd
 *          HColumnDescriptor
 * @param htd
 *          HTableDescriptor
 * @param schemas
 *          cf => doc => schemaString
 * @return HColumnDescriptor
 */
public static HColumnDescriptor prepareDotColumn(Configuration conf, HColumnDescriptor hcd,
        HTableDescriptor htd, Map<byte[], Map<byte[], JSONObject>> schemas) {
    if (!isDot(htd))
        return hcd;

    byte[] cf = hcd.getName();
    Map<byte[], JSONObject> docSchemaString = schemas.get(cf);
    String[] elements = new String[docSchemaString.keySet().size()];
    int index = 0;
    Iterator iter = docSchemaString.entrySet().iterator();
    while (iter.hasNext()) {
        Map.Entry<byte[], JSONObject> entry = (Entry<byte[], JSONObject>) iter.next();
        byte[] doc = entry.getKey();
        elements[index] = Bytes.toString(doc);
        LOG.info("doc: " + elements[index]);
        hcd.setValue(DotConstants.HBASE_DOT_COLUMNFAMILY_DOC_SCHEMA_PREFIX + elements[index],
                entry.getValue().toString());
        index++;
    }

    hcd.setValue(DotConstants.HBASE_DOT_COLUMNFAMILY_DOC_ELEMENT, StringUtils.arrayToString(elements));
    return hcd;
}

From source file:com.intel.hadoop.hbase.dot.KEY.java

License:Apache License

private void createDotTable(String tableName, Map<String, List<String>> layouts, byte[][] splits) {

    HTableDescriptor htd = new HTableDescriptor(tableName);

    for (Map.Entry<String, List<String>> cfLayout : layouts.entrySet()) {
        String family = cfLayout.getKey();
        List<String> columns = cfLayout.getValue();

        HColumnDescriptor cfdesc = new HColumnDescriptor(family);

        Map<String, List<String>> docsMap = new HashMap<String, List<String>>();

        for (String q : columns) {
            int idx = q.indexOf(".");
            String doc = q.substring(0, idx);
            String field = q.substring(idx + 1);

            List<String> fieldList = docsMap.get(doc);

            if (fieldList == null) {
                fieldList = new ArrayList<String>();
                docsMap.put(doc, fieldList);
            }/*from   ww  w  .  j ava 2 s .c o m*/

            fieldList.add(field);
        }

        String[] docs = new String[docsMap.entrySet().size()];
        int index = 0;

        for (Map.Entry<String, List<String>> m : docsMap.entrySet()) {
            String docName = m.getKey();
            List<String> fields = m.getValue();
            boolean firstField = true;

            docs[index++] = docName;

            String docSchemaId = "hbase.dot.columnfamily.doc.schema." + docName;
            String docSchemaValue = " {    \n" + " \"name\": \"" + docName + "\", \n"
                    + " \"type\": \"record\",\n" + " \"fields\": [\n";
            for (String field : fields) {
                if (firstField) {
                    firstField = false;
                } else {
                    docSchemaValue += ", \n";
                }
                docSchemaValue += " {\"name\": \"" + field + "\", \"type\": \"bytes\"}";
            }

            docSchemaValue += " ]}";
            LOG.info("--- " + family + ":" + docName + " = " + docSchemaValue);
            cfdesc.setValue(docSchemaId, docSchemaValue);
        }
        String docElements = StringUtils.arrayToString(docs);
        cfdesc.setValue("hbase.dot.columnfamily.doc.element", docElements);
        htd.addFamily(cfdesc);
    }

    htd.setValue("hbase.dot.enable", "true");
    htd.setValue("hbase.dot.type", "ANALYTICAL");

    try {
        if (splits == null) {
            admin.createTable(htd);
        } else {
            admin.createTable(htd, splits);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.intel.hadoop.hbase.dot.StringRange.java

License:Apache License

private static void createDotTable(String tableName, Map<String, List<String>> layouts) {

    HTableDescriptor htd = new HTableDescriptor(tableName);

    for (Map.Entry<String, List<String>> cfLayout : layouts.entrySet()) {
        String family = cfLayout.getKey();
        List<String> columns = cfLayout.getValue();

        HColumnDescriptor cfdesc = new HColumnDescriptor(family);

        Map<String, List<String>> docsMap = new HashMap<String, List<String>>();

        for (String q : columns) {
            int idx = q.indexOf(".");
            String doc = q.substring(0, idx);
            String field = q.substring(idx + 1);

            List<String> fieldList = docsMap.get(doc);

            if (fieldList == null) {
                fieldList = new ArrayList<String>();
                docsMap.put(doc, fieldList);
            }//  w w w  . j av  a2s .  co m

            fieldList.add(field);
        }

        String[] docs = new String[docsMap.entrySet().size()];
        int index = 0;

        for (Map.Entry<String, List<String>> m : docsMap.entrySet()) {
            String docName = m.getKey();
            List<String> fields = m.getValue();
            boolean firstField = true;

            docs[index++] = docName;

            String docSchemaId = "hbase.dot.columnfamily.doc.schema." + docName;
            String docSchemaValue = " {    \n" + " \"name\": \"" + docName + "\", \n"
                    + " \"type\": \"record\",\n" + " \"fields\": [\n";
            for (String field : fields) {
                if (firstField) {
                    firstField = false;
                } else {
                    docSchemaValue += ", \n";
                }
                docSchemaValue += " {\"name\": \"" + field + "\", \"type\": \"bytes\"}";
            }

            docSchemaValue += " ]}";
            LOG.info("--- " + family + ":" + docName + " = " + docSchemaValue);
            cfdesc.setValue(docSchemaId, docSchemaValue);
        }
        String docElements = StringUtils.arrayToString(docs);
        cfdesc.setValue("hbase.dot.columnfamily.doc.element", docElements);
        htd.addFamily(cfdesc);
    }

    htd.setValue("hbase.dot.enable", "true");
    htd.setValue("hbase.dot.type", "ANALYTICAL");

    try {
        admin.createTable(htd);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java

License:Apache License

/**
 * Set up the distributed cache by localizing the resources, and updating
 * the configuration with references to the localized resources.
 * @param conf job configuration//from  w w  w .  j  a va  2s.c o m
 * @throws IOException
 */
public void setup(Configuration conf) throws IOException {
    //If we are not 0th worker, wait for 0th worker to set up the cache
    if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS,
                    WAIT_GRANULARITY_MS);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return;
    }

    File workDir = new File(System.getProperty("user.dir"));

    // Generate YARN local resources objects corresponding to the distributed
    // cache configuration
    Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
    MRApps.setupDistributedCache(conf, localResources);

    //CODE CHANGE FROM ORIGINAL FILE:
    //We need to clear the resources from jar files, since they are distributed through the IG.
    //
    Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator();
    while (iterator.hasNext()) {
        Entry<String, LocalResource> entry = iterator.next();
        if (entry.getKey().endsWith(".jar")) {
            iterator.remove();
        }
    }

    // Generating unique numbers for FSDownload.

    AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());

    // Find which resources are to be put on the local classpath
    Map<String, Path> classpaths = new HashMap<String, Path>();
    Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (archiveClassPaths != null) {
        for (Path p : archiveClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
    if (fileClassPaths != null) {
        for (Path p : fileClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    // Localize the resources
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    FileContext localFSFileContext = FileContext.getLocalFSFileContext();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

    ExecutorService exec = null;
    try {
        ThreadFactory tf = new ThreadFactoryBuilder()
                .setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
        exec = Executors.newCachedThreadPool(tf);
        Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
        Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
        for (LocalResource resource : localResources.values()) {
            Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
                    new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
            Future<Path> future = exec.submit(download);
            resourcesToPaths.put(resource, future);
        }
        for (Entry<String, LocalResource> entry : localResources.entrySet()) {
            LocalResource resource = entry.getValue();
            Path path;
            try {
                path = resourcesToPaths.get(resource).get();
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e) {
                throw new IOException(e);
            }
            String pathString = path.toUri().toString();
            String link = entry.getKey();
            String target = new File(path.toUri()).getPath();
            symlink(workDir, target, link);

            if (resource.getType() == LocalResourceType.ARCHIVE) {
                localArchives.add(pathString);
            } else if (resource.getType() == LocalResourceType.FILE) {
                localFiles.add(pathString);
            } else if (resource.getType() == LocalResourceType.PATTERN) {
                //PATTERN is not currently used in local mode
                throw new IllegalArgumentException(
                        "Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
            }
            Path resourcePath;
            try {
                resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
            LOG.info(String.format("Localized %s as %s", resourcePath, path));
            String cp = resourcePath.toUri().getPath();
            if (classpaths.keySet().contains(cp)) {
                localClasspaths.add(path.toUri().getPath().toString());
            }
        }
    } finally {
        if (exec != null) {
            exec.shutdown();
        }
    }
    // Update the configuration object with localized data.
    if (!localArchives.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALARCHIVES,
                StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
    }
    if (!localFiles.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALFILES,
                StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
    }
    setupCalled = true;

    //If we are  0th worker, signal action complete
    if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache.//from  w  ww  .j  a v a2  s .c o m
 */
public static void addDependencyJars(Configuration conf, Class... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);

    Set<String> jars = new HashSet<String>();

    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // Add jars containing the specified classes
    for (Class clazz : classes) {
        if (clazz == null)
            continue;

        String pathStr = findOrCreateJar(clazz);
        if (pathStr == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        Path path = new Path(pathStr);
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.makeQualified(localFs).toString());
    }
    if (jars.isEmpty())
        return;

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}