Example usage for org.apache.hadoop.util StringUtils arrayToString

Introduction

In this page you can find the example usage for org.apache.hadoop.util StringUtils arrayToString.

Prototype


public static String arrayToString(String[] strs)

Source Link

Document

Given an array of strings, return a comma-separated list of its elements.

Usage

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing type counts on RDF triple inputs
 * // w  w w .  j a v a 2  s  . com
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param intermediateOutputPath
 *            Path for intermediate output which will be all the type
 *            declaration triples present in the inputs
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job[] getTripleTypeCountJobs(Configuration config, String[] inputPaths,
        String intermediateOutputPath, String outputPath) throws IOException {
    Job[] jobs = new Job[2];

    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Triples Extraction");

    // Map/Reduce classes
    job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
    job.setMapperClass(TripleFilterByPredicateUriMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(TripleWritable.class);

    // Input and Output Format
    job.setInputFormatClass(TriplesInputFormat.class);
    job.setOutputFormatClass(NTriplesOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));

    jobs[0] = job;

    // Object Node Usage count job
    job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Triples Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(TripleObjectCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(NTriplesInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
                                                      // better if this was
                                                      // intelligently
                                                      // configured
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, intermediateOutputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    jobs[1] = job;

    return jobs;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing type counts on RDF quad inputs
 * /* w w w.  jav a2  s.c o  m*/
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param intermediateOutputPath
 *            Path for intermediate output which will be all the type
 *            declaration quads present in the inputs
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job[] getQuadTypeCountJobs(Configuration config, String[] inputPaths,
        String intermediateOutputPath, String outputPath) throws IOException {
    Job[] jobs = new Job[2];

    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Quads Extraction");

    // Map/Reduce classes
    job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
    job.setMapperClass(QuadFilterByPredicateMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(QuadWritable.class);

    // Input and Output Format
    job.setInputFormatClass(QuadsInputFormat.class);
    job.setOutputFormatClass(NQuadsOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));

    jobs[0] = job;

    // Object Node Usage count job
    job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Quads Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadObjectCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(NQuadsInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
                                                      // better if this was
                                                      // intelligently
                                                      // configured
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, intermediateOutputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    jobs[1] = job;

    return jobs;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing type counts on RDF triple and/or quad inputs
 * //from ww  w.j  av  a  2  s. co m
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param intermediateOutputPath
 *            Path for intermediate output which will be all the type
 *            declaration quads present in the inputs
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job[] getTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
        String outputPath) throws IOException {
    Job[] jobs = new Job[2];

    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Extraction");

    // Map/Reduce classes
    job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
    job.setMapperClass(QuadFilterByPredicateMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(QuadWritable.class);

    // Input and Output Format
    job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
    job.setOutputFormatClass(NQuadsOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));

    jobs[0] = job;

    // Object Node Usage count job
    job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadObjectCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(NQuadsInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
                                                      // better if this was
                                                      // intelligently
                                                      // configured
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, intermediateOutputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    jobs[1] = job;

    return jobs;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple inputs
 * // ww  w . ja  va 2  s .c  o m
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getTripleDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Triples Literal Data Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(TripleDataTypeCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesInputFormat.class);
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF quad inputs
 * /*from   w  ww. ja v  a  2s .c o  m*/
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getQuadDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Quads Literal Data Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadDataTypeCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(QuadsInputFormat.class);
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple and/or
 * quad inputs//from   ww w. ja v  a  2s  . c o  m
 * 
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Literal Data Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadDataTypeCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple inputs
 * /*from   w  w w  . j a  v a 2 s. c  o  m*/
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getTripleNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Triples Namespace Usage Count");

    // Map/Reduce classes
    job.setMapperClass(TripleNamespaceCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(TextCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF quad inputs
 * //from w w w. j a va2s .  co m
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getQuadNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Quads Namespace Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadNamespaceCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(TextCountReducer.class);

    // Input and Output
    job.setInputFormatClass(QuadsInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple and/or
 * quad inputs//from w w w .j  ava  2  s .c  o m
 * 
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Namespace Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadNamespaceCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(TextCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache./*  w  w w  .j  a  v a 2  s .  co m*/
 */
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can avoid
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null) {
            continue;
        }

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty()) {
        return;
    }

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
}