Example usage for org.apache.hadoop.util StringUtils arrayToString

List of usage examples for org.apache.hadoop.util StringUtils arrayToString

Introduction

In this page you can find the example usage for org.apache.hadoop.util StringUtils arrayToString.

Prototype


public static String arrayToString(String[] strs) 

Source Link

Document

Given an array of strings, return a comma-separated list of its elements.

Usage

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing type counts on RDF triple inputs
 * // w  w w .  j a v a 2  s  . com
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param intermediateOutputPath
 *            Path for intermediate output which will be all the type
 *            declaration triples present in the inputs
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job[] getTripleTypeCountJobs(Configuration config, String[] inputPaths,
        String intermediateOutputPath, String outputPath) throws IOException {
    Job[] jobs = new Job[2];

    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Triples Extraction");

    // Map/Reduce classes
    job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
    job.setMapperClass(TripleFilterByPredicateUriMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(TripleWritable.class);

    // Input and Output Format
    job.setInputFormatClass(TriplesInputFormat.class);
    job.setOutputFormatClass(NTriplesOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));

    jobs[0] = job;

    // Object Node Usage count job
    job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Triples Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(TripleObjectCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(NTriplesInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
                                                      // better if this was
                                                      // intelligently
                                                      // configured
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, intermediateOutputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    jobs[1] = job;

    return jobs;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing type counts on RDF quad inputs
 * /* w w w.  jav a2  s.c o  m*/
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param intermediateOutputPath
 *            Path for intermediate output which will be all the type
 *            declaration quads present in the inputs
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job[] getQuadTypeCountJobs(Configuration config, String[] inputPaths,
        String intermediateOutputPath, String outputPath) throws IOException {
    Job[] jobs = new Job[2];

    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Quads Extraction");

    // Map/Reduce classes
    job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
    job.setMapperClass(QuadFilterByPredicateMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(QuadWritable.class);

    // Input and Output Format
    job.setInputFormatClass(QuadsInputFormat.class);
    job.setOutputFormatClass(NQuadsOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));

    jobs[0] = job;

    // Object Node Usage count job
    job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Quads Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadObjectCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(NQuadsInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
                                                      // better if this was
                                                      // intelligently
                                                      // configured
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, intermediateOutputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    jobs[1] = job;

    return jobs;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing type counts on RDF triple and/or quad inputs
 * //from ww  w.j  av  a  2  s. co m
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param intermediateOutputPath
 *            Path for intermediate output which will be all the type
 *            declaration quads present in the inputs
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job[] getTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath,
        String outputPath) throws IOException {
    Job[] jobs = new Job[2];

    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Extraction");

    // Map/Reduce classes
    job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI());
    job.setMapperClass(QuadFilterByPredicateMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(QuadWritable.class);

    // Input and Output Format
    job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
    job.setOutputFormatClass(NQuadsOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath));

    jobs[0] = job;

    // Object Node Usage count job
    job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadObjectCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(NQuadsInputFormat.class);
    NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be
                                                      // better if this was
                                                      // intelligently
                                                      // configured
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, intermediateOutputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    jobs[1] = job;

    return jobs;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple inputs
 * // ww  w . ja  va 2  s .c  o m
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getTripleDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Triples Literal Data Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(TripleDataTypeCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesInputFormat.class);
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF quad inputs
 * /*from   w  ww. ja v  a  2s .c o  m*/
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getQuadDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Quads Literal Data Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadDataTypeCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(QuadsInputFormat.class);
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple and/or
 * quad inputs//from   ww w. ja v  a  2s  . c o  m
 * 
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Literal Data Type Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadDataTypeCountMapper.class);
    job.setMapOutputKeyClass(NodeWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(NodeCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
    job.setOutputFormatClass(NTriplesNodeOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple inputs
 * /*from   w  w w  . j a  v a 2 s. c  o  m*/
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getTripleNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Triples Namespace Usage Count");

    // Map/Reduce classes
    job.setMapperClass(TripleNamespaceCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(TextCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF quad inputs
 * //from w w w. j a va2s .  co m
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getQuadNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Quads Namespace Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadNamespaceCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(TextCountReducer.class);

    // Input and Output
    job.setInputFormatClass(QuadsInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java

License:Apache License

/**
 * Gets a job for computing literal data type counts on RDF triple and/or
 * quad inputs//from w w w .j  ava  2  s .c  o m
 * 
 * @param config
 *            Configuration
 * @param inputPaths
 *            Input paths
 * @param outputPath
 *            Output path
 * @return Job
 * @throws IOException
 */
public static Job getNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath)
        throws IOException {
    Job job = Job.getInstance(config);
    job.setJarByClass(JobFactory.class);
    job.setJobName("RDF Namespace Usage Count");

    // Map/Reduce classes
    job.setMapperClass(QuadNamespaceCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(TextCountReducer.class);

    // Input and Output
    job.setInputFormatClass(TriplesOrQuadsInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    return job;
}

From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration
 * such that JobClient will ship them to the cluster and add them to
 * the DistributedCache./*  w  w w  .j  a  v a 2  s .  co m*/
 */
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {

    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // add jars as we find them to a map of contents jar name so that we can avoid
    // creating new jars for classes that have already been packaged.
    Map<String, String> packagedClasses = new HashMap<String, String>();

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null) {
            continue;
        }

        Path path = findOrCreateJar(clazz, localFs, packagedClasses);
        if (path == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.toString());
    }
    if (jars.isEmpty()) {
        return;
    }

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()])));
}