List of usage examples for org.apache.hadoop.util StringUtils arrayToString
public static String arrayToString(String[] strs)
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing type counts on RDF triple inputs * // w w w . j a v a 2 s . com * @param config * Configuration * @param inputPaths * Input paths * @param intermediateOutputPath * Path for intermediate output which will be all the type * declaration triples present in the inputs * @param outputPath * Output path * @return Job * @throws IOException */ public static Job[] getTripleTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath, String outputPath) throws IOException { Job[] jobs = new Job[2]; Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Type Triples Extraction"); // Map/Reduce classes job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI()); job.setMapperClass(TripleFilterByPredicateUriMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(TripleWritable.class); // Input and Output Format job.setInputFormatClass(TriplesInputFormat.class); job.setOutputFormatClass(NTriplesOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath)); jobs[0] = job; // Object Node Usage count job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Triples Type Usage Count"); // Map/Reduce classes job.setMapperClass(TripleObjectCountMapper.class); job.setMapOutputKeyClass(NodeWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(NodeCountReducer.class); // Input and Output job.setInputFormatClass(NTriplesInputFormat.class); NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be // better if this was // intelligently // configured job.setOutputFormatClass(NTriplesNodeOutputFormat.class); FileInputFormat.setInputPaths(job, intermediateOutputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); jobs[1] = job; return jobs; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing type counts on RDF quad inputs * /* w w w. jav a2 s.c o m*/ * @param config * Configuration * @param inputPaths * Input paths * @param intermediateOutputPath * Path for intermediate output which will be all the type * declaration quads present in the inputs * @param outputPath * Output path * @return Job * @throws IOException */ public static Job[] getQuadTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath, String outputPath) throws IOException { Job[] jobs = new Job[2]; Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Type Quads Extraction"); // Map/Reduce classes job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI()); job.setMapperClass(QuadFilterByPredicateMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(QuadWritable.class); // Input and Output Format job.setInputFormatClass(QuadsInputFormat.class); job.setOutputFormatClass(NQuadsOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath)); jobs[0] = job; // Object Node Usage count job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Quads Type Usage Count"); // Map/Reduce classes job.setMapperClass(QuadObjectCountMapper.class); job.setMapOutputKeyClass(NodeWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(NodeCountReducer.class); // Input and Output job.setInputFormatClass(NQuadsInputFormat.class); NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be // better if this was // intelligently // configured job.setOutputFormatClass(NTriplesNodeOutputFormat.class); FileInputFormat.setInputPaths(job, intermediateOutputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); jobs[1] = job; return jobs; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing type counts on RDF triple and/or quad inputs * //from ww w.j av a 2 s. co m * @param config * Configuration * @param inputPaths * Input paths * @param intermediateOutputPath * Path for intermediate output which will be all the type * declaration quads present in the inputs * @param outputPath * Output path * @return Job * @throws IOException */ public static Job[] getTypeCountJobs(Configuration config, String[] inputPaths, String intermediateOutputPath, String outputPath) throws IOException { Job[] jobs = new Job[2]; Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Type Extraction"); // Map/Reduce classes job.getConfiguration().setStrings(RdfMapReduceConstants.FILTER_PREDICATE_URIS, RDF.type.getURI()); job.setMapperClass(QuadFilterByPredicateMapper.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(QuadWritable.class); // Input and Output Format job.setInputFormatClass(TriplesOrQuadsInputFormat.class); job.setOutputFormatClass(NQuadsOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(intermediateOutputPath)); jobs[0] = job; // Object Node Usage count job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Type Usage Count"); // Map/Reduce classes job.setMapperClass(QuadObjectCountMapper.class); job.setMapOutputKeyClass(NodeWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(NodeCountReducer.class); // Input and Output job.setInputFormatClass(NQuadsInputFormat.class); NLineInputFormat.setNumLinesPerSplit(job, 10000); // TODO Would be // better if this was // intelligently // configured job.setOutputFormatClass(NTriplesNodeOutputFormat.class); FileInputFormat.setInputPaths(job, intermediateOutputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); jobs[1] = job; return jobs; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing literal data type counts on RDF triple inputs * // ww w . ja va 2 s .c o m * @param config * Configuration * @param inputPaths * Input paths * @param outputPath * Output path * @return Job * @throws IOException */ public static Job getTripleDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Triples Literal Data Type Usage Count"); // Map/Reduce classes job.setMapperClass(TripleDataTypeCountMapper.class); job.setMapOutputKeyClass(NodeWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(NodeCountReducer.class); // Input and Output job.setInputFormatClass(TriplesInputFormat.class); job.setOutputFormatClass(NTriplesNodeOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing literal data type counts on RDF quad inputs * /*from w ww. ja v a 2s .c o m*/ * @param config * Configuration * @param inputPaths * Input paths * @param outputPath * Output path * @return Job * @throws IOException */ public static Job getQuadDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Quads Literal Data Type Usage Count"); // Map/Reduce classes job.setMapperClass(QuadDataTypeCountMapper.class); job.setMapOutputKeyClass(NodeWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(NodeCountReducer.class); // Input and Output job.setInputFormatClass(QuadsInputFormat.class); job.setOutputFormatClass(NTriplesNodeOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing literal data type counts on RDF triple and/or * quad inputs//from ww w. ja v a 2s . c o m * * @param config * Configuration * @param inputPaths * Input paths * @param outputPath * Output path * @return Job * @throws IOException */ public static Job getDataTypeCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Literal Data Type Usage Count"); // Map/Reduce classes job.setMapperClass(QuadDataTypeCountMapper.class); job.setMapOutputKeyClass(NodeWritable.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(NodeCountReducer.class); // Input and Output job.setInputFormatClass(TriplesOrQuadsInputFormat.class); job.setOutputFormatClass(NTriplesNodeOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing literal data type counts on RDF triple inputs * /*from w w w . j a v a 2 s. c o m*/ * @param config * Configuration * @param inputPaths * Input paths * @param outputPath * Output path * @return Job * @throws IOException */ public static Job getTripleNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Triples Namespace Usage Count"); // Map/Reduce classes job.setMapperClass(TripleNamespaceCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(TextCountReducer.class); // Input and Output job.setInputFormatClass(TriplesInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing literal data type counts on RDF quad inputs * //from w w w. j a va2s . co m * @param config * Configuration * @param inputPaths * Input paths * @param outputPath * Output path * @return Job * @throws IOException */ public static Job getQuadNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Quads Namespace Usage Count"); // Map/Reduce classes job.setMapperClass(QuadNamespaceCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(TextCountReducer.class); // Input and Output job.setInputFormatClass(QuadsInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:org.apache.jena.hadoop.rdf.stats.jobs.JobFactory.java
License:Apache License
/** * Gets a job for computing literal data type counts on RDF triple and/or * quad inputs//from w w w .j ava 2 s .c o m * * @param config * Configuration * @param inputPaths * Input paths * @param outputPath * Output path * @return Job * @throws IOException */ public static Job getNamespaceCountJob(Configuration config, String[] inputPaths, String outputPath) throws IOException { Job job = Job.getInstance(config); job.setJarByClass(JobFactory.class); job.setJobName("RDF Namespace Usage Count"); // Map/Reduce classes job.setMapperClass(QuadNamespaceCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setReducerClass(TextCountReducer.class); // Input and Output job.setInputFormatClass(TriplesOrQuadsInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileInputFormat.setInputPaths(job, StringUtils.arrayToString(inputPaths)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:org.apache.kudu.mapreduce.KuduTableMapReduceUtil.java
License:Apache License
/** * Add the jars containing the given classes to the job's configuration * such that JobClient will ship them to the cluster and add them to * the DistributedCache./* w w w .j a v a 2 s . co m*/ */ public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // add jars as we find them to a map of contents jar name so that we can avoid // creating new jars for classes that have already been packaged. Map<String, String> packagedClasses = new HashMap<String, String>(); // Add jars containing the specified classes for (Class<?> clazz : classes) { if (clazz == null) { continue; } Path path = findOrCreateJar(clazz, localFs, packagedClasses); if (path == null) { LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.toString()); } if (jars.isEmpty()) { return; } conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[jars.size()]))); }