List of usage examples for org.apache.hadoop.util StringUtils unEscapeString
public static String unEscapeString(String str)
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * /* www. j a va 2 s .c om*/ * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobConf conf) { String dirs = conf.get("mapred.input.dir", ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.blm.orc.OrcInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. *//*from w ww. ja va 2 s .c om*/ static Path[] getInputPaths(Configuration conf) throws IOException { String dirs = conf.get("mapred.input.dir"); if (dirs == null) { throw new IOException("Configuration mapred.input.dir is not defined."); } String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the bsp job. * * @param job//from w w w . ja v a 2 s. c o m * the current job BSPJob. * @return the list of input {@link Path}s for the bsp job. */ public static Path[] getInputPaths(BSPJob job) { String dirs = job.getConf().get(Constants.USER_BC_BSP_JOB_INPUT_DIR, ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * //from w w w . java 2s . c o m * @param context The job * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobContext context) { String dirs = context.getConfiguration().get(INPUT_DIR, ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * * @param context The job//from ww w . j a v a 2 s . c om * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobContext context) { String dirs = context.getConfiguration().get("mapred.input.dir", ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * /*from w w w . ja v a 2 s . c o m*/ * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobConf conf) { String dirs = conf.get(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.xiaomi.linden.hadoop.indexing.job.LindenJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration conf = getConf(); String dir = conf.get(LindenJobConfig.INPUT_DIR, null); logger.info("input dir:" + dir); Path inputPath = new Path(StringUtils.unEscapeString(dir)); Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR)); String indexPath = conf.get(LindenJobConfig.INDEX_PATH); FileSystem fs = FileSystem.get(conf); if (fs.exists(outputPath)) { fs.delete(outputPath, true);/*w ww . java 2 s . c o m*/ } if (fs.exists(new Path(indexPath))) { fs.delete(new Path(indexPath), true); } int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1); Shard[] shards = createShards(indexPath, numShards); Shard.setIndexShards(conf, shards); //empty trash; (new Trash(conf)).expunge(); Job job = Job.getInstance(conf, "linden-hadoop-indexing"); job.setJarByClass(LindenJob.class); job.setMapperClass(LindenMapper.class); job.setCombinerClass(LindenCombiner.class); job.setReducerClass(LindenReducer.class); job.setMapOutputKeyClass(Shard.class); job.setMapOutputValueClass(IntermediateForm.class); job.setOutputKeyClass(Shard.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(IndexUpdateOutputFormat.class); job.setReduceSpeculativeExecution(false); job.setNumReduceTasks(numShards); String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL); if (lindenSchemaFile == null) { throw new IOException("no schema file is found"); } logger.info("Adding schema file: " + lindenSchemaFile); job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema")); String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL); if (lindenPropertiesFile == null) { throw new IOException("no linden properties file is found"); } logger.info("Adding linden properties file: " + lindenPropertiesFile); job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties")); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); Path[] inputs = FileInputFormat.getInputPaths(job); StringBuilder buffer = new StringBuilder(inputs[0].toString()); for (int i = 1; i < inputs.length; i++) { buffer.append(","); buffer.append(inputs[i].toString()); } logger.info("mapreduce.input.dir = " + buffer.toString()); logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString()); logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks()); logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS)); logger.info("mapreduce.input.format.class = " + job.getInputFormatClass()); logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass()); logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR)); job.waitForCompletion(true); if (!job.isSuccessful()) { throw new RuntimeException("Job failed"); } return 0; }
From source file:ml.shifu.guagua.hadoop.GuaguaMRUnitDriver.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. */// w w w .j ava 2 s. c om private static Path[] getInputPaths(String inputs) { String[] list = StringUtils.split(inputs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java
License:Apache License
public static Path[] getInputPaths(JobContext context) { String dirs = context.getConfiguration().get(CommonConstants.CROSS_VALIDATION_DIR, ""); LOG.info("crossValidation_dir:" + dirs); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); }// w w w .j ava 2 s . co m return result; }
From source file:org.apache.giraph.io.formats.GiraphFileInputFormat.java
License:Apache License
/** * Get the list of vertex input {@link Path}s. * * @param context The job//ww w .ja v a2s.c o m * @return The list of input {@link Path}s */ public static Path[] getVertexInputPaths(JobContext context) { String dirs = context.getConfiguration().get(VERTEX_INPUT_DIR, ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }