Example usage for org.apache.hadoop.util StringUtils unEscapeString

Introduction

In this page you can find the example usage for org.apache.hadoop.util StringUtils unEscapeString.

Prototype

public static String unEscapeString(String str)

Source Link

Document

Unescape commas in the string using the default escape char

Usage

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * /* www.  j  a  va 2  s  .c om*/
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
    String dirs = conf.get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 *
 * @param conf The configuration of the job
 * @return the list of input {@link Path}s for the map-reduce job.
 *//*from w ww. ja va 2 s  .c om*/
static Path[] getInputPaths(Configuration conf) throws IOException {
    String dirs = conf.get("mapred.input.dir");
    if (dirs == null) {
        throw new IOException("Configuration mapred.input.dir is not defined.");
    }
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the bsp job.
 *
 * @param job//from w w  w .  ja v  a  2 s.  c o  m
 *        the current job BSPJob.
 * @return the list of input {@link Path}s for the bsp job.
 */
public static Path[] getInputPaths(BSPJob job) {
    String dirs = job.getConf().get(Constants.USER_BC_BSP_JOB_INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * //from w w w  .  java  2s . c o m
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get(INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 *
 * @param context The job//from  ww w .  j a v a  2 s . c  om
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.vertica.hadoop.FixedSplitFileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * /*from   w w w . ja  v  a 2 s . c o  m*/
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
    String dirs = conf.get(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.xiaomi.linden.hadoop.indexing.job.LindenJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
    logger.info("input dir:" + dir);
    Path inputPath = new Path(StringUtils.unEscapeString(dir));
    Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
    String indexPath = conf.get(LindenJobConfig.INDEX_PATH);

    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);/*w ww  . java  2  s . c  o m*/
    }
    if (fs.exists(new Path(indexPath))) {
        fs.delete(new Path(indexPath), true);
    }

    int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
    Shard[] shards = createShards(indexPath, numShards);

    Shard.setIndexShards(conf, shards);

    //empty trash;
    (new Trash(conf)).expunge();

    Job job = Job.getInstance(conf, "linden-hadoop-indexing");
    job.setJarByClass(LindenJob.class);
    job.setMapperClass(LindenMapper.class);
    job.setCombinerClass(LindenCombiner.class);
    job.setReducerClass(LindenReducer.class);
    job.setMapOutputKeyClass(Shard.class);
    job.setMapOutputValueClass(IntermediateForm.class);
    job.setOutputKeyClass(Shard.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(IndexUpdateOutputFormat.class);
    job.setReduceSpeculativeExecution(false);
    job.setNumReduceTasks(numShards);

    String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
    if (lindenSchemaFile == null) {
        throw new IOException("no schema file is found");
    }
    logger.info("Adding schema file: " + lindenSchemaFile);
    job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
    String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
    if (lindenPropertiesFile == null) {
        throw new IOException("no linden properties file is found");
    }
    logger.info("Adding linden properties file: " + lindenPropertiesFile);
    job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));

    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    Path[] inputs = FileInputFormat.getInputPaths(job);
    StringBuilder buffer = new StringBuilder(inputs[0].toString());
    for (int i = 1; i < inputs.length; i++) {
        buffer.append(",");
        buffer.append(inputs[i].toString());
    }
    logger.info("mapreduce.input.dir = " + buffer.toString());
    logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
    logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
    logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
    logger.info("mapreduce.input.format.class = " + job.getInputFormatClass());
    logger.info("mapreduce.output.format.class = " + job.getOutputFormatClass());
    logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed");
    }
    return 0;
}

From source file:ml.shifu.guagua.hadoop.GuaguaMRUnitDriver.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 */// w w  w  .j ava 2  s.  c om
private static Path[] getInputPaths(String inputs) {
    String[] list = StringUtils.split(inputs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:ml.shifu.shifu.guagua.ShifuInputFormat.java

License:Apache License

public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get(CommonConstants.CROSS_VALIDATION_DIR, "");
    LOG.info("crossValidation_dir:" + dirs);
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }//  w  w  w  .j ava  2 s  . co m
    return result;
}

From source file:org.apache.giraph.io.formats.GiraphFileInputFormat.java

License:Apache License

/**
 * Get the list of vertex input {@link Path}s.
 *
 * @param context The job//ww  w .ja  v a2s.c o  m
 * @return The list of input {@link Path}s
 */
public static Path[] getVertexInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get(VERTEX_INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}