Example usage for org.apache.hadoop.util StringUtils split

List of usage examples for org.apache.hadoop.util StringUtils split

Introduction

In this page you can find the example usage for org.apache.hadoop.util StringUtils split.

Prototype

public static String[] split(String str) 

Source Link

Document

Split a string using the default separator

Usage

From source file:com.avira.couchdoop.CouchbaseArgs.java

License:Apache License

@Override
public void loadFromHadoopConfiguration(Configuration conf) throws ArgsException {
    String rawUrls = conf.get(ARG_COUCHBASE_URLS.getPropertyName());
    if (rawUrls != null) {
        urls = new ArrayList<>();
        String[] urlStrings = StringUtils.split(conf.get(ARG_COUCHBASE_URLS.getPropertyName()));
        for (String urlString : urlStrings) {
            urls.add(URI.create(urlString));
        }// ww  w .  j a v a2 s.c o m
    }

    bucket = conf.get(ARG_COUCHBASE_BUCKET.getPropertyName());
    password = conf.get(ARG_COUCHBASE_PASSWORD.getPropertyName(), "");
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * //w  ww  .  j  a  va 2s  .  c om
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
    String dirs = conf.get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 *
 * @param conf The configuration of the job
 * @return the list of input {@link Path}s for the map-reduce job.
 */// w w w.  j  ava2 s  .c  om
static Path[] getInputPaths(Configuration conf) throws IOException {
    String dirs = conf.get("mapred.input.dir");
    if (dirs == null) {
        throw new IOException("Configuration mapred.input.dir is not defined.");
    }
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the bsp job.
 *
 * @param job/*from w ww . ja  v  a  2  s  .c  o  m*/
 *        the current job BSPJob.
 * @return the list of input {@link Path}s for the bsp job.
 */
public static Path[] getInputPaths(BSPJob job) {
    String dirs = job.getConf().get(Constants.USER_BC_BSP_JOB_INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * //from  ww  w .  j  av a 2s . c  o m
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get(INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.ebay.nest.io.sede.ColumnProjectionUtils.java

License:Apache License

/**
 * Returns an array of column ids(start from zero) which is set in the given
 * parameter <tt>conf</tt>./*from  www .java2  s  . c  om*/
 */
public static ArrayList<Integer> getReadColumnIDs(Configuration conf) {
    if (conf == null) {
        return new ArrayList<Integer>(0);
    }
    String skips = conf.get(READ_COLUMN_IDS_CONF_STR, "");
    String[] list = StringUtils.split(skips);
    ArrayList<Integer> result = new ArrayList<Integer>(list.length);
    for (String element : list) {
        // it may contain duplicates, remove duplicates
        Integer toAdd = Integer.parseInt(element);
        if (!result.contains(toAdd)) {
            result.add(toAdd);
        }
    }
    return result;
}

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    String dirs = job.getConfiguration().get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);

    List<FileStatus> status = new ArrayList<FileStatus>();
    for (int i = 0; i < list.length; i++) {
        status.addAll(getAllSubFileStatus(job, new Path(list[i])));
    }// www  .j  a v a  2  s  .co m

    return status;
}

From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 *
 * @param context The job//from   w  w w  .ja  v  a  2 s  .  com
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.splicemachine.orc.input.SpliceOrcNewInputFormat.java

License:Open Source License

public static List<Integer> getReadColumnIDs(String confString, Configuration conf) {
    String skips = conf.get(confString, "");
    String[] list = StringUtils.split(skips);
    ArrayList result = new ArrayList(list.length);
    String[] arr$ = list;//w  ww .  j a  v  a 2 s  .  c o m
    int len$ = list.length;

    for (int i$ = 0; i$ < len$; ++i$) {
        String element = arr$[i$];
        Integer toAdd = Integer.valueOf(Integer.parseInt(element));
        result.add(toAdd);
    }

    return result;
}

From source file:com.telefonica.iot.tidoop.apiext.hadoop.ckan.CKANInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) {
    // create a CKAN backend
    String ckanHost = job.getConfiguration().get(INPUT_CKAN_HOST);
    String ckanPort = job.getConfiguration().get(INPUT_CKAN_PORT);
    boolean ckanSSL = job.getConfiguration().get(INPUT_CKAN_SSL).equals("true");
    String ckanAPIKey = job.getConfiguration().get(INPUT_CKAN_API_KEY);
    int splitsLength = new Integer(job.getConfiguration().get(INPUT_CKAN_SPLITS_LENGTH));
    logger.info("Getting splits, the backend is at " + (ckanSSL ? "https://" : "http://") + ckanHost + ":"
            + ckanPort + " (API key=" + ckanAPIKey + ")");

    if (backend == null) {
        backend = new CKANBackend(ckanHost, ckanPort, ckanSSL, ckanAPIKey, splitsLength);
    } // if/* w w w.  j  a  v a2 s . com*/

    // resulting splits container
    List<InputSplit> splits = new ArrayList<InputSplit>();

    // get the Job configuration
    Configuration conf = job.getConfiguration();

    // get the inputs, i.e. the list of CKAN URLs
    String input = conf.get(INPUT_CKAN_URLS, "");
    String[] ckanURLs = StringUtils.split(input);

    // iterate on the CKAN URLs, they may be related to whole organizations, packages/datasets or specific resources
    for (String ckanURL : ckanURLs) {
        if (isCKANOrg(ckanURL)) {
            logger.info("Getting splits for " + ckanURL + ", it is an organization");
            splits.addAll(getSplitsOrg(ckanURL, job.getConfiguration()));
        } else if (isCKANPkg(ckanURL)) {
            logger.info("Getting splits for " + ckanURL + ", it is a package/dataset");
            splits.addAll(getSplitsPkg(ckanURL, job.getConfiguration()));
        } else {
            logger.info("Getting splits for " + ckanURL + ", it is a resource");
            splits.addAll(getSplitsRes(ckanURL, job.getConfiguration()));
        } // if else if
    } // for

    // return the splits
    logger.info("Number of total splits=" + splits.size());
    return splits;
}