Example usage for org.apache.hadoop.util StringUtils split

Introduction

In this page you can find the example usage for org.apache.hadoop.util StringUtils split.

Prototype

public static String[] split(String str)

Source Link

Document

Split a string using the default separator

Usage

From source file:com.avira.couchdoop.CouchbaseArgs.java

License:Apache License

@Override
public void loadFromHadoopConfiguration(Configuration conf) throws ArgsException {
    String rawUrls = conf.get(ARG_COUCHBASE_URLS.getPropertyName());
    if (rawUrls != null) {
        urls = new ArrayList<>();
        String[] urlStrings = StringUtils.split(conf.get(ARG_COUCHBASE_URLS.getPropertyName()));
        for (String urlString : urlStrings) {
            urls.add(URI.create(urlString));
        }// ww  w .  j a v a2 s.c o m
    }

    bucket = conf.get(ARG_COUCHBASE_BUCKET.getPropertyName());
    password = conf.get(ARG_COUCHBASE_PASSWORD.getPropertyName(), "");
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * //w  ww  .  j  a  va 2s  .  c om
 * @param conf The configuration of the job 
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobConf conf) {
    String dirs = conf.get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.blm.orc.OrcInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 *
 * @param conf The configuration of the job
 * @return the list of input {@link Path}s for the map-reduce job.
 */// w w w.  j  ava2 s  .c  om
static Path[] getInputPaths(Configuration conf) throws IOException {
    String dirs = conf.get("mapred.input.dir");
    if (dirs == null) {
        throw new IOException("Configuration mapred.input.dir is not defined.");
    }
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the bsp job.
 *
 * @param job/*from w ww . ja  v  a  2  s  .c  o  m*/
 *        the current job BSPJob.
 * @return the list of input {@link Path}s for the bsp job.
 */
public static Path[] getInputPaths(BSPJob job) {
    String dirs = job.getConf().get(Constants.USER_BC_BSP_JOB_INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 * //from  ww  w .  j  av a 2s . c  o m
 * @param context The job
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get(INPUT_DIR, "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.ebay.nest.io.sede.ColumnProjectionUtils.java

License:Apache License

/**
 * Returns an array of column ids(start from zero) which is set in the given
 * parameter <tt>conf</tt>./*from  www .java2  s  . c  om*/
 */
public static ArrayList<Integer> getReadColumnIDs(Configuration conf) {
    if (conf == null) {
        return new ArrayList<Integer>(0);
    }
    String skips = conf.get(READ_COLUMN_IDS_CONF_STR, "");
    String[] list = StringUtils.split(skips);
    ArrayList<Integer> result = new ArrayList<Integer>(list.length);
    for (String element : list) {
        // it may contain duplicates, remove duplicates
        Integer toAdd = Integer.parseInt(element);
        if (!result.contains(toAdd)) {
            result.add(toAdd);
        }
    }
    return result;
}

From source file:com.linkedin.json.JsonSequenceFileInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    String dirs = job.getConfiguration().get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);

    List<FileStatus> status = new ArrayList<FileStatus>();
    for (int i = 0; i < list.length; i++) {
        status.addAll(getAllSubFileStatus(job, new Path(list[i])));
    }// www  .j  a v a  2  s  .co m

    return status;
}

From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java

License:Apache License

/**
 * Get the list of input {@link Path}s for the map-reduce job.
 *
 * @param context The job//from   w  w w  .ja  v  a  2 s  .  com
 * @return the list of input {@link Path}s for the map-reduce job.
 */
public static Path[] getInputPaths(JobContext context) {
    String dirs = context.getConfiguration().get("mapred.input.dir", "");
    String[] list = StringUtils.split(dirs);
    Path[] result = new Path[list.length];
    for (int i = 0; i < list.length; i++) {
        result[i] = new Path(StringUtils.unEscapeString(list[i]));
    }
    return result;
}

From source file:com.splicemachine.orc.input.SpliceOrcNewInputFormat.java

License:Open Source License

public static List<Integer> getReadColumnIDs(String confString, Configuration conf) {
    String skips = conf.get(confString, "");
    String[] list = StringUtils.split(skips);
    ArrayList result = new ArrayList(list.length);
    String[] arr$ = list;//w  ww .  j a  v  a 2 s  .  c o m
    int len$ = list.length;

    for (int i$ = 0; i$ < len$; ++i$) {
        String element = arr$[i$];
        Integer toAdd = Integer.valueOf(Integer.parseInt(element));
        result.add(toAdd);
    }

    return result;
}

From source file:com.telefonica.iot.tidoop.apiext.hadoop.ckan.CKANInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) {
    // create a CKAN backend
    String ckanHost = job.getConfiguration().get(INPUT_CKAN_HOST);
    String ckanPort = job.getConfiguration().get(INPUT_CKAN_PORT);
    boolean ckanSSL = job.getConfiguration().get(INPUT_CKAN_SSL).equals("true");
    String ckanAPIKey = job.getConfiguration().get(INPUT_CKAN_API_KEY);
    int splitsLength = new Integer(job.getConfiguration().get(INPUT_CKAN_SPLITS_LENGTH));
    logger.info("Getting splits, the backend is at " + (ckanSSL ? "https://" : "http://") + ckanHost + ":"
            + ckanPort + " (API key=" + ckanAPIKey + ")");

    if (backend == null) {
        backend = new CKANBackend(ckanHost, ckanPort, ckanSSL, ckanAPIKey, splitsLength);
    } // if/* w w w.  j  a  v a2 s . com*/

    // resulting splits container
    List<InputSplit> splits = new ArrayList<InputSplit>();

    // get the Job configuration
    Configuration conf = job.getConfiguration();

    // get the inputs, i.e. the list of CKAN URLs
    String input = conf.get(INPUT_CKAN_URLS, "");
    String[] ckanURLs = StringUtils.split(input);

    // iterate on the CKAN URLs, they may be related to whole organizations, packages/datasets or specific resources
    for (String ckanURL : ckanURLs) {
        if (isCKANOrg(ckanURL)) {
            logger.info("Getting splits for " + ckanURL + ", it is an organization");
            splits.addAll(getSplitsOrg(ckanURL, job.getConfiguration()));
        } else if (isCKANPkg(ckanURL)) {
            logger.info("Getting splits for " + ckanURL + ", it is a package/dataset");
            splits.addAll(getSplitsPkg(ckanURL, job.getConfiguration()));
        } else {
            logger.info("Getting splits for " + ckanURL + ", it is a resource");
            splits.addAll(getSplitsRes(ckanURL, job.getConfiguration()));
        } // if else if
    } // for

    // return the splits
    logger.info("Number of total splits=" + splits.size());
    return splits;
}