List of usage examples for org.apache.hadoop.util StringUtils split
public static String[] split(String str)
From source file:com.avira.couchdoop.CouchbaseArgs.java
License:Apache License
@Override public void loadFromHadoopConfiguration(Configuration conf) throws ArgsException { String rawUrls = conf.get(ARG_COUCHBASE_URLS.getPropertyName()); if (rawUrls != null) { urls = new ArrayList<>(); String[] urlStrings = StringUtils.split(conf.get(ARG_COUCHBASE_URLS.getPropertyName())); for (String urlString : urlStrings) { urls.add(URI.create(urlString)); }// ww w . j a v a2 s.c o m } bucket = conf.get(ARG_COUCHBASE_BUCKET.getPropertyName()); password = conf.get(ARG_COUCHBASE_PASSWORD.getPropertyName(), ""); }
From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * //w ww . j a va 2s . c om * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobConf conf) { String dirs = conf.get("mapred.input.dir", ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.blm.orc.OrcInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * * @param conf The configuration of the job * @return the list of input {@link Path}s for the map-reduce job. */// w w w. j ava2 s .c om static Path[] getInputPaths(Configuration conf) throws IOException { String dirs = conf.get("mapred.input.dir"); if (dirs == null) { throw new IOException("Configuration mapred.input.dir is not defined."); } String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the bsp job. * * @param job/*from w ww . ja v a 2 s .c o m*/ * the current job BSPJob. * @return the list of input {@link Path}s for the bsp job. */ public static Path[] getInputPaths(BSPJob job) { String dirs = job.getConf().get(Constants.USER_BC_BSP_JOB_INPUT_DIR, ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.dinglicom.clouder.mapreduce.input.FileInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * //from ww w . j av a 2s . c o m * @param context The job * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobContext context) { String dirs = context.getConfiguration().get(INPUT_DIR, ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.ebay.nest.io.sede.ColumnProjectionUtils.java
License:Apache License
/** * Returns an array of column ids(start from zero) which is set in the given * parameter <tt>conf</tt>./*from www .java2 s . c om*/ */ public static ArrayList<Integer> getReadColumnIDs(Configuration conf) { if (conf == null) { return new ArrayList<Integer>(0); } String skips = conf.get(READ_COLUMN_IDS_CONF_STR, ""); String[] list = StringUtils.split(skips); ArrayList<Integer> result = new ArrayList<Integer>(list.length); for (String element : list) { // it may contain duplicates, remove duplicates Integer toAdd = Integer.parseInt(element); if (!result.contains(toAdd)) { result.add(toAdd); } } return result; }
From source file:com.linkedin.json.JsonSequenceFileInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { String dirs = job.getConfiguration().get("mapred.input.dir", ""); String[] list = StringUtils.split(dirs); List<FileStatus> status = new ArrayList<FileStatus>(); for (int i = 0; i < list.length; i++) { status.addAll(getAllSubFileStatus(job, new Path(list[i]))); }// www .j a v a 2 s .co m return status; }
From source file:com.ning.metrics.serialization.hadoop.SmileInputFormat.java
License:Apache License
/** * Get the list of input {@link Path}s for the map-reduce job. * * @param context The job//from w w w .ja v a 2 s . com * @return the list of input {@link Path}s for the map-reduce job. */ public static Path[] getInputPaths(JobContext context) { String dirs = context.getConfiguration().get("mapred.input.dir", ""); String[] list = StringUtils.split(dirs); Path[] result = new Path[list.length]; for (int i = 0; i < list.length; i++) { result[i] = new Path(StringUtils.unEscapeString(list[i])); } return result; }
From source file:com.splicemachine.orc.input.SpliceOrcNewInputFormat.java
License:Open Source License
public static List<Integer> getReadColumnIDs(String confString, Configuration conf) { String skips = conf.get(confString, ""); String[] list = StringUtils.split(skips); ArrayList result = new ArrayList(list.length); String[] arr$ = list;//w ww . j a v a 2 s . c o m int len$ = list.length; for (int i$ = 0; i$ < len$; ++i$) { String element = arr$[i$]; Integer toAdd = Integer.valueOf(Integer.parseInt(element)); result.add(toAdd); } return result; }
From source file:com.telefonica.iot.tidoop.apiext.hadoop.ckan.CKANInputFormat.java
License:Open Source License
@Override public List<InputSplit> getSplits(JobContext job) { // create a CKAN backend String ckanHost = job.getConfiguration().get(INPUT_CKAN_HOST); String ckanPort = job.getConfiguration().get(INPUT_CKAN_PORT); boolean ckanSSL = job.getConfiguration().get(INPUT_CKAN_SSL).equals("true"); String ckanAPIKey = job.getConfiguration().get(INPUT_CKAN_API_KEY); int splitsLength = new Integer(job.getConfiguration().get(INPUT_CKAN_SPLITS_LENGTH)); logger.info("Getting splits, the backend is at " + (ckanSSL ? "https://" : "http://") + ckanHost + ":" + ckanPort + " (API key=" + ckanAPIKey + ")"); if (backend == null) { backend = new CKANBackend(ckanHost, ckanPort, ckanSSL, ckanAPIKey, splitsLength); } // if/* w w w. j a v a2 s . com*/ // resulting splits container List<InputSplit> splits = new ArrayList<InputSplit>(); // get the Job configuration Configuration conf = job.getConfiguration(); // get the inputs, i.e. the list of CKAN URLs String input = conf.get(INPUT_CKAN_URLS, ""); String[] ckanURLs = StringUtils.split(input); // iterate on the CKAN URLs, they may be related to whole organizations, packages/datasets or specific resources for (String ckanURL : ckanURLs) { if (isCKANOrg(ckanURL)) { logger.info("Getting splits for " + ckanURL + ", it is an organization"); splits.addAll(getSplitsOrg(ckanURL, job.getConfiguration())); } else if (isCKANPkg(ckanURL)) { logger.info("Getting splits for " + ckanURL + ", it is a package/dataset"); splits.addAll(getSplitsPkg(ckanURL, job.getConfiguration())); } else { logger.info("Getting splits for " + ckanURL + ", it is a resource"); splits.addAll(getSplitsRes(ckanURL, job.getConfiguration())); } // if else if } // for // return the splits logger.info("Number of total splits=" + splits.size()); return splits; }