List of usage examples for org.apache.hadoop.io DefaultStringifier store
public static <K> void store(Configuration conf, K item, String keyName) throws IOException
From source file:alluxio.hadoop.ConfUtils.java
License:Apache License
/** * Stores the Alluxio {@link Configuration} to the target * Hadoop {@link org.apache.hadoop.conf.Configuration} object. * * @param target the {@link org.apache.hadoop.conf.Configuration} target *///from ww w .java2 s. com public static void storeToHadoopConfiguration(org.apache.hadoop.conf.Configuration target) { // Need to set io.serializations key to prevent NPE when trying to get SerializationFactory. target.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Map<String, String> confProperties = Configuration.toMap(); try { DefaultStringifier.store(target, confProperties, PropertyKey.SITE_CONF_DIR.toString()); } catch (IOException ex) { LOG.error("Unable to store Alluxio configuration in Hadoop configuration", ex); throw new RuntimeException(ex); } }
From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext job) throws IOException { boolean delimSplit = isSplitInput(job.getConfiguration()); //if delimSplit is true, size of each split is determined by //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat List<InputSplit> splits = super.getSplits(job); if (!delimSplit) { return splits; }/*w w w .j ava2 s . co m*/ if (splits.size() >= SPLIT_COUNT_LIMIT) { //if #splits > 1 million, there is enough parallelism //therefore no point to split LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT); DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT); return splits; } // add header info into splits List<InputSplit> populatedSplits = new ArrayList<InputSplit>(); LOG.info(splits.size() + " DelimitedSplits generated"); Configuration conf = job.getConfiguration(); char delimiter = 0; ArrayList<Text> hlist = new ArrayList<Text>(); for (InputSplit file : splits) { FileSplit fsplit = ((FileSplit) file); Path path = fsplit.getPath(); FileSystem fs = path.getFileSystem(conf); if (fsplit.getStart() == 0) { // parse the inSplit, get the header FSDataInputStream fileIn = fs.open(path); String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER); if (delimStr.length() == 1) { delimiter = delimStr.charAt(0); } else { LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character."); } String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING, MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING); InputStreamReader instream = new InputStreamReader(fileIn, encoding); CSVParser parser = new CSVParser(instream, CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true)); Iterator<CSVRecord> it = parser.iterator(); String[] header = null; if (it.hasNext()) { CSVRecord record = (CSVRecord) it.next(); Iterator<String> recordIterator = record.iterator(); int recordSize = record.size(); header = new String[recordSize]; for (int i = 0; i < recordSize; i++) { if (recordIterator.hasNext()) { header[i] = (String) recordIterator.next(); } else { throw new IOException("Record size doesn't match the real size"); } } EncodingUtil.handleBOMUTF8(header, 0); hlist.clear(); for (String s : header) { hlist.add(new Text(s)); } } instream.close(); } DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])), path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations()); populatedSplits.add(ds); } return populatedSplits; }
From source file:com.marklogic.contentpump.TransformOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { super.checkOutputSpecs(conf, cs); // store mimetypes map into config system DefaultStringifier.store(conf, getMimetypesMap(), ConfigConstants.CONF_MIMETYPES); }
From source file:com.marklogic.mapreduce.ContentOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { Session session = null;/*from w ww . java 2 s .c om*/ ResultSequence result = null; try { session = cs.newSession(); RequestOptions options = new RequestOptions(); options.setDefaultXQueryVersion("1.0-ml"); session.setDefaultRequestOptions(options); // clear output dir if specified String outputDir = conf.get(OUTPUT_DIRECTORY); if (outputDir != null) { outputDir = outputDir.endsWith("/") ? outputDir : outputDir + "/"; if (conf.getBoolean(OUTPUT_CLEAN_DIR, false)) { // delete directory if exists String queryText = DELETE_DIRECTORY_TEMPLATE.replace(DIRECTORY_TEMPLATE, outputDir); AdhocQuery query = session.newAdhocQuery(queryText); result = session.submitRequest(query); } else { // ensure nothing exists under output dir String queryText = CHECK_DIRECTORY_EXIST_TEMPLATE.replace(DIRECTORY_TEMPLATE, outputDir); AdhocQuery query = session.newAdhocQuery(queryText); result = session.submitRequest(query); if (result.hasNext()) { ResultItem item = result.next(); if (((XSBoolean) (item.getItem())).asBoolean()) { throw new IllegalStateException("Directory " + outputDir + " already exists"); } } else { throw new IllegalStateException("Failed to query directory content."); } } } // initialize server host name and assignment policy initialize(session); // ensure manual directory creation if (fastLoad) { LOG.info("Running in fast load mode"); // store forest-info map into config system DefaultStringifier.store(conf, queryForestInfo(cs), OUTPUT_FOREST_HOST); AdhocQuery query = session.newAdhocQuery(DIRECTORY_CREATE_QUERY); result = session.submitRequest(query); if (result.hasNext()) { ResultItem item = result.next(); String dirMode = item.asString(); if (!dirMode.equals(MANUAL_DIRECTORY_MODE)) { throw new IllegalStateException("Manual directory creation mode is required. " + "The current creation mode is " + dirMode + "."); } } else { throw new IllegalStateException("Failed to query directory creation mode."); } } else { TextArrayWritable hostArray; // 23798: replace hostname in forest config with // user-specified output host String outputHost = conf.get(OUTPUT_HOST); if (MODE_LOCAL.equals(conf.get(EXECUTION_MODE))) { hostArray = queryHosts(cs, initHostName, outputHost); } else { hostArray = queryHosts(cs); } DefaultStringifier.store(conf, hostArray, OUTPUT_FOREST_HOST); } // validate capabilities String[] perms = conf.getStrings(OUTPUT_PERMISSION); if (perms != null && perms.length > 0) { if (perms.length % 2 != 0) { throw new IllegalStateException("Permissions are expected to be in <role, capability> pairs."); } int i = 0; while (i + 1 < perms.length) { String roleName = perms[i++]; if (roleName == null || roleName.isEmpty()) { throw new IllegalStateException("Illegal role name: " + roleName); } String perm = perms[i].trim(); if (!perm.equalsIgnoreCase(ContentCapability.READ.toString()) && !perm.equalsIgnoreCase(ContentCapability.EXECUTE.toString()) && !perm.equalsIgnoreCase(ContentCapability.INSERT.toString()) && !perm.equalsIgnoreCase(ContentCapability.UPDATE.toString())) { throw new IllegalStateException("Illegal capability: " + perm); } i++; } } } catch (RequestException ex) { throw new IOException(ex); } finally { if (session != null) { session.close(); } if (result != null) { result.close(); } } }
From source file:com.marklogic.mapreduce.KeyValueOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { // check for required configuration if (conf.get(OUTPUT_QUERY) == null) { throw new IllegalArgumentException(OUTPUT_QUERY + " is not specified."); }//from ww w . j a v a 2 s . co m // warn against unsupported configuration if (conf.get(BATCH_SIZE) != null) { LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for " + this.getClass().getName() + " and will be ignored."); } String queryLanguage = conf.get(OUTPUT_QUERY_LANGUAGE); if (queryLanguage != null) { InternalUtilities.checkQueryLanguage(queryLanguage); } // store hosts into config system DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST); }
From source file:com.marklogic.mapreduce.NodeOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException { // warn against unsupported configuration if (conf.get(BATCH_SIZE) != null) { LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for " + this.getClass().getName() + " and will be ignored."); }//from w ww. ja va 2 s . c om // store hosts into config system DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST); }
From source file:org.apache.gora.util.IOUtils.java
License:Apache License
/** * Stores the given object in the configuration under the given dataKey * @param obj the object to store/*from w ww .j a v a2 s.c o m*/ * @param conf the configuration to store the object into * @param dataKey the key to store the data */ public static <T> void storeToConf(T obj, Configuration conf, String dataKey) throws IOException { String classKey = dataKey + "._class"; conf.set(classKey, obj.getClass().getName()); DefaultStringifier.store(conf, obj, dataKey); }
From source file:org.apache.mahout.text.LuceneStorageConfiguration.java
License:Apache License
/** * Serializes this object in a Hadoop {@link Configuration} * * @return a {@link Configuration} object with a String serialization * @throws IOException if serialization fails *//*from ww w .j av a 2 s . c om*/ public Configuration serialize() throws IOException { DefaultStringifier.store(configuration, this, KEY); return new Configuration(configuration); }
From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java
License:Apache License
public static void configureExportInputFormat(SqoopOptions opts, Job job, ConnManager connMgr, String dbTable, Configuration config) throws IOException { LOG.info("Configuring HCatalog for export job"); SqoopHCatUtilities hCatUtils = SqoopHCatUtilities.instance(); hCatUtils.configureHCat(opts, job, connMgr, dbTable, job.getConfiguration()); job.setInputFormatClass(getInputFormatClass()); Map<String, Integer> dbColTypes = hCatUtils.getDbColumnTypes(); MapWritable columnTypesJava = new MapWritable(); for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) { Text columnName = new Text(e.getKey()); Text columnText = new Text(connMgr.toJavaType(dbTable, e.getKey(), e.getValue())); columnTypesJava.put(columnName, columnText); }//from w w w .j a v a 2 s .c o m MapWritable columnTypesSql = new MapWritable(); for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) { Text columnName = new Text(e.getKey()); IntWritable sqlType = new IntWritable(e.getValue()); columnTypesSql.put(columnName, sqlType); } DefaultStringifier.store(config, columnTypesJava, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA); DefaultStringifier.store(config, columnTypesSql, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL); }
From source file:org.apache.sqoop.mapreduce.JdbcExportJob.java
License:Apache License
@Override protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { fileType = getInputFileType();/* w ww .ja v a2 s . co m*/ super.configureInputFormat(job, tableName, tableClassName, splitByCol); if (isHCatJob) { SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName, job.getConfiguration()); return; } else if (fileType == FileType.AVRO_DATA_FILE) { LOG.debug("Configuring for Avro export"); ConnManager connManager = context.getConnManager(); Map<String, Integer> columnTypeInts; if (options.getCall() == null) { columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery()); } else { columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall()); } MapWritable columnTypes = new MapWritable(); for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) { Text columnName = new Text(e.getKey()); Text columnText = new Text(connManager.toJavaType(tableName, e.getKey(), e.getValue())); columnTypes.put(columnName, columnText); } DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP); } }