Example usage for org.apache.hadoop.io DefaultStringifier store

Introduction

In this page you can find the example usage for org.apache.hadoop.io DefaultStringifier store.

Prototype

public static <K> void store(Configuration conf, K item, String keyName) throws IOException

Source Link

Document

Stores the item in the configuration with the given keyName.

Usage

From source file:alluxio.hadoop.ConfUtils.java

License:Apache License

/**
 * Stores the Alluxio {@link Configuration} to the target
 * Hadoop {@link org.apache.hadoop.conf.Configuration} object.
 *
 * @param target the {@link org.apache.hadoop.conf.Configuration} target
 *///from   ww  w  .java2  s. com
public static void storeToHadoopConfiguration(org.apache.hadoop.conf.Configuration target) {
    // Need to set io.serializations key to prevent NPE when trying to get SerializationFactory.
    target.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    Map<String, String> confProperties = Configuration.toMap();
    try {
        DefaultStringifier.store(target, confProperties, PropertyKey.SITE_CONF_DIR.toString());
    } catch (IOException ex) {
        LOG.error("Unable to store Alluxio configuration in Hadoop configuration", ex);
        throw new RuntimeException(ex);
    }
}

From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }/*w w  w  .j ava2 s  . co m*/

    if (splits.size() >= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter = 0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file : splits) {
        FileSplit fsplit = ((FileSplit) file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);

        if (fsplit.getStart() == 0) {
            // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character.");
            }
            String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                    MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream,
                    CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true));
            Iterator<CSVRecord> it = parser.iterator();

            String[] header = null;
            if (it.hasNext()) {
                CSVRecord record = (CSVRecord) it.next();
                Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                    if (recordIterator.hasNext()) {
                        header[i] = (String) recordIterator.next();
                    } else {
                        throw new IOException("Record size doesn't match the real size");
                    }
                }

                EncodingUtil.handleBOMUTF8(header, 0);

                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }

        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])),
                path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations());
        populatedSplits.add(ds);
    }

    return populatedSplits;
}

From source file:com.marklogic.contentpump.TransformOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    super.checkOutputSpecs(conf, cs);

    // store mimetypes map into config system
    DefaultStringifier.store(conf, getMimetypesMap(), ConfigConstants.CONF_MIMETYPES);
}

From source file:com.marklogic.mapreduce.ContentOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    Session session = null;/*from w  ww  .  java  2  s  .c om*/
    ResultSequence result = null;
    try {
        session = cs.newSession();
        RequestOptions options = new RequestOptions();
        options.setDefaultXQueryVersion("1.0-ml");
        session.setDefaultRequestOptions(options);

        // clear output dir if specified
        String outputDir = conf.get(OUTPUT_DIRECTORY);
        if (outputDir != null) {
            outputDir = outputDir.endsWith("/") ? outputDir : outputDir + "/";
            if (conf.getBoolean(OUTPUT_CLEAN_DIR, false)) {
                // delete directory if exists
                String queryText = DELETE_DIRECTORY_TEMPLATE.replace(DIRECTORY_TEMPLATE, outputDir);
                AdhocQuery query = session.newAdhocQuery(queryText);
                result = session.submitRequest(query);
            } else { // ensure nothing exists under output dir
                String queryText = CHECK_DIRECTORY_EXIST_TEMPLATE.replace(DIRECTORY_TEMPLATE, outputDir);
                AdhocQuery query = session.newAdhocQuery(queryText);
                result = session.submitRequest(query);
                if (result.hasNext()) {
                    ResultItem item = result.next();
                    if (((XSBoolean) (item.getItem())).asBoolean()) {
                        throw new IllegalStateException("Directory " + outputDir + " already exists");
                    }
                } else {
                    throw new IllegalStateException("Failed to query directory content.");
                }
            }
        }
        // initialize server host name and assignment policy
        initialize(session);

        // ensure manual directory creation 
        if (fastLoad) {
            LOG.info("Running in fast load mode");
            // store forest-info map into config system
            DefaultStringifier.store(conf, queryForestInfo(cs), OUTPUT_FOREST_HOST);

            AdhocQuery query = session.newAdhocQuery(DIRECTORY_CREATE_QUERY);
            result = session.submitRequest(query);
            if (result.hasNext()) {
                ResultItem item = result.next();
                String dirMode = item.asString();
                if (!dirMode.equals(MANUAL_DIRECTORY_MODE)) {
                    throw new IllegalStateException("Manual directory creation mode is required. "
                            + "The current creation mode is " + dirMode + ".");
                }
            } else {
                throw new IllegalStateException("Failed to query directory creation mode.");
            }
        } else {
            TextArrayWritable hostArray;
            // 23798: replace hostname in forest config with 
            // user-specified output host
            String outputHost = conf.get(OUTPUT_HOST);
            if (MODE_LOCAL.equals(conf.get(EXECUTION_MODE))) {
                hostArray = queryHosts(cs, initHostName, outputHost);
            } else {
                hostArray = queryHosts(cs);
            }
            DefaultStringifier.store(conf, hostArray, OUTPUT_FOREST_HOST);
        }

        // validate capabilities
        String[] perms = conf.getStrings(OUTPUT_PERMISSION);
        if (perms != null && perms.length > 0) {
            if (perms.length % 2 != 0) {
                throw new IllegalStateException("Permissions are expected to be in <role, capability> pairs.");
            }
            int i = 0;
            while (i + 1 < perms.length) {
                String roleName = perms[i++];
                if (roleName == null || roleName.isEmpty()) {
                    throw new IllegalStateException("Illegal role name: " + roleName);
                }
                String perm = perms[i].trim();
                if (!perm.equalsIgnoreCase(ContentCapability.READ.toString())
                        && !perm.equalsIgnoreCase(ContentCapability.EXECUTE.toString())
                        && !perm.equalsIgnoreCase(ContentCapability.INSERT.toString())
                        && !perm.equalsIgnoreCase(ContentCapability.UPDATE.toString())) {
                    throw new IllegalStateException("Illegal capability: " + perm);
                }
                i++;
            }
        }
    } catch (RequestException ex) {
        throw new IOException(ex);
    } finally {
        if (session != null) {
            session.close();
        }
        if (result != null) {
            result.close();
        }
    }
}

From source file:com.marklogic.mapreduce.KeyValueOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    // check for required configuration
    if (conf.get(OUTPUT_QUERY) == null) {
        throw new IllegalArgumentException(OUTPUT_QUERY + " is not specified.");
    }//from   ww w .  j  a v a  2  s  .  co  m
    // warn against unsupported configuration
    if (conf.get(BATCH_SIZE) != null) {
        LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for "
                + this.getClass().getName() + " and will be ignored.");
    }
    String queryLanguage = conf.get(OUTPUT_QUERY_LANGUAGE);
    if (queryLanguage != null) {
        InternalUtilities.checkQueryLanguage(queryLanguage);
    }
    // store hosts into config system
    DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST);
}

From source file:com.marklogic.mapreduce.NodeOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    // warn against unsupported configuration
    if (conf.get(BATCH_SIZE) != null) {
        LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for "
                + this.getClass().getName() + " and will be ignored.");
    }//from w  ww. ja va  2  s .  c  om
    // store hosts into config system
    DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST);
}

From source file:org.apache.gora.util.IOUtils.java

License:Apache License

/**
 * Stores the given object in the configuration under the given dataKey
 * @param obj the object to store/*from w ww  .j  a v  a2  s.c o m*/
 * @param conf the configuration to store the object into
 * @param dataKey the key to store the data
 */
public static <T> void storeToConf(T obj, Configuration conf, String dataKey) throws IOException {
    String classKey = dataKey + "._class";
    conf.set(classKey, obj.getClass().getName());
    DefaultStringifier.store(conf, obj, dataKey);
}

From source file:org.apache.mahout.text.LuceneStorageConfiguration.java

License:Apache License

/**
 * Serializes this object in a Hadoop {@link Configuration}
 *
 * @return a {@link Configuration} object with a String serialization
 * @throws IOException if serialization fails
 *//*from   ww w  .j  av a 2 s  . c  om*/
public Configuration serialize() throws IOException {
    DefaultStringifier.store(configuration, this, KEY);

    return new Configuration(configuration);
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java

License:Apache License

public static void configureExportInputFormat(SqoopOptions opts, Job job, ConnManager connMgr, String dbTable,
        Configuration config) throws IOException {

    LOG.info("Configuring HCatalog for export job");
    SqoopHCatUtilities hCatUtils = SqoopHCatUtilities.instance();
    hCatUtils.configureHCat(opts, job, connMgr, dbTable, job.getConfiguration());
    job.setInputFormatClass(getInputFormatClass());
    Map<String, Integer> dbColTypes = hCatUtils.getDbColumnTypes();
    MapWritable columnTypesJava = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        Text columnText = new Text(connMgr.toJavaType(dbTable, e.getKey(), e.getValue()));
        columnTypesJava.put(columnName, columnText);
    }//from   w w  w .j a  v  a  2  s  .c  o  m
    MapWritable columnTypesSql = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        IntWritable sqlType = new IntWritable(e.getValue());
        columnTypesSql.put(columnName, sqlType);
    }
    DefaultStringifier.store(config, columnTypesJava, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA);
    DefaultStringifier.store(config, columnTypesSql, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL);
}

From source file:org.apache.sqoop.mapreduce.JdbcExportJob.java

License:Apache License

@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    fileType = getInputFileType();/* w ww .ja  v a2  s . co m*/

    super.configureInputFormat(job, tableName, tableClassName, splitByCol);

    if (isHCatJob) {
        SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName,
                job.getConfiguration());
        return;
    } else if (fileType == FileType.AVRO_DATA_FILE) {
        LOG.debug("Configuring for Avro export");
        ConnManager connManager = context.getConnManager();
        Map<String, Integer> columnTypeInts;
        if (options.getCall() == null) {
            columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery());
        } else {
            columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall());
        }
        MapWritable columnTypes = new MapWritable();
        for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
            Text columnName = new Text(e.getKey());
            Text columnText = new Text(connManager.toJavaType(tableName, e.getKey(), e.getValue()));
            columnTypes.put(columnName, columnText);
        }
        DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
    }

}