Example usage for org.apache.hadoop.io DefaultStringifier store

List of usage examples for org.apache.hadoop.io DefaultStringifier store

Introduction

In this page you can find the example usage for org.apache.hadoop.io DefaultStringifier store.

Prototype

public static <K> void store(Configuration conf, K item, String keyName) throws IOException 

Source Link

Document

Stores the item in the configuration with the given keyName.

Usage

From source file:alluxio.hadoop.ConfUtils.java

License:Apache License

/**
 * Stores the Alluxio {@link Configuration} to the target
 * Hadoop {@link org.apache.hadoop.conf.Configuration} object.
 *
 * @param target the {@link org.apache.hadoop.conf.Configuration} target
 *///from   ww  w  .java2  s. com
public static void storeToHadoopConfiguration(org.apache.hadoop.conf.Configuration target) {
    // Need to set io.serializations key to prevent NPE when trying to get SerializationFactory.
    target.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    Map<String, String> confProperties = Configuration.toMap();
    try {
        DefaultStringifier.store(target, confProperties, PropertyKey.SITE_CONF_DIR.toString());
    } catch (IOException ex) {
        LOG.error("Unable to store Alluxio configuration in Hadoop configuration", ex);
        throw new RuntimeException(ex);
    }
}

From source file:com.marklogic.contentpump.DelimitedTextInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext job) throws IOException {
    boolean delimSplit = isSplitInput(job.getConfiguration());
    //if delimSplit is true, size of each split is determined by 
    //Math.max(minSize, Math.min(maxSize, blockSize)) in FileInputFormat
    List<InputSplit> splits = super.getSplits(job);
    if (!delimSplit) {
        return splits;
    }/*w w  w  .j ava2 s  . co m*/

    if (splits.size() >= SPLIT_COUNT_LIMIT) {
        //if #splits > 1 million, there is enough parallelism
        //therefore no point to split
        LOG.warn("Exceeding SPLIT_COUNT_LIMIT, input_split is off:" + SPLIT_COUNT_LIMIT);
        DefaultStringifier.store(job.getConfiguration(), false, ConfigConstants.CONF_SPLIT_INPUT);
        return splits;
    }
    // add header info into splits
    List<InputSplit> populatedSplits = new ArrayList<InputSplit>();
    LOG.info(splits.size() + " DelimitedSplits generated");
    Configuration conf = job.getConfiguration();
    char delimiter = 0;
    ArrayList<Text> hlist = new ArrayList<Text>();
    for (InputSplit file : splits) {
        FileSplit fsplit = ((FileSplit) file);
        Path path = fsplit.getPath();
        FileSystem fs = path.getFileSystem(conf);

        if (fsplit.getStart() == 0) {
            // parse the inSplit, get the header
            FSDataInputStream fileIn = fs.open(path);

            String delimStr = conf.get(ConfigConstants.CONF_DELIMITER, ConfigConstants.DEFAULT_DELIMITER);
            if (delimStr.length() == 1) {
                delimiter = delimStr.charAt(0);
            } else {
                LOG.error("Incorrect delimitor: " + delimiter + ". Expects single character.");
            }
            String encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING,
                    MarkLogicConstants.DEFAULT_OUTPUT_CONTENT_ENCODING);
            InputStreamReader instream = new InputStreamReader(fileIn, encoding);
            CSVParser parser = new CSVParser(instream,
                    CSVParserFormatter.getFormat(delimiter, DelimitedTextReader.encapsulator, true, true));
            Iterator<CSVRecord> it = parser.iterator();

            String[] header = null;
            if (it.hasNext()) {
                CSVRecord record = (CSVRecord) it.next();
                Iterator<String> recordIterator = record.iterator();
                int recordSize = record.size();
                header = new String[recordSize];
                for (int i = 0; i < recordSize; i++) {
                    if (recordIterator.hasNext()) {
                        header[i] = (String) recordIterator.next();
                    } else {
                        throw new IOException("Record size doesn't match the real size");
                    }
                }

                EncodingUtil.handleBOMUTF8(header, 0);

                hlist.clear();
                for (String s : header) {
                    hlist.add(new Text(s));
                }
            }
            instream.close();
        }

        DelimitedSplit ds = new DelimitedSplit(new TextArrayWritable(hlist.toArray(new Text[hlist.size()])),
                path, fsplit.getStart(), fsplit.getLength(), fsplit.getLocations());
        populatedSplits.add(ds);
    }

    return populatedSplits;
}

From source file:com.marklogic.contentpump.TransformOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    super.checkOutputSpecs(conf, cs);

    // store mimetypes map into config system
    DefaultStringifier.store(conf, getMimetypesMap(), ConfigConstants.CONF_MIMETYPES);
}

From source file:com.marklogic.mapreduce.ContentOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    Session session = null;/*from w  ww  .  java  2  s  .c om*/
    ResultSequence result = null;
    try {
        session = cs.newSession();
        RequestOptions options = new RequestOptions();
        options.setDefaultXQueryVersion("1.0-ml");
        session.setDefaultRequestOptions(options);

        // clear output dir if specified
        String outputDir = conf.get(OUTPUT_DIRECTORY);
        if (outputDir != null) {
            outputDir = outputDir.endsWith("/") ? outputDir : outputDir + "/";
            if (conf.getBoolean(OUTPUT_CLEAN_DIR, false)) {
                // delete directory if exists
                String queryText = DELETE_DIRECTORY_TEMPLATE.replace(DIRECTORY_TEMPLATE, outputDir);
                AdhocQuery query = session.newAdhocQuery(queryText);
                result = session.submitRequest(query);
            } else { // ensure nothing exists under output dir
                String queryText = CHECK_DIRECTORY_EXIST_TEMPLATE.replace(DIRECTORY_TEMPLATE, outputDir);
                AdhocQuery query = session.newAdhocQuery(queryText);
                result = session.submitRequest(query);
                if (result.hasNext()) {
                    ResultItem item = result.next();
                    if (((XSBoolean) (item.getItem())).asBoolean()) {
                        throw new IllegalStateException("Directory " + outputDir + " already exists");
                    }
                } else {
                    throw new IllegalStateException("Failed to query directory content.");
                }
            }
        }
        // initialize server host name and assignment policy
        initialize(session);

        // ensure manual directory creation 
        if (fastLoad) {
            LOG.info("Running in fast load mode");
            // store forest-info map into config system
            DefaultStringifier.store(conf, queryForestInfo(cs), OUTPUT_FOREST_HOST);

            AdhocQuery query = session.newAdhocQuery(DIRECTORY_CREATE_QUERY);
            result = session.submitRequest(query);
            if (result.hasNext()) {
                ResultItem item = result.next();
                String dirMode = item.asString();
                if (!dirMode.equals(MANUAL_DIRECTORY_MODE)) {
                    throw new IllegalStateException("Manual directory creation mode is required. "
                            + "The current creation mode is " + dirMode + ".");
                }
            } else {
                throw new IllegalStateException("Failed to query directory creation mode.");
            }
        } else {
            TextArrayWritable hostArray;
            // 23798: replace hostname in forest config with 
            // user-specified output host
            String outputHost = conf.get(OUTPUT_HOST);
            if (MODE_LOCAL.equals(conf.get(EXECUTION_MODE))) {
                hostArray = queryHosts(cs, initHostName, outputHost);
            } else {
                hostArray = queryHosts(cs);
            }
            DefaultStringifier.store(conf, hostArray, OUTPUT_FOREST_HOST);
        }

        // validate capabilities
        String[] perms = conf.getStrings(OUTPUT_PERMISSION);
        if (perms != null && perms.length > 0) {
            if (perms.length % 2 != 0) {
                throw new IllegalStateException("Permissions are expected to be in <role, capability> pairs.");
            }
            int i = 0;
            while (i + 1 < perms.length) {
                String roleName = perms[i++];
                if (roleName == null || roleName.isEmpty()) {
                    throw new IllegalStateException("Illegal role name: " + roleName);
                }
                String perm = perms[i].trim();
                if (!perm.equalsIgnoreCase(ContentCapability.READ.toString())
                        && !perm.equalsIgnoreCase(ContentCapability.EXECUTE.toString())
                        && !perm.equalsIgnoreCase(ContentCapability.INSERT.toString())
                        && !perm.equalsIgnoreCase(ContentCapability.UPDATE.toString())) {
                    throw new IllegalStateException("Illegal capability: " + perm);
                }
                i++;
            }
        }
    } catch (RequestException ex) {
        throw new IOException(ex);
    } finally {
        if (session != null) {
            session.close();
        }
        if (result != null) {
            result.close();
        }
    }
}

From source file:com.marklogic.mapreduce.KeyValueOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    // check for required configuration
    if (conf.get(OUTPUT_QUERY) == null) {
        throw new IllegalArgumentException(OUTPUT_QUERY + " is not specified.");
    }//from   ww w .  j  a v a  2  s  .  co  m
    // warn against unsupported configuration
    if (conf.get(BATCH_SIZE) != null) {
        LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for "
                + this.getClass().getName() + " and will be ignored.");
    }
    String queryLanguage = conf.get(OUTPUT_QUERY_LANGUAGE);
    if (queryLanguage != null) {
        InternalUtilities.checkQueryLanguage(queryLanguage);
    }
    // store hosts into config system
    DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST);
}

From source file:com.marklogic.mapreduce.NodeOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(Configuration conf, ContentSource cs) throws IOException {
    // warn against unsupported configuration
    if (conf.get(BATCH_SIZE) != null) {
        LOG.warn("Config entry for " + "\"mapreduce.marklogic.output.batchsize\" is not " + "supported for "
                + this.getClass().getName() + " and will be ignored.");
    }//from w  ww. ja va  2  s .  c  om
    // store hosts into config system
    DefaultStringifier.store(conf, queryHosts(cs), OUTPUT_FOREST_HOST);
}

From source file:org.apache.gora.util.IOUtils.java

License:Apache License

/**
 * Stores the given object in the configuration under the given dataKey
 * @param obj the object to store/*from w ww  .j  a v  a2  s.c o m*/
 * @param conf the configuration to store the object into
 * @param dataKey the key to store the data
 */
public static <T> void storeToConf(T obj, Configuration conf, String dataKey) throws IOException {
    String classKey = dataKey + "._class";
    conf.set(classKey, obj.getClass().getName());
    DefaultStringifier.store(conf, obj, dataKey);
}

From source file:org.apache.mahout.text.LuceneStorageConfiguration.java

License:Apache License

/**
 * Serializes this object in a Hadoop {@link Configuration}
 *
 * @return a {@link Configuration} object with a String serialization
 * @throws IOException if serialization fails
 *//*from   ww w  .j  av a 2 s  . c  om*/
public Configuration serialize() throws IOException {
    DefaultStringifier.store(configuration, this, KEY);

    return new Configuration(configuration);
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java

License:Apache License

public static void configureExportInputFormat(SqoopOptions opts, Job job, ConnManager connMgr, String dbTable,
        Configuration config) throws IOException {

    LOG.info("Configuring HCatalog for export job");
    SqoopHCatUtilities hCatUtils = SqoopHCatUtilities.instance();
    hCatUtils.configureHCat(opts, job, connMgr, dbTable, job.getConfiguration());
    job.setInputFormatClass(getInputFormatClass());
    Map<String, Integer> dbColTypes = hCatUtils.getDbColumnTypes();
    MapWritable columnTypesJava = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        Text columnText = new Text(connMgr.toJavaType(dbTable, e.getKey(), e.getValue()));
        columnTypesJava.put(columnName, columnText);
    }//from   w w  w .j a  v  a  2  s  .c  o  m
    MapWritable columnTypesSql = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        IntWritable sqlType = new IntWritable(e.getValue());
        columnTypesSql.put(columnName, sqlType);
    }
    DefaultStringifier.store(config, columnTypesJava, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA);
    DefaultStringifier.store(config, columnTypesSql, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL);
}

From source file:org.apache.sqoop.mapreduce.JdbcExportJob.java

License:Apache License

@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    fileType = getInputFileType();/* w ww .ja  v a2  s . co m*/

    super.configureInputFormat(job, tableName, tableClassName, splitByCol);

    if (isHCatJob) {
        SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName,
                job.getConfiguration());
        return;
    } else if (fileType == FileType.AVRO_DATA_FILE) {
        LOG.debug("Configuring for Avro export");
        ConnManager connManager = context.getConnManager();
        Map<String, Integer> columnTypeInts;
        if (options.getCall() == null) {
            columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery());
        } else {
            columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall());
        }
        MapWritable columnTypes = new MapWritable();
        for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
            Text columnName = new Text(e.getKey());
            Text columnText = new Text(connManager.toJavaType(tableName, e.getKey(), e.getValue()));
            columnTypes.put(columnName, columnText);
        }
        DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
    }

}