Example usage for org.apache.hadoop.io MapWritable MapWritable

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapWritable MapWritable.

Prototype

public MapWritable()

Source Link

Document

Default constructor.

Usage

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchUtils.java

License:Apache License

/**
 * Encodes a particular HmmModel as a Sequence File and write it to the specified location.
 *
 * @param model     HmmModel to be encoded
 * @param modelPath Location to store the encoded model
 * @param conf      Configuration object
 * @throws IOException/*from   w w w  .java2 s  .c  o m*/
 */

protected static void writeModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);
            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);
            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {

                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.nutch.scoring.nextpage.NextPageAnalysisScoringFilter.java

License:Apache License

private Outlink generateNextPageLink(String baseUrlString, String previousNextPageIndexString)
        throws MalformedURLException {
    if (previousNextPageIndexString == null)
        previousNextPageIndexString = "1";

    int lastNextPageIndex = Integer.valueOf(previousNextPageIndexString).intValue();
    int nextPageIndex = lastNextPageIndex + 1;

    Outlink nextPageOutlink = new Outlink("", "nextPage " + nextPageIndex);
    URL base = new URL(baseUrlString);
    String file = base.getFile();
    String toUrl = base.toString();
    if (file.equals("/")) {
        toUrl = base.toString() + "nextPage/" + nextPageIndex;
    } else {/*from www . jav a  2 s  .  c o m*/
        toUrl = base.toString().replace(file, "/nextPage/" + nextPageIndex);
    }
    nextPageOutlink.setUrl(toUrl);
    MapWritable outlinkMeta = new MapWritable();
    nextPageOutlink.setMetadata(outlinkMeta);
    outlinkMeta.put(new Text("nextPageIndex"), new IntWritable(nextPageIndex));
    return nextPageOutlink;
}

From source file:org.apache.nutch.util.hostdb.HostDatum.java

License:Apache License

/**
 * Returns a MapWritable if it was set or read in @see readFields(DataInput),
 * Returns empty map in case CrawlDatum was freshly created (lazily instantiated).
 *//*  w  ww  .j  a  va 2  s .c o  m*/
public MapWritable getMetaData() {
    if (this.metaData == null)
        this.metaData = new MapWritable();
    return this.metaData;
}

From source file:org.apache.nutch.util.hostdb.HostDatum.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    byte version = in.readByte();
    if (version > CUR_VERSION) // check version
        throw new VersionMismatchException(CUR_VERSION, version);

    score = in.readFloat();/*from ww w.j  a  va2s .  com*/
    lastCheck = new Date(in.readLong());
    homepageUrl = Text.readString(in);

    dnsFailures = in.readInt();
    connectionFailures = in.readInt();

    statCounts.put(CrawlDatum.STATUS_DB_UNFETCHED, in.readInt());
    statCounts.put(CrawlDatum.STATUS_DB_FETCHED, in.readInt());
    statCounts.put(CrawlDatum.STATUS_DB_NOTMODIFIED, in.readInt());
    statCounts.put(CrawlDatum.STATUS_DB_REDIR_PERM, in.readInt());
    statCounts.put(CrawlDatum.STATUS_DB_REDIR_TEMP, in.readInt());
    statCounts.put(CrawlDatum.STATUS_DB_GONE, in.readInt());

    metaData = new MapWritable();
    metaData.readFields(in);
}

From source file:org.apache.pirk.inputformat.hadoop.json.JSONRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException {
    key = new Text();
    value = new MapWritable();
    jsonParser = new JSONParser();

    lineReader = new LineRecordReader();
    lineReader.initialize(inputSplit, context);

    queryString = context.getConfiguration().get("query", "?q=*");

    // Load the data schemas
    FileSystem fs = FileSystem.get(context.getConfiguration());
    try {/*from w  w  w  .jav  a2s.  c  om*/
        SystemConfiguration.setProperty("data.schemas", context.getConfiguration().get("data.schemas"));
        DataSchemaLoader.initialize(true, fs);
    } catch (Exception e) {
        e.printStackTrace();
    }
    String dataSchemaName = context.getConfiguration().get("dataSchemaName");
    dataSchema = DataSchemaRegistry.get(dataSchemaName);
}

From source file:org.apache.pirk.utils.StringUtils.java

License:Apache License

/**
 * Method to take an input json string and output a MapWritable with arrays as JSON formatted String objects
 *///from   ww w . ja  v a 2s. c om
public static MapWritable jsonStringToMapWritable(String jsonString) {
    MapWritable value = new MapWritable();
    JSONParser jsonParser = new JSONParser();

    try {
        JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString);
        for (Object key : jsonObj.keySet()) {
            Text mapKey = new Text(key.toString());
            Text mapValue = new Text();
            if (jsonObj.get(key) != null) {
                mapValue.set(jsonObj.get(key).toString());
            }
            value.put(mapKey, mapValue);
        }
    } catch (ParseException e) {
        logger.warn("Could not json-decode string: " + jsonString, e);
    } catch (NumberFormatException e) {
        logger.warn("Could not parse field into number: " + jsonString, e);
    }

    return value;
}

From source file:org.apache.pirk.utils.StringUtils.java

License:Apache License

/**
 * Method to take an input json string and output a MapWritable with arrays as WritableArrayWritable objects
 *///from   w  w  w.  j a  v a2s  .co m
public static MapWritable jsonStringToMapWritableWithWritableArrayWritable(String jsonString,
        DataSchema dataSchema) {
    MapWritable value = new MapWritable();
    JSONParser jsonParser = new JSONParser();

    try {
        JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString);
        for (Object key : jsonObj.keySet()) {
            Text mapKey = new Text(key.toString());
            if (jsonObj.get(key) != null) {
                logger.debug("key = " + key.toString());
                if (dataSchema.isArrayElement((String) key)) {
                    WritableArrayWritable mapValue = StringUtils
                            .jsonArrayStringToWritableArrayWritable(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                } else {
                    Text mapValue = new Text(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                }
            }
        }
    } catch (ParseException e) {
        logger.warn("Could not json-decode string: " + jsonString, e);
    } catch (NumberFormatException e) {
        logger.warn("Could not parse field into number: " + jsonString, e);
    }

    return value;
}

From source file:org.apache.pirk.utils.StringUtils.java

License:Apache License

/**
 * Method to take an input json string and output a MapWritable with arrays as WritableArrayWritable objects
 *//*from w  ww.  ja  va2  s .  c  o  m*/
public static MapWritable jsonStringToMapWritableWithArrayWritable(String jsonString, DataSchema dataSchema) {
    MapWritable value = new MapWritable();
    JSONParser jsonParser = new JSONParser();

    try {
        JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString);
        for (Object key : jsonObj.keySet()) {
            Text mapKey = new Text(key.toString());
            if (jsonObj.get(key) != null) {
                logger.debug("key = " + key.toString());
                if (dataSchema.isArrayElement((String) key)) {
                    ArrayWritable mapValue = StringUtils
                            .jsonArrayStringtoArrayWritable(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                } else {
                    Text mapValue = new Text(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                }
            }
        }
    } catch (ParseException e) {
        logger.warn("Could not json-decode string: " + jsonString, e);
    } catch (NumberFormatException e) {
        logger.warn("Could not parse field into number: " + jsonString, e);
    }

    return value;
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java

License:Apache License

public static void configureExportInputFormat(SqoopOptions opts, Job job, ConnManager connMgr, String dbTable,
        Configuration config) throws IOException {

    LOG.info("Configuring HCatalog for export job");
    SqoopHCatUtilities hCatUtils = SqoopHCatUtilities.instance();
    hCatUtils.configureHCat(opts, job, connMgr, dbTable, job.getConfiguration());
    job.setInputFormatClass(getInputFormatClass());
    Map<String, Integer> dbColTypes = hCatUtils.getDbColumnTypes();
    MapWritable columnTypesJava = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        Text columnText = new Text(connMgr.toJavaType(dbTable, e.getKey(), e.getValue()));
        columnTypesJava.put(columnName, columnText);
    }/*from   www.j a v  a 2s . c o  m*/
    MapWritable columnTypesSql = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        IntWritable sqlType = new IntWritable(e.getValue());
        columnTypesSql.put(columnName, sqlType);
    }
    DefaultStringifier.store(config, columnTypesJava, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA);
    DefaultStringifier.store(config, columnTypesSql, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL);
}

From source file:org.apache.sqoop.mapreduce.JdbcExportJob.java

License:Apache License

@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    fileType = getInputFileType();// w w w . j  a  v  a2  s  .  c o  m

    super.configureInputFormat(job, tableName, tableClassName, splitByCol);

    if (isHCatJob) {
        SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName,
                job.getConfiguration());
        return;
    } else if (fileType == FileType.AVRO_DATA_FILE) {
        LOG.debug("Configuring for Avro export");
        ConnManager connManager = context.getConnManager();
        Map<String, Integer> columnTypeInts;
        if (options.getCall() == null) {
            columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery());
        } else {
            columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall());
        }
        MapWritable columnTypes = new MapWritable();
        for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
            Text columnName = new Text(e.getKey());
            Text columnText = new Text(connManager.toJavaType(tableName, e.getKey(), e.getValue()));
            columnTypes.put(columnName, columnText);
        }
        DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
    }

}