Example usage for org.apache.hadoop.io MapWritable put

Introduction

In this page you can find the example usage for org.apache.hadoop.io MapWritable put.

Prototype

@Override
    public Writable put(Writable key, Writable value)

Source Link

Usage

From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchUtils.java

License:Apache License

/**
 * Encodes a particular HmmModel as a Sequence File and write it to the specified location.
 *
 * @param model     HmmModel to be encoded
 * @param modelPath Location to store the encoded model
 * @param conf      Configuration object
 * @throws IOException/*w w w.j  av a  2s  . c o  m*/
 */

protected static void writeModelToDirectory(HmmModel model, Path modelPath, Configuration conf)
        throws IOException {

    int numHidden = model.getNrOfHiddenStates();
    int numObserved = model.getNrOfOutputStates();
    Matrix emissionMatrix = model.getEmissionMatrix();
    Matrix transitionMatrix = model.getTransitionMatrix();
    Vector initialProbability = model.getInitialProbabilities();

    MapWritable initialDistributionMap = new MapWritable();
    MapWritable transitionDistributionMap = new MapWritable();
    MapWritable emissionDistributionMap = new MapWritable();
    // delete the output directory
    HadoopUtil.delete(conf, modelPath);
    // create new file to store HMM
    FileSystem fs = FileSystem.get(modelPath.toUri(), conf);
    Path outFile = new Path(modelPath, "part-randomSeed");
    boolean newFile = fs.createNewFile(outFile);

    if (newFile) {
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class,
                MapWritable.class);

        try {
            for (int i = 0; i < numHidden; i++) {
                IntWritable initialDistributionKey = new IntWritable(i);
                DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i));
                initialDistributionMap.put(initialDistributionKey, initialDistributionValue);

                Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i));
                MapWritable transitionDistributionValue = new MapWritable();
                for (int j = 0; j < numHidden; j++) {
                    IntWritable transitionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable transitionDistributionInnerValue = new DoubleWritable(
                            transitionMatrix.get(i, j));
                    transitionDistributionValue.put(transitionDistributionInnerKey,
                            transitionDistributionInnerValue);
                }
                transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue);

                Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i));
                MapWritable emissionDistributionValue = new MapWritable();
                for (int j = 0; j < numObserved; j++) {
                    IntWritable emissionDistributionInnerKey = new IntWritable(j);
                    DoubleWritable emissionDistributionInnerValue = new DoubleWritable(
                            emissionMatrix.get(i, j));
                    emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue);
                }
                emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue);
            }

            writer.append(new Text("INITIAL"), initialDistributionMap);
            log.info("Wrote random Initial Distribution Map to {}", outFile);
            for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) {

                writer.append(transitionEntry.getKey(), transitionEntry.getValue());
            }
            log.info("Wrote random Transition Distribution Map to {}", outFile);

            for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) {
                writer.append(emissionEntry.getKey(), emissionEntry.getValue());
            }
            log.info("Wrote random Emission Distribution Map to {}", outFile);

        } finally {
            Closeables.closeQuietly(writer);
        }

    }

}

From source file:org.apache.nutch.crawl.MimeAdaptiveFetchSchedule.java

License:Apache License

public static void main(String[] args) throws Exception {
    FetchSchedule fs = new MimeAdaptiveFetchSchedule();
    fs.setConf(NutchConfiguration.create());
    // we start the time at 0, for simplicity
    long curTime = 0;
    long delta = 1000L * 3600L * 24L; // 2 hours
    // we trigger the update of the page every 30 days
    long update = 1000L * 3600L * 24L * 30L; // 30 days
    boolean changed = true;
    long lastModified = 0;
    int miss = 0;
    int totalMiss = 0;
    int maxMiss = 0;
    int fetchCnt = 0;
    int changeCnt = 0;

    // initial fetchInterval is 10 days
    CrawlDatum p = new CrawlDatum(1, 3600 * 24 * 30, 1.0f);

    // Set a default MIME-type to test with
    org.apache.hadoop.io.MapWritable x = new org.apache.hadoop.io.MapWritable();
    x.put(HttpHeaders.WRITABLE_CONTENT_TYPE, new Text("text/html; charset=utf-8"));
    p.setMetaData(x);/*from   w  ww . j  a  v  a2s. c o m*/

    p.setFetchTime(0);
    LOG.info(p.toString());

    // let's move the timeline a couple of deltas
    for (int i = 0; i < 10000; i++) {
        if (lastModified + update < curTime) {
            //System.out.println("i=" + i + ", lastModified=" + lastModified + ", update=" + update + ", curTime=" + curTime);
            changed = true;
            changeCnt++;
            lastModified = curTime;
        }

        LOG.info(i + ". " + changed + "\twill fetch at " + (p.getFetchTime() / delta) + "\tinterval "
                + (p.getFetchInterval() / SECONDS_PER_DAY) + " days" + "\t missed " + miss);

        if (p.getFetchTime() <= curTime) {
            fetchCnt++;
            fs.setFetchSchedule(new Text("http://www.example.com"), p, p.getFetchTime(), p.getModifiedTime(),
                    curTime, lastModified,
                    changed ? FetchSchedule.STATUS_MODIFIED : FetchSchedule.STATUS_NOTMODIFIED);

            LOG.info("\tfetched & adjusted: " + "\twill fetch at " + (p.getFetchTime() / delta) + "\tinterval "
                    + (p.getFetchInterval() / SECONDS_PER_DAY) + " days");

            if (!changed)
                miss++;
            if (miss > maxMiss)
                maxMiss = miss;
            changed = false;
            totalMiss += miss;
            miss = 0;
        }

        if (changed)
            miss++;
        curTime += delta;
    }
    LOG.info("Total missed: " + totalMiss + ", max miss: " + maxMiss);
    LOG.info("Page changed " + changeCnt + " times, fetched " + fetchCnt + " times.");
}

From source file:org.apache.nutch.scoring.nextpage.NextPageAnalysisScoringFilter.java

License:Apache License

private Outlink generateNextPageLink(String baseUrlString, String previousNextPageIndexString)
        throws MalformedURLException {
    if (previousNextPageIndexString == null)
        previousNextPageIndexString = "1";

    int lastNextPageIndex = Integer.valueOf(previousNextPageIndexString).intValue();
    int nextPageIndex = lastNextPageIndex + 1;

    Outlink nextPageOutlink = new Outlink("", "nextPage " + nextPageIndex);
    URL base = new URL(baseUrlString);
    String file = base.getFile();
    String toUrl = base.toString();
    if (file.equals("/")) {
        toUrl = base.toString() + "nextPage/" + nextPageIndex;
    } else {//w w w  .  j a  v  a2 s .  c o  m
        toUrl = base.toString().replace(file, "/nextPage/" + nextPageIndex);
    }
    nextPageOutlink.setUrl(toUrl);
    MapWritable outlinkMeta = new MapWritable();
    nextPageOutlink.setMetadata(outlinkMeta);
    outlinkMeta.put(new Text("nextPageIndex"), new IntWritable(nextPageIndex));
    return nextPageOutlink;
}

From source file:org.apache.nutch.scoring.nextpage.NextPageAnalysisScoringFilter.java

License:Apache License

private void upgradeOutLinks(Content parentContent, Outlink outlink, boolean isNextPageLink) {

    MapWritable outlinkMeta = outlink.getMetadata();
    /* if(isNextPageLink && outlinkMeta.get(new Text("nextPage"))!=null)
        return;*///No cookies necessary for next page browser driven

    Metadata parentContentMeta = parentContent.getMetadata();

    if (parentContentMeta.get("Cookie") != null) {
        outlinkMeta.put(new Text("Cookie"), new Text(parentContentMeta.get("Cookie")));
        outlinkMeta.put(new Text("CookieDomain"), new Text(parentContentMeta.get("CookieDomain")));
        outlinkMeta.put(new Text("CookiePath"), new Text(parentContentMeta.get("CookiePath")));
        outlinkMeta.put(new Text("CookieExpiry"), new Text(parentContentMeta.get("CookieExpiry")));
        outlinkMeta.put(new Text("CookieSecure"), new Text(parentContentMeta.get("CookieSecure")));

    }/*from w  w w.ja  va 2  s  .  c o m*/

    //we must add a parent url meta , so that the XPathIndexingFilter plugin can use the correct scheme to cast the metas and add them to Nutch document
    outlinkMeta.put(new Text(PARSE_PARENT_URL), new Text(parentContent.getBaseUrl()));
}

From source file:org.apache.pirk.utils.StringUtils.java

License:Apache License

/**
 * Method to take an input json string and output a MapWritable with arrays as JSON formatted String objects
 *//*from   w w  w .  j av a2s .  c o  m*/
public static MapWritable jsonStringToMapWritable(String jsonString) {
    MapWritable value = new MapWritable();
    JSONParser jsonParser = new JSONParser();

    try {
        JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString);
        for (Object key : jsonObj.keySet()) {
            Text mapKey = new Text(key.toString());
            Text mapValue = new Text();
            if (jsonObj.get(key) != null) {
                mapValue.set(jsonObj.get(key).toString());
            }
            value.put(mapKey, mapValue);
        }
    } catch (ParseException e) {
        logger.warn("Could not json-decode string: " + jsonString, e);
    } catch (NumberFormatException e) {
        logger.warn("Could not parse field into number: " + jsonString, e);
    }

    return value;
}

From source file:org.apache.pirk.utils.StringUtils.java

License:Apache License

/**
 * Method to take an input json string and output a MapWritable with arrays as WritableArrayWritable objects
 *//*from   w w  w.  j  a  v  a 2s.  c o  m*/
public static MapWritable jsonStringToMapWritableWithWritableArrayWritable(String jsonString,
        DataSchema dataSchema) {
    MapWritable value = new MapWritable();
    JSONParser jsonParser = new JSONParser();

    try {
        JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString);
        for (Object key : jsonObj.keySet()) {
            Text mapKey = new Text(key.toString());
            if (jsonObj.get(key) != null) {
                logger.debug("key = " + key.toString());
                if (dataSchema.isArrayElement((String) key)) {
                    WritableArrayWritable mapValue = StringUtils
                            .jsonArrayStringToWritableArrayWritable(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                } else {
                    Text mapValue = new Text(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                }
            }
        }
    } catch (ParseException e) {
        logger.warn("Could not json-decode string: " + jsonString, e);
    } catch (NumberFormatException e) {
        logger.warn("Could not parse field into number: " + jsonString, e);
    }

    return value;
}

From source file:org.apache.pirk.utils.StringUtils.java

License:Apache License

/**
 * Method to take an input json string and output a MapWritable with arrays as WritableArrayWritable objects
 *//*  w  w w  .  j  a  v  a  2s  . co m*/
public static MapWritable jsonStringToMapWritableWithArrayWritable(String jsonString, DataSchema dataSchema) {
    MapWritable value = new MapWritable();
    JSONParser jsonParser = new JSONParser();

    try {
        JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString);
        for (Object key : jsonObj.keySet()) {
            Text mapKey = new Text(key.toString());
            if (jsonObj.get(key) != null) {
                logger.debug("key = " + key.toString());
                if (dataSchema.isArrayElement((String) key)) {
                    ArrayWritable mapValue = StringUtils
                            .jsonArrayStringtoArrayWritable(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                } else {
                    Text mapValue = new Text(jsonObj.get(key).toString());
                    value.put(mapKey, mapValue);
                }
            }
        }
    } catch (ParseException e) {
        logger.warn("Could not json-decode string: " + jsonString, e);
    } catch (NumberFormatException e) {
        logger.warn("Could not parse field into number: " + jsonString, e);
    }

    return value;
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java

License:Apache License

public static void configureExportInputFormat(SqoopOptions opts, Job job, ConnManager connMgr, String dbTable,
        Configuration config) throws IOException {

    LOG.info("Configuring HCatalog for export job");
    SqoopHCatUtilities hCatUtils = SqoopHCatUtilities.instance();
    hCatUtils.configureHCat(opts, job, connMgr, dbTable, job.getConfiguration());
    job.setInputFormatClass(getInputFormatClass());
    Map<String, Integer> dbColTypes = hCatUtils.getDbColumnTypes();
    MapWritable columnTypesJava = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        Text columnText = new Text(connMgr.toJavaType(dbTable, e.getKey(), e.getValue()));
        columnTypesJava.put(columnName, columnText);
    }/*from  ww  w  .  ja va2 s .co m*/
    MapWritable columnTypesSql = new MapWritable();
    for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) {
        Text columnName = new Text(e.getKey());
        IntWritable sqlType = new IntWritable(e.getValue());
        columnTypesSql.put(columnName, sqlType);
    }
    DefaultStringifier.store(config, columnTypesJava, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA);
    DefaultStringifier.store(config, columnTypesSql, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL);
}

From source file:org.apache.sqoop.mapreduce.JdbcExportJob.java

License:Apache License

@Override
protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol)
        throws ClassNotFoundException, IOException {

    fileType = getInputFileType();//from www .j  a v  a  2 s .  c om

    super.configureInputFormat(job, tableName, tableClassName, splitByCol);

    if (isHCatJob) {
        SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName,
                job.getConfiguration());
        return;
    } else if (fileType == FileType.AVRO_DATA_FILE) {
        LOG.debug("Configuring for Avro export");
        ConnManager connManager = context.getConnManager();
        Map<String, Integer> columnTypeInts;
        if (options.getCall() == null) {
            columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery());
        } else {
            columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall());
        }
        MapWritable columnTypes = new MapWritable();
        for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
            Text columnName = new Text(e.getKey());
            Text columnText = new Text(connManager.toJavaType(tableName, e.getKey(), e.getValue()));
            columnTypes.put(columnName, columnText);
        }
        DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
    }

}

From source file:org.apache.sqoop.mapreduce.odps.HdfsOdpsImportJob.java

License:Apache License

private void configureGenericRecordExportInputFormat(Job job, String tableName) throws IOException {
    if (options.getOdpsTable() != null) {
        MapWritable columnTypes = new MapWritable();
        Map<String, OdpsType> colTypeMap = getColTypeMap();
        for (Map.Entry<String, OdpsType> e : colTypeMap.entrySet()) {
            String column = e.getKey();
            if (column != null) {
                Text columnName = new Text(column);
                Text columnType = new Text(toJavaType(e.getValue()));
                columnTypes.put(columnName, columnType);
            }//  www .j a v a2 s  .  c o m
        }
        DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
        return;
    }
    ConnManager connManager = context.getConnManager();
    Map<String, Integer> columnTypeInts;
    if (options.getCall() == null) {
        columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery());
    } else {
        columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall());
    }
    String[] specifiedColumns = options.getColumns();
    MapWritable columnTypes = new MapWritable();
    for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) {
        String column = e.getKey();
        column = (specifiedColumns == null) ? column : options.getColumnNameCaseInsensitive(column);
        if (column != null) {
            Text columnName = new Text(column);
            Text columnType = new Text(connManager.toJavaType(tableName, column, e.getValue()));
            columnTypes.put(columnName, columnType);
        }
    }
    DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP);
}