List of usage examples for org.apache.hadoop.io MapWritable MapWritable
public MapWritable()
From source file:org.apache.mahout.classifier.sequencelearning.hmm.hadoop.BaumWelchUtils.java
License:Apache License
/** * Encodes a particular HmmModel as a Sequence File and write it to the specified location. * * @param model HmmModel to be encoded * @param modelPath Location to store the encoded model * @param conf Configuration object * @throws IOException/*from w w w .java2 s .c o m*/ */ protected static void writeModelToDirectory(HmmModel model, Path modelPath, Configuration conf) throws IOException { int numHidden = model.getNrOfHiddenStates(); int numObserved = model.getNrOfOutputStates(); Matrix emissionMatrix = model.getEmissionMatrix(); Matrix transitionMatrix = model.getTransitionMatrix(); Vector initialProbability = model.getInitialProbabilities(); MapWritable initialDistributionMap = new MapWritable(); MapWritable transitionDistributionMap = new MapWritable(); MapWritable emissionDistributionMap = new MapWritable(); // delete the output directory HadoopUtil.delete(conf, modelPath); // create new file to store HMM FileSystem fs = FileSystem.get(modelPath.toUri(), conf); Path outFile = new Path(modelPath, "part-randomSeed"); boolean newFile = fs.createNewFile(outFile); if (newFile) { SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outFile, Text.class, MapWritable.class); try { for (int i = 0; i < numHidden; i++) { IntWritable initialDistributionKey = new IntWritable(i); DoubleWritable initialDistributionValue = new DoubleWritable(initialProbability.get(i)); initialDistributionMap.put(initialDistributionKey, initialDistributionValue); Text transitionDistributionKey = new Text("TRANSIT_" + Integer.toString(i)); MapWritable transitionDistributionValue = new MapWritable(); for (int j = 0; j < numHidden; j++) { IntWritable transitionDistributionInnerKey = new IntWritable(j); DoubleWritable transitionDistributionInnerValue = new DoubleWritable( transitionMatrix.get(i, j)); transitionDistributionValue.put(transitionDistributionInnerKey, transitionDistributionInnerValue); } transitionDistributionMap.put(transitionDistributionKey, transitionDistributionValue); Text emissionDistributionKey = new Text("EMIT_" + Integer.toString(i)); MapWritable emissionDistributionValue = new MapWritable(); for (int j = 0; j < numObserved; j++) { IntWritable emissionDistributionInnerKey = new IntWritable(j); DoubleWritable emissionDistributionInnerValue = new DoubleWritable( emissionMatrix.get(i, j)); emissionDistributionValue.put(emissionDistributionInnerKey, emissionDistributionInnerValue); } emissionDistributionMap.put(emissionDistributionKey, emissionDistributionValue); } writer.append(new Text("INITIAL"), initialDistributionMap); log.info("Wrote random Initial Distribution Map to {}", outFile); for (MapWritable.Entry<Writable, Writable> transitionEntry : transitionDistributionMap.entrySet()) { writer.append(transitionEntry.getKey(), transitionEntry.getValue()); } log.info("Wrote random Transition Distribution Map to {}", outFile); for (MapWritable.Entry<Writable, Writable> emissionEntry : emissionDistributionMap.entrySet()) { writer.append(emissionEntry.getKey(), emissionEntry.getValue()); } log.info("Wrote random Emission Distribution Map to {}", outFile); } finally { Closeables.closeQuietly(writer); } } }
From source file:org.apache.nutch.scoring.nextpage.NextPageAnalysisScoringFilter.java
License:Apache License
private Outlink generateNextPageLink(String baseUrlString, String previousNextPageIndexString) throws MalformedURLException { if (previousNextPageIndexString == null) previousNextPageIndexString = "1"; int lastNextPageIndex = Integer.valueOf(previousNextPageIndexString).intValue(); int nextPageIndex = lastNextPageIndex + 1; Outlink nextPageOutlink = new Outlink("", "nextPage " + nextPageIndex); URL base = new URL(baseUrlString); String file = base.getFile(); String toUrl = base.toString(); if (file.equals("/")) { toUrl = base.toString() + "nextPage/" + nextPageIndex; } else {/*from www . jav a 2 s . c o m*/ toUrl = base.toString().replace(file, "/nextPage/" + nextPageIndex); } nextPageOutlink.setUrl(toUrl); MapWritable outlinkMeta = new MapWritable(); nextPageOutlink.setMetadata(outlinkMeta); outlinkMeta.put(new Text("nextPageIndex"), new IntWritable(nextPageIndex)); return nextPageOutlink; }
From source file:org.apache.nutch.util.hostdb.HostDatum.java
License:Apache License
/** * Returns a MapWritable if it was set or read in @see readFields(DataInput), * Returns empty map in case CrawlDatum was freshly created (lazily instantiated). *//* w ww .j a va 2 s .c o m*/ public MapWritable getMetaData() { if (this.metaData == null) this.metaData = new MapWritable(); return this.metaData; }
From source file:org.apache.nutch.util.hostdb.HostDatum.java
License:Apache License
@Override public void readFields(DataInput in) throws IOException { byte version = in.readByte(); if (version > CUR_VERSION) // check version throw new VersionMismatchException(CUR_VERSION, version); score = in.readFloat();/*from ww w.j a va2s . com*/ lastCheck = new Date(in.readLong()); homepageUrl = Text.readString(in); dnsFailures = in.readInt(); connectionFailures = in.readInt(); statCounts.put(CrawlDatum.STATUS_DB_UNFETCHED, in.readInt()); statCounts.put(CrawlDatum.STATUS_DB_FETCHED, in.readInt()); statCounts.put(CrawlDatum.STATUS_DB_NOTMODIFIED, in.readInt()); statCounts.put(CrawlDatum.STATUS_DB_REDIR_PERM, in.readInt()); statCounts.put(CrawlDatum.STATUS_DB_REDIR_TEMP, in.readInt()); statCounts.put(CrawlDatum.STATUS_DB_GONE, in.readInt()); metaData = new MapWritable(); metaData.readFields(in); }
From source file:org.apache.pirk.inputformat.hadoop.json.JSONRecordReader.java
License:Apache License
@Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException { key = new Text(); value = new MapWritable(); jsonParser = new JSONParser(); lineReader = new LineRecordReader(); lineReader.initialize(inputSplit, context); queryString = context.getConfiguration().get("query", "?q=*"); // Load the data schemas FileSystem fs = FileSystem.get(context.getConfiguration()); try {/*from w w w .jav a2s. c om*/ SystemConfiguration.setProperty("data.schemas", context.getConfiguration().get("data.schemas")); DataSchemaLoader.initialize(true, fs); } catch (Exception e) { e.printStackTrace(); } String dataSchemaName = context.getConfiguration().get("dataSchemaName"); dataSchema = DataSchemaRegistry.get(dataSchemaName); }
From source file:org.apache.pirk.utils.StringUtils.java
License:Apache License
/** * Method to take an input json string and output a MapWritable with arrays as JSON formatted String objects *///from ww w . ja v a 2s. c om public static MapWritable jsonStringToMapWritable(String jsonString) { MapWritable value = new MapWritable(); JSONParser jsonParser = new JSONParser(); try { JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString); for (Object key : jsonObj.keySet()) { Text mapKey = new Text(key.toString()); Text mapValue = new Text(); if (jsonObj.get(key) != null) { mapValue.set(jsonObj.get(key).toString()); } value.put(mapKey, mapValue); } } catch (ParseException e) { logger.warn("Could not json-decode string: " + jsonString, e); } catch (NumberFormatException e) { logger.warn("Could not parse field into number: " + jsonString, e); } return value; }
From source file:org.apache.pirk.utils.StringUtils.java
License:Apache License
/** * Method to take an input json string and output a MapWritable with arrays as WritableArrayWritable objects *///from w w w. j a v a2s .co m public static MapWritable jsonStringToMapWritableWithWritableArrayWritable(String jsonString, DataSchema dataSchema) { MapWritable value = new MapWritable(); JSONParser jsonParser = new JSONParser(); try { JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString); for (Object key : jsonObj.keySet()) { Text mapKey = new Text(key.toString()); if (jsonObj.get(key) != null) { logger.debug("key = " + key.toString()); if (dataSchema.isArrayElement((String) key)) { WritableArrayWritable mapValue = StringUtils .jsonArrayStringToWritableArrayWritable(jsonObj.get(key).toString()); value.put(mapKey, mapValue); } else { Text mapValue = new Text(jsonObj.get(key).toString()); value.put(mapKey, mapValue); } } } } catch (ParseException e) { logger.warn("Could not json-decode string: " + jsonString, e); } catch (NumberFormatException e) { logger.warn("Could not parse field into number: " + jsonString, e); } return value; }
From source file:org.apache.pirk.utils.StringUtils.java
License:Apache License
/** * Method to take an input json string and output a MapWritable with arrays as WritableArrayWritable objects *//*from w ww. ja va2 s . c o m*/ public static MapWritable jsonStringToMapWritableWithArrayWritable(String jsonString, DataSchema dataSchema) { MapWritable value = new MapWritable(); JSONParser jsonParser = new JSONParser(); try { JSONObject jsonObj = (JSONObject) jsonParser.parse(jsonString); for (Object key : jsonObj.keySet()) { Text mapKey = new Text(key.toString()); if (jsonObj.get(key) != null) { logger.debug("key = " + key.toString()); if (dataSchema.isArrayElement((String) key)) { ArrayWritable mapValue = StringUtils .jsonArrayStringtoArrayWritable(jsonObj.get(key).toString()); value.put(mapKey, mapValue); } else { Text mapValue = new Text(jsonObj.get(key).toString()); value.put(mapKey, mapValue); } } } } catch (ParseException e) { logger.warn("Could not json-decode string: " + jsonString, e); } catch (NumberFormatException e) { logger.warn("Could not parse field into number: " + jsonString, e); } return value; }
From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatUtilities.java
License:Apache License
public static void configureExportInputFormat(SqoopOptions opts, Job job, ConnManager connMgr, String dbTable, Configuration config) throws IOException { LOG.info("Configuring HCatalog for export job"); SqoopHCatUtilities hCatUtils = SqoopHCatUtilities.instance(); hCatUtils.configureHCat(opts, job, connMgr, dbTable, job.getConfiguration()); job.setInputFormatClass(getInputFormatClass()); Map<String, Integer> dbColTypes = hCatUtils.getDbColumnTypes(); MapWritable columnTypesJava = new MapWritable(); for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) { Text columnName = new Text(e.getKey()); Text columnText = new Text(connMgr.toJavaType(dbTable, e.getKey(), e.getValue())); columnTypesJava.put(columnName, columnText); }/*from www.j a v a 2s . c o m*/ MapWritable columnTypesSql = new MapWritable(); for (Map.Entry<String, Integer> e : dbColTypes.entrySet()) { Text columnName = new Text(e.getKey()); IntWritable sqlType = new IntWritable(e.getValue()); columnTypesSql.put(columnName, sqlType); } DefaultStringifier.store(config, columnTypesJava, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_JAVA); DefaultStringifier.store(config, columnTypesSql, SqoopHCatUtilities.HCAT_DB_OUTPUT_COLTYPES_SQL); }
From source file:org.apache.sqoop.mapreduce.JdbcExportJob.java
License:Apache License
@Override protected void configureInputFormat(Job job, String tableName, String tableClassName, String splitByCol) throws ClassNotFoundException, IOException { fileType = getInputFileType();// w w w . j a v a2 s . c o m super.configureInputFormat(job, tableName, tableClassName, splitByCol); if (isHCatJob) { SqoopHCatUtilities.configureExportInputFormat(options, job, context.getConnManager(), tableName, job.getConfiguration()); return; } else if (fileType == FileType.AVRO_DATA_FILE) { LOG.debug("Configuring for Avro export"); ConnManager connManager = context.getConnManager(); Map<String, Integer> columnTypeInts; if (options.getCall() == null) { columnTypeInts = connManager.getColumnTypes(tableName, options.getSqlQuery()); } else { columnTypeInts = connManager.getColumnTypesForProcedure(options.getCall()); } MapWritable columnTypes = new MapWritable(); for (Map.Entry<String, Integer> e : columnTypeInts.entrySet()) { Text columnName = new Text(e.getKey()); Text columnText = new Text(connManager.toJavaType(tableName, e.getKey(), e.getValue())); columnTypes.put(columnName, columnText); } DefaultStringifier.store(job.getConfiguration(), columnTypes, AvroExportMapper.AVRO_COLUMN_TYPES_MAP); } }