Java tutorial
/** * (c) Copyright 2013 WibiData, Inc. * * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.moz.fiji.mapreduce.lib.examples; import java.io.IOException; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import com.moz.fiji.mapreduce.FijiTableContext; import com.moz.fiji.mapreduce.bulkimport.FijiBulkImporter; import com.moz.fiji.schema.EntityId; /** * <p>A bulk importer that takes qualified_path/raw_article key/value pairs, * and loads these into fiji. The article name is specified by the * parent folder and name of this article (This is to guarantee unique names). * The classification is specified by the parent * folder to this article. The raw article to store is passed in as the value.</p> * * <p>For example, the calling the produce() method with key:value of<p> * <code>"some/path/sci.med/12345":"This is the article text"</code> * <p>will generate a single row in fiji with fields:<p> * <ul> * <li>name: "sci.med.12345"</li> * <li>category: "sci.med"</li> * <li>raw_article: "This is the article text"</li> * </ul> */ public class News20BulkImporter extends FijiBulkImporter<Text, Text> { /** The family to write input data to. */ public static final String FAMILY = "info"; /** Qualifier storing the article name. */ public static final String ARTICLE_NAME_QUALIFIER = "name"; /** Qualifier storing the article category. */ public static final String CATEGORY_QUALIFIER = "category"; /** Qualifier storing the raw text of an article. */ public static final String RAW_ARTICLE_QUALIFIER = "raw_article"; /** * Reads a single news article, and writes its contents to a new fiji row, * indexed by the article's name (A string consisting of the parent folder, and * this article's hash), and the a priori categorization of this article. * * @param key The fully qualified path to the current file we're reading. * @param value The raw data to insert into this column. * @param context The context to write to. * @throws IOException if there is an error. */ @Override public void produce(Text key, Text value, FijiTableContext context) throws IOException { Path qualifiedPath = new Path(key.toString()); // Category is specified on the containing folder. String category = qualifiedPath.getParent().getName(); // Name is the concatenation of category and file name. String name = category + "." + qualifiedPath.getName(); // write name, category, and raw article. EntityId entity = context.getEntityId(name); context.put(entity, FAMILY, ARTICLE_NAME_QUALIFIER, name); context.put(entity, FAMILY, CATEGORY_QUALIFIER, category); context.put(entity, FAMILY, RAW_ARTICLE_QUALIFIER, value.toString()); } }