Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.marklogic.mapreduce.ContentOutputFormat.java

License:Apache License

@Override
public RecordWriter<DocumentURI, VALUEOUT> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    // TODO: if MAPREDUCE-3377 still exists, need to re-run initialize
    fastLoad = Boolean.valueOf(conf.get(OUTPUT_FAST_LOAD));
    Map<String, ContentSource> sourceMap = getSourceMap(fastLoad, context);
    // construct the ContentWriter
    return new ContentWriter<VALUEOUT>(conf, sourceMap, fastLoad, am);
}

From source file:com.marklogic.mapreduce.DocumentInputFormat.java

License:Apache License

@Override
public RecordReader<DocumentURI, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new DocumentReader<VALUEIN>(context.getConfiguration());
}

From source file:com.marklogic.mapreduce.examples.BinaryReader.java

License:Apache License

@Override
public RecordWriter<DocumentURI, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new BinaryWriter(getOutputPath(context), context.getConfiguration());
}

From source file:com.marklogic.mapreduce.examples.ContentLoader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    bytesTotal = inSplit.getLength();/*w  ww  . ja  v  a  2 s .  c  o  m*/
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    key.set(file.toString());
    byte[] buf = new byte[(int) inSplit.getLength()];
    try {
        fileIn.readFully(buf);
        value.set(buf);
        hasNext = true;
    } catch (Exception e) {
        hasNext = false;
    } finally {
        fileIn.close();
    }
}

From source file:com.marklogic.mapreduce.examples.LinkCountHDFS.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    DocumentBuilder docBuilder = builderLocal.get();
    try {//from w w  w  . j a v  a  2  s.  c  o  m
        Document document = docBuilder.parse(fileIn);
        net.sf.saxon.s9api.DocumentBuilder db = saxonBuilderLocal.get();
        XdmNode xdmDoc = db.wrap(document);
        XPathCompiler xpath = proc.newXPathCompiler();
        xpath.declareNamespace("wp", "http://www.mediawiki.org/xml/export-0.4/");
        XPathSelector selector = xpath.compile(PATH_EXPRESSION).load();
        selector.setContextItem(xdmDoc);
        items = new ArrayList<XdmItem>();
        for (XdmItem item : selector) {
            items.add(item);
        }
    } catch (SAXException ex) {
        ex.printStackTrace();
        throw new IOException(ex);
    } catch (SaxonApiException e) {
        e.printStackTrace();
    } finally {
        if (fileIn != null) {
            fileIn.close();
        }
    }
}

From source file:com.marklogic.mapreduce.examples.WikiLoader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    byte[] buf = new byte[BUFFER_SIZE];
    long bytesTotal = inSplit.getLength();
    long start = ((FileSplit) inSplit).getStart();
    fileIn.seek(start);/*from  www . ja va2  s  .c o  m*/
    long bytesRead = 0;
    StringBuilder pages = new StringBuilder();
    int sindex = -1;
    while (true) {
        int length = (int) Math.min(bytesTotal - bytesRead, buf.length);
        int read = fileIn.read(buf, 0, length);
        if (read == -1) {
            System.out.println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead);
            break;
        }
        bytesRead += read;
        String temp = new String(new String(buf, 0, read));
        if (sindex == -1) { // haven't found the start yet    
            sindex = temp.indexOf(BEGIN_PAGE_TAG);
            if (sindex > -1) {
                pages.append(temp.substring(sindex));
            }
        } else if (bytesRead < bytesTotal) { // haven't completed the split
            pages.append(temp);
        } else { // reached the end of this split
            // look for end
            int eindex = 0;
            if (temp.contains(END_DOC_TAG) || // reached the end of doc
                    temp.endsWith(END_PAGE_TAG)) {
                eindex = temp.lastIndexOf(END_PAGE_TAG);
                pages.append(temp.substring(0, eindex + END_PAGE_TAG.length()));
                System.out.println("Found end of doc.");
            } else { // need to read ahead to look for end of page
                while (true) {
                    read = fileIn.read(buf, 0, READ_AHEAD_SIZE);
                    if (read == -1) { // no more to read
                        System.out
                                .println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead);
                        System.out.println(temp);
                        break;
                    }
                    bytesRead += read;
                    // look for end
                    temp = new String(buf, 0, read);
                    eindex = temp.indexOf(END_PAGE_TAG);
                    if (eindex > -1) {
                        pages.append(temp.substring(0, eindex + END_PAGE_TAG.length()));
                        break;
                    } else {
                        pages.append(temp);
                    }
                }
            }
            break;
        }
    }
    fileIn.close();
    articles = WikiModelProcessor.process(pages);
}

From source file:com.marklogic.mapreduce.examples.ZipContentLoader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    zipIn = new ZipInputStream(fileIn);
}

From source file:com.marklogic.mapreduce.ForestReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.split = (FileSplit) split;
    conf = context.getConfiguration();
    Path dataPath = this.split.getPath();
    FileSystem fs = dataPath.getFileSystem(conf);
    dataIs = new BiendianDataInputStream(fs.open(dataPath));
    dataIs.skipBytes(this.split.getStart());
    Path ordPath = new Path(dataPath.getParent(), "Ordinals");
    ordIs = new BiendianDataInputStream(fs.open(ordPath));
    Path tsPath = new Path(dataPath.getParent(), "Timestamps");
    tsIs = new BiendianDataInputStream(fs.open(tsPath));
    valueClass = conf.getClass(INPUT_VALUE_CLASS, ForestDocument.class, Writable.class);
    if (!ForestDocument.class.isAssignableFrom(valueClass)) {
        throw new IllegalArgumentException("Unsupported " + INPUT_VALUE_CLASS);
    }//from www.  ja v  a 2  s. com
    largeForestDir = new Path(dataPath.getParent().getParent(), "Large");
    colFilters = conf.getStringCollection(COLLECTION_FILTER);
    dirFilters = conf.getStringCollection(DIRECTORY_FILTER);
    Collection<String> addedDirs = null;
    for (Iterator<String> it = dirFilters.iterator(); it.hasNext();) {
        String dir = it.next();
        if (!dir.endsWith("/")) {
            String newDir = dir + "/";
            it.remove();
            if (addedDirs == null) {
                addedDirs = new ArrayList<String>();
            }
            addedDirs.add(newDir);
        }
    }
    if (addedDirs != null) {
        dirFilters.addAll(addedDirs);
    }
    typeFilters = conf.getStringCollection(TYPE_FILTER);
}

From source file:com.marklogic.mapreduce.KeyValueInputFormat.java

License:Apache License

@Override
public RecordReader<KEYIN, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new KeyValueReader<KEYIN, VALUEIN>(context.getConfiguration());
}

From source file:com.marklogic.mapreduce.KeyValueOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEYOUT, VALUEOUT> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);
    String host = InternalUtilities.getHost(hosts);
    return new KeyValueWriter<KEYOUT, VALUEOUT>(conf, host);
}