Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.marklogic.mapreduce.ContentOutputFormat.java

License:Apache License

@Override
public RecordWriter<DocumentURI, VALUEOUT> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    // TODO: if MAPREDUCE-3377 still exists, need to re-run initialize
    fastLoad = Boolean.valueOf(conf.get(OUTPUT_FAST_LOAD));
    Map<String, ContentSource> sourceMap = getSourceMap(fastLoad, context);
    // construct the ContentWriter
    return new ContentWriter<VALUEOUT>(conf, sourceMap, fastLoad, am);
}

From source file:com.marklogic.mapreduce.DocumentInputFormat.java

License:Apache License

@Override
public RecordReader<DocumentURI, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new DocumentReader<VALUEIN>(context.getConfiguration());
}

From source file:com.marklogic.mapreduce.examples.BinaryReader.java

License:Apache License

@Override
public RecordWriter<DocumentURI, BytesWritable> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new BinaryWriter(getOutputPath(context), context.getConfiguration());
}

From source file:com.marklogic.mapreduce.examples.ContentLoader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    bytesTotal = inSplit.getLength();/*w  ww  . ja  v  a  2 s .  c  o  m*/
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    key.set(file.toString());
    byte[] buf = new byte[(int) inSplit.getLength()];
    try {
        fileIn.readFully(buf);
        value.set(buf);
        hasNext = true;
    } catch (Exception e) {
        hasNext = false;
    } finally {
        fileIn.close();
    }
}

From source file:com.marklogic.mapreduce.examples.LinkCountHDFS.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    DocumentBuilder docBuilder = builderLocal.get();
    try {//from w w  w  . j a v  a  2  s.  c  o  m
        Document document = docBuilder.parse(fileIn);
        net.sf.saxon.s9api.DocumentBuilder db = saxonBuilderLocal.get();
        XdmNode xdmDoc = db.wrap(document);
        XPathCompiler xpath = proc.newXPathCompiler();
        xpath.declareNamespace("wp", "http://www.mediawiki.org/xml/export-0.4/");
        XPathSelector selector = xpath.compile(PATH_EXPRESSION).load();
        selector.setContextItem(xdmDoc);
        items = new ArrayList<XdmItem>();
        for (XdmItem item : selector) {
            items.add(item);
        }
    } catch (SAXException ex) {
        ex.printStackTrace();
        throw new IOException(ex);
    } catch (SaxonApiException e) {
        e.printStackTrace();
    } finally {
        if (fileIn != null) {
            fileIn.close();
        }
    }
}

From source file:com.marklogic.mapreduce.examples.WikiLoader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    byte[] buf = new byte[BUFFER_SIZE];
    long bytesTotal = inSplit.getLength();
    long start = ((FileSplit) inSplit).getStart();
    fileIn.seek(start);/*from  www . ja va2  s  .c o  m*/
    long bytesRead = 0;
    StringBuilder pages = new StringBuilder();
    int sindex = -1;
    while (true) {
        int length = (int) Math.min(bytesTotal - bytesRead, buf.length);
        int read = fileIn.read(buf, 0, length);
        if (read == -1) {
            System.out.println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead);
            break;
        }
        bytesRead += read;
        String temp = new String(new String(buf, 0, read));
        if (sindex == -1) { // haven't found the start yet    
            sindex = temp.indexOf(BEGIN_PAGE_TAG);
            if (sindex > -1) {
                pages.append(temp.substring(sindex));
            }
        } else if (bytesRead < bytesTotal) { // haven't completed the split
            pages.append(temp);
        } else { // reached the end of this split
            // look for end
            int eindex = 0;
            if (temp.contains(END_DOC_TAG) || // reached the end of doc
                    temp.endsWith(END_PAGE_TAG)) {
                eindex = temp.lastIndexOf(END_PAGE_TAG);
                pages.append(temp.substring(0, eindex + END_PAGE_TAG.length()));
                System.out.println("Found end of doc.");
            } else { // need to read ahead to look for end of page
                while (true) {
                    read = fileIn.read(buf, 0, READ_AHEAD_SIZE);
                    if (read == -1) { // no more to read
                        System.out
                                .println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead);
                        System.out.println(temp);
                        break;
                    }
                    bytesRead += read;
                    // look for end
                    temp = new String(buf, 0, read);
                    eindex = temp.indexOf(END_PAGE_TAG);
                    if (eindex > -1) {
                        pages.append(temp.substring(0, eindex + END_PAGE_TAG.length()));
                        break;
                    } else {
                        pages.append(temp);
                    }
                }
            }
            break;
        }
    }
    fileIn.close();
    articles = WikiModelProcessor.process(pages);
}

From source file:com.marklogic.mapreduce.examples.ZipContentLoader.java

License:Apache License

@Override
public void initialize(InputSplit inSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    Path file = ((FileSplit) inSplit).getPath();
    FileSystem fs = file.getFileSystem(context.getConfiguration());
    FSDataInputStream fileIn = fs.open(file);
    zipIn = new ZipInputStream(fileIn);
}

From source file:com.marklogic.mapreduce.ForestReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    this.split = (FileSplit) split;
    conf = context.getConfiguration();
    Path dataPath = this.split.getPath();
    FileSystem fs = dataPath.getFileSystem(conf);
    dataIs = new BiendianDataInputStream(fs.open(dataPath));
    dataIs.skipBytes(this.split.getStart());
    Path ordPath = new Path(dataPath.getParent(), "Ordinals");
    ordIs = new BiendianDataInputStream(fs.open(ordPath));
    Path tsPath = new Path(dataPath.getParent(), "Timestamps");
    tsIs = new BiendianDataInputStream(fs.open(tsPath));
    valueClass = conf.getClass(INPUT_VALUE_CLASS, ForestDocument.class, Writable.class);
    if (!ForestDocument.class.isAssignableFrom(valueClass)) {
        throw new IllegalArgumentException("Unsupported " + INPUT_VALUE_CLASS);
    }//from www.  ja v  a 2  s. com
    largeForestDir = new Path(dataPath.getParent().getParent(), "Large");
    colFilters = conf.getStringCollection(COLLECTION_FILTER);
    dirFilters = conf.getStringCollection(DIRECTORY_FILTER);
    Collection<String> addedDirs = null;
    for (Iterator<String> it = dirFilters.iterator(); it.hasNext();) {
        String dir = it.next();
        if (!dir.endsWith("/")) {
            String newDir = dir + "/";
            it.remove();
            if (addedDirs == null) {
                addedDirs = new ArrayList<String>();
            }
            addedDirs.add(newDir);
        }
    }
    if (addedDirs != null) {
        dirFilters.addAll(addedDirs);
    }
    typeFilters = conf.getStringCollection(TYPE_FILTER);
}

From source file:com.marklogic.mapreduce.KeyValueInputFormat.java

License:Apache License

@Override
public RecordReader<KEYIN, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {
    return new KeyValueReader<KEYIN, VALUEIN>(context.getConfiguration());
}

From source file:com.marklogic.mapreduce.KeyValueOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEYOUT, VALUEOUT> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    TextArrayWritable hosts = getHosts(conf);
    String host = InternalUtilities.getHost(hosts);
    return new KeyValueWriter<KEYOUT, VALUEOUT>(conf, host);
}