List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.marklogic.mapreduce.ContentOutputFormat.java
License:Apache License
@Override public RecordWriter<DocumentURI, VALUEOUT> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); // TODO: if MAPREDUCE-3377 still exists, need to re-run initialize fastLoad = Boolean.valueOf(conf.get(OUTPUT_FAST_LOAD)); Map<String, ContentSource> sourceMap = getSourceMap(fastLoad, context); // construct the ContentWriter return new ContentWriter<VALUEOUT>(conf, sourceMap, fastLoad, am); }
From source file:com.marklogic.mapreduce.DocumentInputFormat.java
License:Apache License
@Override public RecordReader<DocumentURI, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new DocumentReader<VALUEIN>(context.getConfiguration()); }
From source file:com.marklogic.mapreduce.examples.BinaryReader.java
License:Apache License
@Override public RecordWriter<DocumentURI, BytesWritable> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { return new BinaryWriter(getOutputPath(context), context.getConfiguration()); }
From source file:com.marklogic.mapreduce.examples.ContentLoader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { bytesTotal = inSplit.getLength();/*w ww . ja v a 2 s . c o m*/ Path file = ((FileSplit) inSplit).getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); key.set(file.toString()); byte[] buf = new byte[(int) inSplit.getLength()]; try { fileIn.readFully(buf); value.set(buf); hasNext = true; } catch (Exception e) { hasNext = false; } finally { fileIn.close(); } }
From source file:com.marklogic.mapreduce.examples.LinkCountHDFS.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { Path file = ((FileSplit) inSplit).getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); DocumentBuilder docBuilder = builderLocal.get(); try {//from w w w . j a v a 2 s. c o m Document document = docBuilder.parse(fileIn); net.sf.saxon.s9api.DocumentBuilder db = saxonBuilderLocal.get(); XdmNode xdmDoc = db.wrap(document); XPathCompiler xpath = proc.newXPathCompiler(); xpath.declareNamespace("wp", "http://www.mediawiki.org/xml/export-0.4/"); XPathSelector selector = xpath.compile(PATH_EXPRESSION).load(); selector.setContextItem(xdmDoc); items = new ArrayList<XdmItem>(); for (XdmItem item : selector) { items.add(item); } } catch (SAXException ex) { ex.printStackTrace(); throw new IOException(ex); } catch (SaxonApiException e) { e.printStackTrace(); } finally { if (fileIn != null) { fileIn.close(); } } }
From source file:com.marklogic.mapreduce.examples.WikiLoader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { Path file = ((FileSplit) inSplit).getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); byte[] buf = new byte[BUFFER_SIZE]; long bytesTotal = inSplit.getLength(); long start = ((FileSplit) inSplit).getStart(); fileIn.seek(start);/*from www . ja va2 s .c o m*/ long bytesRead = 0; StringBuilder pages = new StringBuilder(); int sindex = -1; while (true) { int length = (int) Math.min(bytesTotal - bytesRead, buf.length); int read = fileIn.read(buf, 0, length); if (read == -1) { System.out.println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead); break; } bytesRead += read; String temp = new String(new String(buf, 0, read)); if (sindex == -1) { // haven't found the start yet sindex = temp.indexOf(BEGIN_PAGE_TAG); if (sindex > -1) { pages.append(temp.substring(sindex)); } } else if (bytesRead < bytesTotal) { // haven't completed the split pages.append(temp); } else { // reached the end of this split // look for end int eindex = 0; if (temp.contains(END_DOC_TAG) || // reached the end of doc temp.endsWith(END_PAGE_TAG)) { eindex = temp.lastIndexOf(END_PAGE_TAG); pages.append(temp.substring(0, eindex + END_PAGE_TAG.length())); System.out.println("Found end of doc."); } else { // need to read ahead to look for end of page while (true) { read = fileIn.read(buf, 0, READ_AHEAD_SIZE); if (read == -1) { // no more to read System.out .println("Unexpected EOF: bytesTotal=" + bytesTotal + "bytesRead=" + bytesRead); System.out.println(temp); break; } bytesRead += read; // look for end temp = new String(buf, 0, read); eindex = temp.indexOf(END_PAGE_TAG); if (eindex > -1) { pages.append(temp.substring(0, eindex + END_PAGE_TAG.length())); break; } else { pages.append(temp); } } } break; } } fileIn.close(); articles = WikiModelProcessor.process(pages); }
From source file:com.marklogic.mapreduce.examples.ZipContentLoader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { Path file = ((FileSplit) inSplit).getPath(); FileSystem fs = file.getFileSystem(context.getConfiguration()); FSDataInputStream fileIn = fs.open(file); zipIn = new ZipInputStream(fileIn); }
From source file:com.marklogic.mapreduce.ForestReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { this.split = (FileSplit) split; conf = context.getConfiguration(); Path dataPath = this.split.getPath(); FileSystem fs = dataPath.getFileSystem(conf); dataIs = new BiendianDataInputStream(fs.open(dataPath)); dataIs.skipBytes(this.split.getStart()); Path ordPath = new Path(dataPath.getParent(), "Ordinals"); ordIs = new BiendianDataInputStream(fs.open(ordPath)); Path tsPath = new Path(dataPath.getParent(), "Timestamps"); tsIs = new BiendianDataInputStream(fs.open(tsPath)); valueClass = conf.getClass(INPUT_VALUE_CLASS, ForestDocument.class, Writable.class); if (!ForestDocument.class.isAssignableFrom(valueClass)) { throw new IllegalArgumentException("Unsupported " + INPUT_VALUE_CLASS); }//from www. ja v a 2 s. com largeForestDir = new Path(dataPath.getParent().getParent(), "Large"); colFilters = conf.getStringCollection(COLLECTION_FILTER); dirFilters = conf.getStringCollection(DIRECTORY_FILTER); Collection<String> addedDirs = null; for (Iterator<String> it = dirFilters.iterator(); it.hasNext();) { String dir = it.next(); if (!dir.endsWith("/")) { String newDir = dir + "/"; it.remove(); if (addedDirs == null) { addedDirs = new ArrayList<String>(); } addedDirs.add(newDir); } } if (addedDirs != null) { dirFilters.addAll(addedDirs); } typeFilters = conf.getStringCollection(TYPE_FILTER); }
From source file:com.marklogic.mapreduce.KeyValueInputFormat.java
License:Apache License
@Override public RecordReader<KEYIN, VALUEIN> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return new KeyValueReader<KEYIN, VALUEIN>(context.getConfiguration()); }
From source file:com.marklogic.mapreduce.KeyValueOutputFormat.java
License:Apache License
@Override public RecordWriter<KEYOUT, VALUEOUT> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); TextArrayWritable hosts = getHosts(conf); String host = InternalUtilities.getHost(hosts); return new KeyValueWriter<KEYOUT, VALUEOUT>(conf, host); }