List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:com.marklogic.contentpump.RDFInputFormat.java
License:Apache License
protected LinkedMapWritable getRoleMap(TaskAttemptContext context) throws IOException { //Restores the object from the configuration. Configuration conf = context.getConfiguration(); LinkedMapWritable fhmap = null;//from ww w. j a v a 2 s . c o m if (conf.get(ConfigConstants.CONF_ROLE_MAP) != null) { fhmap = DefaultStringifier.load(conf, ConfigConstants.CONF_ROLE_MAP, LinkedMapWritable.class); } return fhmap; }
From source file:com.marklogic.contentpump.RDFInputFormat.java
License:Apache License
protected String getServerVersion(TaskAttemptContext context) throws IOException { //Restores the object from the configuration. Configuration conf = context.getConfiguration(); Text version = DefaultStringifier.load(conf, ConfigConstants.CONF_ML_VERSION, Text.class); return version.toString(); }
From source file:com.marklogic.contentpump.RDFReader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { if (version == null) throw new IOException("Server Version is null"); String majorVersion = version.substring(0, version.indexOf('.')); graphSupported = Integer.valueOf(majorVersion) >= 8; conf = context.getConfiguration(); String rdfopt = conf.get(ConfigConstants.RDF_STREAMING_MEMORY_THRESHOLD); if (rdfopt != null) { INMEMORYTHRESHOLD = Long.parseLong(rdfopt); }/*from ww w. j a v a2 s. c o m*/ rdfopt = conf.get(ConfigConstants.RDF_TRIPLES_PER_DOCUMENT); if (rdfopt != null) { MAXTRIPLESPERDOCUMENT = Integer.parseInt(rdfopt); } String fnAsColl = conf.get(ConfigConstants.CONF_OUTPUT_FILENAME_AS_COLLECTION); if (fnAsColl != null) { LOG.warn( "The -filename_as_collection has no effect with input_type RDF, use -output_collections instead."); } String[] collections = conf.getStrings(MarkLogicConstants.OUTPUT_COLLECTION); outputGraph = conf.get(MarkLogicConstants.OUTPUT_GRAPH); outputOverrideGraph = conf.get(MarkLogicConstants.OUTPUT_OVERRIDE_GRAPH); //if no defulat-graph set and output_collections is set ignoreCollectionQuad = (outputGraph == null && collections != null) || outputOverrideGraph != null; hasOutputCol = (collections != null); Class<? extends Writable> valueClass = RDFWritable.class; @SuppressWarnings("unchecked") VALUEIN localValue = (VALUEIN) ReflectionUtils.newInstance(valueClass, conf); value = localValue; encoding = conf.get(MarkLogicConstants.OUTPUT_CONTENT_ENCODING, DEFAULT_ENCODING); setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); FileStatus status = fs.getFileStatus(file); if (status.isDirectory()) { iterator = new FileIterator((FileSplit) inSplit, context); inSplit = iterator.next(); } try { initStream(inSplit); } catch (IOException e) { LOG.error("Invalid input: " + file.getName() + ": " + e.getMessage()); throw e; } String[] perms = conf.getStrings(MarkLogicConstants.OUTPUT_PERMISSION); if (perms != null) { defaultPerms = PermissionUtil.getPermissions(perms).toArray(new ContentPermission[perms.length >> 1]); } else { List<ContentPermission> tmp = PermissionUtil.getDefaultPermissions(conf, roleMap); if (tmp != null) defaultPerms = tmp.toArray(new ContentPermission[tmp.size()]); } if (roleMapExists) initExistingMapPerms(); }
From source file:com.marklogic.contentpump.SequenceFileReader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { initConfig(context);//from w w w. j a v a 2s. co m batchSize = conf.getInt(MarkLogicConstants.BATCH_SIZE, MarkLogicConstants.DEFAULT_BATCH_SIZE); setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); FileStatus status = fs.getFileStatus(file); if (status.isDirectory()) { iterator = new FileIterator((FileSplit) inSplit, context); inSplit = iterator.next(); } initReader(inSplit); }
From source file:com.marklogic.contentpump.SingleDocumentOutputFormat.java
License:Apache License
@Override public RecordWriter<DocumentURI, MarkLogicDocument> getRecordWriter(TaskAttemptContext contex) throws IOException, InterruptedException { Configuration conf = contex.getConfiguration(); String p = conf.get(ConfigConstants.CONF_OUTPUT_FILEPATH); Path path = new Path(p); return new SingleDocumentWriter(path, conf); }
From source file:com.marklogic.contentpump.SplitDelimitedTextReader.java
License:Apache License
@Override public void initialize(InputSplit inSplit, TaskAttemptContext context) throws IOException, InterruptedException { initConfig(context);/*from ww w.j a va 2 s . c o m*/ initDocType(); initDelimConf(); setFile(((FileSplit) inSplit).getPath()); fs = file.getFileSystem(context.getConfiguration()); start = ((DelimitedSplit) inSplit).getStart(); end = start + ((DelimitedSplit) inSplit).getLength(); initParser(inSplit); }
From source file:com.marklogic.contentpump.TransformOutputFormat.java
License:Apache License
@Override public RecordWriter<DocumentURI, VALUEOUT> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); fastLoad = Boolean.valueOf(conf.get(OUTPUT_FAST_LOAD)); Map<String, ContentSource> sourceMap = getSourceMap(fastLoad, context); getMimetypesMap();/*from w w w . j a v a 2 s . c om*/ // construct the ContentWriter return new TransformWriter<VALUEOUT>(conf, sourceMap, fastLoad, am); }
From source file:com.marklogic.contentpump.utilities.FileIterator.java
License:Apache License
public FileIterator(Iterator<FileSplit> iterator, TaskAttemptContext context) { this.iterator = iterator; conf = context.getConfiguration(); fileDirSplits = new LinkedList<FileSplit>(); PathFilter jobFilter = getInputPathFilter(); List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(FileAndDirectoryInputFormat.hiddenFileFilter); if (jobFilter != null) { filters.add(jobFilter);/* w ww . j a v a 2s .c om*/ } inputFilter = new FileAndDirectoryInputFormat.MultiPathFilter(filters); }
From source file:com.marklogic.contentpump.utilities.FileIterator.java
License:Apache License
public FileIterator(FileSplit inSplit, TaskAttemptContext context) { conf = context.getConfiguration(); fileDirSplits = new LinkedList<FileSplit>(); LinkedList<FileSplit> src = new LinkedList<FileSplit>(); src.add(inSplit);/*w ww . j av a2 s . com*/ iterator = src.iterator(); PathFilter jobFilter = getInputPathFilter(); List<PathFilter> filters = new ArrayList<PathFilter>(); filters.add(FileAndDirectoryInputFormat.hiddenFileFilter); if (jobFilter != null) { filters.add(jobFilter); } inputFilter = new FileAndDirectoryInputFormat.MultiPathFilter(filters); }
From source file:com.marklogic.mapreduce.ContentOutputFormat.java
License:Apache License
protected Map<String, ContentSource> getSourceMap(boolean fastLoad, TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); Map<String, ContentSource> sourceMap = new LinkedHashMap<String, ContentSource>(); if (fastLoad) { LinkedMapWritable forestStatusMap = getForestStatusMap(conf); // get host->contentSource mapping Map<String, ContentSource> hostSourceMap = new HashMap<String, ContentSource>(); for (Writable v : forestStatusMap.values()) { ForestInfo fs = (ForestInfo) v; //unupdatable forests if (fs.getUpdatable() == false) continue; if (hostSourceMap.get(fs.getHostName()) == null) { try { ContentSource cs = InternalUtilities.getOutputContentSource(conf, fs.getHostName().toString()); hostSourceMap.put(fs.getHostName(), cs); } catch (XccConfigException e) { throw new IOException(e); }/*from www .j a v a 2 s .co m*/ } } // consolidate forest->host map and host-contentSource map to // forest-contentSource map for (Writable forestId : forestStatusMap.keySet()) { String forest = ((Text) forestId).toString(); String hostName = ((ForestInfo) forestStatusMap.get(forestId)).getHostName(); ContentSource cs = hostSourceMap.get(hostName); sourceMap.put(ID_PREFIX + forest, cs); } } else { TextArrayWritable hosts = getHosts(conf); for (Writable host : hosts.get()) { String hostStr = host.toString(); try { ContentSource cs = InternalUtilities.getOutputContentSource(conf, hostStr); sourceMap.put(hostStr, cs); } catch (XccConfigException e) { throw new IOException(e); } } } return sourceMap; }