List of usage examples for org.apache.hadoop.io Text find
public int find(String what)
From source file:org.apache.accumulo.server.tabletserver.Tablet.java
License:Apache License
/** * yet another constructor - this one allows us to avoid costly lookups into the Metadata table if we already know the files we need - as at split time */// www . j a va2 s.c o m private Tablet(final TabletServer tabletServer, final Text location, final KeyExtent extent, final TabletResourceManager trm, final Configuration conf, final VolumeManager fs, final List<LogEntry> logEntries, final SortedMap<FileRef, DataFileValue> datafiles, String time, final TServerInstance lastLocation, Set<FileRef> scanFiles, long initFlushID, long initCompactID) throws IOException { Path locationPath; if (location.find(":") >= 0) { locationPath = new Path(location.toString()); } else { locationPath = fs.getFullPath(FileType.TABLE, extent.getTableId().toString() + location.toString()); } this.location = locationPath.makeQualified(fs.getFileSystemByPath(locationPath)); this.lastLocation = lastLocation; this.tabletDirectory = location.toString(); this.conf = conf; this.acuTableConf = tabletServer.getTableConfiguration(extent); this.fs = fs; this.extent = extent; this.tabletResources = trm; this.lastFlushID = initFlushID; this.lastCompactID = initCompactID; if (extent.isRootTablet()) { long rtime = Long.MIN_VALUE; for (FileRef ref : datafiles.keySet()) { Path path = ref.path(); FileSystem ns = fs.getFileSystemByPath(path); FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), true, ns, ns.getConf(), tabletServer.getTableConfiguration(extent)); long maxTime = -1; try { while (reader.hasTop()) { maxTime = Math.max(maxTime, reader.getTopKey().getTimestamp()); reader.next(); } } finally { reader.close(); } if (maxTime > rtime) { time = TabletTime.LOGICAL_TIME_ID + "" + maxTime; rtime = maxTime; } } } this.tabletServer = tabletServer; this.logId = tabletServer.createLogId(extent); this.timer = new TabletStatsKeeper(); setupDefaultSecurityLabels(extent); tabletMemory = new TabletMemory(); tabletTime = TabletTime.getInstance(time); persistedTime = tabletTime.getTime(); acuTableConf.addObserver(configObserver = new ConfigurationObserver() { private void reloadConstraints() { constraintChecker.set(new ConstraintChecker(getTableConfiguration())); } @Override public void propertiesChanged() { reloadConstraints(); try { setupDefaultSecurityLabels(extent); } catch (Exception e) { log.error("Failed to reload default security labels for extent: " + extent.toString()); } } @Override public void propertyChanged(String prop) { if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey())) reloadConstraints(); else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) { try { log.info("Default security labels changed for extent: " + extent.toString()); setupDefaultSecurityLabels(extent); } catch (Exception e) { log.error("Failed to reload default security labels for extent: " + extent.toString()); } } } @Override public void sessionExpired() { log.debug("Session expired, no longer updating per table props..."); } }); // Force a load of any per-table properties configObserver.propertiesChanged(); tabletResources.setTablet(this, acuTableConf); if (!logEntries.isEmpty()) { log.info("Starting Write-Ahead Log recovery for " + this.extent); final long[] count = new long[2]; final CommitSession commitSession = tabletMemory.getCommitSession(); count[1] = Long.MIN_VALUE; try { Set<String> absPaths = new HashSet<String>(); for (FileRef ref : datafiles.keySet()) absPaths.add(ref.path().toString()); tabletServer.recover(this.tabletServer.getFileSystem(), this, logEntries, absPaths, new MutationReceiver() { @Override public void receive(Mutation m) { // LogReader.printMutation(m); Collection<ColumnUpdate> muts = m.getUpdates(); for (ColumnUpdate columnUpdate : muts) { if (!columnUpdate.hasTimestamp()) { // if it is not a user set timestamp, it must have been set // by the system count[1] = Math.max(count[1], columnUpdate.getTimestamp()); } } tabletMemory.mutate(commitSession, Collections.singletonList(m)); count[0]++; } }); if (count[1] != Long.MIN_VALUE) { tabletTime.useMaxTimeFromWALog(count[1]); } commitSession.updateMaxCommittedTime(tabletTime.getTime()); tabletMemory.updateMemoryUsageStats(); if (count[0] == 0) { MetadataTableUtil.removeUnusedWALEntries(extent, logEntries, tabletServer.getLock()); logEntries.clear(); } } catch (Throwable t) { if (acuTableConf.getBoolean(Property.TABLE_FAILURES_IGNORE)) { log.warn("Error recovering from log files: ", t); } else { throw new RuntimeException(t); } } // make some closed references that represent the recovered logs currentLogs = new HashSet<DfsLogger>(); for (LogEntry logEntry : logEntries) { for (String log : logEntry.logSet) { String[] parts = log.split("/", 2); Path file = fs.getFullPath(FileType.WAL, parts[1]); currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), logEntry.server, file)); } } log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + count[0] + " mutations applied, " + tabletMemory.getNumEntries() + " entries created)"); } String contextName = acuTableConf.get(Property.TABLE_CLASSPATH); if (contextName != null && !contextName.equals("")) { // initialize context classloader, instead of possibly waiting for it to initialize for a scan // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292 AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName); } // do this last after tablet is completely setup because it // could cause major compaction to start datafileManager = new DatafileManager(datafiles); computeNumEntries(); datafileManager.removeFilesAfterScan(scanFiles); // look for hints of a failure on the previous tablet server if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) { // look for any temp files hanging around removeOldTemporaryFiles(); } log.log(TLevel.TABLET_HIST, extent + " opened "); }
From source file:org.apache.accumulo.tserver.Tablet.java
License:Apache License
/** * yet another constructor - this one allows us to avoid costly lookups into the Metadata table if we already know the files we need - as at split time *///from w w w. j a v a2 s . co m private Tablet(final TabletServer tabletServer, final Text location, final KeyExtent extent, final TabletResourceManager trm, final Configuration conf, final VolumeManager fs, final List<LogEntry> logEntries, final SortedMap<FileRef, DataFileValue> datafiles, String time, final TServerInstance lastLocation, Set<FileRef> scanFiles, long initFlushID, long initCompactID) throws IOException { Path locationPath; if (location.find(":") >= 0) { locationPath = new Path(location.toString()); } else { locationPath = fs.getFullPath(FileType.TABLE, extent.getTableId().toString() + location.toString()); } locationPath = DirectoryDecommissioner.checkTabletDirectory(tabletServer, fs, extent, locationPath); this.location = locationPath; this.lastLocation = lastLocation; this.tabletDirectory = location.toString(); this.conf = conf; this.acuTableConf = tabletServer.getTableConfiguration(extent); this.fs = fs; this.extent = extent; this.tabletResources = trm; this.lastFlushID = initFlushID; this.lastCompactID = initCompactID; if (extent.isRootTablet()) { long rtime = Long.MIN_VALUE; for (FileRef ref : datafiles.keySet()) { Path path = ref.path(); FileSystem ns = fs.getFileSystemByPath(path); FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), true, ns, ns.getConf(), tabletServer.getTableConfiguration(extent)); long maxTime = -1; try { while (reader.hasTop()) { maxTime = Math.max(maxTime, reader.getTopKey().getTimestamp()); reader.next(); } } finally { reader.close(); } if (maxTime > rtime) { time = TabletTime.LOGICAL_TIME_ID + "" + maxTime; rtime = maxTime; } } } if (time == null && datafiles.isEmpty() && extent.equals(RootTable.OLD_EXTENT)) { // recovery... old root tablet has no data, so time doesn't matter: time = TabletTime.LOGICAL_TIME_ID + "" + Long.MIN_VALUE; } this.tabletServer = tabletServer; this.logId = tabletServer.createLogId(extent); this.timer = new TabletStatsKeeper(); setupDefaultSecurityLabels(extent); tabletMemory = new TabletMemory(); tabletTime = TabletTime.getInstance(time); persistedTime = tabletTime.getTime(); acuTableConf.addObserver(configObserver = new ConfigurationObserver() { private void reloadConstraints() { constraintChecker.set(new ConstraintChecker(acuTableConf)); } @Override public void propertiesChanged() { reloadConstraints(); try { setupDefaultSecurityLabels(extent); } catch (Exception e) { log.error("Failed to reload default security labels for extent: " + extent.toString()); } } @Override public void propertyChanged(String prop) { if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey())) reloadConstraints(); else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) { try { log.info("Default security labels changed for extent: " + extent.toString()); setupDefaultSecurityLabels(extent); } catch (Exception e) { log.error("Failed to reload default security labels for extent: " + extent.toString()); } } } @Override public void sessionExpired() { log.debug("Session expired, no longer updating per table props..."); } }); acuTableConf.getNamespaceConfiguration().addObserver(configObserver); // Force a load of any per-table properties configObserver.propertiesChanged(); if (!logEntries.isEmpty()) { log.info("Starting Write-Ahead Log recovery for " + this.extent); final long[] count = new long[2]; final CommitSession commitSession = tabletMemory.getCommitSession(); count[1] = Long.MIN_VALUE; try { Set<String> absPaths = new HashSet<String>(); for (FileRef ref : datafiles.keySet()) absPaths.add(ref.path().toString()); tabletServer.recover(this.tabletServer.getFileSystem(), extent, acuTableConf, logEntries, absPaths, new MutationReceiver() { @Override public void receive(Mutation m) { // LogReader.printMutation(m); Collection<ColumnUpdate> muts = m.getUpdates(); for (ColumnUpdate columnUpdate : muts) { if (!columnUpdate.hasTimestamp()) { // if it is not a user set timestamp, it must have been set // by the system count[1] = Math.max(count[1], columnUpdate.getTimestamp()); } } tabletMemory.mutate(commitSession, Collections.singletonList(m)); count[0]++; } }); if (count[1] != Long.MIN_VALUE) { tabletTime.useMaxTimeFromWALog(count[1]); } commitSession.updateMaxCommittedTime(tabletTime.getTime()); if (count[0] == 0) { MetadataTableUtil.removeUnusedWALEntries(extent, logEntries, tabletServer.getLock()); logEntries.clear(); } } catch (Throwable t) { if (acuTableConf.getBoolean(Property.TABLE_FAILURES_IGNORE)) { log.warn("Error recovering from log files: ", t); } else { throw new RuntimeException(t); } } // make some closed references that represent the recovered logs currentLogs = new HashSet<DfsLogger>(); for (LogEntry logEntry : logEntries) { for (String log : logEntry.logSet) { currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), log)); } } log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + count[0] + " mutations applied, " + tabletMemory.getNumEntries() + " entries created)"); } String contextName = acuTableConf.get(Property.TABLE_CLASSPATH); if (contextName != null && !contextName.equals("")) { // initialize context classloader, instead of possibly waiting for it to initialize for a scan // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292 AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName); } // do this last after tablet is completely setup because it // could cause major compaction to start datafileManager = new DatafileManager(datafiles); computeNumEntries(); datafileManager.removeFilesAfterScan(scanFiles); // look for hints of a failure on the previous tablet server if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) { // look for any temp files hanging around removeOldTemporaryFiles(); } log.log(TLevel.TABLET_HIST, extent + " opened"); }
From source file:org.cloudata.examples.web.TermUploadReduce.java
License:Apache License
public void reduce(WritableComparable key, Iterator<Writable> values, OutputCollector<WritableComparable, Writable> collector, Reporter reporter) throws IOException { if (exception != null) { throw exception; }/*from w w w .j av a 2 s . c om*/ Text tKey = (Text) key; int keyIndex = tKey.find("\t"); if (keyIndex < 0) { LOG.error("invalid value:" + tKey); return; } Row.Key rowKey = new Row.Key(tKey.getBytes(), 0, keyIndex); String keyStr = new String(tKey.getBytes(), keyIndex + 1, (tKey.getLength() - keyIndex - 1), "EUC-KR"); //term, ?(tf), documentId url, freq, weight //term, ?(df), df String[] valueTokens = keyStr.split("\t"); if (rowKey.getLength() < TestWebPage.MIN_TERM_LENGTH) { return; } count++; if (count % 50000 == 0) { System.out.println(new Date() + ":" + keyStr); } if (valueTokens.length == 2 && "df".equals(valueTokens[0])) { Row row = new Row(rowKey); row.addCell("df", new Cell(Cell.Key.EMPTY_KEY, valueTokens[1].getBytes())); dfUploader.put(row); } else if (valueTokens.length == 4 && "tf".equals(valueTokens[0])) { Row row = new Row(rowKey); String documentId = valueTokens[1]; String freq = valueTokens[2]; String weight = valueTokens[3]; row.addCell("tf", new Cell(new Cell.Key(documentId), freq.getBytes())); row.addCell("weight", new Cell(new Cell.Key(documentId), weight.getBytes())); byte[] documentIdBytes = documentId.getBytes(); row.addCell("i_weight", new Cell(new Cell.Key((df.format(1.0 - Double.parseDouble(weight)) + documentId).getBytes()), documentIdBytes)); weightUploader.put(row); } else { LOG.error("invalid value:" + valueTokens.length + "," + count + "," + valueTokens[1] + "," + keyStr); return; } }
From source file:org.cloudata.examples.web.WebKeyRangePartitioner.java
License:Apache License
public int getPartition(WritableComparable key, Writable value, int numPartitions) { if (confException != null) { LOG.error(confException.getMessage(), confException); return -1; }/*w w w . j a v a 2 s.co m*/ if (numPartitions != tabletInfoSet.size()) { LOG.error( "tablet count(" + tabletInfoSet.size() + ") not equals numPartitions (" + numPartitions + ")"); return -1; } if (tabletInfoSet.size() == 0) { LOG.error("tablet partition size is zero"); return -1; } int partitionNumber = 0; Text tKey = (Text) key; Row.Key rowKey; int keyIndex = tKey.find("\t"); if (keyIndex < 0) { LOG.error("invalid value:" + tKey); rowKey = Row.Key.MAX_KEY; } else { rowKey = new Row.Key(tKey.getBytes(), 0, keyIndex); } SortedSet<RowKeyItem> tailSet = tabletInfoSet.tailSet(new RowKeyItem(rowKey, 0)); RowKeyItem item = null; if (tailSet.size() > 0) { item = tailSet.first(); partitionNumber = item.index; } else { item = tabletInfoSet.last(); partitionNumber = item.index; } if (partitionNumber >= numPartitions) { LOG.info("Partition Number is : " + partitionNumber + ", numPartitions : " + numPartitions + ", Row.Key : " + key.toString()); partitionNumber = numPartitions - 1; } //LOG.info("tablet partition num:" + partitionNumber); count++; if (count % 5000 == 0) { try { System.out.println("Partitioned:" + new String(rowKey.getBytes(), "EUC-KR") + "," + new String(item.rowKey.getBytes(), "EUC-KR")); } catch (UnsupportedEncodingException e) { } } return partitionNumber; }
From source file:uk.bl.wa.hadoop.mapred.FrequencyCountingReducer.java
License:Open Source License
@Override public void reduce(Text key, Iterator<Text> iterator, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { log.warn("Reducing for key: " + key); // Use a simple set to collect only distinct results for this key: Map<String, MutableInt> matches = new HashMap<String, MutableInt>(); while (iterator.hasNext()) { String m = iterator.next().toString(); // Get or set up the counter: MutableInt value = matches.get(m); if (value == null) { value = new MutableInt(); matches.put(m, value);//from w ww . jav a 2s. c o m } // Increment the counter for this match: value.inc(); } // Loop through and collect all distinct matches: Text result = new Text(); Text outKey = key; OutputCollector<Text, Text> collector; int pos = key.find("__"); if (pos == -1) { collector = output; } else { String[] fp = key.toString().split("__"); collector = mos.getCollector(fp[0], fp[1], reporter); outKey = new Text(fp[1]); } log.info("For key: " + key + " outKey " + outKey + " outputting " + matches.size() + " unique values."); for (String match : matches.keySet()) { // This outputs the count: result.set(match + "\t" + matches.get(match).get()); // And collect: collector.collect(outKey, result); } }
From source file:uk.bl.wa.hadoop.mapred.ReservoirSamplingReducer.java
License:Open Source License
@Override public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { Text item;/*from w w w . j a va 2 s . c om*/ long numItemsSeen = 0; Vector<Text> reservoir = new Vector<Text>(); RandomDataGenerator random = new RandomDataGenerator(); // Fix the seed so repoducible by default: random.reSeed(defaultSeed); // Iterate through all values: while (values.hasNext()) { item = values.next(); if (reservoir.size() < numSamples) { // reservoir not yet full, just append reservoir.add(item); } else { // find a sample to replace long rIndex = random.nextLong(0, numItemsSeen); if (rIndex < numSamples) { reservoir.set((int) rIndex, item); } } numItemsSeen++; } // Choose the output: Text outKey = key; OutputCollector<Text, Text> collector; int pos = key.find("__"); if (pos == -1) { collector = output; } else { String[] fp = key.toString().split("__"); collector = getCollector(fp[0], fp[1], reporter); outKey = new Text(fp[1]); } // Now output the sample: for (Text sto : reservoir) { collector.collect(outKey, sto); } }