Example usage for org.apache.hadoop.io Text find

List of usage examples for org.apache.hadoop.io Text find

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text find.

Prototype

public int find(String what) 

Source Link

Usage

From source file:org.apache.accumulo.server.tabletserver.Tablet.java

License:Apache License

/**
 * yet another constructor - this one allows us to avoid costly lookups into the Metadata table if we already know the files we need - as at split time
 *///  www . j a  va2 s.c o  m
private Tablet(final TabletServer tabletServer, final Text location, final KeyExtent extent,
        final TabletResourceManager trm, final Configuration conf, final VolumeManager fs,
        final List<LogEntry> logEntries, final SortedMap<FileRef, DataFileValue> datafiles, String time,
        final TServerInstance lastLocation, Set<FileRef> scanFiles, long initFlushID, long initCompactID)
        throws IOException {
    Path locationPath;
    if (location.find(":") >= 0) {
        locationPath = new Path(location.toString());
    } else {
        locationPath = fs.getFullPath(FileType.TABLE, extent.getTableId().toString() + location.toString());
    }
    this.location = locationPath.makeQualified(fs.getFileSystemByPath(locationPath));
    this.lastLocation = lastLocation;
    this.tabletDirectory = location.toString();
    this.conf = conf;
    this.acuTableConf = tabletServer.getTableConfiguration(extent);

    this.fs = fs;
    this.extent = extent;
    this.tabletResources = trm;

    this.lastFlushID = initFlushID;
    this.lastCompactID = initCompactID;

    if (extent.isRootTablet()) {

        long rtime = Long.MIN_VALUE;
        for (FileRef ref : datafiles.keySet()) {
            Path path = ref.path();
            FileSystem ns = fs.getFileSystemByPath(path);
            FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), true, ns,
                    ns.getConf(), tabletServer.getTableConfiguration(extent));
            long maxTime = -1;
            try {

                while (reader.hasTop()) {
                    maxTime = Math.max(maxTime, reader.getTopKey().getTimestamp());
                    reader.next();
                }

            } finally {
                reader.close();
            }

            if (maxTime > rtime) {
                time = TabletTime.LOGICAL_TIME_ID + "" + maxTime;
                rtime = maxTime;
            }
        }
    }

    this.tabletServer = tabletServer;
    this.logId = tabletServer.createLogId(extent);

    this.timer = new TabletStatsKeeper();

    setupDefaultSecurityLabels(extent);

    tabletMemory = new TabletMemory();
    tabletTime = TabletTime.getInstance(time);
    persistedTime = tabletTime.getTime();

    acuTableConf.addObserver(configObserver = new ConfigurationObserver() {

        private void reloadConstraints() {
            constraintChecker.set(new ConstraintChecker(getTableConfiguration()));
        }

        @Override
        public void propertiesChanged() {
            reloadConstraints();

            try {
                setupDefaultSecurityLabels(extent);
            } catch (Exception e) {
                log.error("Failed to reload default security labels for extent: " + extent.toString());
            }
        }

        @Override
        public void propertyChanged(String prop) {
            if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey()))
                reloadConstraints();
            else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) {
                try {
                    log.info("Default security labels changed for extent: " + extent.toString());
                    setupDefaultSecurityLabels(extent);
                } catch (Exception e) {
                    log.error("Failed to reload default security labels for extent: " + extent.toString());
                }
            }

        }

        @Override
        public void sessionExpired() {
            log.debug("Session expired, no longer updating per table props...");
        }

    });
    // Force a load of any per-table properties
    configObserver.propertiesChanged();

    tabletResources.setTablet(this, acuTableConf);
    if (!logEntries.isEmpty()) {
        log.info("Starting Write-Ahead Log recovery for " + this.extent);
        final long[] count = new long[2];
        final CommitSession commitSession = tabletMemory.getCommitSession();
        count[1] = Long.MIN_VALUE;
        try {
            Set<String> absPaths = new HashSet<String>();
            for (FileRef ref : datafiles.keySet())
                absPaths.add(ref.path().toString());

            tabletServer.recover(this.tabletServer.getFileSystem(), this, logEntries, absPaths,
                    new MutationReceiver() {
                        @Override
                        public void receive(Mutation m) {
                            // LogReader.printMutation(m);
                            Collection<ColumnUpdate> muts = m.getUpdates();
                            for (ColumnUpdate columnUpdate : muts) {
                                if (!columnUpdate.hasTimestamp()) {
                                    // if it is not a user set timestamp, it must have been set
                                    // by the system
                                    count[1] = Math.max(count[1], columnUpdate.getTimestamp());
                                }
                            }
                            tabletMemory.mutate(commitSession, Collections.singletonList(m));
                            count[0]++;
                        }
                    });

            if (count[1] != Long.MIN_VALUE) {
                tabletTime.useMaxTimeFromWALog(count[1]);
            }
            commitSession.updateMaxCommittedTime(tabletTime.getTime());

            tabletMemory.updateMemoryUsageStats();

            if (count[0] == 0) {
                MetadataTableUtil.removeUnusedWALEntries(extent, logEntries, tabletServer.getLock());
                logEntries.clear();
            }

        } catch (Throwable t) {
            if (acuTableConf.getBoolean(Property.TABLE_FAILURES_IGNORE)) {
                log.warn("Error recovering from log files: ", t);
            } else {
                throw new RuntimeException(t);
            }
        }
        // make some closed references that represent the recovered logs
        currentLogs = new HashSet<DfsLogger>();
        for (LogEntry logEntry : logEntries) {
            for (String log : logEntry.logSet) {
                String[] parts = log.split("/", 2);
                Path file = fs.getFullPath(FileType.WAL, parts[1]);
                currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), logEntry.server, file));
            }
        }

        log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + count[0]
                + " mutations applied, " + tabletMemory.getNumEntries() + " entries created)");
    }

    String contextName = acuTableConf.get(Property.TABLE_CLASSPATH);
    if (contextName != null && !contextName.equals("")) {
        // initialize context classloader, instead of possibly waiting for it to initialize for a scan
        // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292
        AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName);
    }

    // do this last after tablet is completely setup because it
    // could cause major compaction to start
    datafileManager = new DatafileManager(datafiles);

    computeNumEntries();

    datafileManager.removeFilesAfterScan(scanFiles);

    // look for hints of a failure on the previous tablet server
    if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) {
        // look for any temp files hanging around
        removeOldTemporaryFiles();
    }

    log.log(TLevel.TABLET_HIST, extent + " opened ");
}

From source file:org.apache.accumulo.tserver.Tablet.java

License:Apache License

/**
 * yet another constructor - this one allows us to avoid costly lookups into the Metadata table if we already know the files we need - as at split time
 *///from   w  w  w. j  a  v  a2  s . co m
private Tablet(final TabletServer tabletServer, final Text location, final KeyExtent extent,
        final TabletResourceManager trm, final Configuration conf, final VolumeManager fs,
        final List<LogEntry> logEntries, final SortedMap<FileRef, DataFileValue> datafiles, String time,
        final TServerInstance lastLocation, Set<FileRef> scanFiles, long initFlushID, long initCompactID)
        throws IOException {
    Path locationPath;
    if (location.find(":") >= 0) {
        locationPath = new Path(location.toString());
    } else {
        locationPath = fs.getFullPath(FileType.TABLE, extent.getTableId().toString() + location.toString());
    }

    locationPath = DirectoryDecommissioner.checkTabletDirectory(tabletServer, fs, extent, locationPath);

    this.location = locationPath;
    this.lastLocation = lastLocation;
    this.tabletDirectory = location.toString();
    this.conf = conf;
    this.acuTableConf = tabletServer.getTableConfiguration(extent);

    this.fs = fs;
    this.extent = extent;
    this.tabletResources = trm;

    this.lastFlushID = initFlushID;
    this.lastCompactID = initCompactID;

    if (extent.isRootTablet()) {
        long rtime = Long.MIN_VALUE;
        for (FileRef ref : datafiles.keySet()) {
            Path path = ref.path();
            FileSystem ns = fs.getFileSystemByPath(path);
            FileSKVIterator reader = FileOperations.getInstance().openReader(path.toString(), true, ns,
                    ns.getConf(), tabletServer.getTableConfiguration(extent));
            long maxTime = -1;
            try {

                while (reader.hasTop()) {
                    maxTime = Math.max(maxTime, reader.getTopKey().getTimestamp());
                    reader.next();
                }

            } finally {
                reader.close();
            }

            if (maxTime > rtime) {
                time = TabletTime.LOGICAL_TIME_ID + "" + maxTime;
                rtime = maxTime;
            }
        }
    }
    if (time == null && datafiles.isEmpty() && extent.equals(RootTable.OLD_EXTENT)) {
        // recovery... old root tablet has no data, so time doesn't matter:
        time = TabletTime.LOGICAL_TIME_ID + "" + Long.MIN_VALUE;
    }

    this.tabletServer = tabletServer;
    this.logId = tabletServer.createLogId(extent);

    this.timer = new TabletStatsKeeper();

    setupDefaultSecurityLabels(extent);

    tabletMemory = new TabletMemory();
    tabletTime = TabletTime.getInstance(time);
    persistedTime = tabletTime.getTime();

    acuTableConf.addObserver(configObserver = new ConfigurationObserver() {

        private void reloadConstraints() {
            constraintChecker.set(new ConstraintChecker(acuTableConf));
        }

        @Override
        public void propertiesChanged() {
            reloadConstraints();

            try {
                setupDefaultSecurityLabels(extent);
            } catch (Exception e) {
                log.error("Failed to reload default security labels for extent: " + extent.toString());
            }
        }

        @Override
        public void propertyChanged(String prop) {
            if (prop.startsWith(Property.TABLE_CONSTRAINT_PREFIX.getKey()))
                reloadConstraints();
            else if (prop.equals(Property.TABLE_DEFAULT_SCANTIME_VISIBILITY.getKey())) {
                try {
                    log.info("Default security labels changed for extent: " + extent.toString());
                    setupDefaultSecurityLabels(extent);
                } catch (Exception e) {
                    log.error("Failed to reload default security labels for extent: " + extent.toString());
                }
            }

        }

        @Override
        public void sessionExpired() {
            log.debug("Session expired, no longer updating per table props...");
        }

    });

    acuTableConf.getNamespaceConfiguration().addObserver(configObserver);

    // Force a load of any per-table properties
    configObserver.propertiesChanged();

    if (!logEntries.isEmpty()) {
        log.info("Starting Write-Ahead Log recovery for " + this.extent);
        final long[] count = new long[2];
        final CommitSession commitSession = tabletMemory.getCommitSession();
        count[1] = Long.MIN_VALUE;
        try {
            Set<String> absPaths = new HashSet<String>();
            for (FileRef ref : datafiles.keySet())
                absPaths.add(ref.path().toString());

            tabletServer.recover(this.tabletServer.getFileSystem(), extent, acuTableConf, logEntries, absPaths,
                    new MutationReceiver() {
                        @Override
                        public void receive(Mutation m) {
                            // LogReader.printMutation(m);
                            Collection<ColumnUpdate> muts = m.getUpdates();
                            for (ColumnUpdate columnUpdate : muts) {
                                if (!columnUpdate.hasTimestamp()) {
                                    // if it is not a user set timestamp, it must have been set
                                    // by the system
                                    count[1] = Math.max(count[1], columnUpdate.getTimestamp());
                                }
                            }
                            tabletMemory.mutate(commitSession, Collections.singletonList(m));
                            count[0]++;
                        }
                    });

            if (count[1] != Long.MIN_VALUE) {
                tabletTime.useMaxTimeFromWALog(count[1]);
            }
            commitSession.updateMaxCommittedTime(tabletTime.getTime());

            if (count[0] == 0) {
                MetadataTableUtil.removeUnusedWALEntries(extent, logEntries, tabletServer.getLock());
                logEntries.clear();
            }

        } catch (Throwable t) {
            if (acuTableConf.getBoolean(Property.TABLE_FAILURES_IGNORE)) {
                log.warn("Error recovering from log files: ", t);
            } else {
                throw new RuntimeException(t);
            }
        }
        // make some closed references that represent the recovered logs
        currentLogs = new HashSet<DfsLogger>();
        for (LogEntry logEntry : logEntries) {
            for (String log : logEntry.logSet) {
                currentLogs.add(new DfsLogger(tabletServer.getServerConfig(), log));
            }
        }

        log.info("Write-Ahead Log recovery complete for " + this.extent + " (" + count[0]
                + " mutations applied, " + tabletMemory.getNumEntries() + " entries created)");
    }

    String contextName = acuTableConf.get(Property.TABLE_CLASSPATH);
    if (contextName != null && !contextName.equals("")) {
        // initialize context classloader, instead of possibly waiting for it to initialize for a scan
        // TODO this could hang, causing other tablets to fail to load - ACCUMULO-1292
        AccumuloVFSClassLoader.getContextManager().getClassLoader(contextName);
    }

    // do this last after tablet is completely setup because it
    // could cause major compaction to start
    datafileManager = new DatafileManager(datafiles);

    computeNumEntries();

    datafileManager.removeFilesAfterScan(scanFiles);

    // look for hints of a failure on the previous tablet server
    if (!logEntries.isEmpty() || needsMajorCompaction(MajorCompactionReason.NORMAL)) {
        // look for any temp files hanging around
        removeOldTemporaryFiles();
    }

    log.log(TLevel.TABLET_HIST, extent + " opened");
}

From source file:org.cloudata.examples.web.TermUploadReduce.java

License:Apache License

public void reduce(WritableComparable key, Iterator<Writable> values,
        OutputCollector<WritableComparable, Writable> collector, Reporter reporter) throws IOException {
    if (exception != null) {
        throw exception;
    }/*from w w w .j  av a 2  s  . c om*/
    Text tKey = (Text) key;
    int keyIndex = tKey.find("\t");
    if (keyIndex < 0) {
        LOG.error("invalid value:" + tKey);
        return;
    }

    Row.Key rowKey = new Row.Key(tKey.getBytes(), 0, keyIndex);

    String keyStr = new String(tKey.getBytes(), keyIndex + 1, (tKey.getLength() - keyIndex - 1), "EUC-KR");

    //term, ?(tf), documentId url, freq, weight
    //term, ?(df), df
    String[] valueTokens = keyStr.split("\t");

    if (rowKey.getLength() < TestWebPage.MIN_TERM_LENGTH) {
        return;
    }

    count++;
    if (count % 50000 == 0) {
        System.out.println(new Date() + ":" + keyStr);
    }

    if (valueTokens.length == 2 && "df".equals(valueTokens[0])) {
        Row row = new Row(rowKey);
        row.addCell("df", new Cell(Cell.Key.EMPTY_KEY, valueTokens[1].getBytes()));
        dfUploader.put(row);
    } else if (valueTokens.length == 4 && "tf".equals(valueTokens[0])) {
        Row row = new Row(rowKey);
        String documentId = valueTokens[1];
        String freq = valueTokens[2];
        String weight = valueTokens[3];

        row.addCell("tf", new Cell(new Cell.Key(documentId), freq.getBytes()));
        row.addCell("weight", new Cell(new Cell.Key(documentId), weight.getBytes()));

        byte[] documentIdBytes = documentId.getBytes();

        row.addCell("i_weight",
                new Cell(new Cell.Key((df.format(1.0 - Double.parseDouble(weight)) + documentId).getBytes()),
                        documentIdBytes));

        weightUploader.put(row);
    } else {
        LOG.error("invalid value:" + valueTokens.length + "," + count + "," + valueTokens[1] + "," + keyStr);
        return;
    }
}

From source file:org.cloudata.examples.web.WebKeyRangePartitioner.java

License:Apache License

public int getPartition(WritableComparable key, Writable value, int numPartitions) {
    if (confException != null) {
        LOG.error(confException.getMessage(), confException);
        return -1;
    }/*w  w  w  .  j a v a 2 s.co m*/

    if (numPartitions != tabletInfoSet.size()) {
        LOG.error(
                "tablet count(" + tabletInfoSet.size() + ") not equals numPartitions (" + numPartitions + ")");
        return -1;
    }

    if (tabletInfoSet.size() == 0) {
        LOG.error("tablet partition size is zero");
        return -1;
    }
    int partitionNumber = 0;
    Text tKey = (Text) key;

    Row.Key rowKey;

    int keyIndex = tKey.find("\t");
    if (keyIndex < 0) {
        LOG.error("invalid value:" + tKey);
        rowKey = Row.Key.MAX_KEY;
    } else {
        rowKey = new Row.Key(tKey.getBytes(), 0, keyIndex);
    }

    SortedSet<RowKeyItem> tailSet = tabletInfoSet.tailSet(new RowKeyItem(rowKey, 0));
    RowKeyItem item = null;
    if (tailSet.size() > 0) {
        item = tailSet.first();
        partitionNumber = item.index;
    } else {
        item = tabletInfoSet.last();
        partitionNumber = item.index;
    }

    if (partitionNumber >= numPartitions) {
        LOG.info("Partition Number is : " + partitionNumber + ", numPartitions : " + numPartitions
                + ", Row.Key : " + key.toString());
        partitionNumber = numPartitions - 1;
    }
    //LOG.info("tablet partition num:" + partitionNumber);
    count++;
    if (count % 5000 == 0) {
        try {
            System.out.println("Partitioned:" + new String(rowKey.getBytes(), "EUC-KR") + ","
                    + new String(item.rowKey.getBytes(), "EUC-KR"));
        } catch (UnsupportedEncodingException e) {

        }
    }
    return partitionNumber;
}

From source file:uk.bl.wa.hadoop.mapred.FrequencyCountingReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterator<Text> iterator, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    log.warn("Reducing for key: " + key);

    // Use a simple set to collect only distinct results for this key:
    Map<String, MutableInt> matches = new HashMap<String, MutableInt>();
    while (iterator.hasNext()) {
        String m = iterator.next().toString();
        // Get or set up the counter:
        MutableInt value = matches.get(m);
        if (value == null) {
            value = new MutableInt();
            matches.put(m, value);//from  w  ww  .  jav a  2s. c o  m
        }
        // Increment the counter for this match:
        value.inc();
    }

    // Loop through and collect all distinct matches:
    Text result = new Text();
    Text outKey = key;
    OutputCollector<Text, Text> collector;
    int pos = key.find("__");
    if (pos == -1) {
        collector = output;
    } else {
        String[] fp = key.toString().split("__");
        collector = mos.getCollector(fp[0], fp[1], reporter);
        outKey = new Text(fp[1]);
    }
    log.info("For key: " + key + " outKey " + outKey + " outputting " + matches.size() + " unique values.");
    for (String match : matches.keySet()) {
        // This outputs the count:
        result.set(match + "\t" + matches.get(match).get());
        // And collect:
        collector.collect(outKey, result);
    }

}

From source file:uk.bl.wa.hadoop.mapred.ReservoirSamplingReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    Text item;/*from  w  w  w  .  j  a va  2 s  .  c om*/
    long numItemsSeen = 0;
    Vector<Text> reservoir = new Vector<Text>();
    RandomDataGenerator random = new RandomDataGenerator();
    // Fix the seed so repoducible by default:
    random.reSeed(defaultSeed);

    // Iterate through all values:
    while (values.hasNext()) {
        item = values.next();

        if (reservoir.size() < numSamples) {
            // reservoir not yet full, just append
            reservoir.add(item);
        } else {
            // find a sample to replace
            long rIndex = random.nextLong(0, numItemsSeen);
            if (rIndex < numSamples) {
                reservoir.set((int) rIndex, item);
            }
        }
        numItemsSeen++;
    }

    // Choose the output:
    Text outKey = key;
    OutputCollector<Text, Text> collector;
    int pos = key.find("__");
    if (pos == -1) {
        collector = output;
    } else {
        String[] fp = key.toString().split("__");
        collector = getCollector(fp[0], fp[1], reporter);
        outKey = new Text(fp[1]);
    }

    // Now output the sample:
    for (Text sto : reservoir) {
        collector.collect(outKey, sto);
    }
}