Example usage for org.apache.hadoop.io Text equals

List of usage examples for org.apache.hadoop.io Text equals

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text equals.

Prototype

@Override
public boolean equals(Object o) 

Source Link

Document

Returns true iff o is a Text with the same contents.

Usage

From source file:joshelser.as2015.query.Query.java

License:Apache License

public static void main(String[] args) throws Exception {
    JCommander commander = new JCommander();
    final Opts options = new Opts();
    commander.addObject(options);//w ww  . j a va  2s .c om

    commander.setProgramName("Query");
    try {
        commander.parse(args);
    } catch (ParameterException ex) {
        commander.usage();
        System.err.println(ex.getMessage());
        System.exit(1);
    }

    ClientConfiguration conf = ClientConfiguration.loadDefault();
    if (null != options.clientConfFile) {
        conf = new ClientConfiguration(new PropertiesConfiguration(options.clientConfFile));
    }
    conf.withInstance(options.instanceName).withZkHosts(options.zookeepers);

    ZooKeeperInstance inst = new ZooKeeperInstance(conf);
    Connector conn = inst.getConnector(options.user, new PasswordToken(options.password));

    BatchScanner bs = conn.createBatchScanner(options.table, Authorizations.EMPTY, 16);
    try {
        bs.setRanges(Collections.singleton(new Range()));
        final Text categoryText = new Text("category");
        bs.fetchColumn(categoryText, new Text("name"));
        bs.fetchColumn(new Text("review"), new Text("score"));
        bs.fetchColumn(new Text("review"), new Text("userId"));

        bs.addScanIterator(new IteratorSetting(50, "wri", WholeRowIterator.class));
        final Text colf = new Text();
        Map<String, List<Integer>> scoresByUser = new HashMap<>();
        for (Entry<Key, Value> entry : bs) {
            SortedMap<Key, Value> row = WholeRowIterator.decodeRow(entry.getKey(), entry.getValue());
            Iterator<Entry<Key, Value>> iter = row.entrySet().iterator();
            if (!iter.hasNext()) {
                // row was empty
                continue;
            }
            Entry<Key, Value> categoryEntry = iter.next();
            categoryEntry.getKey().getColumnFamily(colf);
            if (!colf.equals(categoryText)) {
                throw new IllegalArgumentException("Unknown!");
            }
            if (!categoryEntry.getValue().toString().equals("books")) {
                // not a book review
                continue;
            }

            if (!iter.hasNext()) {
                continue;
            }
            Entry<Key, Value> reviewScore = iter.next();
            if (!iter.hasNext()) {
                continue;
            }
            Entry<Key, Value> reviewUserId = iter.next();

            String userId = reviewUserId.getValue().toString();
            if (userId.equals("unknown")) {
                // filter unknow user id
                continue;
            }

            List<Integer> scores = scoresByUser.get(userId);
            if (null == scores) {
                scores = new ArrayList<>();
                scoresByUser.put(userId, scores);
            }
            scores.add(Float.valueOf(reviewScore.getValue().toString()).intValue());
        }

        for (Entry<String, List<Integer>> entry : scoresByUser.entrySet()) {
            int sum = 0;
            for (Integer val : entry.getValue()) {
                sum += val;
            }

            System.out.println(entry.getKey() + " => " + new Float(sum) / entry.getValue().size());
        }
    } finally {
        bs.close();
    }
}

From source file:microbench.WordCountOnHDFSDataLocal.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException {
    try {/*  w  w w  .ja v  a  2 s.  co  m*/
        parseArgs(args);
        HashMap<String, String> conf = new HashMap<String, String>();
        initConf(conf);
        MPI_D.Init(args, MPI_D.Mode.Common, conf);

        JobConf jobConf = new JobConf(confPath);
        if (MPI_D.COMM_BIPARTITE_O != null) {
            // O communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
            if (rank == 0) {
                DataMPIUtil.printArgs(args);
            }
            System.out.println("The O task " + rank + " of " + size + " is working...");

            HadoopReader<LongWritable, Text> reader = HadoopIOUtil.getReader(jobConf, inDir,
                    TextInputFormat.class, rank, MPI_D.COMM_BIPARTITE_O);
            Text word = new Text();
            IntWritable one = new IntWritable(1);
            LongWritable khead = reader.createKey();
            Text vhead = reader.createValue();
            while (reader.next(khead, vhead)) {
                StringTokenizer itr = new StringTokenizer(vhead.toString());
                while (itr.hasMoreTokens()) {
                    word.set(itr.nextToken());
                    // send key-value
                    MPI_D.Send(word, one);
                }
            }
            reader.close();
        } else if (MPI_D.COMM_BIPARTITE_A != null) {
            // A communicator
            int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
            int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_A);
            System.out.println("The A task " + rank + " of " + size + " is working...");

            HadoopWriter<Text, IntWritable> outrw = HadoopIOUtil.getNewWriter(jobConf, outDir, Text.class,
                    IntWritable.class, TextOutputFormat.class, null, rank, MPI_D.COMM_BIPARTITE_A);

            Text oldKey = null;
            IntWritable valueData = new IntWritable();
            int sum = 0;
            Object[] keyValue = MPI_D.Recv();
            while (keyValue != null) {
                Text key = (Text) keyValue[0];
                IntWritable value = (IntWritable) keyValue[1];
                if (oldKey == null) {
                    oldKey = key;
                    sum = value.get();
                } else {
                    if (key.equals(oldKey)) {
                        sum += value.get();
                    } else {
                        valueData.set(sum);
                        outrw.write(oldKey, valueData);
                        oldKey = key;
                        sum = value.get();
                    }
                }
                keyValue = MPI_D.Recv();
            }
            if (oldKey != null) {
                valueData.set(sum);
                outrw.write(oldKey, valueData);
            }
            outrw.close();
        }
        MPI_D.Finalize();
    } catch (MPI_D_Exception e) {
        e.printStackTrace();
    }
}

From source file:mlbench.bayes.train.WeightSummer.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws MPI_D_Exception, IOException, MPIException {
    parseArgs(args);//from  w w w  . jav a 2s .c  o m
    HashMap<String, String> conf = new HashMap<String, String>();
    initConf(conf);
    MPI_D.Init(args, MPI_D.Mode.Common, conf);
    if (MPI_D.COMM_BIPARTITE_O != null) {

        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_O);
        int size = MPI_D.Comm_size(MPI_D.COMM_BIPARTITE_O);
        FileSplit[] inputs = DataMPIUtil.HDFSDataLocalLocator.getTaskInputs(MPI_D.COMM_BIPARTITE_O,
                (JobConf) config, inDir, rank);
        Vector weightsPerFeature = null;
        Vector weightsPerLabel = new DenseVector(labNum);

        for (int i = 0; i < inputs.length; i++) {
            FileSplit fsplit = inputs[i];
            SequenceFileRecordReader<IntWritable, VectorWritable> kvrr = new SequenceFileRecordReader<>(config,
                    fsplit);
            IntWritable index = kvrr.createKey();
            VectorWritable value = kvrr.createValue();
            while (kvrr.next(index, value)) {
                Vector instance = value.get();
                if (weightsPerFeature == null) {
                    weightsPerFeature = new RandomAccessSparseVector(instance.size(),
                            instance.getNumNondefaultElements());
                }

                int label = index.get();
                weightsPerFeature.assign(instance, Functions.PLUS);
                weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
            }
        }
        if (weightsPerFeature != null) {
            MPI_D.Send(new Text(WEIGHTS_PER_FEATURE), new VectorWritable(weightsPerFeature));
            MPI_D.Send(new Text(WEIGHTS_PER_LABEL), new VectorWritable(weightsPerLabel));
        }
    } else if (MPI_D.COMM_BIPARTITE_A != null) {
        int rank = MPI_D.Comm_rank(MPI_D.COMM_BIPARTITE_A);
        config.set(MAPRED_OUTPUT_DIR, outDirW);
        config.set("mapred.task.id", DataMPIUtil.getHadoopTaskAttemptID().toString().toString());
        ((JobConf) config).setOutputKeyClass(Text.class);
        ((JobConf) config).setOutputValueClass(VectorWritable.class);
        TaskAttemptContext taskContext = new TaskAttemptContextImpl(config,
                DataMPIUtil.getHadoopTaskAttemptID());
        SequenceFileOutputFormat<Text, VectorWritable> outfile = new SequenceFileOutputFormat<>();
        FileSystem fs = FileSystem.get(config);

        Path output = new Path(config.get(MAPRED_OUTPUT_DIR));
        FileOutputCommitter fcommitter = new FileOutputCommitter(output, taskContext);
        RecordWriter<Text, VectorWritable> outrw = null;
        try {
            fcommitter.setupJob(taskContext);
            outrw = outfile.getRecordWriter(fs, (JobConf) config, getOutputName(rank), null);
        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("ERROR: Please set the HDFS configuration properly\n");
            System.exit(-1);
        }

        Text key = null, newKey = null;
        VectorWritable point = null, newPoint = null;
        Vector vector = null;
        Object[] vals = MPI_D.Recv();
        while (vals != null) {
            newKey = (Text) vals[0];
            newPoint = (VectorWritable) vals[1];
            if (key == null && point == null) {
            } else if (!key.equals(newKey)) {
                outrw.write(key, new VectorWritable(vector));
                vector = null;
            }
            if (vector == null) {
                vector = newPoint.get();
            } else {
                vector.assign(newPoint.get(), Functions.PLUS);
            }

            key = newKey;
            point = newPoint;
            vals = MPI_D.Recv();
        }
        if (newKey != null && newPoint != null) {
            outrw.write(key, new VectorWritable(vector));
        }

        outrw.close(null);
        if (fcommitter.needsTaskCommit(taskContext)) {
            fcommitter.commitTask(taskContext);
        }

        MPI_D.COMM_BIPARTITE_A.Barrier();
        if (rank == 0) {
            Path resOut = new Path(outDir);
            NaiveBayesModel naiveBayesModel = BayesUtils.readModelFromDir(new Path(outDir), config);
            naiveBayesModel.serialize(resOut, config);
        }
    }

    MPI_D.Finalize();
}

From source file:org.apache.accumulo.core.client.admin.FindMax.java

License:Apache License

private static Text _findMax(Scanner scanner, Text start, boolean inclStart, Text end, boolean inclEnd) {

    // System.out.printf("findMax(%s, %s, %s, %s)%n", Key.toPrintableString(start.getBytes(), 0, start.getLength(), 1000), inclStart,
    // Key.toPrintableString(end.getBytes(), 0, end.getLength(), 1000), inclEnd);

    int cmp = start.compareTo(end);

    if (cmp >= 0) {
        if (inclStart && inclEnd && cmp == 0) {
            scanner.setRange(new Range(start, true, end, true));
            Iterator<Entry<Key, Value>> iter = scanner.iterator();
            if (iter.hasNext())
                return iter.next().getKey().getRow();
        }//from ww  w .j  a  va 2 s.c o  m

        return null;
    }

    Text mid = findMidPoint(start, end);
    // System.out.println("mid = :"+Key.toPrintableString(mid.getBytes(), 0, mid.getLength(), 1000)+":");

    scanner.setRange(new Range(mid, mid.equals(start) ? inclStart : true, end, inclEnd));

    Iterator<Entry<Key, Value>> iter = scanner.iterator();

    if (iter.hasNext()) {
        Key next = iter.next().getKey();

        int count = 0;
        while (count < 10 && iter.hasNext()) {
            next = iter.next().getKey();
            count++;
        }

        if (!iter.hasNext())
            return next.getRow();

        Text ret = _findMax(scanner, next.followingKey(PartialKey.ROW).getRow(), true, end, inclEnd);
        if (ret == null)
            return next.getRow();
        else
            return ret;
    } else {

        return _findMax(scanner, start, inclStart, mid, mid.equals(start) ? inclStart : false);
    }
}

From source file:org.apache.accumulo.core.data.impl.KeyExtent.java

License:Apache License

private boolean equals(Text t1, Text t2) {
    if (t1 == null || t2 == null)
        return t1 == t2;

    return t1.equals(t2);
}

From source file:org.apache.accumulo.core.metadata.MetadataLocationObtainer.java

License:Apache License

public static TabletLocations getMetadataLocationEntries(SortedMap<Key, Value> entries) {
    Key key;/*from   w  w w.j  a v  a2s. c  om*/
    Value val;
    Text location = null;
    Text session = null;
    Value prevRow = null;
    KeyExtent ke;

    List<TabletLocation> results = new ArrayList<>();
    ArrayList<KeyExtent> locationless = new ArrayList<>();

    Text lastRowFromKey = new Text();

    // text obj below is meant to be reused in loop for efficiency
    Text colf = new Text();
    Text colq = new Text();

    for (Entry<Key, Value> entry : entries.entrySet()) {
        key = entry.getKey();
        val = entry.getValue();

        if (key.compareRow(lastRowFromKey) != 0) {
            prevRow = null;
            location = null;
            session = null;
            key.getRow(lastRowFromKey);
        }

        colf = key.getColumnFamily(colf);
        colq = key.getColumnQualifier(colq);

        // interpret the row id as a key extent
        if (colf.equals(TabletsSection.CurrentLocationColumnFamily.NAME)
                || colf.equals(TabletsSection.FutureLocationColumnFamily.NAME)) {
            if (location != null) {
                throw new IllegalStateException("Tablet has multiple locations : " + lastRowFromKey);
            }
            location = new Text(val.toString());
            session = new Text(colq);
        } else if (TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.equals(colf, colq)) {
            prevRow = new Value(val);
        }

        if (prevRow != null) {
            ke = new KeyExtent(key.getRow(), prevRow);
            if (location != null)
                results.add(new TabletLocation(ke, location.toString(), session.toString()));
            else
                locationless.add(ke);

            location = null;
            prevRow = null;
        }
    }

    return new TabletLocations(results, locationless);
}

From source file:org.apache.accumulo.core.metadata.schema.TabletMetadata.java

License:Apache License

public static TabletMetadata convertRow(Iterator<Entry<Key, Value>> rowIter,
        EnumSet<FetchedColumns> fetchedColumns) {
    Objects.requireNonNull(rowIter);

    TabletMetadata te = new TabletMetadata();

    Builder<String> filesBuilder = ImmutableList.builder();
    ByteSequence row = null;/*from   ww  w.  j a va 2  s  .  co m*/

    while (rowIter.hasNext()) {
        Entry<Key, Value> kv = rowIter.next();
        Key k = kv.getKey();
        Value v = kv.getValue();
        Text fam = k.getColumnFamily();

        if (row == null) {
            row = k.getRowData();
            KeyExtent ke = new KeyExtent(k.getRow(), (Text) null);
            te.endRow = ke.getEndRow();
            te.tableId = ke.getTableId();
        } else if (!row.equals(k.getRowData())) {
            throw new IllegalArgumentException(
                    "Input contains more than one row : " + row + " " + k.getRowData());
        }

        if (PREV_ROW_COLUMN.hasColumns(k)) {
            te.prevEndRow = KeyExtent.decodePrevEndRow(v);
        }

        if (fam.equals(DataFileColumnFamily.NAME)) {
            filesBuilder.add(k.getColumnQualifier().toString());
        } else if (fam.equals(CurrentLocationColumnFamily.NAME)) {
            if (te.location != null) {
                throw new IllegalArgumentException(
                        "Input contains more than one location " + te.location + " " + v);
            }
            te.location = new Location(v.toString(), k.getColumnQualifierData().toString(),
                    LocationType.CURRENT);
        } else if (fam.equals(FutureLocationColumnFamily.NAME)) {
            if (te.location != null) {
                throw new IllegalArgumentException(
                        "Input contains more than one location " + te.location + " " + v);
            }
            te.location = new Location(v.toString(), k.getColumnQualifierData().toString(),
                    LocationType.FUTURE);
        } else if (fam.equals(LastLocationColumnFamily.NAME)) {
            te.last = new Location(v.toString(), k.getColumnQualifierData().toString(), LocationType.LAST);
        }
    }

    te.files = filesBuilder.build();
    te.fetchedColumns = fetchedColumns;
    return te;
}

From source file:org.apache.accumulo.core.metadata.TableMetadataServicer.java

License:Apache License

@Override
public void getTabletLocations(SortedMap<KeyExtent, String> tablets)
        throws AccumuloException, AccumuloSecurityException, TableNotFoundException {

    Scanner scanner = context.getConnector().createScanner(getServicingTableName(), Authorizations.EMPTY);

    TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.fetch(scanner);
    scanner.fetchColumnFamily(TabletsSection.CurrentLocationColumnFamily.NAME);

    // position at first entry in metadata table for given table
    scanner.setRange(TabletsSection.getRange(getServicedTableId()));

    Text colf = new Text();
    Text colq = new Text();

    KeyExtent currentKeyExtent = null;/*from  w  w  w.j a  va 2s. c  o m*/
    String location = null;
    Text row = null;
    // acquire this table's tablets from the metadata table which services it
    for (Entry<Key, Value> entry : scanner) {
        if (row != null) {
            if (!row.equals(entry.getKey().getRow())) {
                currentKeyExtent = null;
                location = null;
                row = entry.getKey().getRow();
            }
        } else {
            row = entry.getKey().getRow();
        }

        colf = entry.getKey().getColumnFamily(colf);
        colq = entry.getKey().getColumnQualifier(colq);

        if (TabletsSection.TabletColumnFamily.PREV_ROW_COLUMN.equals(colf, colq)) {
            currentKeyExtent = new KeyExtent(entry.getKey().getRow(), entry.getValue());
            tablets.put(currentKeyExtent, location);
            currentKeyExtent = null;
            location = null;
        } else if (colf.equals(TabletsSection.CurrentLocationColumnFamily.NAME)) {
            location = entry.getValue().toString();
        }

    }

    validateEntries(tablets);
}

From source file:org.apache.accumulo.core.util.MetadataTable.java

License:Apache License

public static Pair<SortedMap<KeyExtent, Text>, List<KeyExtent>> getMetadataLocationEntries(
        SortedMap<Key, Value> entries) {
    Key key;/*from  www .  j  a  va 2s  .c  om*/
    Value val;
    Text location = null;
    Value prevRow = null;
    KeyExtent ke;

    SortedMap<KeyExtent, Text> results = new TreeMap<KeyExtent, Text>();
    ArrayList<KeyExtent> locationless = new ArrayList<KeyExtent>();

    Text lastRowFromKey = new Text();

    // text obj below is meant to be reused in loop for efficiency
    Text colf = new Text();
    Text colq = new Text();

    for (Entry<Key, Value> entry : entries.entrySet()) {
        key = entry.getKey();
        val = entry.getValue();

        if (key.compareRow(lastRowFromKey) != 0) {
            prevRow = null;
            location = null;
            key.getRow(lastRowFromKey);
        }

        colf = key.getColumnFamily(colf);
        colq = key.getColumnQualifier(colq);

        // interpret the row id as a key extent
        if (colf.equals(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY)
                || colf.equals(Constants.METADATA_FUTURE_LOCATION_COLUMN_FAMILY)) {
            if (location != null) {
                throw new IllegalStateException("Tablet has multiple locations : " + lastRowFromKey);
            }
            location = new Text(val.toString());
        } else if (Constants.METADATA_PREV_ROW_COLUMN.equals(colf, colq)) {
            prevRow = new Value(val);
        }

        if (prevRow != null) {
            ke = new KeyExtent(key.getRow(), prevRow);
            if (location != null)
                results.put(ke, location);
            else
                locationless.add(ke);

            location = null;
            prevRow = null;
        }
    }

    return new Pair<SortedMap<KeyExtent, Text>, List<KeyExtent>>(results, locationless);
}

From source file:org.apache.accumulo.core.util.MetadataTable.java

License:Apache License

public static void getEntries(Instance instance, TCredentials credentials, String table, boolean isTid,
        Map<KeyExtent, String> locations, SortedSet<KeyExtent> tablets)
        throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
    String tableId = isTid ? table : Tables.getNameToIdMap(instance).get(table);

    Scanner scanner = instance/*from   www  .j  a va 2 s  .c  o  m*/
            .getConnector(credentials.getPrincipal(), CredentialHelper.extractToken(credentials))
            .createScanner(Constants.METADATA_TABLE_NAME, Constants.NO_AUTHS);

    Constants.METADATA_PREV_ROW_COLUMN.fetch(scanner);
    scanner.fetchColumnFamily(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY);

    // position at first entry in metadata table for given table
    KeyExtent ke = new KeyExtent(new Text(tableId), new Text(), null);
    Key startKey = new Key(ke.getMetadataEntry());
    ke = new KeyExtent(new Text(tableId), null, null);
    Key endKey = new Key(ke.getMetadataEntry()).followingKey(PartialKey.ROW);
    scanner.setRange(new Range(startKey, endKey));

    Text colf = new Text();
    Text colq = new Text();

    KeyExtent currentKeyExtent = null;
    String location = null;
    Text row = null;
    // acquire this tables METADATA table entries
    boolean haveExtent = false;
    boolean haveLocation = false;
    for (Entry<Key, Value> entry : scanner) {
        if (row != null) {
            if (!row.equals(entry.getKey().getRow())) {
                currentKeyExtent = null;
                haveExtent = false;
                haveLocation = false;
                row = entry.getKey().getRow();
            }
        } else
            row = entry.getKey().getRow();

        colf = entry.getKey().getColumnFamily(colf);
        colq = entry.getKey().getColumnQualifier(colq);

        // stop scanning metadata table when another table is reached
        if (!(new KeyExtent(entry.getKey().getRow(), (Text) null)).getTableId().toString().equals(tableId))
            break;

        if (Constants.METADATA_PREV_ROW_COLUMN.equals(colf, colq)) {
            currentKeyExtent = new KeyExtent(entry.getKey().getRow(), entry.getValue());
            tablets.add(currentKeyExtent);
            haveExtent = true;
        } else if (colf.equals(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY)) {
            location = entry.getValue().toString();
            haveLocation = true;
        }

        if (haveExtent && haveLocation) {
            locations.put(currentKeyExtent, location);
            haveExtent = false;
            haveLocation = false;
            currentKeyExtent = null;
        }
    }

    validateEntries(tableId, tablets);
}