Example usage for org.apache.hadoop.io Text toString

List of usage examples for org.apache.hadoop.io Text toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text toString.

Prototype

@Override
public String toString() 

Source Link

Document

Convert text back to string

Usage

From source file:com.csiro.hadoop.UFORecordValidationMapper.java

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    String line = value.toString();
    if (validate(line, context)) {
        context.write(key, value);//w  w w.  j  a va  2  s.  c om
    }
}

From source file:com.dappervision.hbase.mapred.TypedBytesTableReducer.java

License:Apache License

@Override
public void reduce(Text key, Iterator<Text> values,
        OutputCollector<TypedBytesWritable, TypedBytesWritable> outputCollector, Reporter arg3)
        throws IOException {
    byte[] keyBytes = key.getBytes();
    TypedBytesWritable keyWritable = new TypedBytesWritable();
    TypedBytesWritable valueWritable = new TypedBytesWritable();
    keyWritable.setValue(new Buffer(keyBytes));

    //merge the column family and qualifier
    HashMap<String, HashMap<String, String>> cfMap = new HashMap<String, HashMap<String, String>>();
    while (values.hasNext()) {
        Text value = values.next();
        String strVal = value.toString();
        //Separate column family with comma (:)
        //Separate the qualifier and value with equity
        String[] cf_qual_val_parts = strVal.split(":");
        String cf = cf_qual_val_parts[0];
        String qual_val = cf_qual_val_parts[1];
        String[] qual_val_parts = qual_val.split("=");
        String qual = qual_val_parts[0];
        String val = qual_val_parts[1];

        if (cfMap.get(cf) != null) {
            HashMap<String, String> qualMap = cfMap.get(cf);
            if (qualMap == null) {
                qualMap = new HashMap<String, String>();
            }//from   ww w.  j ava2  s.  c  o m
            qualMap.put(qual, val); // the duplicated key will be replaced, if using Buffer, we should do it ourselves
        } else {
            HashMap<String, String> qualMap = new HashMap<String, String>();
            qualMap.put(qual, val);
            cfMap.put(cf, qualMap);
        }
    }

    HashMap<Buffer, HashMap<Buffer, Buffer>> bufMap = new HashMap<Buffer, HashMap<Buffer, Buffer>>();
    Set<Entry<String, HashMap<String, String>>> entrySet = cfMap.entrySet();
    for (Entry<String, HashMap<String, String>> entry : entrySet) {
        HashMap<String, String> qualValMap = entry.getValue();

        HashMap<Buffer, Buffer> qualValBufMap = new HashMap<Buffer, Buffer>();
        for (Entry<String, String> qualValEntry : qualValMap.entrySet()) {
            qualValBufMap.put(new Buffer(qualValEntry.getKey().getBytes()),
                    new Buffer(qualValEntry.getValue().getBytes()));
        }

        bufMap.put(new Buffer(entry.getKey().getBytes()), qualValBufMap);
    }
    valueWritable.setValue(bufMap);

    outputCollector.collect(keyWritable, valueWritable);
}

From source file:com.dasasian.chok.lucene.integration.LuceneClientTest.java

License:Apache License

@Test
public void testFilteredSearch() throws Exception {
    // write and deploy test index
    File filterIndex = temporaryFolder.newFolder("filterIndex");
    File filterShard = new File(filterIndex, "filterShard");
    String textFieldName = "textField";
    String filterFieldName = "filterField";
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(filterShard),
            new StandardAnalyzer(Version.LUCENE_30), true, MaxFieldLength.UNLIMITED);
    for (int i = 0; i < 100; i++) {
        Document document = new Document();
        document.add(new Field(textFieldName, "sample " + i, Store.YES, Index.NOT_ANALYZED));
        document.add(new Field(filterFieldName, "" + (i % 10), Store.YES, Index.NOT_ANALYZED));
        indexWriter.addDocument(document);
    }/*from   www.j  a v a 2  s.  com*/
    indexWriter.close(true);

    DeployClient deployClient = new DeployClient(miniCluster.createInteractionProtocol());
    IndexState indexState = deployClient.addIndex(filterIndex.getName(), filterIndex.getAbsolutePath(), 1)
            .joinDeployment();
    assertEquals(IndexState.DEPLOYED, indexState);

    // build filter for terms in set {i | (i % 10) == 3}.
    LuceneClient client = new LuceneClient(miniCluster.getZkConfiguration());
    TermQuery filterQuery = new TermQuery(new Term(filterFieldName, "3"));
    QueryWrapperFilter filter = new QueryWrapperFilter(filterQuery);
    final Query query = new QueryParser(Version.LUCENE_30, "", new KeywordAnalyzer())
            .parse(textFieldName + ":" + "sample*3");

    final Hits hits = client.search(query, new String[] { filterIndex.getName() }, 100, null, filter);
    assertNotNull(hits);
    List<Hit> hitsList = hits.getHits();
    for (final Hit hit : hitsList) {
        writeToLog(hit);
    }
    assertEquals(10, hits.size());
    assertEquals(10, hitsList.size());

    // check that returned results conform to the filter
    for (final Hit hit : hitsList) {
        MapWritable mw = client.getDetails(hit);
        Text text = (Text) mw.get(new Text("textField"));
        assertNotNull(text);
        String[] parts = text.toString().split(" ");
        assertTrue(parts.length == 2);
        int num = Integer.valueOf(parts[1]);
        assertTrue((num % 10) == 3);
    }
    client.close();
}

From source file:com.dasasian.chok.mapfile.MapFileClient.java

License:Apache License

public List<String> get(final String key, final String[] indexNames) throws ChokException {
    ClientResult<TextArrayWritable> results = chokClient.broadcastToIndices(TIMEOUT, true, GET_METHOD,
            GET_METHOD_SHARD_ARG_IDX, indexNames, new Text(key), null);
    if (results.isError()) {
        throw results.getChokException();
    }//from  w w  w  .  ja v  a  2s.co m
    List<String> stringResults = new ArrayList<>();
    for (TextArrayWritable taw : results.getResults()) {
        for (Writable w : taw.array.get()) {
            Text text = (Text) w;
            stringResults.add(text.toString());
        }
    }
    return stringResults;
}

From source file:com.dasasian.chok.mapfile.MapFileServerTest.java

License:Apache License

protected String getOneResult(IMapFileServer server, String key, String[] shards) throws Exception {
    TextArrayWritable texts = server.get(new Text(key), shards);
    assertNotNull(texts);/*  w  ww .  j a  v  a2s  . c  om*/
    assertNotNull(texts.array);
    Writable[] array = texts.array.get();
    assertEquals(1, array.length);
    assertTrue(array[0] instanceof Text);
    Text text = (Text) array[0];
    return text.toString();
}

From source file:com.dataflowdeveloper.detection.URLDetector.java

License:Apache License

/**
 * UDF Evaluation/*from  www  . j a v a2s. com*/
 * 
 * @param s
 *            Text passed in
 * @return Text cleaned
 */
public Text evaluate(final Text s) {
    if (s == null) {
        return null;
    }
    try {
        String cleaned = Util.detect(s.toString());
        return new Text(cleaned);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return null;
}

From source file:com.datasalt.pangool.examples.gameoflife.GameOfLifeJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        failArguments("Wrong number of arguments");
        return -1;
    }/* w  ww. j av  a 2  s  . c  o  m*/
    String output = args[0];
    String input = GameOfLifeJob.class.getName() + "-prepared-input";
    delete(output);
    delete(input);

    final int gridSize = Integer.parseInt(args[1]);
    // Write the input of the job as a set of (min, max) intervals
    // Each number between (min, max) represents a possible initial configuration for Game of Life
    int parallelism = Integer.parseInt(args[2]);
    int maxCombinations = (int) Math.pow(2, gridSize * gridSize);
    int splitSize = maxCombinations / parallelism;
    FileSystem fS = FileSystem.get(conf);
    BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fS.create(new Path(input))));
    for (int i = 0; i < parallelism; i++) {
        writer.write(((i * splitSize) + 1) + "\t" + ((i + 1) * splitSize) + "\n");
    }
    writer.close();

    // Optional parameters: maxX, maxY, #iterations
    final int maxX = conf.getInt("gol.max_x", 32);
    final int maxY = conf.getInt("gol.max_y", 32);
    final int iterations = conf.getInt("gol.iterations", 1000);
    Log.info(
            "using parameters: maxX grid: " + maxX + " maxY grid: " + maxY + " max #iterations: " + iterations);

    // Define the intermediate schema: a pair of ints
    final Schema schema = new Schema("minMax", Fields.parse("min:int, max:int"));

    TupleMRBuilder job = new TupleMRBuilder(conf);
    job.addIntermediateSchema(schema);
    job.setGroupByFields("min", "max");
    job.setCustomPartitionFields("min");
    // Define the input and its associated mapper
    // The mapper will just emit the (min, max) pairs to the reduce stage
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
            new TupleMapper<LongWritable, Text>() {

                Tuple tuple = new Tuple(schema);

                @Override
                public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
                        throws IOException, InterruptedException {
                    String[] fields = value.toString().split("\t");
                    tuple.set("min", Integer.parseInt(fields[0]));
                    tuple.set("max", Integer.parseInt(fields[1]));
                    collector.write(tuple);
                }
            });

    // Define the reducer
    // The reducer will run as many games of life as (max - min) for each interval it receives
    // It will emit the inputs of GOL that converged together with the number of iterations
    // Note that inputs that produce grid overflow are ignored (but may have longer iteration convergence)
    job.setTupleReducer(new TupleReducer<Text, NullWritable>() {

        public void reduce(ITuple group, Iterable<ITuple> tuples, TupleMRContext context, Collector collector)
                throws IOException, InterruptedException, TupleMRException {

            int min = (Integer) group.get("min"), max = (Integer) group.get("max");
            for (int i = min; i < max; i++) {
                try {
                    GameOfLife gameOfLife = new GameOfLife(gridSize, GameOfLife.longToBytes((long) i), maxX,
                            maxY, iterations);
                    while (true) {
                        gameOfLife.nextCycle();
                    }
                } catch (GameOfLifeException e) {
                    context.getHadoopContext().progress();
                    context.getHadoopContext().getCounter("stats", e.getCauseMessage() + "").increment(1);
                    if (e.getCauseMessage().equals(CauseMessage.CONVERGENCE_REACHED)) {
                        collector.write(new Text(
                                Arrays.toString(GameOfLife.longToBytes((long) i)) + "\t" + e.getIterations()),
                                NullWritable.get());
                    }
                }
            }
        };
    });

    job.setOutput(new Path(output), new HadoopOutputFormat(TextOutputFormat.class), Text.class,
            NullWritable.class);
    try {
        job.createJob().waitForCompletion(true);
    } finally {
        job.cleanUpInstanceFiles();
    }
    delete(input);
    return 0;
}

From source file:com.datasalt.pangool.flow.mapred.TextMapper.java

License:Apache License

@Override
public void map(Object keyToIgnore, Text value, TupleMRContext context, Collector collector)
        throws IOException, InterruptedException {

    op.process(value.toString(), callback);
}

From source file:com.datasalt.pangool.solr.TupleSolrOutputFormatExample.java

License:Apache License

public int run(String input, String output, Configuration conf) throws Exception {
    // Define the intermediate schema: It must match SOLR's schema.xml!
    final Schema schema = new Schema("iSchema", Fields.parse("user_id:string, message:string"));

    TupleMRBuilder job = new TupleMRBuilder(conf);
    job.addIntermediateSchema(schema);//w ww. ja v a2s .co  m
    job.setGroupByFields("user_id");
    // Define the input and its associated mapper.
    // We'll just have a Mapper, reducer will be Identity
    job.addInput(new Path(input), new HadoopInputFormat(TextInputFormat.class),
            new TupleMapper<LongWritable, Text>() {

                Tuple tuple = new Tuple(schema);

                @Override
                public void map(LongWritable key, Text value, TupleMRContext context, Collector collector)
                        throws IOException, InterruptedException {
                    String[] fields = value.toString().split("\t");
                    String language = fields[1];
                    tuple.set("user_id", fields[0]);
                    tuple.set("message", fields[2]);
                    if (language.equals("en")) {
                        // English -> write to main output
                        collector.write(tuple);
                    } else if (language.equals("fr")) {
                        // French -> write to french index
                        collector.getNamedOutput("FR").write(tuple, NullWritable.get());
                    } else if (language.equals("es")) {
                        // Spanish -> write to spanish index
                        collector.getNamedOutput("ES").write(tuple, NullWritable.get());
                    }
                }
            });
    // Add multi-output: French index
    job.addNamedOutput("FR", new TupleSolrOutputFormat(new File("src/test/resources/solr-fr"), conf),
            ITuple.class, NullWritable.class);
    // Add multi-output: Spanish index
    job.addNamedOutput("ES", new TupleSolrOutputFormat(new File("src/test/resources/solr-es"), conf),
            ITuple.class, NullWritable.class);
    job.setTupleReducer(new IdentityTupleReducer());
    // Add multi-output: English index
    job.setOutput(new Path(output), new TupleSolrOutputFormat(new File("src/test/resources/solr-en"), conf),
            ITuple.class, NullWritable.class);
    Job hadoopJob = job.createJob();
    try {
        hadoopJob.waitForCompletion(true);
        if (!hadoopJob.isSuccessful()) {
            throw new PangoolRuntimeException("Job was not sucessfull");
        }
    } finally {
        job.cleanUpInstanceFiles();
    }
    return 0;
}

From source file:com.datasalt.pangool.tuplemr.mapred.TestRollup.java

License:Apache License

/**
 * //from   www.j  a  va 2  s. c o m
 * Checks that {@link RollupReducer} calls properly {@link TupleReducer#onOpenGroup},
 * {@link TupleReducer#onCloseGroup} and {@link TupleReducer#onGroupElements} and checks that the elements (tuples)
 * passed are coherent. This method assumes an specific output from the {@link TupleReducer}. The output needs to be a
 * Text,Text for key and value This will be the format used : key("OPEN depth"), value("serialized value")
 * key("CLOSE depth"), value("serialized value") key("ELEMENT"),value("serialized element") (for every element
 * received in onElements needs to contain a record like this)
 * 
 * For instance : key("OPEN 0"), value(" element1") key("OPEN 1"), value("element1 ") key("ELEMENT") , value
 * ("element1") key("ELEMENT"),value ("element2") key("CLOSE 1"),value ("element2") key("CLOSE 0"),value("element2")
 * 
 * 
 */
public void checkRollupOutput(Path path, int minDepth, int maxDepth) throws IOException {
    SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.getLocal(getConf()), path, getConf());

    Text actualKey = new Text();
    Text actualValue = new Text();
    reader.next(actualKey, actualValue); // first action
    String currentKey = actualKey.toString();
    String currentValue = actualValue.toString();

    Assert.assertTrue("First output needs to be an OPEN ", currentKey.startsWith("OPEN"));
    int currentDepth = Integer.parseInt(currentKey.split(" ")[1]);
    Assert.assertEquals("First OPEN needs to match minDepth", minDepth, currentDepth);
    int lastDepth = currentDepth;
    String lastValue = currentValue;
    State lastState = State.OPEN;

    while (reader.next(actualKey, actualValue)) {
        currentKey = actualKey.toString();
        currentValue = actualValue.toString();
        if (currentKey.startsWith("OPEN")) {
            currentDepth = Integer.parseInt(currentKey.split(" ")[1]);
            Assert.assertEquals("OPEN needs to increase depth in +1 ", lastDepth + 1, currentDepth);
            Assert.assertTrue("Too many OPENs, over maxDepth ", maxDepth >= currentDepth);
            if (lastState == State.OPEN) {
                Assert.assertEquals("First element in OPEN needs to match first element in previous OPEN",
                        lastValue, currentValue);
            } else if (lastState == State.CLOSE) {
                Assert.assertNotSame(
                        "Element from new group needs to be different from last element from last group ",
                        lastValue, currentValue);
            } else {
                Assert.fail("Not allowed OPEN after ELEMENT");
            }
            lastState = State.OPEN;
            lastValue = currentValue;
            lastDepth = currentDepth;

        } else if (currentKey.startsWith("CLOSE")) {
            currentDepth = Integer.parseInt(currentKey.split(" ")[1]);
            Assert.assertNotSame("Not allowed CLOSE after OPEN , needs at least one ELEMENT in between",
                    State.OPEN, lastState);
            Assert.assertEquals("CLOSE depth needs to match previous OPEN depth", lastDepth, currentDepth);
            Assert.assertEquals("Element in CLOSE needs to match lastElement in group", lastValue,
                    currentValue);

            lastState = State.CLOSE;
            lastValue = currentValue;
            lastDepth = currentDepth - 1;

        } else if (currentKey.startsWith("ELEMENT")) {
            Assert.assertNotSame("Not allowed ELEMENT after CLOSE, needs an OPEN or ELEMENT before",
                    State.CLOSE, lastState);
            lastState = State.ELEMENT;
            lastValue = currentValue;
        }
    }

    Assert.assertEquals("File doesn't properly finishes with a CLOSE ", State.CLOSE, lastState);
    Assert.assertEquals("Last CLOSE doesn't close the minDepth ", minDepth - 1, lastDepth);
    reader.close();
}