Example usage for org.apache.lucene.analysis Token getPayload

List of usage examples for org.apache.lucene.analysis Token getPayload

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Token getPayload.

Prototype

@Override
public BytesRef getPayload() 

Source Link

Usage

From source file:au.edu.unimelb.csse.analyser.JsonSentenceParserTest.java

License:Apache License

public void testLong() {
    String jsonString = "{\"n\":\"S\", \"i\":\"0_32_0_65\", \"c\":[{\"n\":\"NP\", \"i\":\"0_2_1_64\", \"c\":[{\"n\":\"NP\", \"i\":\"0_1_2_4\", \"c\":[{\"n\":\"NNP\", \"i\":\"0_1_3_1\", \"c\":[{\"n\":\"Arafat\", \"i\":\"0_1_4_0\", \"c\":[]}]}]}, {\"n\":\"NP\", \"i\":\"1_2_2_4\", \"c\":[{\"n\":\"PRP\", \"i\":\"1_2_3_3\", \"c\":[{\"n\":\"himself\", \"i\":\"1_2_4_2\", \"c\":[]}]}]}]}, {\"n\":\"VP\", \"i\":\"2_30_1_64\", \"c\":[{\"n\":\"VBD\", \"i\":\"2_3_2_61\", \"c\":[{\"n\":\"said\", \"i\":\"2_3_3_5\", \"c\":[]}]}, {\"n\":\"SBAR\", \"i\":\"3_30_2_61\", \"c\":[{\"n\":\"S\", \"i\":\"3_30_3_60\", \"c\":[{\"n\":\"NP\", \"i\":\"3_5_4_59\", \"c\":[{\"n\":\"DT\", \"i\":\"3_4_5_8\", \"c\":[{\"n\":\"the\", \"i\":\"3_4_6_6\", \"c\":[]}]}, {\"n\":\"NN\", \"i\":\"4_5_5_8\", \"c\":[{\"n\":\"award\", \"i\":\"4_5_6_7\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"5_30_4_59\", \"c\":[{\"n\":\"VBD\", \"i\":\"5_6_5_58\", \"c\":[{\"n\":\"was\", \"i\":\"5_6_6_9\", \"c\":[]}]}, {\"n\":\"RB\", \"i\":\"6_7_5_58\", \"c\":[{\"n\":\"not\", \"i\":\"6_7_6_10\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"7_30_5_58\", \"c\":[{\"n\":\"VBN\", \"i\":\"7_8_6_57\", \"c\":[{\"n\":\"granted\", \"i\":\"7_8_7_11\", \"c\":[]}]}, {\"n\":\"PP\", \"i\":\"8_30_6_57\", \"c\":[{\"n\":\"``\", \"i\":\"8_9_7_56\", \"c\":[{\"n\":\"``\", \"i\":\"8_9_8_12\", \"c\":[]}]}, {\"n\":\"TO\", \"i\":\"9_10_7_56\", \"c\":[{\"n\":\"to\", \"i\":\"9_10_8_13\", \"c\":[]}]}, {\"n\":\"NP\", \"i\":\"10_30_7_56\", \"c\":[{\"n\":\"NP\", \"i\":\"10_11_8_55\", \"c\":[{\"n\":\"NN\", \"i\":\"10_11_9_15\", \"c\":[{\"n\":\"crown\", \"i\":\"10_11_10_14\", \"c\":[]}]}]}, {\"n\":\"NP\", \"i\":\"11_26_8_55\", \"c\":[{\"n\":\"DT\", \"i\":\"11_12_9_44\", \"c\":[{\"n\":\"an\", \"i\":\"11_12_10_16\", \"c\":[]}]}, {\"n\":\"NN\", \"i\":\"12_13_9_44\", \"c\":[{\"n\":\"endeavor\", \"i\":\"12_13_10_17\", \"c\":[]}]}, {\"n\":\"SBAR\", \"i\":\"13_26_9_44\", \"c\":[{\"n\":\"IN\", \"i\":\"13_14_10_43\", \"c\":[{\"n\":\"that\", \"i\":\"13_14_11_18\", \"c\":[]}]}, {\"n\":\"S\", \"i\":\"14_26_10_43\", \"c\":[{\"n\":\"NP\", \"i\":\"14_15_11_42\", \"c\":[{\"n\":\"PRP\", \"i\":\"14_15_12_20\", \"c\":[{\"n\":\"we\", \"i\":\"14_15_13_19\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"15_26_11_42\", \"c\":[{\"n\":\"VP\", \"i\":\"15_17_12_41\", \"c\":[{\"n\":\"VBP\", \"i\":\"15_16_13_24\", \"c\":[{\"n\":\"have\", \"i\":\"15_16_14_21\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"16_17_13_24\", \"c\":[{\"n\":\"VBN\", \"i\":\"16_17_14_23\", \"c\":[{\"n\":\"completed\", \"i\":\"16_17_15_22\", \"c\":[]}]}]}]}, {\"n\":\"CC\", \"i\":\"17_18_12_41\", \"c\":[{\"n\":\"but\", \"i\":\"17_18_13_25\", \"c\":[]}]}, {\"n\":\"RB\", \"i\":\"18_19_12_41\", \"c\":[{\"n\":\"rather\", \"i\":\"18_19_13_26\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"19_26_12_41\", \"c\":[{\"n\":\"TO\", \"i\":\"19_20_13_40\", \"c\":[{\"n\":\"to\", \"i\":\"19_20_14_27\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"20_26_13_40\", \"c\":[{\"n\":\"VB\", \"i\":\"20_21_14_39\", \"c\":[{\"n\":\"encourage\", \"i\":\"20_21_15_28\", \"c\":[]}]}, {\"n\":\"S\", \"i\":\"21_26_14_39\", \"c\":[{\"n\":\"NP\", \"i\":\"21_22_15_38\", \"c\":[{\"n\":\"PRP\", \"i\":\"21_22_16_30\", \"c\":[{\"n\":\"us\", \"i\":\"21_22_17_29\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"22_26_15_38\", \"c\":[{\"n\":\"TO\", \"i\":\"22_23_16_37\", \"c\":[{\"n\":\"to\", \"i\":\"22_23_17_31\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"23_26_16_37\", \"c\":[{\"n\":\"VB\", \"i\":\"23_24_17_36\", \"c\":[{\"n\":\"continue\", \"i\":\"23_24_18_32\", \"c\":[]}]}, {\"n\":\"NP\", \"i\":\"24_26_17_36\", \"c\":[{\"n\":\"DT\", \"i\":\"24_25_18_35\", \"c\":[{\"n\":\"a\", \"i\":\"24_25_19_33\", \"c\":[]}]}, {\"n\":\"NN\", \"i\":\"25_26_18_35\", \"c\":[{\"n\":\"road\", \"i\":\"25_26_19_34\", \"c\":[]}]}]}]}]}]}]}]}]}]}]}]}, {\"n\":\"SBAR\", \"i\":\"26_30_8_55\", \"c\":[{\"n\":\"WHNP\", \"i\":\"26_27_9_54\", \"c\":[{\"n\":\"WDT\", \"i\":\"26_27_10_46\", \"c\":[{\"n\":\"which\", \"i\":\"26_27_11_45\", \"c\":[]}]}]}, {\"n\":\"S\", \"i\":\"27_30_9_54\", \"c\":[{\"n\":\"NP\", \"i\":\"27_28_10_53\", \"c\":[{\"n\":\"PRP\", \"i\":\"27_28_11_48\", \"c\":[{\"n\":\"we\", \"i\":\"27_28_12_47\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"28_30_10_53\", \"c\":[{\"n\":\"VBP\", \"i\":\"28_29_11_52\", \"c\":[{\"n\":\"have\", \"i\":\"28_29_12_49\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"29_30_11_52\", \"c\":[{\"n\":\"VBN\", \"i\":\"29_30_12_51\", \"c\":[{\"n\":\"started\", \"i\":\"29_30_13_50\", \"c\":[]}]}]}]}]}]}]}]}]}]}]}]}]}, {\"n\":\".\", \"i\":\"30_31_1_64\", \"c\":[{\"n\":\".\", \"i\":\"30_31_2_62\", \"c\":[]}]}, {\"n\":\"''\", \"i\":\"31_32_1_64\", \"c\":[{\"n\":\"''\", \"i\":\"31_32_2_63\", \"c\":[]}]}]}";
    JsonSentenceParser parser = new JsonSentenceParser(false);
    parser.parse(jsonString);/*from w  w w  .j  av a  2  s.  com*/
    Token token = new Token();
    parser.next(token);
    assertNotNull(token);
    assertEquals("S", token.term());
    assertEquals(0, token.getPayload().byteAt(0));
    assertEquals(32, token.getPayload().byteAt(1));
    assertEquals(0, token.getPayload().byteAt(2));
    assertEquals(65, token.getPayload().byteAt(3));

    parser.next(token);
    assertEquals("NP", token.term());
    assertEquals(0, token.getPayload().byteAt(0));
    assertEquals(2, token.getPayload().byteAt(1));
    assertEquals(1, token.getPayload().byteAt(2));
    assertEquals(64, token.getPayload().byteAt(3));

    parser.next(token);
    assertEquals("NP", token.term());
    assertEquals(0, token.getPayload().byteAt(0));
    assertEquals(1, token.getPayload().byteAt(1));
    assertEquals(2, token.getPayload().byteAt(2));
    assertEquals(4, token.getPayload().byteAt(3));

    parser.next(token);
    assertEquals("NNP", token.term());
    assertEquals(0, token.getPayload().byteAt(0));
    assertEquals(1, token.getPayload().byteAt(1));
    assertEquals(3, token.getPayload().byteAt(2));
    assertEquals(1, token.getPayload().byteAt(3));
}

From source file:au.edu.unimelb.csse.analyser.JsonSentenceParserTest.java

License:Apache License

public void testSentenceContainingEscapedDoubleQuotes() {
    String jsonString = "{\"n\":\"S\\\"\", \"i\":\"0_32_0_65\", \"c\":[{\"n\":\"NP\", \"i\":\"0_2_1_64\", \"c\":[{\"n\":\"NP\", \"i\":\"0_1_2_4\", \"c\":[{\"n\":\"NNP\", \"i\":\"0_1_3_1\", \"c\":[{\"n\":\"Arafat\", \"i\":\"0_1_4_0\", \"c\":[]}]}]}, {\"n\":\"NP\", \"i\":\"1_2_2_4\", \"c\":[{\"n\":\"PRP\", \"i\":\"1_2_3_3\", \"c\":[{\"n\":\"himself\", \"i\":\"1_2_4_2\", \"c\":[]}]}]}]}, {\"n\":\"VP\", \"i\":\"2_30_1_64\", \"c\":[{\"n\":\"VBD\", \"i\":\"2_3_2_61\", \"c\":[{\"n\":\"said\", \"i\":\"2_3_3_5\", \"c\":[]}]}, {\"n\":\"SBAR\", \"i\":\"3_30_2_61\", \"c\":[{\"n\":\"S\", \"i\":\"3_30_3_60\", \"c\":[{\"n\":\"NP\", \"i\":\"3_5_4_59\", \"c\":[{\"n\":\"DT\", \"i\":\"3_4_5_8\", \"c\":[{\"n\":\"the\", \"i\":\"3_4_6_6\", \"c\":[]}]}, {\"n\":\"NN\", \"i\":\"4_5_5_8\", \"c\":[{\"n\":\"award\", \"i\":\"4_5_6_7\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"5_30_4_59\", \"c\":[{\"n\":\"VBD\", \"i\":\"5_6_5_58\", \"c\":[{\"n\":\"was\", \"i\":\"5_6_6_9\", \"c\":[]}]}, {\"n\":\"RB\", \"i\":\"6_7_5_58\", \"c\":[{\"n\":\"not\", \"i\":\"6_7_6_10\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"7_30_5_58\", \"c\":[{\"n\":\"VBN\", \"i\":\"7_8_6_57\", \"c\":[{\"n\":\"granted\", \"i\":\"7_8_7_11\", \"c\":[]}]}, {\"n\":\"PP\", \"i\":\"8_30_6_57\", \"c\":[{\"n\":\"``\", \"i\":\"8_9_7_56\", \"c\":[{\"n\":\"``\", \"i\":\"8_9_8_12\", \"c\":[]}]}, {\"n\":\"TO\", \"i\":\"9_10_7_56\", \"c\":[{\"n\":\"to\", \"i\":\"9_10_8_13\", \"c\":[]}]}, {\"n\":\"NP\", \"i\":\"10_30_7_56\", \"c\":[{\"n\":\"NP\", \"i\":\"10_11_8_55\", \"c\":[{\"n\":\"NN\", \"i\":\"10_11_9_15\", \"c\":[{\"n\":\"crown\", \"i\":\"10_11_10_14\", \"c\":[]}]}]}, {\"n\":\"NP\", \"i\":\"11_26_8_55\", \"c\":[{\"n\":\"DT\", \"i\":\"11_12_9_44\", \"c\":[{\"n\":\"an\", \"i\":\"11_12_10_16\", \"c\":[]}]}, {\"n\":\"NN\", \"i\":\"12_13_9_44\", \"c\":[{\"n\":\"endeavor\", \"i\":\"12_13_10_17\", \"c\":[]}]}, {\"n\":\"SBAR\", \"i\":\"13_26_9_44\", \"c\":[{\"n\":\"IN\", \"i\":\"13_14_10_43\", \"c\":[{\"n\":\"that\", \"i\":\"13_14_11_18\", \"c\":[]}]}, {\"n\":\"S\", \"i\":\"14_26_10_43\", \"c\":[{\"n\":\"NP\", \"i\":\"14_15_11_42\", \"c\":[{\"n\":\"PRP\", \"i\":\"14_15_12_20\", \"c\":[{\"n\":\"we\", \"i\":\"14_15_13_19\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"15_26_11_42\", \"c\":[{\"n\":\"VP\", \"i\":\"15_17_12_41\", \"c\":[{\"n\":\"VBP\", \"i\":\"15_16_13_24\", \"c\":[{\"n\":\"have\", \"i\":\"15_16_14_21\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"16_17_13_24\", \"c\":[{\"n\":\"VBN\", \"i\":\"16_17_14_23\", \"c\":[{\"n\":\"completed\", \"i\":\"16_17_15_22\", \"c\":[]}]}]}]}, {\"n\":\"CC\", \"i\":\"17_18_12_41\", \"c\":[{\"n\":\"but\", \"i\":\"17_18_13_25\", \"c\":[]}]}, {\"n\":\"RB\", \"i\":\"18_19_12_41\", \"c\":[{\"n\":\"rather\", \"i\":\"18_19_13_26\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"19_26_12_41\", \"c\":[{\"n\":\"TO\", \"i\":\"19_20_13_40\", \"c\":[{\"n\":\"to\", \"i\":\"19_20_14_27\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"20_26_13_40\", \"c\":[{\"n\":\"VB\", \"i\":\"20_21_14_39\", \"c\":[{\"n\":\"encourage\", \"i\":\"20_21_15_28\", \"c\":[]}]}, {\"n\":\"S\", \"i\":\"21_26_14_39\", \"c\":[{\"n\":\"NP\", \"i\":\"21_22_15_38\", \"c\":[{\"n\":\"PRP\", \"i\":\"21_22_16_30\", \"c\":[{\"n\":\"us\", \"i\":\"21_22_17_29\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"22_26_15_38\", \"c\":[{\"n\":\"TO\", \"i\":\"22_23_16_37\", \"c\":[{\"n\":\"to\", \"i\":\"22_23_17_31\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"23_26_16_37\", \"c\":[{\"n\":\"VB\", \"i\":\"23_24_17_36\", \"c\":[{\"n\":\"continue\", \"i\":\"23_24_18_32\", \"c\":[]}]}, {\"n\":\"NP\", \"i\":\"24_26_17_36\", \"c\":[{\"n\":\"DT\", \"i\":\"24_25_18_35\", \"c\":[{\"n\":\"a\", \"i\":\"24_25_19_33\", \"c\":[]}]}, {\"n\":\"NN\", \"i\":\"25_26_18_35\", \"c\":[{\"n\":\"road\", \"i\":\"25_26_19_34\", \"c\":[]}]}]}]}]}]}]}]}]}]}]}]}, {\"n\":\"SBAR\", \"i\":\"26_30_8_55\", \"c\":[{\"n\":\"WHNP\", \"i\":\"26_27_9_54\", \"c\":[{\"n\":\"WDT\", \"i\":\"26_27_10_46\", \"c\":[{\"n\":\"which\", \"i\":\"26_27_11_45\", \"c\":[]}]}]}, {\"n\":\"S\", \"i\":\"27_30_9_54\", \"c\":[{\"n\":\"NP\", \"i\":\"27_28_10_53\", \"c\":[{\"n\":\"PRP\", \"i\":\"27_28_11_48\", \"c\":[{\"n\":\"we\", \"i\":\"27_28_12_47\", \"c\":[]}]}]}, {\"n\":\"VP\", \"i\":\"28_30_10_53\", \"c\":[{\"n\":\"VBP\", \"i\":\"28_29_11_52\", \"c\":[{\"n\":\"have\", \"i\":\"28_29_12_49\", \"c\":[]}]}, {\"n\":\"VP\", \"i\":\"29_30_11_52\", \"c\":[{\"n\":\"VBN\", \"i\":\"29_30_12_51\", \"c\":[{\"n\":\"started\", \"i\":\"29_30_13_50\", \"c\":[]}]}]}]}]}]}]}]}]}]}]}]}]}, {\"n\":\".\", \"i\":\"30_31_1_64\", \"c\":[{\"n\":\".\", \"i\":\"30_31_2_62\", \"c\":[]}]}, {\"n\":\"''\", \"i\":\"31_32_1_64\", \"c\":[{\"n\":\"''\", \"i\":\"31_32_2_63\", \"c\":[]}]}]}";
    JsonSentenceParser parser = new JsonSentenceParser(false);
    parser.parse(jsonString);//from  w w  w.j  ava  2 s .c o  m
    Token token = new Token();
    parser.next(token);
    assertNotNull(token);
    assertEquals("S\"", token.term());
    assertEquals(0, token.getPayload().byteAt(0));
    assertEquals(32, token.getPayload().byteAt(1));
    assertEquals(0, token.getPayload().byteAt(2));
    assertEquals(65, token.getPayload().byteAt(3));
}

From source file:au.edu.unimelb.csse.analyser.String2NodesParserTest.java

License:Apache License

private void assertPayload(Token token, int right, int left, int depth, int parent) {
    Payload payload = token.getPayload();
    assertEquals(right, payload.byteAt(0));
    assertEquals(left, payload.byteAt(1));
    assertEquals(depth, payload.byteAt(2));
    assertEquals(parent, payload.byteAt(3));
}

From source file:hu.mokk.hunglish.lucene.analysis.CompoundWordTokenFilterBase.java

private final void setToken(final Token token) throws IOException {
    termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
    flagsAtt.setFlags(token.getFlags());
    typeAtt.setType(token.type());//ww w  .j av a2  s. c  om
    offsetAtt.setOffset(token.startOffset(), token.endOffset());
    posIncAtt.setPositionIncrement(token.getPositionIncrement());
    payloadAtt.setPayload(token.getPayload());
}

From source file:magoffin.matt.lucene.KeyFilter.java

License:Open Source License

@Override
public Token next() throws IOException {
    final Token t = new Token();
    Token token = next(t);
    if (token == null) {
        return null;
    }//w w  w.  j a va  2  s  . c o m
    Payload p = token.getPayload();
    if (p != null) {
        token.setPayload((Payload) p.clone());
    }
    String key = token.term();
    if (key.length() > maxLength) {
        key = key.substring(0, maxLength);
    }
    char[] keyChars = key.toCharArray();
    return new Token(keyChars, 0, keyChars.length, token.startOffset(), token.startOffset() + keyChars.length);
}

From source file:magoffin.matt.lucene.LuceneSearchService.java

License:Open Source License

/**
 * Add a series of non-required TermQuery objects to a BooleanQuery, 
 * from tokenizing a string with the Analyzer used by the index type.
 * @param rootQuery the root boolean query
 * @param query the query to tokenize/*ww  w. java  2 s.  co m*/
 * @param field the field this query is searching
 * @param type the index type
 */
@Override
public void addTokenizedTermQuery(BooleanQuery rootQuery, String query, String field, String type) {
    StringReader reader = new StringReader(query);
    IndexData data = getIndexData(type);
    TokenStream stream = data.plugin.getAnalyzer().tokenStream(field, reader);
    try {
        Token t = new Token();
        while (true) {
            Token token = stream.next(t);
            if (token == null) {
                break;
            }
            Payload p = token.getPayload();
            if (p != null) {
                token.setPayload((Payload) p.clone());
            }
            Query q = new TermQuery(new Term(field, token.term()));
            rootQuery.add(q, Occur.SHOULD);
        }
    } catch (IOException e) {
        throw new RuntimeException("Unable to tokenize query string", e);
    }
}

From source file:magoffin.matt.lucene.LuceneSearchService.java

License:Open Source License

/**
 * Add a series of non-required FuzzyQuery objects to a BooleanQuery, 
 * from tokenizing a string with the Analyzer used by the index type.
 * @param rootQuery the root boolean query
 * @param query the query to tokenize//from ww w .  ja v a2 s .  c om
 * @param field the field this query is searching
 * @param type the index type
 */
@Override
public void addTokenizedFuzzyQuery(BooleanQuery rootQuery, String query, String field, String type) {
    StringReader reader = new StringReader(query);
    IndexData data = getIndexData(type);
    TokenStream stream = data.plugin.getAnalyzer().tokenStream(field, reader);
    try {
        Token t = new Token();
        while (true) {
            Token token = stream.next(t);
            if (token == null) {
                break;
            }
            Payload p = token.getPayload();
            if (p != null) {
                token.setPayload((Payload) p.clone());
            }
            Query q = new FuzzyQuery(new Term(field, token.term()));
            rootQuery.add(q, Occur.SHOULD);
        }
    } catch (IOException e) {
        throw new RuntimeException("Unable to tokenize query string", e);
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.SingletonTokenStream.java

License:Apache License

/**
 * Creates a new SingleTokenStream with the given token.
 *
 * @param t the token./*from  ww w. j a v  a2  s .c  om*/
 */
public SingletonTokenStream(Token t) {
    this.value = t.term();
    this.payload = t.getPayload();
}

From source file:org.apache.solr.analysis.BufferedTokenStream.java

License:Apache License

/** old api emulation for back compat */
private boolean writeToken(Token token) throws IOException {
    clearAttributes();/*from w ww  . java 2 s  .c  om*/
    termAtt.copyBuffer(token.buffer(), 0, token.length());
    offsetAtt.setOffset(token.startOffset(), token.endOffset());
    typeAtt.setType(token.type());
    flagsAtt.setFlags(token.getFlags());
    posIncAtt.setPositionIncrement(token.getPositionIncrement());
    payloadAtt.setPayload(token.getPayload());
    return true;
}