Example usage for org.apache.lucene.document Document getValues

List of usage examples for org.apache.lucene.document Document getValues

Introduction

In this page you can find the example usage for org.apache.lucene.document Document getValues.

Prototype

public final String[] getValues(String name) 

Source Link

Document

Returns an array of values of the field specified as the method parameter.

Usage

From source file:TestWang.java

License:Open Source License

public void computeMAP(ImageSearcher searcher, String prefix) throws IOException {
    System.out.println("ComputeMap");
    // copy index to ram to be much faster ...
    IndexReader reader = IndexReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath))), true);

    Pattern p = Pattern.compile("([0-9]+).jpg");
    double map = 0;
    double errorRate = 0d;
    double precision10 = 0d;
    double[] pr10cat = new double[10];
    double[] pr10cnt = new double[10];
    for (int i = 0; i < pr10cat.length; i++) {
        pr10cat[i] = 0d;/*from w ww .j  av a 2 s  . c  om*/
        pr10cnt[i] = 0d;
    }
    System.out.println("name\tmap\tp@10\terror rate");
    System.out.println("Anzahl Anfragen: " + sampleQueries.length);
    for (int i = 0; i < sampleQueries.length; i++) {
        int id = sampleQueries[i];
        String file = testExtensive + "/" + id + ".jpg";
        ImageSearchHits hits = searcher.search(findDoc(reader, id + ".jpg"), reader);
        int goodOnes = 0;
        double avgPrecision = 0d;
        double precision10temp = 0d;
        int countResults = 0;
        //            System.out.println("Anzahl Hits: " + hits.length());
        for (int j = 0; j < hits.length(); j++) {
            Document d = hits.doc(j);
            String hitsId = d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
            Matcher matcher = p.matcher(hitsId);
            if (matcher.find())
                hitsId = matcher.group(1);
            else
                fail("Did not get the number ...");
            int testID = Integer.parseInt(hitsId);
            if (testID != id)
                countResults++;
            if ((testID != id) && ((int) Math.floor(id / 100) == (int) Math.floor(testID / 100))) {
                goodOnes++;
                // Only if there is a change in recall
                avgPrecision += (double) goodOnes / (double) countResults;
                //                    System.out.print("x");
                if (j <= 10) {
                    precision10temp += 1d;
                }
            } else {
                if (j == 1) { // error rate
                    errorRate++;
                }
            }
        } // end for loop iterating results.
        //            if (avgPrecision<=0) {
        //                System.out.println("avgPrecision = " + avgPrecision);
        //                System.out.println("goodOnes = " + goodOnes);
        //            }
        //            assertTrue("Check if average precision is > 0", avgPrecision > 0);
        //            assertTrue("Check if goodOnes is > 0", goodOnes > 0);
        avgPrecision = avgPrecision / goodOnes;
        precision10 += precision10temp / 10d;
        // precision @ 10 for each category ...
        pr10cat[(int) Math.floor(id / 100)] += precision10temp / 10d;
        pr10cnt[(int) Math.floor(id / 100)] += 1d;
        map += avgPrecision;
    }
    map = map / sampleQueries.length;
    errorRate = errorRate / sampleQueries.length;
    precision10 = precision10 / sampleQueries.length;
    System.out.print(prefix + "\t");
    System.out.print(String.format("%.5f ", map) + '\t');
    System.out.print(String.format("%.5f ", precision10) + '\t');
    System.out.print(String.format("%.5f ", errorRate) + '\t');
    // precision@10 per category
    for (int i = 0; i < pr10cat.length; i++) {
        double v = 0;
        if (pr10cnt[i] > 0)
            v = pr10cat[i] / pr10cnt[i];
        //            System.out.print(i + ": ");
        System.out.printf("%.5f\t", v);

    }
    System.out.println();
}

From source file:TestWang.java

License:Open Source License

private Document findDoc(IndexReader reader, String file) throws IOException {
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        if (s.endsWith(File.separator + file)) {
            //                System.out.println("s = " + s);
            return document;
        }/* ww w  .ja  va 2s  .c o  m*/
    }
    return null;
}

From source file:TestWang.java

License:Open Source License

private Document[] findDocs(IndexReader reader, String[] file) throws IOException {
    Document[] result = new Document[file.length];
    for (int i = 0; i < reader.numDocs(); i++) {
        Document document = reader.document(i);
        String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        for (int j = 0; j < result.length; j++) {
            if (s.endsWith("\\" + file[j])) {
                //                System.out.println("s = " + s);
                result[j] = document;/*from  w ww.j  a va2s. co m*/
            }
        }
    }
    return result;
}

From source file:KNearestNeighbourDocumentClassifier.java

License:Apache License

/**
 * Returns the top k results from a More Like This query based on the input document
 *
 * @param document the document to use for More Like This search
 * @return the top results for the MLT query
 * @throws IOException If there is a low-level I/O error
 *///from  ww  w.  ja  v  a  2  s .c o  m
private TopDocs knnSearch(Document document) throws IOException {
    BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();

    for (String fieldName : textFieldNames) {
        String boost = null;
        if (fieldName.contains("^")) {
            String[] field2boost = fieldName.split("\\^");
            fieldName = field2boost[0];
            boost = field2boost[1];
        }
        String[] fieldValues = document.getValues(fieldName);
        if (boost != null) {
            mlt.setBoost(true);
            mlt.setBoostFactor(Float.parseFloat(boost));
        }
        mlt.setAnalyzer(field2analyzer.get(fieldName));
        for (String fieldContent : fieldValues) {
            mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)),
                    BooleanClause.Occur.SHOULD));
        }
        mlt.setBoost(false);
    }
    Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
    mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
    if (query != null) {
        mltQuery.add(query, BooleanClause.Occur.MUST);
    }
    return indexSearcher.search(mltQuery.build(), k);
}

From source file:aos.lucene.analysis.synonym.WordNetSynonymEngine.java

License:Apache License

public String[] getSynonyms(String word) throws IOException {

    List<String> synList = new ArrayList<String>();

    AllDocCollector collector = new AllDocCollector(); // #A

    searcher.search(new TermQuery(new Term("word", word)), collector);

    for (ScoreDoc hit : collector.getHits()) { // #B
        Document doc = searcher.doc(hit.doc);

        String[] values = doc.getValues("syn");

        for (String syn : values) { // #C
            synList.add(syn);/*w w w . j  a  v a 2 s .c  o  m*/
        }
    }

    return synList.toArray(new String[0]);
}

From source file:aos.lucene.search.advanced.BooksLikeThis.java

License:Apache License

public Document[] docsLike(int id, int max) throws IOException {
    Document doc = reader.document(id);

    String[] authors = doc.getValues("author");
    BooleanQuery authorQuery = new BooleanQuery(); //
    for (String author : authors) { //
        authorQuery.add(new TermQuery(new Term("author", author)), //
                BooleanClause.Occur.SHOULD); //
    }/*from   w ww .j a v  a 2 s.  c o  m*/
    authorQuery.setBoost(2.0f);

    TermFreqVector vector = //
            reader.getTermFreqVector(id, "subject"); //

    BooleanQuery subjectQuery = new BooleanQuery(); //
    for (String vecTerm : vector.getTerms()) { //
        TermQuery tq = new TermQuery( //
                new Term("subject", vecTerm)); //
        subjectQuery.add(tq, BooleanClause.Occur.SHOULD); //
    }

    BooleanQuery likeThisQuery = new BooleanQuery(); //
    likeThisQuery.add(authorQuery, BooleanClause.Occur.SHOULD); //
    likeThisQuery.add(subjectQuery, BooleanClause.Occur.SHOULD); //

    likeThisQuery.add(new TermQuery( //
            new Term("isbn", doc.get("isbn"))), BooleanClause.Occur.MUST_NOT); //

    // LOGGER.info("  Query: " +
    // likeThisQuery.toString("contents"));
    TopDocs hits = searcher.search(likeThisQuery, 10);
    int size = max;
    if (max > hits.scoreDocs.length)
        size = hits.scoreDocs.length;

    Document[] docs = new Document[size];
    for (int i = 0; i < size; i++) {
        docs[i] = reader.document(hits.scoreDocs[i].doc);
    }

    return docs;
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

@Override
public Collection<String> getConcepts(String label) throws IOException {
    Set<String> concepts = new HashSet<>();
    // convert the query to lower-case
    String queryString = label.toLowerCase(Locale.ROOT);
    AllDocCollector collector = new AllDocCollector();
    DisjunctionMaxQuery query = new DisjunctionMaxQuery(0.0f);
    query.add(new TermQuery(new Term(FIELD_PREF_LABEL, queryString)));
    query.add(new TermQuery(new Term(FIELD_ALT_LABEL, queryString)));
    query.add(new TermQuery(new Term(FIELD_HIDDEN_LABEL, queryString)));
    searcher.search(query, collector);/*from  ww w .  j a  va2 s . c  o m*/
    for (Integer hit : collector.getDocs()) {
        Document doc = searcher.doc(hit);
        String conceptURI = doc.getValues(FIELD_URI)[0];
        concepts.add(conceptURI);
    }
    return concepts;
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * Returns the values of a given field for a given concept
 */// ww w .  jav  a2  s  .  com
private Collection<String> readConceptFieldValues(String conceptURI, String field) throws IOException {
    Query query = new TermQuery(new Term(FIELD_URI, conceptURI));
    TopDocs docs = searcher.search(query, 1);
    ScoreDoc[] results = docs.scoreDocs;
    if (results.length != 1) {
        logger.warn("Unknown concept " + conceptURI);
        return null;
    }
    Document conceptDoc = searcher.doc(results[0].doc);
    return Arrays.asList(conceptDoc.getValues(field));
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.SKOSURIFilterTest.java

License:Apache License

@Test
public void singleUriExpansionWithStoredField() throws CorruptIndexException, IOException {

    Document doc = new Document();
    doc.add(new Field("subject", "http://example.com/concept/1", Field.Store.YES, Field.Index.ANALYZED));

    writer.addDocument(doc);//from  w w w  .j  a va 2 s .  co m

    searcher = new IndexSearcher(IndexReader.open(writer, false));

    Query query = new TermQuery(new Term("subject", "leaps"));

    TopDocs results = searcher.search(query, 10);
    Assert.assertEquals(1, results.totalHits);

    Document indexDoc = searcher.doc(results.scoreDocs[0].doc);

    String[] fieldValues = indexDoc.getValues("subject");

    Assert.assertEquals(1, fieldValues.length);

    Assert.assertEquals(fieldValues[0], "http://example.com/concept/1");

}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.SKOSURIFilterTest.java

License:Apache License

@Test
public void singleUriExpansionWithUnstoredField() throws CorruptIndexException, IOException {

    Document doc = new Document();
    doc.add(new Field("subject", "http://example.com/concept/1", Field.Store.NO, Field.Index.ANALYZED));

    writer.addDocument(doc);//  ww  w  . ja va  2  s.c  o m

    searcher = new IndexSearcher(IndexReader.open(writer, false));

    Query query = new TermQuery(new Term("subject", "jumps"));

    TopDocs results = searcher.search(query, 10);
    Assert.assertEquals(1, results.totalHits);

    Document indexDoc = searcher.doc(results.scoreDocs[0].doc);

    String[] fieldValues = indexDoc.getValues("subject");

    Assert.assertEquals(0, fieldValues.length);

}