List of usage examples for org.apache.lucene.document Document getValues
public final String[] getValues(String name)
From source file:TestWang.java
License:Open Source License
public void computeMAP(ImageSearcher searcher, String prefix) throws IOException { System.out.println("ComputeMap"); // copy index to ram to be much faster ... IndexReader reader = IndexReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath))), true); Pattern p = Pattern.compile("([0-9]+).jpg"); double map = 0; double errorRate = 0d; double precision10 = 0d; double[] pr10cat = new double[10]; double[] pr10cnt = new double[10]; for (int i = 0; i < pr10cat.length; i++) { pr10cat[i] = 0d;/*from w ww .j av a 2 s . c om*/ pr10cnt[i] = 0d; } System.out.println("name\tmap\tp@10\terror rate"); System.out.println("Anzahl Anfragen: " + sampleQueries.length); for (int i = 0; i < sampleQueries.length; i++) { int id = sampleQueries[i]; String file = testExtensive + "/" + id + ".jpg"; ImageSearchHits hits = searcher.search(findDoc(reader, id + ".jpg"), reader); int goodOnes = 0; double avgPrecision = 0d; double precision10temp = 0d; int countResults = 0; // System.out.println("Anzahl Hits: " + hits.length()); for (int j = 0; j < hits.length(); j++) { Document d = hits.doc(j); String hitsId = d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; Matcher matcher = p.matcher(hitsId); if (matcher.find()) hitsId = matcher.group(1); else fail("Did not get the number ..."); int testID = Integer.parseInt(hitsId); if (testID != id) countResults++; if ((testID != id) && ((int) Math.floor(id / 100) == (int) Math.floor(testID / 100))) { goodOnes++; // Only if there is a change in recall avgPrecision += (double) goodOnes / (double) countResults; // System.out.print("x"); if (j <= 10) { precision10temp += 1d; } } else { if (j == 1) { // error rate errorRate++; } } } // end for loop iterating results. // if (avgPrecision<=0) { // System.out.println("avgPrecision = " + avgPrecision); // System.out.println("goodOnes = " + goodOnes); // } // assertTrue("Check if average precision is > 0", avgPrecision > 0); // assertTrue("Check if goodOnes is > 0", goodOnes > 0); avgPrecision = avgPrecision / goodOnes; precision10 += precision10temp / 10d; // precision @ 10 for each category ... pr10cat[(int) Math.floor(id / 100)] += precision10temp / 10d; pr10cnt[(int) Math.floor(id / 100)] += 1d; map += avgPrecision; } map = map / sampleQueries.length; errorRate = errorRate / sampleQueries.length; precision10 = precision10 / sampleQueries.length; System.out.print(prefix + "\t"); System.out.print(String.format("%.5f ", map) + '\t'); System.out.print(String.format("%.5f ", precision10) + '\t'); System.out.print(String.format("%.5f ", errorRate) + '\t'); // precision@10 per category for (int i = 0; i < pr10cat.length; i++) { double v = 0; if (pr10cnt[i] > 0) v = pr10cat[i] / pr10cnt[i]; // System.out.print(i + ": "); System.out.printf("%.5f\t", v); } System.out.println(); }
From source file:TestWang.java
License:Open Source License
private Document findDoc(IndexReader reader, String file) throws IOException { for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; if (s.endsWith(File.separator + file)) { // System.out.println("s = " + s); return document; }/* ww w .ja va 2s .c o m*/ } return null; }
From source file:TestWang.java
License:Open Source License
private Document[] findDocs(IndexReader reader, String[] file) throws IOException { Document[] result = new Document[file.length]; for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String s = document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; for (int j = 0; j < result.length; j++) { if (s.endsWith("\\" + file[j])) { // System.out.println("s = " + s); result[j] = document;/*from w ww.j a va2s. co m*/ } } } return result; }
From source file:KNearestNeighbourDocumentClassifier.java
License:Apache License
/** * Returns the top k results from a More Like This query based on the input document * * @param document the document to use for More Like This search * @return the top results for the MLT query * @throws IOException If there is a low-level I/O error *///from ww w. ja v a 2 s .c o m private TopDocs knnSearch(Document document) throws IOException { BooleanQuery.Builder mltQuery = new BooleanQuery.Builder(); for (String fieldName : textFieldNames) { String boost = null; if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = field2boost[1]; } String[] fieldValues = document.getValues(fieldName); if (boost != null) { mlt.setBoost(true); mlt.setBoostFactor(Float.parseFloat(boost)); } mlt.setAnalyzer(field2analyzer.get(fieldName)); for (String fieldContent : fieldValues) { mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD)); } mlt.setBoost(false); } Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*")); mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST)); if (query != null) { mltQuery.add(query, BooleanClause.Occur.MUST); } return indexSearcher.search(mltQuery.build(), k); }
From source file:aos.lucene.analysis.synonym.WordNetSynonymEngine.java
License:Apache License
public String[] getSynonyms(String word) throws IOException { List<String> synList = new ArrayList<String>(); AllDocCollector collector = new AllDocCollector(); // #A searcher.search(new TermQuery(new Term("word", word)), collector); for (ScoreDoc hit : collector.getHits()) { // #B Document doc = searcher.doc(hit.doc); String[] values = doc.getValues("syn"); for (String syn : values) { // #C synList.add(syn);/*w w w . j a v a 2 s .c o m*/ } } return synList.toArray(new String[0]); }
From source file:aos.lucene.search.advanced.BooksLikeThis.java
License:Apache License
public Document[] docsLike(int id, int max) throws IOException { Document doc = reader.document(id); String[] authors = doc.getValues("author"); BooleanQuery authorQuery = new BooleanQuery(); // for (String author : authors) { // authorQuery.add(new TermQuery(new Term("author", author)), // BooleanClause.Occur.SHOULD); // }/*from w ww .j a v a 2 s. c o m*/ authorQuery.setBoost(2.0f); TermFreqVector vector = // reader.getTermFreqVector(id, "subject"); // BooleanQuery subjectQuery = new BooleanQuery(); // for (String vecTerm : vector.getTerms()) { // TermQuery tq = new TermQuery( // new Term("subject", vecTerm)); // subjectQuery.add(tq, BooleanClause.Occur.SHOULD); // } BooleanQuery likeThisQuery = new BooleanQuery(); // likeThisQuery.add(authorQuery, BooleanClause.Occur.SHOULD); // likeThisQuery.add(subjectQuery, BooleanClause.Occur.SHOULD); // likeThisQuery.add(new TermQuery( // new Term("isbn", doc.get("isbn"))), BooleanClause.Occur.MUST_NOT); // // LOGGER.info(" Query: " + // likeThisQuery.toString("contents")); TopDocs hits = searcher.search(likeThisQuery, 10); int size = max; if (max > hits.scoreDocs.length) size = hits.scoreDocs.length; Document[] docs = new Document[size]; for (int i = 0; i < size; i++) { docs[i] = reader.document(hits.scoreDocs[i].doc); } return docs; }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
@Override public Collection<String> getConcepts(String label) throws IOException { Set<String> concepts = new HashSet<>(); // convert the query to lower-case String queryString = label.toLowerCase(Locale.ROOT); AllDocCollector collector = new AllDocCollector(); DisjunctionMaxQuery query = new DisjunctionMaxQuery(0.0f); query.add(new TermQuery(new Term(FIELD_PREF_LABEL, queryString))); query.add(new TermQuery(new Term(FIELD_ALT_LABEL, queryString))); query.add(new TermQuery(new Term(FIELD_HIDDEN_LABEL, queryString))); searcher.search(query, collector);/*from ww w . j a va2 s . c o m*/ for (Integer hit : collector.getDocs()) { Document doc = searcher.doc(hit); String conceptURI = doc.getValues(FIELD_URI)[0]; concepts.add(conceptURI); } return concepts; }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
/** * Returns the values of a given field for a given concept */// ww w . jav a2 s . com private Collection<String> readConceptFieldValues(String conceptURI, String field) throws IOException { Query query = new TermQuery(new Term(FIELD_URI, conceptURI)); TopDocs docs = searcher.search(query, 1); ScoreDoc[] results = docs.scoreDocs; if (results.length != 1) { logger.warn("Unknown concept " + conceptURI); return null; } Document conceptDoc = searcher.doc(results[0].doc); return Arrays.asList(conceptDoc.getValues(field)); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.SKOSURIFilterTest.java
License:Apache License
@Test public void singleUriExpansionWithStoredField() throws CorruptIndexException, IOException { Document doc = new Document(); doc.add(new Field("subject", "http://example.com/concept/1", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);//from w w w .j a va 2 s . co m searcher = new IndexSearcher(IndexReader.open(writer, false)); Query query = new TermQuery(new Term("subject", "leaps")); TopDocs results = searcher.search(query, 10); Assert.assertEquals(1, results.totalHits); Document indexDoc = searcher.doc(results.scoreDocs[0].doc); String[] fieldValues = indexDoc.getValues("subject"); Assert.assertEquals(1, fieldValues.length); Assert.assertEquals(fieldValues[0], "http://example.com/concept/1"); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.SKOSURIFilterTest.java
License:Apache License
@Test public void singleUriExpansionWithUnstoredField() throws CorruptIndexException, IOException { Document doc = new Document(); doc.add(new Field("subject", "http://example.com/concept/1", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc);// ww w . ja va 2 s.c o m searcher = new IndexSearcher(IndexReader.open(writer, false)); Query query = new TermQuery(new Term("subject", "jumps")); TopDocs results = searcher.search(query, 10); Assert.assertEquals(1, results.totalHits); Document indexDoc = searcher.doc(results.scoreDocs[0].doc); String[] fieldValues = indexDoc.getValues("subject"); Assert.assertEquals(0, fieldValues.length); }