List of usage examples for org.apache.lucene.search TermQuery toString
@Override public final String toString()
From source file:dk.dbc.opensearch.fedora.search.LuceneFieldIndex.java
License:Open Source License
public int findHighestId(String namespace) throws IOException { TermQuery luceneQuery = new TermQuery(new Term(PID_NAMESPACE, namespace)); searchManager.maybeRefreshBlocking(); IndexSearcher localSearcher = searchManager.acquire(); try {/* w ww . ja va 2 s .c om*/ log.debug("Query: {}", luceneQuery.toString()); TopFieldDocs search = localSearcher.search(luceneQuery, 1, new Sort(new SortField(PID_INT, SortField.Type.INT, true))); if (search.scoreDocs.length > 0) { IndexReader localReader = localSearcher.getIndexReader(); Document document = localReader.document(search.scoreDocs[0].doc); IndexableField identifer = document.getField(PID_INT); if (identifer != null) { return identifer.numericValue().intValue(); } } return 0; } finally { searchManager.release(localSearcher); } }
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
BooleanQuery orPhraseQuery(List<SearchTerm> orSearchTerms) {
BooleanQuery orTerms = new BooleanQuery();
orTerms.setMaxClauseCount(dvnMaxClauseCount);
for (Iterator it = orSearchTerms.iterator(); it.hasNext();) {
SearchTerm elem = (SearchTerm) it.next();
String[] phrase = getPhrase(elem.getValue().toLowerCase().trim());
if (phrase.length > 1) {
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.setSlop(10);/*from ww w . j a va 2 s .c o m*/
for (int i = 0; i < phrase.length; i++) {
phraseQuery.add(new Term(elem.getFieldName(), phrase[i].toLowerCase().trim()));
}
orTerms.add(phraseQuery, BooleanClause.Occur.SHOULD);
} else if (phrase.length == 1) {
// Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
logger.fine("INDEXER: orPhraseQuery: search element value: " + phrase[0].toLowerCase().trim());
Term t = new Term(elem.getFieldName(), phrase[0].toLowerCase().trim());
logger.fine("INDEXER: orPhraseQuery: term value=" + t.text());
TermQuery orQuery = new TermQuery(t);
logger.fine("INDEXER: TermQuery orQuery (native): " + orQuery.toString());
orTerms.add(orQuery, BooleanClause.Occur.SHOULD);
}
}
return orTerms;
}
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
BooleanQuery orPhraseOrWildcardQuery(List<SearchTerm> orSearchTerms) {
BooleanQuery orTerms = new BooleanQuery();
orTerms.setMaxClauseCount(dvnMaxClauseCount);
for (Iterator it = orSearchTerms.iterator(); it.hasNext();) {
SearchTerm elem = (SearchTerm) it.next();
String[] phrase = getPhrase(elem.getValue().toLowerCase().trim());
if (phrase.length > 1) {
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.setSlop(10);//from ww w. j a v a 2 s . co m
for (int i = 0; i < phrase.length; i++) {
phraseQuery.add(new Term(elem.getFieldName(), phrase[i].toLowerCase().trim()));
}
orTerms.add(phraseQuery, BooleanClause.Occur.SHOULD);
} else if (phrase.length == 1) {
// Term t = new Term(elem.getFieldName(), elem.getValue().toLowerCase().trim());
logger.fine("INDEXER: wildcardQuery: search element value: " + phrase[0].toLowerCase().trim());
if (isPrefixSearchableFileMetadataField(elem.getFieldName())) {
Term t = new Term(elem.getFieldName(), phrase[0].toLowerCase().trim() + "*");
logger.fine("INDEXER: wildcardQuery: term value=" + t.text());
WildcardQuery wcQuery = new WildcardQuery(t);
logger.fine("INDEXER: Term wildcardQuery (native): " + wcQuery.toString());
orTerms.add(wcQuery, BooleanClause.Occur.SHOULD);
} else {
logger.fine("INDEXER: building PhraseQuery: search element value: "
+ phrase[0].toLowerCase().trim());
Term t = new Term(elem.getFieldName(), phrase[0].toLowerCase().trim());
logger.fine("INDEXER: building PhraseQuery: term value=" + t.text());
TermQuery orQuery = new TermQuery(t);
logger.fine("INDEXER: TermQuery orQuery (native): " + orQuery.toString());
orTerms.add(orQuery, BooleanClause.Occur.SHOULD);
}
}
}
return orTerms;
}
From source file:edu.rpi.tw.linkipedia.search.main.helper.ReadIndex.java
License:Open Source License
public static ArrayList<String> readIndexByTerm(IndexSearcher searcher, String fieldString, String termString, String filter) throws CorruptIndexException, IOException { //System.out.println(fieldString+" "+termString); TermQuery query = new TermQuery(new Term(fieldString, termString)); if (debug)// w w w. j a v a 2s. com System.out.println("your query |" + query.toString() + "|"); TopDocs topDocs = searcher.search(query, 1); if (topDocs == null) return new ArrayList<String>(); ScoreDoc[] hits = topDocs.scoreDocs; // System.out.println("matching: "+hits.length); ArrayList<String> contents = new ArrayList<String>(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); List<IndexableField> fields = doc.getFields(); for (int j = 0; j < fields.size(); j++) { IndexableField field = fields.get(j); if (field.name().equals(filter)) { // System.out.println(filter+": ("+field.stringValue()+")"); contents.add(field.stringValue()); } // if(!field.name().equals("triple")) // System.out.println(field.name()+" "+field.stringValue()+" "+filter); /* if(field.name().equals("label")||field.name().equals("boost")||field.stringValue().startsWith(filter+"|")){ System.out.println(field.name()+": "+field.stringValue()); } */ } } return contents; /* Term term = new Term(fieldString, termString); TermDocs docs = reader.termDocs(term); if(docs.next()){ int docId = docs.doc(); Document doc = reader.document(docId); return doc; } */ }
From source file:edu.rpi.tw.linkipedia.search.main.helper.ReadIndex.java
License:Open Source License
public static void printIndexByTerm(IndexSearcher searcher, String fieldString, String termString, String filter) throws CorruptIndexException, IOException { TermQuery query = new TermQuery(new Term(fieldString, termString)); System.out.println("your query " + query.toString()); TopDocs topDocs = searcher.search(query, 1); ScoreDoc[] hits = topDocs.scoreDocs; //System.out.println("matching: "+hits.length); //ArrayList<String> contents = new ArrayList<String>(); for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document doc = searcher.doc(docId); List<IndexableField> fields = doc.getFields(); for (int j = 0; j < fields.size(); j++) { IndexableField field = fields.get(j); if (field.name().equals(filter)) { System.out.println(filter + ": (" + field.stringValue() + ")"); //contents.add(field.stringValue()); }/*w w w . ja v a2s . co m*/ //System.out.println(field.name()+" "+field.stringValue()); /* if(field.name().equals("label")||field.name().equals("boost")||field.stringValue().startsWith(filter+"|")){ System.out.println(field.name()+": "+field.stringValue()); } */ } } //return contents; /* Term term = new Term(fieldString, termString); TermDocs docs = reader.termDocs(term); if(docs.next()){ int docId = docs.doc(); Document doc = reader.document(docId); return doc; } */ }
From source file:io.anserini.index.UpdateIndex.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(//w w w.ja v a2 s.c o m OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(UpdateIndex.class.getName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); textOptions.setStoreTermVectors(true); LOG.info("index: " + indexPath); File file = new File("PittsburghUserTimeline"); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } final StatusStream stream = new JsonStatusCorpusReader(file); Status status; String s; HashMap<Long, String> hm = new HashMap<Long, String>(); try { while ((s = stream.nextRaw()) != null) { try { status = DataObjectFactory.createStatus(s); if (status.getText() == null) { continue; } hm.put(status.getUser().getId(), hm.get(status.getUser().getId()) + status.getText().replaceAll("[\\r\\n]+", " ")); } catch (Exception e) { } } } catch (Exception e) { e.printStackTrace(); } finally { stream.close(); } ArrayList<String> userIDList = new ArrayList<String>(); try (BufferedReader br = new BufferedReader(new FileReader(new File("userID")))) { String line; while ((line = br.readLine()) != null) { userIDList.add(line.replaceAll("[\\r\\n]+", "")); // process the line. } } try { reader = DirectoryReader .open(FSDirectory.open(new File(cmdline.getOptionValue(INDEX_OPTION)).toPath())); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } final Directory dir = new SimpleFSDirectory(Paths.get(cmdline.getOptionValue(INDEX_OPTION))); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); final IndexWriter writer = new IndexWriter(dir, config); IndexSearcher searcher = new IndexSearcher(reader); System.out.println("The total number of docs indexed " + searcher.collectionStatistics(TweetStreamReader.StatusField.TEXT.name).docCount()); for (int city = 0; city < cityName.length; city++) { // Pittsburgh's coordinate -79.976389, 40.439722 Query q_long = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LONGITUDE.name, new Double(longitude[city] - 0.05), new Double(longitude[city] + 0.05), true, true); Query q_lat = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LATITUDE.name, new Double(latitude[city] - 0.05), new Double(latitude[city] + 0.05), true, true); BooleanQuery bqCityName = new BooleanQuery(); Term t = new Term("place", cityName[city]); TermQuery query = new TermQuery(t); bqCityName.add(query, BooleanClause.Occur.SHOULD); System.out.println(query.toString()); for (int i = 0; i < cityNameAlias[city].length; i++) { t = new Term("place", cityNameAlias[city][i]); query = new TermQuery(t); bqCityName.add(query, BooleanClause.Occur.SHOULD); System.out.println(query.toString()); } BooleanQuery bq = new BooleanQuery(); BooleanQuery finalQuery = new BooleanQuery(); // either a coordinate match bq.add(q_long, BooleanClause.Occur.MUST); bq.add(q_lat, BooleanClause.Occur.MUST); finalQuery.add(bq, BooleanClause.Occur.SHOULD); // or a place city name match finalQuery.add(bqCityName, BooleanClause.Occur.SHOULD); TotalHitCountCollector totalHitCollector = new TotalHitCountCollector(); // Query hasFieldQuery = new ConstantScoreQuery(new // FieldValueFilter("timeline")); // // searcher.search(hasFieldQuery, totalHitCollector); // // if (totalHitCollector.getTotalHits() > 0) { // TopScoreDocCollector collector = // TopScoreDocCollector.create(Math.max(0, // totalHitCollector.getTotalHits())); // searcher.search(finalQuery, collector); // ScoreDoc[] hits = collector.topDocs().scoreDocs; // // // HashMap<String, Integer> hasHit = new HashMap<String, Integer>(); // int dupcount = 0; // for (int i = 0; i < hits.length; ++i) { // int docId = hits[i].doc; // Document d; // // d = searcher.doc(docId); // // System.out.println(d.getFields()); // } // } // totalHitCollector = new TotalHitCountCollector(); searcher.search(finalQuery, totalHitCollector); if (totalHitCollector.getTotalHits() > 0) { TopScoreDocCollector collector = TopScoreDocCollector .create(Math.max(0, totalHitCollector.getTotalHits())); searcher.search(finalQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits."); HashMap<String, Integer> hasHit = new HashMap<String, Integer>(); int dupcount = 0; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d; d = searcher.doc(docId); if (userIDList.contains(d.get(IndexTweets.StatusField.USER_ID.name)) && hm.containsKey(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name)))) { // System.out.println("Has timeline field?" + (d.get("timeline") != null)); // System.out.println(reader.getDocCount("timeline")); // d.add(new Field("timeline", hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))), // textOptions)); System.out.println("Found a user hit"); BytesRefBuilder brb = new BytesRefBuilder(); NumericUtils.longToPrefixCodedBytes(Long.parseLong(d.get(IndexTweets.StatusField.ID.name)), 0, brb); Term term = new Term(IndexTweets.StatusField.ID.name, brb.get()); // System.out.println(reader.getDocCount("timeline")); Document d_new = new Document(); // for (IndexableField field : d.getFields()) { // d_new.add(field); // } // System.out.println(d_new.getFields()); d_new.add(new StringField("userBackground", d.get(IndexTweets.StatusField.USER_ID.name), Store.YES)); d_new.add(new Field("timeline", hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))), textOptions)); // System.out.println(d_new.get()); writer.addDocument(d_new); writer.commit(); // t = new Term("label", "why"); // TermQuery tqnew = new TermQuery(t); // // totalHitCollector = new TotalHitCountCollector(); // // searcher.search(tqnew, totalHitCollector); // // if (totalHitCollector.getTotalHits() > 0) { // collector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits())); // searcher.search(tqnew, collector); // hits = collector.topDocs().scoreDocs; // // System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits."); // // for (int k = 0; k < hits.length; k++) { // docId = hits[k].doc; // d = searcher.doc(docId); // System.out.println(d.get(IndexTweets.StatusField.ID.name)); // System.out.println(d.get(IndexTweets.StatusField.PLACE.name)); // } // } // writer.deleteDocuments(term); // writer.commit(); // writer.addDocument(d); // writer.commit(); // System.out.println(reader.getDocCount("timeline")); // writer.updateDocument(term, d); // writer.commit(); } } } } reader.close(); writer.close(); }
From source file:org.codelibs.fess.helper.QueryHelper.java
License:Apache License
protected QueryBuilder convertTermQuery(final QueryContext context, final TermQuery termQuery, final float boost) { final String field = getSearchField(context, termQuery.getTerm().field()); final String text = termQuery.getTerm().text(); final FessConfig fessConfig = ComponentUtil.getFessConfig(); if (fessConfig.getQueryReplaceTermWithPrefixQueryAsBoolean() && text.length() > 1 && text.endsWith("*")) { return convertPrefixQuery(context, new PrefixQuery(new Term(field, text.substring(0, text.length() - 1))), boost); } else if (Constants.DEFAULT_FIELD.equals(field)) { context.addFieldLog(field, text); context.addHighlightedQuery(text); return buildDefaultQueryBuilder((f, b) -> buildMatchPhraseQuery(f, text).boost(b * boost)); } else if ("sort".equals(field)) { split(text, ",").of(stream -> stream.filter(StringUtil::isNotBlank).forEach(t -> { final String[] values = t.split("\\."); if (values.length > 2) { throw new InvalidQueryException(messages -> messages .addErrorsInvalidQuerySortValue(UserMessages.GLOBAL_PROPERTY_KEY, text), "Invalid sort field: " + termQuery); }/* w ww.j ava2 s. co m*/ final String sortField = values[0]; if (!isSortField(sortField)) { throw new InvalidQueryException( messages -> messages.addErrorsInvalidQueryUnsupportedSortField( UserMessages.GLOBAL_PROPERTY_KEY, sortField), "Unsupported sort field: " + termQuery); } SortOrder sortOrder; if (values.length == 2) { sortOrder = SortOrder.DESC.toString().equalsIgnoreCase(values[1]) ? SortOrder.DESC : SortOrder.ASC; if (sortOrder == null) { throw new InvalidQueryException( messages -> messages.addErrorsInvalidQueryUnsupportedSortOrder( UserMessages.GLOBAL_PROPERTY_KEY, values[1]), "Invalid sort order: " + termQuery); } } else { sortOrder = SortOrder.ASC; } context.addSorts(createFieldSortBuilder(sortField, sortOrder)); })); return null; } else if (INURL_FIELD.equals(field) || (StringUtil.equals(field, context.getDefaultField()) && fessConfig.getIndexFieldUrl().equals(context.getDefaultField()))) { return QueryBuilders.wildcardQuery(fessConfig.getIndexFieldUrl(), "*" + text + "*").boost(boost); } else if (SITE_FIELD.equals(field)) { return convertSiteQuery(context, text, boost); } else if (isSearchField(field)) { context.addFieldLog(field, text); context.addHighlightedQuery(text); if (notAnalyzedFieldSet.contains(field)) { return QueryBuilders.termQuery(field, text).boost(boost); } else { return buildMatchPhraseQuery(field, text).boost(boost); } } else { final String origQuery = termQuery.toString(); context.addFieldLog(Constants.DEFAULT_FIELD, origQuery); context.addHighlightedQuery(origQuery); return buildDefaultQueryBuilder((f, b) -> buildMatchPhraseQuery(f, origQuery).boost(b * boost)); } }
From source file:org.segrada.service.repository.orientdb.OrientDbFileRepository.java
License:Apache License
@Override public List<IFile> findBySearchTerm(String term, int maximum, boolean returnWithoutTerm) { List<IFile> hits = new ArrayList<>(); // empty search term and returnWithoutTerm false if (!returnWithoutTerm && (term == null || term.equals(""))) return hits; initDb();//from w ww . jav a2 s .c o m // search for term List<ODocument> result; if (term != null && !term.isEmpty()) { // create query term for lucene full text search StringBuilder sb = new StringBuilder(); boolean first = true; for (String termPart : term.toLowerCase().split("\\s+")) { termPart = QueryParserUtil.escape(termPart); TermQuery termQuery1 = new TermQuery(new Term("title", termPart + "*")); TermQuery termQuery2 = new TermQuery(new Term("filename", termPart + "*")); if (first) first = false; else sb.append(" AND "); sb.append('(').append(termQuery1.toString()).append(" OR ").append(termQuery2.toString()) .append(')'); } // execute query OSQLSynchQuery<ODocument> query = new OSQLSynchQuery<>( "select * from File where " + createSearchTermFullText(term) + " LIMIT " + maximum); result = db.command(query).execute(); } else { // no term, just find top X entries // execute query OSQLSynchQuery<ODocument> query = new OSQLSynchQuery<>( "select * from File " + getDefaultOrder(true) + " LIMIT " + maximum); result = db.command(query).execute(); } // browse entities for (ODocument document : result) { hits.add(convertToEntity(document)); } return hits; }
From source file:org.segrada.service.repository.orientdb.OrientDbPictogramRepository.java
License:Apache License
@Override public List<IPictogram> findBySearchTerm(String term, int maximum, boolean returnWithoutTerm) { List<IPictogram> hits = new ArrayList<>(); // empty search term and returnWithoutTerm false if (!returnWithoutTerm && (term == null || term.equals(""))) return hits; initDb();/*from ww w . ja va 2s. c om*/ // search for term List<ODocument> result; if (term != null && !term.isEmpty()) { // create query term for lucene full text search StringBuilder sb = new StringBuilder(); boolean first = true; for (String termPart : term.toLowerCase().split("\\s+")) { termPart = QueryParserUtil.escape(termPart); TermQuery termQuery = new TermQuery(new Term("title", termPart + "*")); if (first) first = false; else sb.append(" AND "); sb.append(termQuery.toString()); } // execute query OSQLSynchQuery<ODocument> query = new OSQLSynchQuery<>( "select * from Pictogram where title LUCENE ? LIMIT " + maximum); result = db.command(query).execute(sb.toString()); } else { // no term, just find top X entries // execute query OSQLSynchQuery<ODocument> query = new OSQLSynchQuery<>("select * from Pictogram LIMIT " + maximum); result = db.command(query).execute(); } // browse entities and populate list for (ODocument document : result) hits.add(convertToEntity(document)); return hits; }
From source file:org.segrada.service.repository.orientdb.OrientDbTagRepository.java
License:Apache License
@Override public List<ITag> findBySearchTerm(String term, int maximum, boolean returnWithoutTerm) { List<ITag> hits = new ArrayList<>(); // empty search term and returnWithoutTerm false if (!returnWithoutTerm && (term == null || term.equals(""))) return hits; initDb();/*w w w . j a va2s.c om*/ // search for term List<ODocument> result; if (term != null && !term.isEmpty()) { // create query term for lucene full text search StringBuilder sb = new StringBuilder(); boolean first = true; for (String termPart : term.toLowerCase().split("\\s+")) { termPart = QueryParserUtil.escape(termPart); TermQuery termQuery = new TermQuery(new Term("title", termPart + "*")); if (first) first = false; else sb.append(" AND "); sb.append(termQuery.toString()); } // execute query OSQLSynchQuery<ODocument> query = new OSQLSynchQuery<>( "select * from Tag where title LUCENE ? LIMIT " + maximum); result = db.command(query).execute(sb.toString()); } else { // no term, just find top X entries // execute query OSQLSynchQuery<ODocument> query = new OSQLSynchQuery<>( "select * from Tag " + getDefaultOrder(true) + " LIMIT " + maximum); result = db.command(query).execute(); } // browse entities for (ODocument document : result) { hits.add(convertToEntity(document)); } return hits; }