List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:insa.luyten.SearchCISI.java
License:Apache License
/** * This demonstrates a typical paging search scenario, where the search engine presents * pages of size n to the user. The user can then go to the next page if interested in * the next hits.// w w w .j ava2 s . c om * * When the query is executed for the first time, then only enough results are collected * to fill 5 result pages. If the user wants to page beyond this limit, then the query * is executed another time and all hits are collected. * */ private void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, String field, int hitsPerPage, boolean raw, boolean interactive) throws IOException, NewQueryException { // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); while (true) { if (end > hits.length) { System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits + " total matching documents collected."); System.out.println("Collect more (y/n) ?"); String line = in.readLine(); if (line.length() == 0 || line.charAt(0) == 'n') { break; } hits = searcher.search(query, numTotalHits).scoreDocs; } end = Math.min(hits.length, start + hitsPerPage); for (int i = start; i < end; i++) { if (raw) { // output raw format System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score); continue; } Document doc = searcher.doc(hits[i].doc); String index = doc.get("index"); if (index != null) { System.out.println((i + 1) + ". " + index); String title = doc.get("title"); if (title != null && (field.equals("content")) || field.equals("title")) { System.out.println(" Title: " + doc.get("title")); } if (field.equals("authors")) { System.out.println(" Authors: " + doc.get(field)); } if (field.equals("references")) { System.out.println(" References: " + doc.get(field)); } } else { System.out.println((i + 1) + ". " + "No path for this document"); } } if (!interactive || end == 0) { break; } if (numTotalHits >= end) { boolean quit = false; while (true) { System.out.print("Press "); if (start - hitsPerPage >= 0) { System.out.print("(p)revious page, "); } if (start + hitsPerPage < numTotalHits) { System.out.print("(n)ext page, "); } System.out.println("(q)uit or enter number to jump to a page."); String line = in.readLine(); //quit if no enter or q if (line.length() == 0 || line.charAt(0) == 'q') { quit = true; break; } //just p if (line.length() == 1 && line.charAt(0) == 'p') { start = Math.max(0, start - hitsPerPage); break; } //just n else if (line.length() == 1 && line.charAt(0) == 'n') { if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; } break; } else { //test if it's a word or a number int page = 0; try { page = Integer.parseInt(line); } //FD: if not a number, maybe a new query ? catch (NumberFormatException e) { throw new NewQueryException(line); } if ((page - 1) * hitsPerPage < numTotalHits) { start = (page - 1) * hitsPerPage; break; } else { System.out.println("No such page"); } } } if (quit) break; end = Math.min(numTotalHits, start + hitsPerPage); } } }
From source file:intelligentWebAlgorithms.examples.search.MySearcher.java
License:Apache License
public SearchResult[] search(String query, int numberOfMatches) { SearchResult[] docResults = null; IndexSearcher is = null; Directory dir = null;/*from w w w . j a v a2 s. com*/ try { dir = FSDirectory.open(indexFile); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } DirectoryReader dirReader = null; try { dirReader = DirectoryReader.open(dir); is = new IndexSearcher(dirReader); } catch (IOException ioX) { System.out.println("ERROR: " + ioX.getMessage()); } StandardQueryParser queryParserHelper = new StandardQueryParser(); Query q = null; try { q = queryParserHelper.parse(query, LuceneIndexBuilder.INDEX_FIELD_CONTENT); } catch (QueryNodeException e) { e.printStackTrace(); } TopDocs hits = null; try { hits = is.search(q, numberOfMatches); docResults = new SearchResult[hits.scoreDocs.length]; for (int i = 0; i < hits.scoreDocs.length; i++) { Document hitDoc = is.doc(hits.scoreDocs[i].doc); docResults[i] = new SearchResult(hitDoc.get("docid"), hitDoc.get("doctype"), hitDoc.get("title"), hitDoc.get("url"), hits.scoreDocs[i].score); } dirReader.close(); dir.close(); } catch (IOException ioX) { System.out.println("ERROR: " + ioX.getMessage()); } catch (Exception e) { e.printStackTrace(); } String header = "Search results using Lucene index scores:"; boolean showTitle = true; printResults(header, "Query: " + query, docResults, showTitle); return docResults; }
From source file:interfaz.resultados.java
public void setTD_and_searcher(TopDocs td, IndexSearcher is) throws IOException { this.td = td; this.is = is; String result = ""; for (ScoreDoc sd : td.scoreDocs) { Document d = is.doc(sd.doc); result += "Titulo: " + d.get("Title") + "\n\tAutor: " + d.get("Authors") + "\n\tAo: " + d.get("Year") + "\n"; //System.out.println(sd.score+"year: "+d.get("Year")+" "+"Libro: "+d.get("Title")); }// w w w . j a v a2 s . co m if (result.equals("")) result = "No hay resultados"; resultados.setText(result); }
From source file:interfaz.resultados.java
public void setTD_searcher_taxoReader_facetsCollector(TopDocs td, IndexSearcher is, TaxonomyReader tr, FacetsCollector fc) throws IOException { this.td = td; this.is = is; this.tr = tr; this.fc = fc; String result = ""; for (ScoreDoc sd : td.scoreDocs) { Document d = is.doc(sd.doc); //int score=ScoreDoc.score; result += "Titulo: " + d.get("Title") + "\n\tAutor: " + d.get("Authors") + "\n\tAo: " + d.get("Year") + "\n"; //System.out.println(sd.score+"year: "+d.get("Year")+" "+"Libro: "+d.get("Title")); }//from w ww .j av a 2 s .c o m if (result.equals("")) result = "No hay resultados"; resultados.setText(result); /*String[] categorias = {"Authors", "Author keywords", "Index keywords"}; FacetResult[] dimensiones = new FacetResult[3]; FacetsCollector[] fcs = {new FacetsCollector(), new FacetsCollector(), new FacetsCollector(), new FacetsCollector()}; TopDocs[] tds = {null, null, null, null}; Facets[] facets = {null, null, null, null};*/ }
From source file:invertedindex.LineNumberSearcher.java
public ArrayList<String> search(String keyword, String filePath) throws IOException { String indexLocation = getLineIndexLocation(); // System.out.println("Inside LINE search method"); try {/*ww w . j a va 2 s. c o m*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String queryContent = keyword; queryContent = "\"" + queryContent + "\""; // queryContent = "*" + queryContent + "*"; String queryFilePath = filePath; // System.out.println("FIELPATH "+queryFilePath); queryFilePath = "\"" + queryFilePath + "\""; // queryFilePath = "*" + queryFilePath + "*"; QueryParser queryParserContent = new QueryParser(Version.LUCENE_47, "contents", analyzer); QueryParser queryParserFilePath = new QueryParser(Version.LUCENE_47, "path", analyzer); queryParserContent.setAllowLeadingWildcard(true); //queryParserFileName.setAllowLeadingWildcard(true); // Query q = queryParser.parse(query); Query qContent = queryParserContent.parse(queryContent); Query qFileName = queryParserFilePath.parse(queryFilePath); // System.out.println("FIELPATH "+qFileName); BooleanQuery q = new BooleanQuery(); q.add(qContent, Occur.MUST); // MUST implies that the keyword must occur. q.add(qFileName, Occur.MUST); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // System.out.println("Found " + hits.length + " hits."); lineNumbersList = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); // System.out.println((i + 1) + ". " + d.get("filename") // + " score=" + hits[i].score); // System.out.println("Line Number is "+d.get("lineNumber")); // System.out.println("Content is "+d.get("contents")); // String filePath = d.get("path"); lineNumbersList.add(d.get("lineNumber")); } reader.close(); return lineNumbersList; } catch (Exception e) { System.out.println("Error searching in line number search " + indexLocation + " : " + e.getMessage()); } return lineNumbersList; }
From source file:invertedindex.ReadingIndex.java
public Map<String, Set<Integer>> printingIndex() throws IOException { try {// www . j av a 2 s.c o m MatchAllDocsQuery query = new MatchAllDocsQuery(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(getIndexLocation()))); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, Integer.MAX_VALUE); Map<String, Set<Integer>> invertedIndex = new HashMap<>(); if (null == hits.scoreDocs || hits.scoreDocs.length <= 0) { System.out.println("No Hits Found with MatchAllDocsQuery"); return null; } for (ScoreDoc hit : hits.scoreDocs) { Document doc = searcher.doc(hit.doc); List<IndexableField> allFields = doc.getFields(); for (IndexableField field : allFields) { //Single document inverted index Terms terms = searcher.getIndexReader().getTermVector(hit.doc, field.name()); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); while (termsEnum.next() != null) { if (invertedIndex.containsKey(termsEnum.term().utf8ToString())) { Set<Integer> existingDocs = invertedIndex.get(termsEnum.term().utf8ToString()); existingDocs.add(hit.doc); // existingDocs.add(Integer.parseInt((searcher.doc(hit.doc).get("lineNumber")))); invertedIndex.put(termsEnum.term().utf8ToString(), existingDocs); } else { Set<Integer> docs = new TreeSet<>(); docs.add(hit.doc); // docs.add(Integer.parseInt((searcher.doc(hit.doc).get("lineNumber")))); invertedIndex.put(termsEnum.term().utf8ToString(), docs); } } } } } //System.out.println("Printing Inverted Index:"); //invertedIndex.forEach((key , value) -> {System.out.println(key+":"+value); //}); return invertedIndex; } catch (Exception e) { return null; } }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> search(String keyword) throws IOException { String indexLocation = this.getIndexLocation(); // System.out.println("Inside search method"); // indexLocation = ""; // BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); // while (true) { try {//from ww w.java2s . co m IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query = keyword; query = "\"" + query + "\""; Query q = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query); SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); // System.out.println(q); // searcher.search(q, collector); // searcher.search(q, null,topDocs); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(q); // System.out.println("fq "+fq); String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); //String[] lineFragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 18,10); Document d = searcher.doc(docId); String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { // System.out.println("FRAGMENT iS "+fragments[j]); // int k=0; // for(k=0;k<lineFragments.length;k++){ // fragments[j].getSc String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumberArrayList = new ArrayList<>(); lineNumber = "null"; boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if(lineNumbersList.get(0).isEmpty()){ // lineNumber = "Not Found"; // }else { if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } // } } //here is the tried code for enter space /* else{ System.out.println("YES G"); String lines[] = fragments[j].split("\\r?\\n"); // ArrayList<String> newLines = new ArrayList<>(); ArrayList<String> newLines = new ArrayList<>(Arrays.asList(lines)); System.out.println("Here 3"); int special = 0; for(String line : newLines){ if(Pattern.compile("^$").matcher(line).find()){ newLines.remove(line); special++; } } System.out.println("Here 4"); // List<String> list = Arrays.asList(lines); // if(list.contains(temp)){ // // } // for(String line: newLines){ // System.out.println("LINE IS "+line); // } if(newLines.size()==1){ // System.out.println("Yes G but NOT G"); lineNumbersList = lns.search(temp,filePath); if(!lineNumberArrayList.isEmpty()){ lineNumber = lineNumbersList.get(0); } System.out.println("Here 1"); }else{ System.out.println("Here 2"); ArrayList<String> a0 = lns.search(Jsoup.parse(newLines.get(0)).text(),filePath); ArrayList<String> a1 = lns.search(Jsoup.parse(newLines.get(1)).text(),filePath); int k,l; outerloop: for(k=0;k<a0.size();k++){ for(l=0;l<a1.size();l++){ int secondline = Integer.parseInt(a1.get(l)); // System.out.println("second line is"+ secondline); int firstline = Integer.parseInt(a0.get(k)); // System.out.println("first line is"+firstline); int diff = secondline - firstline; // System.out.println("DIFFERENCE IS "+diff); // System.out.println("Special IS "+special); if(diff == special+1){ insideLoopFlag = true; // System.out.println("K IS "+k); // System.out.println("IN BREAK "); break outerloop; } } // System.out.println("K IS "+k); } // System.out.println("OUT OF FOR LOOP"); // System.out.println("K IS "+k); if(insideLoopFlag==true){ lineNumber = String.valueOf(a0.get(k)); } // System.out.println("LINE NUMBER IS "+lineNumber); } } */ // } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + "...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } //Setting Results SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); // sr.setLineNumber(lineNumber); searchResulsAL.add(sr); // } // writer.close(); reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); // break; } // } return searchResulsAL; }
From source file:invertedindex.SearchIndex.java
public ArrayList<String> autoComplete(String keyword) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(this.getIndexLocation()))); IndexSearcher searcher = new IndexSearcher(reader); ArrayList<String> al = new ArrayList<>(); TopDocs sd = searcher.search(new PrefixQuery(new Term("contents", keyword)), null, 5); for (ScoreDoc d : sd.scoreDocs) { // System.out.println("SCORE DOC "+ searcher.doc(d.doc).get("contents")); String s = searcher.doc(d.doc).get("contents"); // System.out.println("STRING S "+s); String regex = keyword + "\\w+"; // String regex = keyword+"\\s*(\\w+)"; Pattern p = Pattern.compile(regex); Matcher m = p.matcher(s); while (m.find()) { al.add(m.group());/*from w ww. j a va2s . c o m*/ } // for(String ss : al){ // System.out.println("AUTO COMPLETE ARRAY IS "+ss); // } Set<String> hs = new HashSet<>(); hs.addAll(al); al.clear(); al.addAll(hs); } return al; }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> multipleSearch(String keyword1, String keyword2, String radio) throws IOException { String indexLocation = this.getIndexLocation(); try {/* w ww.j a va2s . c o m*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query1 = keyword1; String query2 = keyword2; query1 = "\"" + query1 + "\""; query2 = "\"" + query2 + "\""; Query q1 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query1); Query q2 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query2); BooleanQuery apiQuery = new BooleanQuery(); if (radio.equalsIgnoreCase("and")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST); } else if (radio.equalsIgnoreCase("or")) { apiQuery.add(q1, BooleanClause.Occur.SHOULD); apiQuery.add(q2, BooleanClause.Occur.SHOULD); } else if (radio.equalsIgnoreCase("not")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST_NOT); } SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); searcher.search(apiQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(apiQuery); // String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); Document d = searcher.doc(docId); // String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumber = "null"; lineNumberArrayList = new ArrayList<>(); boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G g"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + " ...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); searchResulsAL.add(sr); } reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); } return searchResulsAL; }
From source file:io.anserini.index.UpdateIndex.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(//from ww w . j a v a 2 s . co m OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(UpdateIndex.class.getName(), options); System.exit(-1); } String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); textOptions.setStoreTermVectors(true); LOG.info("index: " + indexPath); File file = new File("PittsburghUserTimeline"); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } final StatusStream stream = new JsonStatusCorpusReader(file); Status status; String s; HashMap<Long, String> hm = new HashMap<Long, String>(); try { while ((s = stream.nextRaw()) != null) { try { status = DataObjectFactory.createStatus(s); if (status.getText() == null) { continue; } hm.put(status.getUser().getId(), hm.get(status.getUser().getId()) + status.getText().replaceAll("[\\r\\n]+", " ")); } catch (Exception e) { } } } catch (Exception e) { e.printStackTrace(); } finally { stream.close(); } ArrayList<String> userIDList = new ArrayList<String>(); try (BufferedReader br = new BufferedReader(new FileReader(new File("userID")))) { String line; while ((line = br.readLine()) != null) { userIDList.add(line.replaceAll("[\\r\\n]+", "")); // process the line. } } try { reader = DirectoryReader .open(FSDirectory.open(new File(cmdline.getOptionValue(INDEX_OPTION)).toPath())); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } final Directory dir = new SimpleFSDirectory(Paths.get(cmdline.getOptionValue(INDEX_OPTION))); final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); final IndexWriter writer = new IndexWriter(dir, config); IndexSearcher searcher = new IndexSearcher(reader); System.out.println("The total number of docs indexed " + searcher.collectionStatistics(TweetStreamReader.StatusField.TEXT.name).docCount()); for (int city = 0; city < cityName.length; city++) { // Pittsburgh's coordinate -79.976389, 40.439722 Query q_long = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LONGITUDE.name, new Double(longitude[city] - 0.05), new Double(longitude[city] + 0.05), true, true); Query q_lat = NumericRangeQuery.newDoubleRange(TweetStreamReader.StatusField.LATITUDE.name, new Double(latitude[city] - 0.05), new Double(latitude[city] + 0.05), true, true); BooleanQuery bqCityName = new BooleanQuery(); Term t = new Term("place", cityName[city]); TermQuery query = new TermQuery(t); bqCityName.add(query, BooleanClause.Occur.SHOULD); System.out.println(query.toString()); for (int i = 0; i < cityNameAlias[city].length; i++) { t = new Term("place", cityNameAlias[city][i]); query = new TermQuery(t); bqCityName.add(query, BooleanClause.Occur.SHOULD); System.out.println(query.toString()); } BooleanQuery bq = new BooleanQuery(); BooleanQuery finalQuery = new BooleanQuery(); // either a coordinate match bq.add(q_long, BooleanClause.Occur.MUST); bq.add(q_lat, BooleanClause.Occur.MUST); finalQuery.add(bq, BooleanClause.Occur.SHOULD); // or a place city name match finalQuery.add(bqCityName, BooleanClause.Occur.SHOULD); TotalHitCountCollector totalHitCollector = new TotalHitCountCollector(); // Query hasFieldQuery = new ConstantScoreQuery(new // FieldValueFilter("timeline")); // // searcher.search(hasFieldQuery, totalHitCollector); // // if (totalHitCollector.getTotalHits() > 0) { // TopScoreDocCollector collector = // TopScoreDocCollector.create(Math.max(0, // totalHitCollector.getTotalHits())); // searcher.search(finalQuery, collector); // ScoreDoc[] hits = collector.topDocs().scoreDocs; // // // HashMap<String, Integer> hasHit = new HashMap<String, Integer>(); // int dupcount = 0; // for (int i = 0; i < hits.length; ++i) { // int docId = hits[i].doc; // Document d; // // d = searcher.doc(docId); // // System.out.println(d.getFields()); // } // } // totalHitCollector = new TotalHitCountCollector(); searcher.search(finalQuery, totalHitCollector); if (totalHitCollector.getTotalHits() > 0) { TopScoreDocCollector collector = TopScoreDocCollector .create(Math.max(0, totalHitCollector.getTotalHits())); searcher.search(finalQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits."); HashMap<String, Integer> hasHit = new HashMap<String, Integer>(); int dupcount = 0; for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d; d = searcher.doc(docId); if (userIDList.contains(d.get(IndexTweets.StatusField.USER_ID.name)) && hm.containsKey(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name)))) { // System.out.println("Has timeline field?" + (d.get("timeline") != null)); // System.out.println(reader.getDocCount("timeline")); // d.add(new Field("timeline", hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))), // textOptions)); System.out.println("Found a user hit"); BytesRefBuilder brb = new BytesRefBuilder(); NumericUtils.longToPrefixCodedBytes(Long.parseLong(d.get(IndexTweets.StatusField.ID.name)), 0, brb); Term term = new Term(IndexTweets.StatusField.ID.name, brb.get()); // System.out.println(reader.getDocCount("timeline")); Document d_new = new Document(); // for (IndexableField field : d.getFields()) { // d_new.add(field); // } // System.out.println(d_new.getFields()); d_new.add(new StringField("userBackground", d.get(IndexTweets.StatusField.USER_ID.name), Store.YES)); d_new.add(new Field("timeline", hm.get(Long.parseLong(d.get(IndexTweets.StatusField.USER_ID.name))), textOptions)); // System.out.println(d_new.get()); writer.addDocument(d_new); writer.commit(); // t = new Term("label", "why"); // TermQuery tqnew = new TermQuery(t); // // totalHitCollector = new TotalHitCountCollector(); // // searcher.search(tqnew, totalHitCollector); // // if (totalHitCollector.getTotalHits() > 0) { // collector = TopScoreDocCollector.create(Math.max(0, totalHitCollector.getTotalHits())); // searcher.search(tqnew, collector); // hits = collector.topDocs().scoreDocs; // // System.out.println("City " + cityName[city] + " " + collector.getTotalHits() + " hits."); // // for (int k = 0; k < hits.length; k++) { // docId = hits[k].doc; // d = searcher.doc(docId); // System.out.println(d.get(IndexTweets.StatusField.ID.name)); // System.out.println(d.get(IndexTweets.StatusField.PLACE.name)); // } // } // writer.deleteDocuments(term); // writer.commit(); // writer.addDocument(d); // writer.commit(); // System.out.println(reader.getDocCount("timeline")); // writer.updateDocument(term, d); // writer.commit(); } } } } reader.close(); writer.close(); }