Example usage for org.apache.lucene.search IndexSearcher doc

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher doc.

Prototype

public Document doc(int docID) throws IOException

Source Link

Document

Sugar for .getIndexReader().document(docID)

Usage

From source file:edu.ucla.sspace.lra.LatentRelationalAnalysis.java

License:Open Source License

private static float searchPhrase(File indexDir, String A, String B) throws Exception {
    Directory fsDir = FSDirectory.getDirectory(indexDir);
    IndexSearcher searcher = new IndexSearcher(fsDir);

    long start = new Date().getTime();
    QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
    //System.err.println("searching for: '\"" + A + " " + B + "\"~"+MAX_PHRASE+"'");
    parser.setPhraseSlop(MAX_PHRASE);//from   ww w . j a  va  2  s  .  c  om
    String my_phrase = "\"" + A + " " + B + "\"";
    Query query = parser.parse(my_phrase);
    //System.err.println("total hits: " + results.totalHits);

    //set similarity to use only the frequencies
    //score is based on frequency of phrase only
    searcher.setSimilarity(new Similarity() {
        public static final long serialVersionUID = 1L;

        public float coord(int overlap, int maxOverlap) {
            return 1;
        }

        public float queryNorm(float sumOfSquaredWeights) {
            return 1;
        }

        public float tf(float freq) {
            return freq;
        }

        public float idf(int docFreq, int numDocs) {
            return 1;
        }

        public float lengthNorm(String fieldName, int numTokens) {
            return 1;
        }

        public float sloppyFreq(int distance) {
            return 1;
        }
    });
    TopDocs results = searcher.search(query, 10);

    ScoreDoc[] hits = results.scoreDocs;
    float total_score = 0;
    //add up the scores
    for (ScoreDoc hit : hits) {
        Document doc = searcher.doc(hit.doc);
        //System.err.printf("%5.3f %sn\n",
        //   hit.score, doc.get("contents"));
        total_score += hit.score;
    }

    long end = new Date().getTime();
    searcher.close();

    return total_score;
}

From source file:edu.ur.ir.groupspace.service.DefaultGroupWorkspaceSearchService.java

License:Apache License

public SearchResults<GroupWorkspace> search(File indexFolder, String query, int offset, int numResults) {
    SearchResults<GroupWorkspace> searchResults = new SearchResults<GroupWorkspace>();
    searchResults.setOriginalQuery(query);
    query = SearchHelper.prepareMainSearchString(query, true);
    ArrayList<GroupWorkspace> groupWorkspaces = new ArrayList<GroupWorkspace>();
    if (log.isDebugEnabled()) {
        log.debug(//w w w.  j a v a  2s  .c om
                "User search results executing query " + query + " on index " + indexFolder.getAbsolutePath());
    }

    IndexSearcher searcher = null;
    IndexReader reader = null;
    try {
        FSDirectory directory = FSDirectory.open(indexFolder);
        reader = IndexReader.open(directory, true);
        searcher = new IndexSearcher(reader);
        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
        parser.setDefaultOperator(QueryParser.AND_OPERATOR);

        Query luceneQuery = parser.parse(query);
        TopDocs hits = searcher.search(luceneQuery, 1000);
        searchResults.setTotalHits(hits.totalHits);

        int position = offset;
        int addedResults = 0;
        while (hits.totalHits > position && (addedResults < numResults)) {
            if (log.isDebugEnabled()) {
                log.debug(" adding document at position " + position);

            }

            Document d = searcher.doc(hits.scoreDocs[position].doc);
            Long groupWorkspaceId = NumericUtils.prefixCodedToLong(d.get(DefaultGroupWorkspaceIndexService.ID));
            log.debug("group workspace id = " + groupWorkspaceId);
            GroupWorkspace groupWorkspace = groupWorkspaceService.get(groupWorkspaceId, false);
            groupWorkspaces.add(groupWorkspace);
            addedResults += 1;
            position += 1;
        }
    } catch (Exception e) {
        log.error(e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error("the searcher could not be closed", e);
            }
        }
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                log.error("the reader could not be closed", e);
            }
        }
    }
    searchResults.setObjects(groupWorkspaces);
    return searchResults;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalCollectionSearchService.java

License:Apache License

/**
 * Returns search results for institutional collections.
 * /*from  w ww. ja  va 2s  .  c  om*/
 * @param institutionalCollectionIndexFolder - folder for the institutional collections
 * @param query - query to execute
 * @param offset - offset to start at
 * @param numResults - number of results.
 * 
 * @return - set of users found for the query.
 */
public SearchResults<InstitutionalCollection> search(File institutionalCollectionIndexFolder, String query,
        int offset, int numResults) {
    SearchResults<InstitutionalCollection> searchResults = new SearchResults<InstitutionalCollection>();
    searchResults.setOriginalQuery(query);
    query = SearchHelper.prepareMainSearchString(query, true);
    ArrayList<InstitutionalCollection> collections = new ArrayList<InstitutionalCollection>();
    if (log.isDebugEnabled()) {
        log.debug("User search results executing query " + query + " on index "
                + institutionalCollectionIndexFolder.getAbsolutePath());
    }

    IndexSearcher searcher = null;
    IndexReader reader = null;
    try {
        FSDirectory directory = FSDirectory.open(institutionalCollectionIndexFolder);
        reader = IndexReader.open(directory, true);
        searcher = new IndexSearcher(reader);

        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
        parser.setDefaultOperator(QueryParser.AND_OPERATOR);

        Query luceneQuery = parser.parse(query);
        TopDocs hits = searcher.search(luceneQuery, 1000);

        searchResults.setTotalHits(hits.totalHits);

        int position = offset;
        int addedResults = 0;
        while (hits.totalHits > position && (addedResults < numResults)) {
            if (log.isDebugEnabled()) {
                log.debug(" adding document at position " + position);

            }

            Document d = searcher.doc(hits.scoreDocs[position].doc);

            Long collectionId = NumericUtils
                    .prefixCodedToLong(d.get(DefaultInstitutionalCollectionIndexService.ID));
            ;
            if (log.isDebugEnabled()) {
                log.debug("collection id = " + collectionId);
            }

            InstitutionalCollection collection = institutionalCollectionService.getCollection(collectionId,
                    false);
            collections.add(collection);
            addedResults += 1;
            position += 1;
        }
    } catch (Exception e) {
        log.error(e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error("the searcher could not be closed", e);
            }
        }
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                log.error("the reader could not be closed", e);
            }
        }
    }
    searchResults.setObjects(collections);
    return searchResults;
}

From source file:edu.ur.ir.person.service.DefaultNameAuthoritySearchService.java

License:Apache License

/**
 *  Execute the search//from w  ww .j  a  v a 2 s.  c  o  m
 * @see edu.ur.ir.person.NameAuthoritySearchService#search(Repository, String, int, int)
 */
public SearchResults<PersonNameAuthority> search(File nameAuthorityIndex, String query, int offset,
        int numResults) {

    SearchResults<PersonNameAuthority> nameSearchResults = new SearchResults<PersonNameAuthority>();
    nameSearchResults.setOriginalQuery(query);
    query = SearchHelper.prepareMainSearchString(query, false);
    List<PersonNameAuthority> personNameAurhorities = new LinkedList<PersonNameAuthority>();

    if (log.isDebugEnabled()) {
        log.debug("Name search results executing query " + query);
    }

    // If the name index folder doesnot exist
    // then just return empty results
    if (nameAuthorityIndex == null) {
        return nameSearchResults;
    }

    String indexFolder = nameAuthorityIndex.getAbsolutePath();

    IndexSearcher searcher = null;
    IndexReader reader = null;
    try {
        FSDirectory directory = FSDirectory.open(new File(indexFolder));
        reader = IndexReader.open(directory, true);
        searcher = new IndexSearcher(reader);

        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
        parser.setDefaultOperator(QueryParser.AND_OPERATOR);

        Query luceneQuery = parser.parse(query);
        TopDocs hits = searcher.search(luceneQuery, 1000);
        nameSearchResults.setTotalHits(hits.totalHits);

        log.debug(" No. of hits = " + hits.totalHits + " offset=" + offset + "  numResults=" + numResults);
        int position = offset;
        int addedResults = 0;
        while (hits.totalHits > position && (addedResults < numResults)) {
            if (log.isDebugEnabled()) {
                log.debug(" adding document at position " + position);
            }
            Document d = searcher.doc(hits.scoreDocs[position].doc);

            personNameAurhorities.add(getPersonNameAuthority(d));
            position += 1;
            addedResults += 1;
        }
    } catch (Exception e) {
        log.error(e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error("the searcher could not be closed", e);
            }
        }
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                log.error("the reader could not be closed", e);
            }
        }
    }
    nameSearchResults.setObjects(personNameAurhorities);

    return nameSearchResults;
}

From source file:edu.ur.ir.researcher.service.DefaultResearcherSearchService.java

License:Apache License

/**
 * This determines the possible facets for each of the categories.  For example - possible authors 
 * for the display.  This does not care about counts later on counts will be important.
 * //from  w w  w. jav a2  s.  c om
 * @param hits
 * @param numberOfHitsToProcess
 * @return
 * @throws CorruptIndexException
 * @throws IOException
 */
private HashMap<String, HashMap<String, FacetResult>> generateFacetSearches(TopDocs hits,
        int numberOfHitsToProcess, int numberOfResultsToCollect, IndexSearcher searcher)
        throws CorruptIndexException, IOException {

    HashMap<String, HashMap<String, FacetResult>> facets = new HashMap<String, HashMap<String, FacetResult>>();
    HashMap<String, FacetResult> departmentsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> fieldsMap = new HashMap<String, FacetResult>();
    HashMap<String, FacetResult> keywordsMap = new HashMap<String, FacetResult>();

    facets.put(DEPARTMENT_MAP, departmentsMap);
    facets.put(FIELD_MAP, fieldsMap);
    facets.put(KEYWORD_MAP, keywordsMap);

    int length = hits.totalHits;

    if (length <= numberOfHitsToProcess) {
        numberOfHitsToProcess = length;
    }

    for (int index = 0; index < numberOfHitsToProcess; index++) {
        Document doc = searcher.doc(hits.scoreDocs[index].doc);

        String departments = doc.get(DefaultResearcherIndexService.DEPARTMENT);

        String fields = doc.get(DefaultResearcherIndexService.FIELD);

        String keywords = doc.get(DefaultResearcherIndexService.KEY_WORDS);

        if (fields != null) {
            fields = fields.trim();
        }

        if (departments != null && departmentsMap.size() < numberOfResultsToCollect) {
            StringTokenizer tokenizer = new StringTokenizer(departments,
                    DefaultResearcherIndexService.SEPERATOR);
            while (tokenizer.hasMoreElements() && departmentsMap.size() < numberOfResultsToCollect) {
                String department = tokenizer.nextToken().trim();
                FacetResult f = departmentsMap.get(department);
                if (f == null) {
                    f = new FacetResult(1l, DefaultResearcherIndexService.DEPARTMENT, department);
                    departmentsMap.put(department, f);
                }
            }
        }

        if (fields != null && fieldsMap.size() < numberOfResultsToCollect) {
            StringTokenizer tokenizer = new StringTokenizer(fields, DefaultResearcherIndexService.SEPERATOR);
            while (tokenizer.hasMoreElements() && fieldsMap.size() < numberOfResultsToCollect) {
                String field = tokenizer.nextToken().trim();
                FacetResult f = fieldsMap.get(field);
                if (f == null) {
                    f = new FacetResult(1l, DefaultResearcherIndexService.FIELD, field);
                    fieldsMap.put(field, f);
                }
            }
        }

        if (keywords != null && keywordsMap.size() < numberOfResultsToCollect) {
            StringTokenizer tokenizer = new StringTokenizer(keywords, DefaultResearcherIndexService.SEPERATOR);
            while (tokenizer.hasMoreElements() && keywordsMap.size() < numberOfResultsToCollect) {
                String keyword = tokenizer.nextToken().trim();
                FacetResult f = keywordsMap.get(keyword);
                if (f == null) {
                    f = new FacetResult(1l, DefaultResearcherIndexService.KEY_WORDS, keyword);
                    keywordsMap.put(keyword, f);
                }
            }
        }
    }
    return facets;
}

From source file:edu.ur.ir.researcher.service.DefaultResearcherSearchService.java

License:Apache License

/** 
 * Process the possible facets and determine the number of hits for each facet accross the main query.
 * /*from w w  w.ja va  2s.  co  m*/
 * @param possibleFacets - possible facets to show to the user
 * @param reader - lucene reader
 * @param mainQueryBits - bitset from the main query
 * @param facetResults - set of facet results
 * @param hits - number of hits
 * @param numberOfIdsToCollect - number of ids to collect and show to user
 * @param mainQueryString - main query 
 * 
 * @return - search helper
 * @throws ParseException
 * @throws IOException
 */
private FacetSearchHelper processPossibleFacets(HashMap<String, HashMap<String, FacetResult>> possibleFacets,
        IndexReader reader, DocIdSet mainQueryBits, HashMap<String, Collection<FacetResult>> facetResults,
        TopDocs hits, int numberOfIdsToCollect, int idsToCollectStartPosition, int numberOfFacetsToShow,
        String mainQueryString, IndexSearcher searcher) throws ParseException, IOException {
    FacetResultHitComparator facetResultHitComparator = new FacetResultHitComparator();
    // get the authors and create a facet for each author
    // determine the number of hits the author has in the main query
    HashMap<String, FacetResult> departmentFacetMap = possibleFacets.get(DEPARTMENT_MAP);
    LinkedList<FacetResult> departmentFacets = new LinkedList<FacetResult>();
    departmentFacets.addAll(departmentFacetMap.values());
    processFacetCategory(departmentFacets, reader, mainQueryBits);
    Collections.sort(departmentFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalDepartmentFacets;

    if (departmentFacets.size() < numberOfFacetsToShow) {
        finalDepartmentFacets = departmentFacets;
    } else {
        finalDepartmentFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalDepartmentFacets.add(departmentFacets.get(index));
        }
    }

    facetResults.put(DEPARTMENT_MAP, finalDepartmentFacets);

    // get the subjects and create a facet for each subject
    // determine the number of hits the subject has in the main query
    HashMap<String, FacetResult> keywordFacetMap = possibleFacets.get(KEYWORD_MAP);
    LinkedList<FacetResult> keywordFacets = new LinkedList<FacetResult>();
    keywordFacets.addAll(keywordFacetMap.values());
    processFacetCategory(keywordFacets, reader, mainQueryBits);
    Collections.sort(keywordFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalKeywordFacets;

    if (keywordFacets.size() < numberOfFacetsToShow) {
        finalKeywordFacets = keywordFacets;
    } else {
        finalKeywordFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalKeywordFacets.add(keywordFacets.get(index));
        }
    }

    facetResults.put(KEYWORD_MAP, finalKeywordFacets);

    // get the language and create a facet for each language
    // determine the number of hits the language has in the main query
    HashMap<String, FacetResult> fieldFacetMap = possibleFacets.get(FIELD_MAP);
    LinkedList<FacetResult> fieldFacets = new LinkedList<FacetResult>();
    fieldFacets.addAll(fieldFacetMap.values());
    processFacetCategory(fieldFacets, reader, mainQueryBits);
    Collections.sort(fieldFacets, facetResultHitComparator);

    // final holder of facets
    LinkedList<FacetResult> finalFieldFacets;

    if (fieldFacets.size() < numberOfFacetsToShow) {
        finalFieldFacets = fieldFacets;
    } else {
        finalFieldFacets = new LinkedList<FacetResult>();
        for (int index = 0; index < numberOfFacetsToShow; index++) {
            finalFieldFacets.add(fieldFacets.get(index));
        }
    }

    facetResults.put(FIELD_MAP, finalFieldFacets);

    HashSet<Long> ids = new HashSet<Long>();

    // end position of ids to collect will be start position plus the number to collect
    int endPosition = idsToCollectStartPosition + numberOfIdsToCollect;

    // make sure that the end position is set up correctly
    if (hits.totalHits < endPosition) {
        endPosition = hits.totalHits;
    }

    for (int index = idsToCollectStartPosition; index < endPosition; index++) {
        Document doc = searcher.doc(hits.scoreDocs[index].doc);
        ids.add(NumericUtils.prefixCodedToLong(doc.get(DefaultResearcherIndexService.ID)));
    }
    FacetSearchHelper helper = new FacetSearchHelper(ids, hits.totalHits, facetResults, mainQueryString);
    return helper;
}

From source file:edu.ur.ir.user.service.DefaultUserGroupSearchService.java

License:Apache License

/**
 * Returns search results for the user groups.
 * /*from  ww  w  .j  av  a2s.c  o  m*/
 * @param userGroupIndexFolder - location where the index folder is location 
 * @param query - query to execute
 * @param offset - offset to start at
 * @param numResults - number of results.
 * 
 * @return - set of users groups found for the query.
 */
public SearchResults<IrUserGroup> search(File userGroupIndexFolder, String query, int offset, int numResults) {
    SearchResults<IrUserGroup> searchResults = new SearchResults<IrUserGroup>();
    searchResults.setOriginalQuery(query);
    query = SearchHelper.prepareMainSearchString(query, true);
    ArrayList<IrUserGroup> userGroups = new ArrayList<IrUserGroup>();
    if (log.isDebugEnabled()) {
        log.debug("User search results executing query " + query + " on index "
                + userGroupIndexFolder.getAbsolutePath());
    }

    IndexSearcher searcher = null;
    IndexReader reader = null;
    try {
        FSDirectory directory = FSDirectory.open(userGroupIndexFolder);
        reader = IndexReader.open(directory, true);
        searcher = new IndexSearcher(reader);

        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
        parser.setDefaultOperator(QueryParser.AND_OPERATOR);

        Query luceneQuery = parser.parse(query);
        TopDocs hits = searcher.search(luceneQuery, 1000);

        searchResults.setTotalHits(hits.totalHits);

        int position = offset;
        int addedResults = 0;
        while (hits.totalHits > position && (addedResults < numResults)) {
            if (log.isDebugEnabled()) {
                log.debug(" adding document at position " + position);

            }

            Document d = searcher.doc(hits.scoreDocs[position].doc);

            Long userGroupId = NumericUtils.prefixCodedToLong(d.get(DefaultUserGroupIndexService.ID));
            ;
            if (log.isDebugEnabled()) {
                log.debug("user group id = " + userGroupId);
            }

            IrUserGroup userGroup = userGroupService.get(userGroupId, false);
            userGroups.add(userGroup);
            addedResults += 1;
            position += 1;
        }
    } catch (Exception e) {
        log.error(e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error("the searcher could not be closed", e);
            }
        }
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                log.error("the reader could not be closed", e);
            }
        }
    }
    searchResults.setObjects(userGroups);
    return searchResults;
}

From source file:edu.ur.ir.user.service.DefaultUserSearchService.java

License:Apache License

public SearchResults<IrUser> search(File userIndexFolder, String query, int offset, int numResults) {
    SearchResults<IrUser> searchResults = new SearchResults<IrUser>();
    searchResults.setOriginalQuery(query);
    query = SearchHelper.prepareMainSearchString(query, true);
    ArrayList<IrUser> users = new ArrayList<IrUser>();
    if (log.isDebugEnabled()) {
        log.debug("User search results executing query " + query + " on index "
                + userIndexFolder.getAbsolutePath());
    }// w w  w.  j a v a 2s.c  o  m

    String indexFolder = userIndexFolder.getAbsolutePath();
    IndexSearcher searcher = null;
    IndexReader reader = null;
    try {
        FSDirectory directory = FSDirectory.open(new File(indexFolder));
        reader = IndexReader.open(directory, true);
        searcher = new IndexSearcher(reader);
        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
        parser.setDefaultOperator(QueryParser.AND_OPERATOR);

        Query luceneQuery = parser.parse(query);
        TopDocs hits = searcher.search(luceneQuery, 1000);
        searchResults.setTotalHits(hits.totalHits);

        int position = offset;
        int addedResults = 0;
        while (hits.totalHits > position && (addedResults < numResults)) {
            if (log.isDebugEnabled()) {
                log.debug(" adding document at position " + position);

            }

            Document d = searcher.doc(hits.scoreDocs[position].doc);
            Long userId = NumericUtils.prefixCodedToLong(d.get(DefaultUserIndexService.USER_ID));
            log.debug("user id = " + userId);
            IrUser user = userService.getUser(userId, false);
            users.add(user);
            addedResults += 1;
            position += 1;
        }
    } catch (Exception e) {
        log.error(e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error("the searcher could not be closed", e);
            }
        }
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                log.error("the reader could not be closed", e);
            }
        }
    }
    searchResults.setObjects(users);
    return searchResults;
}

From source file:edu.ur.ir.user.service.DefaultUserWorkspaceSearchService.java

License:Apache License

/**
 *  Execute the search//from  w w w  . java  2 s .  c  o m
 * @see edu.ur.ir.user.UserWorkspaceSearchService#search(java.io.File, java.lang.String, int, int)
 */
public SearchResults<FileSystem> search(File personalIndexFolder, String query, int offset, int numResults) {

    SearchResults<FileSystem> searchResults = new SearchResults<FileSystem>();
    searchResults.setOriginalQuery(query);

    query = SearchHelper.prepareMainSearchString(query, true);
    ArrayList<FileSystem> fileSystemObjects = new ArrayList<FileSystem>();
    if (log.isDebugEnabled()) {
        log.debug("User search results executing query " + query + " on index "
                + personalIndexFolder.getAbsolutePath());
    }

    String indexFolder = personalIndexFolder.getAbsolutePath();
    IndexSearcher searcher = null;
    IndexReader reader = null;
    try {
        FSDirectory directory = FSDirectory.open(new File(indexFolder));
        reader = IndexReader.open(directory, true);
        searcher = new IndexSearcher(reader);
        QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
        parser.setDefaultOperator(QueryParser.AND_OPERATOR);

        Query luceneQuery = parser.parse(query);
        TopDocs hits = searcher.search(luceneQuery, 1000);
        searchResults.setTotalHits(hits.totalHits);

        int position = offset;
        int addedResults = 0;
        while (hits.totalHits > position && (addedResults <= numResults)) {
            if (log.isDebugEnabled()) {
                log.debug(" adding document at position " + position);
            }
            Document d = searcher.doc(hits.scoreDocs[position].doc);
            fileSystemObjects.add(getFileSystemObject(d));
            addedResults += 1;
            position += 1;
        }
    } catch (Exception e) {
        log.error(e);
    } finally {
        if (searcher != null) {
            try {
                searcher.close();
            } catch (IOException e) {
                log.error("the searcher could not be closed", e);
            }
        }
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException e) {
                log.error("the reader could not be closed", e);
            }
        }
    }
    searchResults.setObjects(fileSystemObjects);

    return searchResults;
}

From source file:edu.usc.ir.geo.gazetteer.GeoNameResolver.java

License:Apache License

private HashMap<String, List<Location>> resolveEntities(List<String> locationNames, int count,
        IndexReader reader) throws IOException {
    if (locationNames.size() >= 200)
        hitsPerPage = 5; // avoid heavy computation
    IndexSearcher searcher = new IndexSearcher(reader);
    Query q = null;/* w  w  w .j av a2  s .co m*/

    HashMap<String, List<Location>> allCandidates = new HashMap<String, List<Location>>();

    for (String name : locationNames) {

        if (!allCandidates.containsKey(name)) {
            try {
                //query is wrapped in additional quotes (") to avoid query tokenization on space
                q = new MultiFieldQueryParser(new String[] { FIELD_NAME_NAME, FIELD_NAME_ALTERNATE_NAMES },
                        analyzer).parse(String.format("\"%s\"", name));

                //sort descending on population
                SortField populationSort = new SortedNumericSortField(FIELD_NAME_POPULATION,
                        SortField.Type.LONG, true);

                Sort sort = new Sort(populationSort);
                //Fetch 3 times desired values, these will be sorted on code and only desired number will be kept
                ScoreDoc[] hits = searcher.search(q, hitsPerPage * 3, sort).scoreDocs;

                List<Location> topHits = new ArrayList<Location>();

                for (int i = 0; i < hits.length; ++i) {
                    Location tmpLocObj = new Location();

                    int docId = hits[i].doc;
                    Document d;
                    try {
                        d = searcher.doc(docId);
                        tmpLocObj.setName(d.get(FIELD_NAME_NAME));
                        tmpLocObj.setLongitude(d.get(FIELD_NAME_LONGITUDE));
                        tmpLocObj.setLatitude(d.get(FIELD_NAME_LATITUDE));
                        //If alternate names are empty put name as actual name
                        //This covers missing data and equals weight for later computation
                        if (d.get(FIELD_NAME_ALTERNATE_NAMES).isEmpty()) {
                            tmpLocObj.setAlternateNames(d.get(FIELD_NAME_NAME));
                        } else {
                            tmpLocObj.setAlternateNames(d.get(FIELD_NAME_ALTERNATE_NAMES));
                        }
                        tmpLocObj.setCountryCode(d.get(FIELD_NAME_COUNTRY_CODE));
                        tmpLocObj.setAdmin1Code(d.get(FIELD_NAME_ADMIN1_CODE));
                        tmpLocObj.setAdmin2Code(d.get(FIELD_NAME_ADMIN2_CODE));
                        tmpLocObj.setFeatureCode(d.get(FIELD_NAME_FEATURE_CODE));

                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                    topHits.add(tmpLocObj);
                }
                //Picking hitsPerPage number of locations from feature code sorted list 
                allCandidates.put(name, pickTopSortedByCode(topHits, hitsPerPage));
            } catch (org.apache.lucene.queryparser.classic.ParseException e) {
                e.printStackTrace();
            }
        }
    }

    HashMap<String, List<Location>> resolvedEntities = new HashMap<String, List<Location>>();
    pickBestCandidates(resolvedEntities, allCandidates, count);
    return resolvedEntities;
}