Example usage for org.apache.lucene.index IndexReader close

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader close.

Prototype

@Override
public final synchronized void close() throws IOException

Source Link

Document

Closes files associated with this index.

Usage

From source file:engine.easy.search.RelevanceFeedBackUtil.java

License:Apache License

/**
 * This method will perform the thumbs down action. And generate the new
 * query based on top specific highest terms. It also decrease the relevant
 * document boost so that their ranking is lower in search results for the
 * similar terms.// w w  w  . j  a  v a  2 s .  co m
 */
public static Query performPesduoRelevance(Result[] results) {

    Query q = null;

    try {
        final Map<String, Integer> frequencyMap = new HashMap<String, Integer>();
        Map<Integer, Document> documentMap = new HashMap<Integer, Document>();
        List<String> termsList = new ArrayList<String>();

        Directory indexDir = FSDirectory.open(new File(AppConstants.INDEX_DIR_PATH));
        IndexReader indexReader = IndexReader.open(indexDir);
        EasySearchIndexReader esiReader = new EasySearchIndexReader(indexReader);
        float boost = 0F;

        for (Result result : results) {

            TermFreqVector tfv = indexReader.getTermFreqVector(result.id, AppConstants.CONTENT_FIELD);
            Document doc = indexReader.document(result.id);
            boost += doc.getBoost() + AppConstants.THUMBS_UP;

            System.out.print("DOC : " + result.id + " Field : " + tfv.getField() + "\n");

            for (int i = 0; i < tfv.getTermFrequencies().length; i++) {
                if (!termsList.contains(tfv.getTerms()[i]))
                    termsList.add(tfv.getTerms()[i]);

                frequencyMap.put(tfv.getTerms()[i], tfv.getTermFrequencies()[i]);
            }
        }

        //close the index reader;
        indexReader.close();

        //Boost the terms visibility in documents, so these documents more frequently for specific search terms.
        q = computeTopTermQuery(termsList, frequencyMap, AppConstants.TOP_DOCUMENTS);
        q.setBoost(boost);
        System.out.print("Query boost : " + boost);

    } catch (Exception e) {
        System.out.println("Exception: performThumbsUp" + e.toString());
    }

    return q;
}

From source file:engine.easy.search.RelevanceFeedBackUtil.java

License:Apache License

/**
 * Computes a term frequency map for the overall index at the specified location.
 * Builds a Boolean OR query out of the "most frequent" terms in the index
 * and returns it. "Most Frequent" is defined as the terms whose frequencies
 * are greater than or equal to the topTermCutoff * the frequency of the top
 * term, where the topTermCutoff is number between 0 and 1.
 * /*w  w w .j a v  a2  s  . c  o  m*/
 * @param ramdir the directory where the index is created.
 * @return a Boolean OR query.
 * @throws Exception if one is thrown.
 */
private static Query computeTopTermQueryFromDataCollection(Directory ramdir, int numOf) throws Exception {

    final Map<String, Integer> frequencyMap = new HashMap<String, Integer>();
    List<String> termlist = new ArrayList<String>();
    IndexReader reader = IndexReader.open(ramdir);

    TermEnum terms = reader.terms();
    while (terms.next()) {
        Term term = terms.term();
        String termText = term.text();
        int frequency = reader.docFreq(term);
        frequencyMap.put(termText, frequency);
        termlist.add(termText);
    }
    reader.close();

    return computeTopTermQuery(termlist, frequencyMap, AppConstants.TOP_DOCUMENTS);
}

From source file:engine.easy.search.RelevanceFeedBackUtil.java

License:Apache License

public static Query performUpAndDown(Map<Integer, Float> docMap) throws IOException {
    float boost = 0.0F;
    //String[] Ids = ids.split(",");

    Query q = null;//from w ww  . j a v a 2s . c  o  m

    try {
        final Map<String, Integer> frequencyMap = new HashMap<String, Integer>();
        Map<Integer, Document> documentMap = new HashMap<Integer, Document>();
        List<String> termsList = new ArrayList<String>();

        Directory indexDir = FSDirectory.open(new File(AppConstants.INDEX_DIR_PATH));
        IndexReader indexReader = IndexReader.open(indexDir);
        EasySearchIndexReader esiReader = new EasySearchIndexReader(indexReader);

        for (Integer docId : docMap.keySet()) {

            TermFreqVector tfv = indexReader.getTermFreqVector(docId, AppConstants.CONTENT_FIELD);
            Document doc = indexReader.document(docId);
            System.out.print("DOC : " + docId + " Field : " + tfv.getField() + "\n");

            for (int i = 0; i < tfv.getTermFrequencies().length; i++) {
                if (!termsList.contains(tfv.getTerms()[i]))
                    termsList.add(tfv.getTerms()[i]);

                System.out.println("TERM : " + tfv.getTerms()[i] + " FREQ : " + tfv.getTermFrequencies()[i]);
                frequencyMap.put(tfv.getTerms()[i], tfv.getTermFrequencies()[i]);
            }

            // put the document with doc id.
            documentMap.put(docId, doc);
        }

        // close the index reader;
        indexReader.close();

        // Boost the terms visibility in documents, so these documents more
        // frequently for specific search terms.
        q = computeTopTermQuery(termsList, frequencyMap, AppConstants.TOP_DOCUMENTS);
        q.setBoost(AppConstants.BOOST);

    } catch (Exception e) {
        System.out.println("Exception: performThumbsUp" + e.toString());
    }

    return q;
}

From source file:es.pode.indexador.negocio.servicios.busqueda.SrvBuscadorServiceImpl.java

License:Open Source License

/**
 * Este metodo busca un ODE al azar de dentro del repositorio.
 * @return DocVO Detalle de un ODE indexado.
 *//*  w w  w.ja  v  a  2s . c o m*/
protected DocVO handleObtenerODERandom() throws Exception {
    List listaIndices = (List) this.getIndiceDao().loadAll(getIndiceDao().TRANSFORM_INDICEVO);
    if (listaIndices.size() == 0)//      No hay indices que listar, no devuelvo nada
        return null;
    Random random = new Random(Calendar.getInstance().getTimeInMillis());
    Document doc = null;
    boolean noCero = true;
    int intRandom = random.nextInt();
    int i = 0;
    int reintentosInt = 10; //Puede que intRandom sea 0, para que no sea as  haremos 10 intentos como mucho
    for (i = 0; i < reintentosInt && intRandom != 0 && noCero; i++) {

        //      Sacamos el indice aleatoriamente de todos los indices del repositorio
        int idiomaSeleciconado = (intRandom < 0 ? (intRandom * (-1)) : intRandom) % listaIndices.size();
        IndiceVO indice = (IndiceVO) listaIndices.get(idiomaSeleciconado);
        //      Abrimos el indice y vemos el numero de documentos indexados
        Directory directorioIndiceSimple = null;
        directorioIndiceSimple = this.getIndexByLanguage(indice.getIdentificador());
        IndexReader indiceLectura = IndexReader.open(directorioIndiceSimple);
        int numeroDocumentos = indiceLectura.numDocs();
        logger.debug("El numero de documentos del indice es " + numeroDocumentos);
        //      Seleccionamos el documento que vamos a extraer
        if (numeroDocumentos > 0) {
            intRandom = random.nextInt();
            noCero = false;
            int documentoSeleccionado = (intRandom < 0 ? (intRandom * (-1)) : intRandom) % numeroDocumentos;
            logger.info("Devuelto documento [" + documentoSeleccionado + "] de [" + numeroDocumentos
                    + "] documentos totales indexados.");
            doc = indiceLectura.document(documentoSeleccionado);
        }
        indiceLectura.close();
    }
    if (i == reintentosInt && noCero) {
        logger.info("No se ha encontrado ning n random  v lido en [" + reintentosInt + "] intentos");
    }
    if (doc != null)
        return getVOFromLucene(doc, new DocVO(), 0);
    else
        return null;
}

From source file:es.pode.indexador.negocio.servicios.indexado.SrvIndexadorServiceImpl.java

License:Open Source License

/**
 * Generacin del repositorio que nos muestra las palabras sugeridas
 * @param directorioIndiceSimple Objeto directorio con informacin del directorio del repositorio de ndices
 * @param directorioIndiceSpell Objeto directorio con la informacin del directorio del repositorio de las palabras sugeridas
 * @throws IOException/* w  ww  . j  a v a2  s.c om*/
 * @throws Exception
 */
private synchronized void spellCheckerAdd(Directory directorioIndiceSimple, Directory directorioIndiceSpell)
        throws IOException, Exception {

    if (logger.isDebugEnabled())
        logger.debug("Comprobamos el directorio del spellchecker = " + directorioIndiceSpell + " y el normal = "
                + directorioIndiceSimple);
    if (IndexReader.indexExists(directorioIndiceSimple)) {
        if (logger.isDebugEnabled())
            logger.debug("El ndiceSimple " + directorioIndiceSimple + "existe y lo abrimos para leer.");
        IndexReader indexReader = IndexReader.open(directorioIndiceSimple);
        String field = props.getProperty("campo_titulo");
        if (logger.isDebugEnabled())
            logger.debug("Creamos un diccionario para el campo = " + field);
        Dictionary dictionary = new LuceneDictionary(indexReader, field);
        if (logger.isDebugEnabled())
            logger.debug("Creamos el spellchecher[" + directorioIndiceSpell + "]");
        SpellChecker spellChecker = new SpellChecker(directorioIndiceSpell);
        if (logger.isDebugEnabled())
            logger.debug("Indexamos el diccionario de [" + directorioIndiceSimple + "] en el spell ["
                    + directorioIndiceSpell + "]");
        spellChecker.indexDictionary(dictionary);
        field = props.getProperty("campo_descripcion");
        if (logger.isDebugEnabled())
            logger.debug("Creamos un diccionario para el campo = " + field);
        dictionary = new LuceneDictionary(indexReader, field);
        spellChecker.indexDictionary(dictionary);
        indexReader.close();
        directorioIndiceSpell.close();
    } else {
        logger.error("No existe el indice en el directorio[" + directorioIndiceSimple + "]");
        throw new Exception("No existe el ndice en el directorio = " + directorioIndiceSimple);
    }
}

From source file:es.unizar.iaaa.crawler.butler.index.SearchFiles.java

License:Apache License

/**
 * Simple command-line based search demo.
 *///from  w w  w  .j a  v a  2 s.  co  m
public ArrayList<SearchResult> search(String dir, String queryS) throws Exception {

    String index = dir + "index";

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new EnglishAnalyzer();

    // para cada necesidad se preprocesa y se lanza una query

    String line = queryS;

    line = line.trim();
    // quiamos signos de puntuacion para el prefiltrado
    line = line.replace(".", " ");
    line = line.replace(",", " ");
    line = line.replace("(", " ");
    line = line.replace(")", " ");
    line = line.replace(";", " ");
    line = line.replace(":", " ");
    line = line.replace("-", " ");

    // quitamos palabrasvacias de signifcado y expanimos la consulta
    // semanticamente

    Query queryMultiple = null;
    BooleanQuery query = new BooleanQuery();// valor este de la caja de
    // consulta

    // Consulta generica sobre todos los indices
    if (!line.equals("")) {
        String[] fields = { "content" };
        // DEBE APARECER EN EL TITULO ALGO SOBRE ESTO
        BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD };
        queryMultiple = MultiFieldQueryParser.parse(line, fields, flags, analyzer);
        query.add(queryMultiple, BooleanClause.Occur.SHOULD);

    }

    ArrayList<SearchResult> result = doPagingSearch(dir, searcher, query);

    reader.close();
    return result;
}

From source file:eu.eexcess.federatedrecommender.decomposer.PseudoRelevanceSourcesDecomposer.java

License:Open Source License

@Override
public SecureUserProfile decompose(SecureUserProfileEvaluation inputSecureUserProfile) {
    FederatedRecommenderCore fCore = null;

    try {//  www  .  ja v a 2  s .c o m
        fCore = FederatedRecommenderCore.getInstance(null);
    } catch (FederatedRecommenderException e) {
        logger.log(Level.SEVERE, "Error getting FederatedRecommenderCore,was perhabs not initialized correctly",
                e);
    }
    Set<String> keywords = new HashSet<String>();
    for (ContextKeyword cKeyword : inputSecureUserProfile.contextKeywords) {
        keywords.add(cKeyword.text);
    }
    //   tmpSUP.partnerList = inputSecureUserProfile.queryExpansionSourcePartner;
    List<PartnerBadge> tmpPartnerList = new ArrayList<PartnerBadge>();
    for (PartnerBadge partnerBadge : inputSecureUserProfile.partnerList) {
        tmpPartnerList.add(partnerBadge);
    }
    inputSecureUserProfile.partnerList = inputSecureUserProfile.queryExpansionSourcePartner;
    PartnersFederatedRecommendations pFR = fCore.getPartnersRecommendations(inputSecureUserProfile);
    inputSecureUserProfile.partnerList = tmpPartnerList;

    Directory directory = new RAMDirectory();

    Analyzer analyzer = new StopAnalyzer(Version.LUCENE_48);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_48, analyzer);
    IndexWriter writer = null;

    try {
        writer = new IndexWriter(directory, config);
        for (ResultList resultLists : pFR.getResults().values()) {
            for (Result result : resultLists.results) {
                addDoc(writer, result.description);
                addDoc(writer, result.title);
            }
        }

        writer.close();

        IndexReader reader = DirectoryReader.open(directory);
        TermStats[] tStats = null;
        try {
            tStats = HighFreqTerms.getHighFreqTerms(reader, 20, "content", new DocFreqComparator());
        } catch (Exception e) {
            logger.log(Level.SEVERE, "Could not open HighFreqTerms", e);
        } finally {
            reader.close();
        }
        if (tStats != null) {
            for (TermStats termStats : tStats) {
                String utf8String = termStats.termtext.utf8ToString();
                if (utf8String.length() > 4)
                    if (!checkHighFreqTermsQuery(utf8String.toLowerCase(), keywords))
                        if (keywords.add(utf8String.toLowerCase())) {
                            inputSecureUserProfile.contextKeywords.add(new ContextKeyword(utf8String,
                                    termStats.docFreq / 100.0, ExpansionType.EXPANSION));
                        }
            }
        } else
            logger.log(Level.SEVERE, "TermStats was null!");
    } catch (IOException e) {
        logger.log(Level.SEVERE, "There was and error writing/reading the Index", e);
    }

    logger.log(Level.INFO, "Source   Expansion: " + keywords.toString() + " Partners: "
            + inputSecureUserProfile.queryExpansionSourcePartner);
    return inputSecureUserProfile;
}

From source file:eu.eexcess.federatedrecommender.evaluation.schloett.SchloettQueryExtraction.java

License:Open Source License

private static List<Interest> getKeyWordsFromHistoryLinks(
        HashMap<String, LinkedHashMap<String, Object>> hashMap, Object taskId) {

    Directory dir = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer();

    IndexWriter writer = null;//from   w w  w .j  a v  a 2 s . co  m

    if (hashMap != null)
        for (String keyset : hashMap.keySet()) {
            LinkedHashMap<String, Object> linkedHashMap = hashMap.get(keyset);
            if (linkedHashMap.get("task_id").equals(taskId))
                if (linkedHashMap != null) {
                    Object urlObject = linkedHashMap.get("url");
                    if (urlObject != null)
                        if (!urlObject.toString().contains("http://de.wikipedia.org/wiki")) {

                            URL url = null;
                            IndexReader reader = null;
                            try {
                                reader = DirectoryReader.open(dir);
                            } catch (IOException e4) {
                            }
                            IndexSearcher searcher = null;
                            if (reader != null)
                                searcher = new IndexSearcher(reader);
                            TopDocs docs = null;
                            if (searcher != null) {

                                try {
                                    docs = searcher.search(new TermQuery(new Term("url", urlObject.toString())),
                                            1);
                                } catch (IOException e4) {
                                    // TODO Auto-generated catch block
                                    e4.printStackTrace();
                                }
                            }
                            if (docs != null && docs.totalHits > 0) {

                            } else {

                                try {
                                    // System.out
                                    // .println(urlObject.toString());
                                    url = new URL(urlObject.toString());
                                } catch (MalformedURLException e3) {
                                    e3.printStackTrace();
                                }

                                try {
                                    reader.close();
                                } catch (Exception e3) {
                                }
                                InputStream input = null;
                                if (url != null) {
                                    try {

                                        input = url.openStream();
                                    } catch (IOException e2) {
                                        // TODO Auto-generated catch block
                                        // System.out.println(e2);
                                    }
                                    if (input != null) {
                                        LinkContentHandler linkHandler = new LinkContentHandler();
                                        BodyContentHandler textHandler = new BodyContentHandler(
                                                10 * 1024 * 1024);
                                        ToHTMLContentHandler toHTMLHandler = new ToHTMLContentHandler();
                                        TeeContentHandler teeHandler = new TeeContentHandler(linkHandler,
                                                textHandler, toHTMLHandler);
                                        Metadata metadata = new Metadata();
                                        ParseContext parseContext = new ParseContext();
                                        HtmlParser parser = new HtmlParser();

                                        try {
                                            parser.parse(input, teeHandler, metadata, parseContext);
                                        } catch (IOException | SAXException | TikaException e1) {
                                            System.out.println(urlObject.toString());
                                            e1.printStackTrace();
                                        }
                                        String string = textHandler.toString();
                                        String docString = " ";

                                        String tagged = tagger.tagString(string.toLowerCase());
                                        Pattern pattern = Pattern.compile("\\s\\w+(_NN|_NNS)");
                                        Matcher matcher = pattern.matcher(tagged);
                                        while (matcher.find()) {
                                            // System.out
                                            // .println("macht: "+matcher.group());
                                            if (!blackList.contains(matcher.group().replaceAll("_NN|_NNS", "")))
                                                docString += matcher.group().replaceAll("_NN|_NNS", " ") + " ";
                                        }

                                        // System.out.println("#######");
                                        // System.out.println(docString);
                                        // for (String string2 :
                                        // docString.split("\\s")) {
                                        // if(string2.length()>1)
                                        // System.out
                                        // .print("\""+string2+"\",");
                                        // }
                                        // System.out.println("#######");
                                        Document doc = new Document();

                                        doc.add(new TextField("content", docString, Store.YES));
                                        doc.add(new StringField("url", urlObject.toString(), Store.YES));

                                        try {
                                            IndexWriterConfig config = new IndexWriterConfig(
                                                    Version.LUCENE_4_10_0, analyzer);
                                            writer = new IndexWriter(dir, config);
                                            writer.addDocument(doc);
                                            writer.close();
                                            input.close();
                                        } catch (IOException e) {
                                            // TODO Auto-generated catch
                                            // block
                                            e.printStackTrace();
                                        }

                                    }
                                }
                            }
                        }

                }
        }

    IndexReader reader = null;
    try {
        reader = DirectoryReader.open(dir);
    } catch (Exception e1) {
        // TODO Auto-generated catch block
        System.out.println(e1);
    }
    TermStats[] tStats = null;
    if (reader != null)
        try {
            tStats = HighFreqTerms.getHighFreqTerms(reader, 30, "content", new DocFreqComparator());
        } catch (Exception e) {
            System.out.println(e);
        } finally {
            try {
                reader.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    List<Interest> keywordList = new ArrayList<Interest>();
    System.out.println("Extraction: ");
    if (tStats != null) {
        for (TermStats termStats : tStats) {
            String utf8String = termStats.termtext.utf8ToString();
            if (!blackList.contains(utf8String.toLowerCase())) {

                // System.out.println(docString);
                // for (String string2 : docString.split("\\s")) {
                // if(string2.length()>1)
                // System.out
                // .print("\""+string2+"\",");
                // }
                // System.out.println("#######");
                System.out.print("\"" + utf8String.toLowerCase() + "\",");
                keywordList.add(new Interest(utf8String.toLowerCase()));
                // System.out.println(utf8String.toLowerCase() + " docFreq "
                // + termStats.docFreq + " TermFreq "
                // + termStats.totalTermFreq + " "+tagged);
            }

        }
    }
    System.out.println();
    return keywordList;
}

From source file:eu.eexcess.sourceselection.redde.dbsampling.DBSampler.java

License:Apache License

/**
 * Estimates the database size of general database using sample-resample and
 * search term "term"./*  ww w  .  j a va2s  .c om*/
 * 
 * @param term
 *            one-term search term for general and sampled index
 * @return the estimated database size of the general index
 * @throws ParseException
 * @throws IOException
 * @throws IllegalArgumentException
 *             if an index (base or sampled) contains no documents
 */
private double resample(String term) throws ParseException, IOException, IllegalArgumentException {

    Query query = new QueryParser(Settings.IndexFields.IndexTextField, new EnglishAnalyzer()).parse(term);

    outIndexWriter.commit();

    if (inIndexReader.numDocs() <= 0) {
        throw new IllegalArgumentException("failed to resample using empty index [inIndexReader]");
    } else if (outIndexWriter.numDocs() <= 0) {
        throw new IllegalArgumentException("failed to resample using empty index [outIndexWriter]");
    }

    double estimation = 0;
    IndexReader sampleIndexReader = null;

    try {
        // get total hits for term in sample index
        sampleIndexReader = DirectoryReader.open(outIndexWriter, true);
        IndexSearcher sampleIndexSearcher = new IndexSearcher(sampleIndexReader);
        TopDocs sampleSearchDocs = sampleIndexSearcher.search(query, sampleIndexReader.numDocs());

        // get total hits for term in general index
        IndexSearcher generalIndexSearcher = new IndexSearcher(inIndexReader);
        TopDocs generalSearchDocs = generalIndexSearcher.search(query, inIndexReader.numDocs());

        estimation = estimationCalculator(generalSearchDocs.totalHits, sampleSearchDocs.totalHits,
                sampleIndexReader.numDocs(), true);
    } finally {
        if (sampleIndexReader != null) {
            sampleIndexReader.close();
        }
    }

    return estimation;
}

From source file:eyeskyhigh.lucene.demo.DeleteFiles.java

License:Apache License

/** Deletes documents from an index that do not contain a term. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.DeleteFiles <unique_term>";
    if (args.length == 0) {
        System.err.println("Usage: " + usage);
        System.exit(1);// w w w  . j  ava 2s .  c  o m
    }
    try {
        Directory directory = FSDirectory.getDirectory("index");
        IndexReader reader = IndexReader.open(directory);

        Term term = new Term("path", args[0]);
        int deleted = reader.deleteDocuments(term);

        System.out.println("deleted " + deleted + " documents containing " + term);

        // one can also delete documents by their internal id:
        /*
        for (int i = 0; i < reader.maxDoc(); i++) {
          System.out.println("Deleting document with id " + i);
          reader.delete(i);
        }*/

        reader.close();
        directory.close();

    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}