Example usage for org.apache.lucene.document Document getField

List of usage examples for org.apache.lucene.document Document getField

Introduction

In this page you can find the example usage for org.apache.lucene.document Document getField.

Prototype

public final IndexableField getField(String name) 

Source Link

Document

Returns a field with the given name if any exist in this document, or null.

Usage

From source file:net.semanticmetadata.lire.searchers.GenericFastImageSearcher.java

License:Open Source License

public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
    SimpleImageSearchHits searchHits = null;
    //        try {
    LireFeature lireFeature = extractorItem.getFeatureInstance();

    if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
        lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);
    double maxDistance = findSimilar(reader, lireFeature);

    if (!useSimilarityScore) {
        searchHits = new SimpleImageSearchHits(this.docs, maxDistance);
    } else {//from   w  w  w  . j  a v  a  2  s .  c  o m
        searchHits = new SimpleImageSearchHits(this.docs, maxDistance, useSimilarityScore);
    }
    //        } catch (InstantiationException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        } catch (IllegalAccessException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        }
    return searchHits;
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for classification. The
 * weka file class are nominal. The classifiers will work with nominal
 * class.//from  w  ww.  ja  v a2s  .c  om
 *
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    Set<String> labels = new HashSet<String>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            labels.add(current);

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    String[] labelReady = new String[labels.size()];
    int posLabel = 0;
    for (String string : labels) {
        labelReady[posLabel] = string;
        posLabel++;
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class {");
    for (int j = 0; j < labelReady.length - 1; j++) {
        FileUtil.writeFile(nuevo, labelReady[j] + ",");
    }
    FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo,
                        terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for regression. The
 * weka file class are real. The used classifiers will work with numeric
 * real classe.//  www.  j av a 2s  . c  om
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaRegression(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    HashMap<String, Integer> labels = new HashMap<String, Integer>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            if (!labels.containsKey(current)) {
                labels.put(current, labels.size());
            }

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class REAL [0.0,");

    FileUtil.writeFile(nuevo, (labels.size() - 1) + ".0]" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo, terms.size() + " "
                        + labels.get(searcher.doc(pos).getField(classF).stringValue()) + ".0}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        IQuranLocation loc = new QuranLocation(doc.getField("location").stringValue());
        if (searchScope.includes(loc)) {
            bits.set(i);//from  www  . ja v a2  s. com
        }
    }
    return new DocIdBitSet(bits);
}

From source file:net.skyatlas.icd.test.RAMDirectoryDemo.java

public static void main(String[] args) throws IOException {
    long startTime = System.currentTimeMillis();
    System.err.println("***************************  ****************************");
    RAMDirectory directory = new RAMDirectory();

    Version matchVersion = Version.LUCENE_48;

    Analyzer analyzer = new StandardAnalyzer(matchVersion);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, iwc);

    Document doc = new Document();
    doc.add(new Field("name", "Chenghui", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("sex", "", Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("dosometing", "I am learning lucene ", Field.Store.YES, Field.Index.ANALYZED));

    writer.addDocument(doc);// w ww  .ja v  a2  s . c  o m
    writer.close();

    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    Query query = new TermQuery(new Term("dosometing", "lucene"));

    TopDocs rs = searcher.search(query, null, 10);
    long endTime = System.currentTimeMillis();
    System.out.println(
            "" + (endTime - startTime) + "" + rs.totalHits + "?");

    for (int i = 0; i < rs.scoreDocs.length; i++) {
        // rs.scoreDocs[i].doc ??id, 0  
        Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
        System.out.println("name:" + firstHit.getField("name").stringValue());
        System.out.println("sex:" + firstHit.getField("sex").stringValue());
        System.out.println("dosomething:" + firstHit.getField("dosometing").stringValue());
    }
    reader.close();
    directory.close();
    System.out.println("*****************?**********************");
}

From source file:net.skyatlas.icd.util.MemoryGrid.java

public void searchDiagName(String diagName) throws IOException {
    IndexReader reader = DirectoryReader.open(this.directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    Query query = new TermQuery(new Term("pathDesc", diagName));

    TopDocs ts = searcher.search(query, null, 100);
    long endTime = System.currentTimeMillis();

    for (int i = 0; i < ts.scoreDocs.length; i++) {
        // rs.scoreDocs[i].doc ??id, 0  
        Document firstHit = searcher.doc(ts.scoreDocs[i].doc);
        System.out.println("guid:" + firstHit.getField("guid").stringValue());
        //            System.out.println("sex:" + firstHit.getField("sex").stringValue());  
        System.out.println("pathDesc:" + firstHit.getField("pathDesc").stringValue());
    }//from ww w  .j  a v a  2  s.c o  m
    reader.close();
    directory.close();
    System.out.println("*****************?**********************");
}

From source file:net.sourceforge.subsonic.service.LuceneSearchService.java

License:Open Source License

public SearchResult search(SearchCriteria criteria, IndexType indexType) {
    SearchResult result = new SearchResult();
    List<MusicFile> musicFiles = new ArrayList<MusicFile>();
    int offset = criteria.getOffset();
    int count = criteria.getCount();
    result.setOffset(offset);/*from   w  ww.  j a  v a2s. co  m*/
    result.setMusicFiles(musicFiles);

    IndexReader reader = null;
    try {
        reader = createIndexReader(indexType);
        Searcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new SubsonicAnalyzer();

        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(LUCENE_VERSION, indexType.getFields(),
                analyzer, indexType.getBoosts());
        Query query = queryParser.parse(criteria.getQuery());

        TopDocs topDocs = searcher.search(query, null, offset + count);
        result.setTotalHits(topDocs.totalHits);

        int start = Math.min(offset, topDocs.totalHits);
        int end = Math.min(start + count, topDocs.totalHits);
        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
            musicFiles.add(musicFileService.getMusicFile(doc.getField(FIELD_PATH).stringValue()));
        }

    } catch (Throwable x) {
        LOG.error("Failed to execute Lucene search.", x);
    } finally {
        FileUtil.closeQuietly(reader);
    }
    return result;
}

From source file:net.strong.weblucene.index.SAXIndexer.java

License:Apache License

/**
 * map original document to lucene index Field
 *
 * @param origDocument original lucene Document
 *
 * @return Document: parse original and make index fields
 *///from  www .j a  va2  s  .  c o  m
private Document mapDoc(Document origDocument) {
    //new Lucene Document
    Document newDoc = new Document();

    try {
        //Enumeration fieldEnum = origDocument.fields();
        List fieldEnum = origDocument.getFields();

        //while (fieldEnum.hasMoreElements()) {
        while (fieldEnum != null && fieldEnum.size() > 0) {
            //Lucene Document Field
            //Field fld = (Field) fieldEnum.nextElement();
            Field fld = (Field) fieldEnum.remove(0);

            //index map field with fields name list: 'field1,field2,field5....'
            if (fld.isIndexed()) {
                String indexName = fld.name();
                StringBuffer indexValue = new StringBuffer();

                //split field list with ","
                String fieldList = fld.stringValue();
                StringTokenizer st = new StringTokenizer(fieldList, ",");

                while (st.hasMoreTokens()) {
                    //add indexValue with mapped field value
                    String mapFieldName = new String();
                    mapFieldName = st.nextToken();

                    Field mapField = origDocument.getField(mapFieldName);
                    String mapValue = null;

                    if (mapField != null) {
                        mapValue = mapField.stringValue();
                    }

                    //add text field value to indexing field
                    if (mapValue != null) {
                        indexValue.append(mapValue);

                        //add space between fields avoid "field1field2"
                        indexValue.append(" ");
                    }
                }

                if (indexValue.length() > 0) {
                    Field newIndex = new Field(indexName, indexValue.toString(), Field.Store.NO,
                            Field.Index.TOKENIZED, Field.TermVector.YES);
                    newDoc.add(newIndex);
                }
            } else { //add a common field
                newDoc.add(fld);
            }
        }
    } catch (Exception e) {
        logger.error(e.toString());

        return null;
    }

    return newDoc;
}

From source file:NewsIR_search.Indexer.java

void indexFile(File collFile) throws Exception {

    Document doc;/*  w  ww .  jav  a  2 s. c  o m*/

    String docType = prop.getProperty("docType");

    if (docType.equalsIgnoreCase("trec")) {
        try {
            TrecDocIterator docElts = new TrecDocIterator(collFile);

            Document docElt;

            while (docElts.hasNext()) {
                docElt = docElts.next();

                if (docElt == null) {
                    System.out.println("docElt null");
                    break;
                }

                String tmp;
                int i = 0;
                tmp = prop.getProperty("index_fields", "null");
                if (!tmp.contentEquals("null")) {
                    for (i = 0; i < tmp.split(",").length; i++) {
                        // System.out.println(index_fields[i]+" ---> "+docElt.getField(index_fields[i]).stringValue());
                        INDEX_FIELDS[i] = docElt.getField(index_fields[i]).stringValue();
                    }
                }

                String DOCNOElt = docElt.getField("DOCNO").stringValue();
                FIELD_ID = DOCNOElt;
                String TEXTElt = docElt.getField("TEXT").stringValue();

                doc = constructDoc(TEXTElt);

                indexWriter.addDocument(doc);
                System.out.println(DOCNOElt);
                docIndexedCounter++;

            }
        } catch (FileNotFoundException ex) {
            System.err.println("Error: '" + collFile.getAbsolutePath() + "' not found");
            ex.printStackTrace();
        } catch (IOException ex) {
            System.err.println("Error: IOException on reading '" + collFile.getAbsolutePath() + "'");
            ex.printStackTrace();
        }
    }
}

From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search
 * engine presents pages of size n to the user. The user can then go to the
 * next page if interested in the next hits.
 * /*from  ww w.j  ava 2 s. co  m*/
 * When the query is executed for the first time, then only enough results
 * are collected to fill 5 result pages. If the user wants to page beyond
 * this limit, then the query is executed another time and all hits are
 * collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }
            Document doc = searcher.doc(hits[i].doc);
            // System.out.println(doc.getField("content"));
            // IndexableField content = doc.getField("content");
            // System.out.println(content.stringValue());
            // String[] values = doc.getValues("indexed");
            // for (int j = 0; j < values.length; j++) {
            // System.out.println(j + " : " + values[j]);
            // }
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                System.out.println(doc.get("author"));
                System.out.println(doc.get("title"));
                System.out.println(doc.get("date"));
                System.out.println(doc.get("content").substring(0, 60));
                // for (IndexableField field : doc.getFields()) {
                // System.out.println("field: " + field);
                // }
                if (doc.getField("indexed") != null) {
                    System.out.println("   indexed: " + doc.getField("indexed"));
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }
            System.out.flush();

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}