Example usage for org.apache.lucene.document Document getField

Introduction

In this page you can find the example usage for org.apache.lucene.document Document getField.

Prototype

public final IndexableField getField(String name)

Source Link

Document

Returns a field with the given name if any exist in this document, or null.

Usage

From source file:net.semanticmetadata.lire.searchers.GenericFastImageSearcher.java

License:Open Source License

public ImageSearchHits search(Document doc, IndexReader reader) throws IOException {
    SimpleImageSearchHits searchHits = null;
    //        try {
    LireFeature lireFeature = extractorItem.getFeatureInstance();

    if (doc.getField(fieldName).binaryValue() != null && doc.getField(fieldName).binaryValue().length > 0)
        lireFeature.setByteArrayRepresentation(doc.getField(fieldName).binaryValue().bytes,
                doc.getField(fieldName).binaryValue().offset, doc.getField(fieldName).binaryValue().length);
    double maxDistance = findSimilar(reader, lireFeature);

    if (!useSimilarityScore) {
        searchHits = new SimpleImageSearchHits(this.docs, maxDistance);
    } else {//from   w  w  w  . j  a v  a  2  s .  c  o m
        searchHits = new SimpleImageSearchHits(this.docs, maxDistance, useSimilarityScore);
    }
    //        } catch (InstantiationException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        } catch (IllegalAccessException e) {
    //            logger.log(Level.SEVERE, "Error instantiating class for generic image searcher: " + e.getMessage());
    //        }
    return searchHits;
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for classification. The
 * weka file class are nominal. The classifiers will work with nominal
 * class.//from  w  ww.  ja  v a2s  .c  om
 *
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaClassification(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    Set<String> labels = new HashSet<String>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            labels.add(current);

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    String[] labelReady = new String[labels.size()];
    int posLabel = 0;
    for (String string : labels) {
        labelReady[posLabel] = string;
        posLabel++;
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class {");
    for (int j = 0; j < labelReady.length - 1; j++) {
        FileUtil.writeFile(nuevo, labelReady[j] + ",");
    }
    FileUtil.writeFile(nuevo, labelReady[labelReady.length - 1] + "}" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo,
                        terms.size() + " " + searcher.doc(pos).getField(classF).stringValue() + "}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.jclal.util.dataset.LuceneIndexToWekaDataSet.java

License:Open Source License

/**
 * It converts a index file of Lucene to a weka file for regression. The
 * weka file class are real. The used classifiers will work with numeric
 * real classe.//  www.  j av a 2s  . c  om
 *
 * @param wekaFileName Path of weka file.
 * @param indexFile Path of index file based on Lucene. The document indexes
 * must have fields called "class" and "content". WARNING: The fields must
 * not contains any puntuaction sign.
 *
 * @return Instances of weka. The instances are sparse since it is about
 * text information.
 *
 * @throws FileNotFoundException If the file does not exists.
 * @throws IOException If happens a error while writing the file.
 */
public Instances convertLuceneToWekaRegression(String wekaFileName, String indexFile)
        throws FileNotFoundException, IOException {
    File nuevo = new File(wekaFileName);

    if (!verify(nuevo)) {
        return null;
    }

    FileUtil.writeFile(nuevo, "@RELATION " + nuevo.getName() + doubleLine);

    IndexSearcher searcher = new IndexSearcher(indexFile);

    IndexReader reader = searcher.getIndexReader();

    int total = reader.maxDoc();

    HashMap<String, Integer> terms = new HashMap<String, Integer>(total * 2);
    HashMap<String, Integer> labels = new HashMap<String, Integer>(total * 2);

    int i;
    for (int l = 0; l < total; l++) {
        if (!reader.isDeleted(l)) {
            TermFreqVector vector = reader.getTermFreqVector(l, content);

            Document doc = reader.document(l);

            String current = doc.getField(classF).stringValue();

            if (!labels.containsKey(current)) {
                labels.put(current, labels.size());
            }

            if (vector != null) {
                String listosI[] = vector.getTerms();
                for (i = 0; i < listosI.length; i++) {
                    if (!terms.containsKey(listosI[i])) {
                        terms.put(listosI[i], terms.size());
                    }

                }
            }
        }
    }

    Container[] terminos = convertir(terms);
    Arrays.sort(terminos);

    for (int j = 0; j < terminos.length; j++) {
        FileUtil.writeFile(nuevo, "@ATTRIBUTE " + (int) terminos[j].getKey() + " NUMERIC" + "\n");
    }

    FileUtil.writeFile(nuevo, "@ATTRIBUTE class REAL [0.0,");

    FileUtil.writeFile(nuevo, (labels.size() - 1) + ".0]" + doubleLine);

    FileUtil.writeFile(nuevo, "@DATA\n");

    for (int pos = 0; pos < searcher.maxDoc(); pos++) {

        if (!reader.isDeleted(pos)) {

            TermFreqVector vector = reader.getTermFreqVector(pos, content);

            if (vector != null) {
                int[] origen = vector.getTermFrequencies();
                String[] termsI = vector.getTerms();

                int[] positions = new int[origen.length];

                for (int k = 0; k < origen.length; k++) {
                    positions[k] = terms.get(termsI[k]);
                }

                Container[] escribir = convertir(positions, origen);
                Arrays.sort(escribir);

                FileUtil.writeFile(nuevo, "{");
                for (int j = 0; j < escribir.length; j++) {
                    FileUtil.writeFile(nuevo, (int) escribir[j].getKey() + " " + escribir[j].getValue() + ",");
                }

                FileUtil.writeFile(nuevo, terms.size() + " "
                        + labels.get(searcher.doc(pos).getField(classF).stringValue()) + ".0}\n");
            }

        }
    }

    //close files
    closeReaders(searcher, reader);

    //Test if the weka file works
    Instances test = testWekaFile(wekaFileName);

    return test;
}

From source file:net.sf.zekr.engine.search.lucene.QuranTextSearcher.java

@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());
    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        IQuranLocation loc = new QuranLocation(doc.getField("location").stringValue());
        if (searchScope.includes(loc)) {
            bits.set(i);//from  www  . ja v a2  s. com
        }
    }
    return new DocIdBitSet(bits);
}

From source file:net.skyatlas.icd.test.RAMDirectoryDemo.java

public static void main(String[] args) throws IOException {
    long startTime = System.currentTimeMillis();
    System.err.println("***************************  ****************************");
    RAMDirectory directory = new RAMDirectory();

    Version matchVersion = Version.LUCENE_48;

    Analyzer analyzer = new StandardAnalyzer(matchVersion);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, iwc);

    Document doc = new Document();
    doc.add(new Field("name", "Chenghui", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("sex", "", Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("dosometing", "I am learning lucene ", Field.Store.YES, Field.Index.ANALYZED));

    writer.addDocument(doc);// w ww  .ja v  a2  s . c  o m
    writer.close();

    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    Query query = new TermQuery(new Term("dosometing", "lucene"));

    TopDocs rs = searcher.search(query, null, 10);
    long endTime = System.currentTimeMillis();
    System.out.println(
            "" + (endTime - startTime) + "" + rs.totalHits + "?");

    for (int i = 0; i < rs.scoreDocs.length; i++) {
        // rs.scoreDocs[i].doc ??id, 0  
        Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
        System.out.println("name:" + firstHit.getField("name").stringValue());
        System.out.println("sex:" + firstHit.getField("sex").stringValue());
        System.out.println("dosomething:" + firstHit.getField("dosometing").stringValue());
    }
    reader.close();
    directory.close();
    System.out.println("*****************?**********************");
}

From source file:net.skyatlas.icd.util.MemoryGrid.java

public void searchDiagName(String diagName) throws IOException {
    IndexReader reader = DirectoryReader.open(this.directory);
    IndexSearcher searcher = new IndexSearcher(reader);

    Query query = new TermQuery(new Term("pathDesc", diagName));

    TopDocs ts = searcher.search(query, null, 100);
    long endTime = System.currentTimeMillis();

    for (int i = 0; i < ts.scoreDocs.length; i++) {
        // rs.scoreDocs[i].doc ??id, 0  
        Document firstHit = searcher.doc(ts.scoreDocs[i].doc);
        System.out.println("guid:" + firstHit.getField("guid").stringValue());
        //            System.out.println("sex:" + firstHit.getField("sex").stringValue());  
        System.out.println("pathDesc:" + firstHit.getField("pathDesc").stringValue());
    }//from ww w  .j  a v a  2  s.c o  m
    reader.close();
    directory.close();
    System.out.println("*****************?**********************");
}

From source file:net.sourceforge.subsonic.service.LuceneSearchService.java

License:Open Source License

public SearchResult search(SearchCriteria criteria, IndexType indexType) {
    SearchResult result = new SearchResult();
    List<MusicFile> musicFiles = new ArrayList<MusicFile>();
    int offset = criteria.getOffset();
    int count = criteria.getCount();
    result.setOffset(offset);/*from   w  ww.  j a  v a2s. co  m*/
    result.setMusicFiles(musicFiles);

    IndexReader reader = null;
    try {
        reader = createIndexReader(indexType);
        Searcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new SubsonicAnalyzer();

        MultiFieldQueryParser queryParser = new MultiFieldQueryParser(LUCENE_VERSION, indexType.getFields(),
                analyzer, indexType.getBoosts());
        Query query = queryParser.parse(criteria.getQuery());

        TopDocs topDocs = searcher.search(query, null, offset + count);
        result.setTotalHits(topDocs.totalHits);

        int start = Math.min(offset, topDocs.totalHits);
        int end = Math.min(start + count, topDocs.totalHits);
        for (int i = start; i < end; i++) {
            Document doc = searcher.doc(topDocs.scoreDocs[i].doc);
            musicFiles.add(musicFileService.getMusicFile(doc.getField(FIELD_PATH).stringValue()));
        }

    } catch (Throwable x) {
        LOG.error("Failed to execute Lucene search.", x);
    } finally {
        FileUtil.closeQuietly(reader);
    }
    return result;
}

From source file:net.strong.weblucene.index.SAXIndexer.java

License:Apache License

/**
 * map original document to lucene index Field
 *
 * @param origDocument original lucene Document
 *
 * @return Document: parse original and make index fields
 *///from  www .j a  va2  s  .  c o  m
private Document mapDoc(Document origDocument) {
    //new Lucene Document
    Document newDoc = new Document();

    try {
        //Enumeration fieldEnum = origDocument.fields();
        List fieldEnum = origDocument.getFields();

        //while (fieldEnum.hasMoreElements()) {
        while (fieldEnum != null && fieldEnum.size() > 0) {
            //Lucene Document Field
            //Field fld = (Field) fieldEnum.nextElement();
            Field fld = (Field) fieldEnum.remove(0);

            //index map field with fields name list: 'field1,field2,field5....'
            if (fld.isIndexed()) {
                String indexName = fld.name();
                StringBuffer indexValue = new StringBuffer();

                //split field list with ","
                String fieldList = fld.stringValue();
                StringTokenizer st = new StringTokenizer(fieldList, ",");

                while (st.hasMoreTokens()) {
                    //add indexValue with mapped field value
                    String mapFieldName = new String();
                    mapFieldName = st.nextToken();

                    Field mapField = origDocument.getField(mapFieldName);
                    String mapValue = null;

                    if (mapField != null) {
                        mapValue = mapField.stringValue();
                    }

                    //add text field value to indexing field
                    if (mapValue != null) {
                        indexValue.append(mapValue);

                        //add space between fields avoid "field1field2"
                        indexValue.append(" ");
                    }
                }

                if (indexValue.length() > 0) {
                    Field newIndex = new Field(indexName, indexValue.toString(), Field.Store.NO,
                            Field.Index.TOKENIZED, Field.TermVector.YES);
                    newDoc.add(newIndex);
                }
            } else { //add a common field
                newDoc.add(fld);
            }
        }
    } catch (Exception e) {
        logger.error(e.toString());

        return null;
    }

    return newDoc;
}

From source file:NewsIR_search.Indexer.java

void indexFile(File collFile) throws Exception {

    Document doc;/*  w  ww .  jav  a  2 s. c  o m*/

    String docType = prop.getProperty("docType");

    if (docType.equalsIgnoreCase("trec")) {
        try {
            TrecDocIterator docElts = new TrecDocIterator(collFile);

            Document docElt;

            while (docElts.hasNext()) {
                docElt = docElts.next();

                if (docElt == null) {
                    System.out.println("docElt null");
                    break;
                }

                String tmp;
                int i = 0;
                tmp = prop.getProperty("index_fields", "null");
                if (!tmp.contentEquals("null")) {
                    for (i = 0; i < tmp.split(",").length; i++) {
                        // System.out.println(index_fields[i]+" ---> "+docElt.getField(index_fields[i]).stringValue());
                        INDEX_FIELDS[i] = docElt.getField(index_fields[i]).stringValue();
                    }
                }

                String DOCNOElt = docElt.getField("DOCNO").stringValue();
                FIELD_ID = DOCNOElt;
                String TEXTElt = docElt.getField("TEXT").stringValue();

                doc = constructDoc(TEXTElt);

                indexWriter.addDocument(doc);
                System.out.println(DOCNOElt);
                docIndexedCounter++;

            }
        } catch (FileNotFoundException ex) {
            System.err.println("Error: '" + collFile.getAbsolutePath() + "' not found");
            ex.printStackTrace();
        } catch (IOException ex) {
            System.err.println("Error: IOException on reading '" + collFile.getAbsolutePath() + "'");
            ex.printStackTrace();
        }
    }
}

From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.SearchFiles.java

License:Apache License

/**
 * This demonstrates a typical paging search scenario, where the search
 * engine presents pages of size n to the user. The user can then go to the
 * next page if interested in the next hits.
 * /*from  ww w.j  ava 2 s. co  m*/
 * When the query is executed for the first time, then only enough results
 * are collected to fill 5 result pages. If the user wants to page beyond
 * this limit, then the query is executed another time and all hits are
 * collected.
 * 
 */
public static void doPagingSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage,
        boolean raw, boolean interactive) throws IOException {

    // Collect enough docs to show 5 pages
    TopDocs results = searcher.search(query, 5 * hitsPerPage);
    ScoreDoc[] hits = results.scoreDocs;

    int numTotalHits = results.totalHits;
    System.out.println(numTotalHits + " total matching documents");

    int start = 0;
    int end = Math.min(numTotalHits, hitsPerPage);

    while (true) {
        if (end > hits.length) {
            System.out.println("Only results 1 - " + hits.length + " of " + numTotalHits
                    + " total matching documents collected.");
            System.out.println("Collect more (y/n) ?");
            String line = in.readLine();
            if (line.length() == 0 || line.charAt(0) == 'n') {
                break;
            }

            hits = searcher.search(query, numTotalHits).scoreDocs;
        }

        end = Math.min(hits.length, start + hitsPerPage);

        for (int i = start; i < end; i++) {
            if (raw) { // output raw format
                System.out.println("doc=" + hits[i].doc + " score=" + hits[i].score);
                continue;
            }
            Document doc = searcher.doc(hits[i].doc);
            // System.out.println(doc.getField("content"));
            // IndexableField content = doc.getField("content");
            // System.out.println(content.stringValue());
            // String[] values = doc.getValues("indexed");
            // for (int j = 0; j < values.length; j++) {
            // System.out.println(j + " : " + values[j]);
            // }
            String path = doc.get("path");
            if (path != null) {
                System.out.println((i + 1) + ". " + path);
                System.out.println(doc.get("author"));
                System.out.println(doc.get("title"));
                System.out.println(doc.get("date"));
                System.out.println(doc.get("content").substring(0, 60));
                // for (IndexableField field : doc.getFields()) {
                // System.out.println("field: " + field);
                // }
                if (doc.getField("indexed") != null) {
                    System.out.println("   indexed: " + doc.getField("indexed"));
                }
            } else {
                System.out.println((i + 1) + ". " + "No path for this document");
            }
            System.out.flush();

        }

        if (!interactive || end == 0) {
            break;
        }

        if (numTotalHits >= end) {
            boolean quit = false;
            while (true) {
                System.out.print("Press ");
                if (start - hitsPerPage >= 0) {
                    System.out.print("(p)revious page, ");
                }
                if (start + hitsPerPage < numTotalHits) {
                    System.out.print("(n)ext page, ");
                }
                System.out.println("(q)uit or enter number to jump to a page.");

                String line = in.readLine();
                if (line.length() == 0 || line.charAt(0) == 'q') {
                    quit = true;
                    break;
                }
                if (line.charAt(0) == 'p') {
                    start = Math.max(0, start - hitsPerPage);
                    break;
                } else if (line.charAt(0) == 'n') {
                    if (start + hitsPerPage < numTotalHits) {
                        start += hitsPerPage;
                    }
                    break;
                } else {
                    int page = Integer.parseInt(line);
                    if ((page - 1) * hitsPerPage < numTotalHits) {
                        start = (page - 1) * hitsPerPage;
                        break;
                    } else {
                        System.out.println("No such page");
                    }
                }
            }
            if (quit)
                break;
            end = Math.min(numTotalHits, start + hitsPerPage);
        }
    }
}