List of usage examples for org.apache.lucene.document StringField stringValue
@Override
public String stringValue()
From source file:edu.cmu.lti.huiying.ir.rangedsearch.TableIndexer.java
License:Apache License
public void indexExplodedXml(IndexWriter writer, File file) throws IOException { if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); if (files != null) { for (int i = 0; i < files.length; i++) { indexExplodedXml(writer, new File(file, files[i])); }//from w ww .ja va 2 s . c o m } } else { FileInputStream fis = new FileInputStream(file); try { NumericFeatureGenerator nfg = new NumericFeatureGenerator(); if (this.xmlreader == null) { this.xmlreader = new XmlStAXReader(); } Article a = xmlreader.readArticleFromXml(file.getAbsolutePath()); for (Table t : a.tables) { for (Group g : t.groups) { for (Column col : g.columns) { // index columns Document coldoc = new Document(); ArrayList<Double> cfv = nfg.getFeatureVector(col.content); if (cfv.get(0) != null) { DoubleField intratio = new DoubleField("intratio", cfv.get(0), Field.Store.NO); coldoc.add(intratio); } if (cfv.get(1) != null) { DoubleField floatratio = new DoubleField("floatratio", cfv.get(1), Field.Store.NO); coldoc.add(floatratio); } if (cfv.get(3) != null) { DoubleField mean = new DoubleField("mean", cfv.get(3), Field.Store.NO); coldoc.add(mean); } if (cfv.get(4) != null) { DoubleField std = new DoubleField("std", cfv.get(4), Field.Store.NO); coldoc.add(std); } if (cfv.get(6) != null) { DoubleField min = new DoubleField("min", cfv.get(6), Field.Store.NO); coldoc.add(min); } if (cfv.get(7) != null) { DoubleField max = new DoubleField("max", cfv.get(7), Field.Store.NO); coldoc.add(max); } if (cfv.get(8) != null) { DoubleField acc = new DoubleField("acc", cfv.get(8), Field.Store.NO); coldoc.add(acc); } if (cfv.get(11) != null) { DoubleField colmag = new DoubleField("colmag", cfv.get(11), Field.Store.NO); coldoc.add(colmag); } StringField wholegroup = new StringField("wholegroup", g.toString(), Field.Store.YES); if (wholegroup.stringValue().getBytes().length > 32760) { wholegroup.setStringValue("Table too large..."); System.err.println( "table too large:" + wholegroup.stringValue().getBytes().length); } String headers = ""; if (col.headers != null) { for (Header hdr : col.headers) { headers += hdr.text.toLowerCase() + " "; } } TextField header = new TextField("headerkeywords", headers.trim(), Field.Store.NO); coldoc.add(header); coldoc.add(wholegroup); StringField fname = new StringField("filename", file.getAbsolutePath(), Field.Store.YES); coldoc.add(fname); StringField type = new StringField("type", "column", Field.Store.YES); coldoc.add(type); IntField bstart = new IntField("bytestart", col.content.get(0).byteStart, Field.Store.YES); IntField bend = new IntField("byteend", col.content.get(col.content.size() - 1).byteEnd, Field.Store.YES); String content = ""; for (edu.cmu.lti.huiying.domainclasses.Field f : col.content) content += f.text + "|"; StringField colcontent = new StringField("colcontent", content.substring(0, content.length() - 1), Field.Store.YES); coldoc.add(colcontent); coldoc.add(bstart); coldoc.add(bend); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(coldoc); totalDocAdded++; } else { writer.updateDocument(new Term("path", file.getPath()), coldoc); } for (edu.cmu.lti.huiying.domainclasses.Field f : col.content) { // Index single cell Document celldoc = new Document(); ArrayList<Double> fv = nfg.field2Features(f); if (fv.get(0) == 1 || fv.get(0) == 2) { try { DoubleField df = new DoubleField("value", fv.get(1), Field.Store.YES); celldoc.add(df); StringField textf = new StringField("text", f.text, Field.Store.YES); celldoc.add(textf); if (fv.get(2) != null & fv.get(2) != Double.NaN) { DoubleField errf = new DoubleField("error", fv.get(2), Field.Store.NO); celldoc.add(errf); } if (fv.get(5) != Double.NaN) { DoubleField magf = new DoubleField("cellmag", fv.get(5), Field.Store.NO); celldoc.add(magf); } if (fv.get(4) != null) { DoubleField pvalue = new DoubleField("cellpvalue", fv.get(4), Field.Store.NO); celldoc.add(pvalue); } StringField sf = new StringField("filename", file.getAbsolutePath(), Field.Store.YES); celldoc.add(sf); StringField ctype = new StringField("type", "cell", Field.Store.YES); celldoc.add(ctype); //StringField cwholegroup=new StringField("wholegroup", g.toString(), Field.Store.YES); //celldoc.add(cwholegroup); IntField cbstart = new IntField("bytestart", f.byteStart, Field.Store.YES); IntField cbend = new IntField("byteend", f.byteEnd, Field.Store.YES); celldoc.add(cbstart); celldoc.add(cbend); } catch (NullPointerException e) { e.printStackTrace(); System.out.println(f.text); } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(celldoc); totalDocAdded++; } else { writer.updateDocument(new Term("path", file.getPath()), celldoc); } } } } } } } finally { fis.close(); } } } }
From source file:org.apache.blur.mapreduce.lib.v2.DocumentWritable.java
License:Apache License
private void writeStringField(DataOutput out, StringField stringField) throws IOException { FieldType fieldType = stringField.fieldType(); if (fieldType.equals(StringField.TYPE_STORED)) { out.writeBoolean(true);/*from w w w .j av a2 s.co m*/ } else if (fieldType.equals(StringField.TYPE_NOT_STORED)) { out.writeBoolean(false); } else { throw new IOException("Non default FieldTypes for StringField not supported."); } writeString(out, stringField.stringValue()); }