Example usage for org.apache.lucene.util RamUsageEstimator NUM_BYTES_OBJECT_REF

List of usage examples for org.apache.lucene.util RamUsageEstimator NUM_BYTES_OBJECT_REF

Introduction

In this page you can find the example usage for org.apache.lucene.util RamUsageEstimator NUM_BYTES_OBJECT_REF.

Prototype

int NUM_BYTES_OBJECT_REF

To view the source code for org.apache.lucene.util RamUsageEstimator NUM_BYTES_OBJECT_REF.

Click Source Link

Document

Number of bytes this JVM uses to represent an object reference.

Usage

From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnum.java

License:Apache License

private RocanaIntersectTermsEnumFrame getFrame(int ord) throws IOException {
    if (ord >= stack.length) {
        final RocanaIntersectTermsEnumFrame[] next = new RocanaIntersectTermsEnumFrame[ArrayUtil
                .oversize(1 + ord, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
        System.arraycopy(stack, 0, next, 0, stack.length);
        for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
            next[stackOrd] = new RocanaIntersectTermsEnumFrame(this, stackOrd);
        }//from   w w  w  .  ja v a  2s  .  c o  m
        stack = next;
    }
    assert stack[ord].ord == ord;
    return stack[ord];
}

From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnum.java

License:Apache License

private FST.Arc<BytesRef> getArc(int ord) {
    if (ord >= arcs.length) {
        @SuppressWarnings({ "rawtypes", "unchecked" })
        final FST.Arc<BytesRef>[] next = new FST.Arc[ArrayUtil.oversize(1 + ord,
                RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
        System.arraycopy(arcs, 0, next, 0, arcs.length);
        for (int arcOrd = arcs.length; arcOrd < next.length; arcOrd++) {
            next[arcOrd] = new FST.Arc<>();
        }//from w w  w  . jav  a 2  s  .  c om
        arcs = next;
    }
    return arcs[ord];
}

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnum.java

License:Apache License

private RocanaSegmentTermsEnumFrame getFrame(int ord) throws IOException {
    if (ord >= stack.length) {
        final RocanaSegmentTermsEnumFrame[] next = new RocanaSegmentTermsEnumFrame[ArrayUtil.oversize(1 + ord,
                RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
        System.arraycopy(stack, 0, next, 0, stack.length);
        for (int stackOrd = stack.length; stackOrd < next.length; stackOrd++) {
            next[stackOrd] = new RocanaSegmentTermsEnumFrame(this, stackOrd);
        }//from   w  ww .ja  v a 2s  . c o  m
        stack = next;
    }
    assert stack[ord].ord == ord;
    return stack[ord];
}

From source file:elhuyar.bilakit.Dictionary.java

License:Apache License

/**
   * Reads the dictionary file through the provided InputStreams, building up the words map
   */*  ww w.  ja va2s  .co  m*/
   * @param dictionaries InputStreams to read the dictionary file through
   * @param decoder CharsetDecoder used to decode the contents of the file
   * @throws IOException Can be thrown while reading from the file
   */
  private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words)
          throws IOException {
      BytesRefBuilder flagsScratch = new BytesRefBuilder();
      IntsRefBuilder scratchInts = new IntsRefBuilder();

      StringBuilder sb = new StringBuilder();

      File unsorted = File.createTempFile("unsorted", "dat", tempDir);
      ByteSequencesWriter writer = new ByteSequencesWriter(unsorted);
      boolean success = false;
      try {
          for (InputStream dictionary : dictionaries) {
              BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
              String line = lines.readLine(); // first line is number of entries (approximately, sometimes)

              while ((line = lines.readLine()) != null) {
                  // wild and unpredictable code comment rules
                  if (line.isEmpty() || line.charAt(0) == '/' || line.charAt(0) == '#'
                          || line.charAt(0) == '\t') {
                      continue;
                  }
                  line = unescapeEntry(line);
                  // if we havent seen any stem exceptions, try to parse one
                  if (hasStemExceptions == false) {
                      int morphStart = line.indexOf(MORPH_SEPARATOR);
                      if (morphStart >= 0 && morphStart < line.length()) {
                          hasStemExceptions = parseStemException(line.substring(morphStart + 1)) != null;
                      }
                  }
                  if (needsInputCleaning) {
                      int flagSep = line.indexOf(FLAG_SEPARATOR);
                      if (flagSep == -1) {
                          flagSep = line.indexOf(MORPH_SEPARATOR);
                      }
                      if (flagSep == -1) {
                          CharSequence cleansed = cleanInput(line, sb);
                          writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
                      } else {
                          String text = line.substring(0, flagSep);
                          CharSequence cleansed = cleanInput(text, sb);
                          if (cleansed != sb) {
                              sb.setLength(0);
                              sb.append(cleansed);
                          }
                          sb.append(line.substring(flagSep));
                          writer.write(sb.toString().getBytes(StandardCharsets.UTF_8));
                      }
                  } else {
                      writer.write(line.getBytes(StandardCharsets.UTF_8));
                  }
              }
          }
          success = true;
      } finally {
          if (success) {
              IOUtils.close(writer);
          } else {
              IOUtils.closeWhileHandlingException(writer);
          }
      }
      File sorted = File.createTempFile("sorted", "dat", tempDir);

      OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() {
          BytesRef scratch1 = new BytesRef();
          BytesRef scratch2 = new BytesRef();

          @Override
          public int compare(BytesRef o1, BytesRef o2) {
              scratch1.bytes = o1.bytes;
              scratch1.offset = o1.offset;
              scratch1.length = o1.length;

              for (int i = scratch1.length - 1; i >= 0; i--) {
                  if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR
                          || scratch1.bytes[scratch1.offset + i] == MORPH_SEPARATOR) {
                      scratch1.length = i;
                      break;
                  }
              }

              scratch2.bytes = o2.bytes;
              scratch2.offset = o2.offset;
              scratch2.length = o2.length;

              for (int i = scratch2.length - 1; i >= 0; i--) {
                  if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR
                          || scratch2.bytes[scratch2.offset + i] == MORPH_SEPARATOR) {
                      scratch2.length = i;
                      break;
                  }
              }

              int cmp = scratch1.compareTo(scratch2);
              if (cmp == 0) {
                  // tie break on whole row
                  return o1.compareTo(o2);
              } else {
                  return cmp;
              }
          }
      });
      sorter.sort(unsorted, sorted);
      unsorted.delete();

      ByteSequencesReader reader = new ByteSequencesReader(sorted);
      BytesRefBuilder scratchLine = new BytesRefBuilder();

      // TODO: the flags themselves can be double-chars (long) or also numeric
      // either way the trick is to encode them as char... but they must be parsed differently

      String currentEntry = null;
      IntsRefBuilder currentOrds = new IntsRefBuilder();

      String line;
      while (reader.read(scratchLine)) {
          line = scratchLine.get().utf8ToString();
          String entry;
          char wordForm[];
          int end;

          int flagSep = line.indexOf(FLAG_SEPARATOR);
          if (flagSep == -1) {
              wordForm = NOFLAGS;
              end = line.indexOf(MORPH_SEPARATOR);
              entry = line.substring(0, end);
          } else {
              end = line.indexOf(MORPH_SEPARATOR);
              String flagPart = line.substring(flagSep + 1, end);
              if (aliasCount > 0) {
                  flagPart = getAliasValue(Integer.parseInt(flagPart));
              }

              wordForm = flagParsingStrategy.parseFlags(flagPart);
              Arrays.sort(wordForm);
              entry = line.substring(0, flagSep);
          }
          // we possibly have morphological data
          int stemExceptionID = 0;
          if (hasStemExceptions && end + 1 < line.length()) {
              String stemException = parseStemException(line.substring(end + 1));
              if (stemException != null) {
                  if (stemExceptionCount == stemExceptions.length) {
                      int newSize = ArrayUtil.oversize(stemExceptionCount + 1,
                              RamUsageEstimator.NUM_BYTES_OBJECT_REF);
                      stemExceptions = Arrays.copyOf(stemExceptions, newSize);
                  }
                  stemExceptionID = stemExceptionCount + 1; // we use '0' to indicate no exception for the form
                  stemExceptions[stemExceptionCount++] = stemException;
              }
          }

          int cmp = currentEntry == null ? 1 : entry.compareTo(currentEntry);
          if (cmp < 0) {
              throw new IllegalArgumentException("out of order: " + entry + " < " + currentEntry);
          } else {
              encodeFlags(flagsScratch, wordForm);
              int ord = flagLookup.add(flagsScratch.get());
              if (ord < 0) {
                  // already exists in our hash
                  ord = (-ord) - 1;
              }
              // finalize current entry, and switch "current" if necessary
              if (cmp > 0 && currentEntry != null) {
                  Util.toUTF32(currentEntry, scratchInts);
                  words.add(scratchInts.get(), currentOrds.get());
              }
              // swap current
              if (cmp > 0 || currentEntry == null) {
                  currentEntry = entry;
                  currentOrds = new IntsRefBuilder(); // must be this way
              }
              if (hasStemExceptions) {
                  currentOrds.append(ord);
                  currentOrds.append(stemExceptionID);
              } else {
                  currentOrds.append(ord);
              }
          }
      }

      // finalize last entry
      Util.toUTF32(currentEntry, scratchInts);
      words.add(scratchInts.get(), currentOrds.get());

      reader.close();
      sorted.delete();
  }

From source file:hunspell_stemmer.Dictionary.java

License:Apache License

/**
   * Reads the dictionary file through the provided InputStreams, building up the words map
   *//from  w ww  .  ja  v  a2 s .  c  om
   * @param dictionaries InputStreams to read the dictionary file through
   * @param decoder CharsetDecoder used to decode the contents of the file
   * @throws IOException Can be thrown while reading from the file
   */
  private void readDictionaryFiles(List<InputStream> dictionaries, CharsetDecoder decoder, Builder<IntsRef> words)
          throws IOException {
      BytesRefBuilder flagsScratch = new BytesRefBuilder();
      IntsRefBuilder scratchInts = new IntsRefBuilder();

      StringBuilder sb = new StringBuilder();

      Path unsorted = Files.createTempFile(tempDir, "unsorted", "dat");
      try (ByteSequencesWriter writer = new ByteSequencesWriter(unsorted)) {
          for (InputStream dictionary : dictionaries) {
              BufferedReader lines = new BufferedReader(new InputStreamReader(dictionary, decoder));
              String line = lines.readLine(); // first line is number of entries (approximately, sometimes)

              while ((line = lines.readLine()) != null) {
                  // wild and unpredictable code comment rules
                  if (line.isEmpty() || line.charAt(0) == '/' || line.charAt(0) == '#'
                          || line.charAt(0) == '\t') {
                      continue;
                  }
                  line = unescapeEntry(line);
                  // if we havent seen any stem exceptions, try to parse one
                  if (hasStemExceptions == false) {
                      int morphStart = line.indexOf(MORPH_SEPARATOR);
                      if (morphStart >= 0 && morphStart < line.length()) {
                          hasStemExceptions = parseStemException(line.substring(morphStart + 1)) != null;
                      }
                  }
                  if (needsInputCleaning) {
                      int flagSep = line.indexOf(FLAG_SEPARATOR);
                      if (flagSep == -1) {
                          flagSep = line.indexOf(MORPH_SEPARATOR);
                      }
                      if (flagSep == -1) {
                          CharSequence cleansed = cleanInput(line, sb);
                          writer.write(cleansed.toString().getBytes(StandardCharsets.UTF_8));
                      } else {
                          String text = line.substring(0, flagSep);
                          CharSequence cleansed = cleanInput(text, sb);
                          if (cleansed != sb) {
                              sb.setLength(0);
                              sb.append(cleansed);
                          }
                          sb.append(line.substring(flagSep));
                          writer.write(sb.toString().getBytes(StandardCharsets.UTF_8));
                      }
                  } else {
                      writer.write(line.getBytes(StandardCharsets.UTF_8));
                  }
              }
          }
      }
      Path sorted = Files.createTempFile(tempDir, "sorted", "dat");

      OfflineSorter sorter = new OfflineSorter(new Comparator<BytesRef>() {
          BytesRef scratch1 = new BytesRef();
          BytesRef scratch2 = new BytesRef();

          @Override
          public int compare(BytesRef o1, BytesRef o2) {
              scratch1.bytes = o1.bytes;
              scratch1.offset = o1.offset;
              scratch1.length = o1.length;

              for (int i = scratch1.length - 1; i >= 0; i--) {
                  if (scratch1.bytes[scratch1.offset + i] == FLAG_SEPARATOR
                          || scratch1.bytes[scratch1.offset + i] == MORPH_SEPARATOR) {
                      scratch1.length = i;
                      break;
                  }
              }

              scratch2.bytes = o2.bytes;
              scratch2.offset = o2.offset;
              scratch2.length = o2.length;

              for (int i = scratch2.length - 1; i >= 0; i--) {
                  if (scratch2.bytes[scratch2.offset + i] == FLAG_SEPARATOR
                          || scratch2.bytes[scratch2.offset + i] == MORPH_SEPARATOR) {
                      scratch2.length = i;
                      break;
                  }
              }

              int cmp = scratch1.compareTo(scratch2);
              if (cmp == 0) {
                  // tie break on whole row
                  return o1.compareTo(o2);
              } else {
                  return cmp;
              }
          }
      });
      boolean success = false;
      try {
          sorter.sort(unsorted, sorted);
          success = true;
      } finally {
          if (success) {
              Files.delete(unsorted);
          } else {
              IOUtils.deleteFilesIgnoringExceptions(unsorted);
          }
      }

      boolean success2 = false;
      ByteSequencesReader reader = new ByteSequencesReader(sorted);
      try {
          BytesRefBuilder scratchLine = new BytesRefBuilder();

          // TODO: the flags themselves can be double-chars (long) or also numeric
          // either way the trick is to encode them as char... but they must be parsed differently

          String currentEntry = null;
          IntsRefBuilder currentOrds = new IntsRefBuilder();

          String line;
          while (reader.read(scratchLine)) {
              line = scratchLine.get().utf8ToString();
              String entry;
              char wordForm[];
              int end;

              int flagSep = line.indexOf(FLAG_SEPARATOR);
              if (flagSep == -1) {
                  wordForm = NOFLAGS;
                  end = line.indexOf(MORPH_SEPARATOR);
                  entry = line.substring(0, end);
              } else {
                  end = line.indexOf(MORPH_SEPARATOR);
                  String flagPart = line.substring(flagSep + 1, end);
                  if (aliasCount > 0) {
                      flagPart = getAliasValue(Integer.parseInt(flagPart));
                  }

                  wordForm = flagParsingStrategy.parseFlags(flagPart);
                  Arrays.sort(wordForm);
                  entry = line.substring(0, flagSep);
              }
              // we possibly have morphological data
              int stemExceptionID = 0;
              if (hasStemExceptions && end + 1 < line.length()) {
                  String stemException = parseStemException(line.substring(end + 1));
                  if (stemException != null) {
                      if (stemExceptionCount == stemExceptions.length) {
                          int newSize = ArrayUtil.oversize(stemExceptionCount + 1,
                                  RamUsageEstimator.NUM_BYTES_OBJECT_REF);
                          stemExceptions = Arrays.copyOf(stemExceptions, newSize);
                      }
                      stemExceptionID = stemExceptionCount + 1; // we use '0' to indicate no exception for the form
                      stemExceptions[stemExceptionCount++] = stemException;
                  }
              }

              int cmp = currentEntry == null ? 1 : entry.compareTo(currentEntry);
              if (cmp < 0) {
                  throw new IllegalArgumentException("out of order: " + entry + " < " + currentEntry);
              } else {
                  encodeFlags(flagsScratch, wordForm);
                  int ord = flagLookup.add(flagsScratch.get());
                  if (ord < 0) {
                      // already exists in our hash
                      ord = (-ord) - 1;
                  }
                  // finalize current entry, and switch "current" if necessary
                  if (cmp > 0 && currentEntry != null) {
                      Util.toUTF32(currentEntry, scratchInts);
                      words.add(scratchInts.get(), currentOrds.get());
                  }
                  // swap current
                  if (cmp > 0 || currentEntry == null) {
                      currentEntry = entry;
                      currentOrds = new IntsRefBuilder(); // must be this way
                  }
                  if (hasStemExceptions) {
                      currentOrds.append(ord);
                      currentOrds.append(stemExceptionID);
                  } else {
                      currentOrds.append(ord);
                  }
              }
          }

          // finalize last entry
          Util.toUTF32(currentEntry, scratchInts);
          words.add(scratchInts.get(), currentOrds.get());
          success2 = true;
      } finally {
          IOUtils.closeWhileHandlingException(reader);
          if (success2) {
              Files.delete(sorted);
          } else {
              IOUtils.deleteFilesIgnoringExceptions(sorted);
          }
      }
  }

From source file:org.apache.solr.search.FilteredDocIdSet.java

License:Apache License

@Override
public long ramBytesUsed() {
    return RamUsageEstimator.NUM_BYTES_OBJECT_REF + _innerSet.ramBytesUsed();
}

From source file:org.codelibs.elasticsearch.common.bytes.CompositeBytesReference.java

License:Apache License

public CompositeBytesReference(BytesReference... references) {
    this.references = Objects.requireNonNull(references, "references must not be null");
    this.offsets = new int[references.length];
    long ramBytesUsed = 0;
    int offset = 0;
    for (int i = 0; i < references.length; i++) {
        BytesReference reference = references[i];
        if (reference == null) {
            throw new IllegalArgumentException("references must not be null");
        }/*from ww w.j a  va2s.  co m*/
        offsets[i] = offset; // we use the offsets to seek into the right BytesReference for random access and slicing
        offset += reference.length();
        ramBytesUsed += reference.ramBytesUsed();
    }
    this.ramBytesUsed = ramBytesUsed
            + (Integer.BYTES * offsets.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) // offsets
            + (references.length * RamUsageEstimator.NUM_BYTES_OBJECT_REF
                    + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) // references
            + Integer.BYTES // length
            + Long.BYTES; // ramBytesUsed
    length = offset;
}

From source file:org.codelibs.elasticsearch.common.util.BigArrays.java

License:Apache License

/** Grow an array to a size that is larger than <code>minSize</code>, preserving content, and potentially reusing part of the provided array. */
public <T> ObjectArray<T> grow(ObjectArray<T> array, long minSize) {
    if (minSize <= array.size()) {
        return array;
    }//from   w  w  w  .  j  a va2  s  . com
    final long newSize = overSize(minSize, OBJECT_PAGE_SIZE, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
    return resize(array, newSize);
}

From source file:org.codelibs.elasticsearch.common.util.BigByteArray.java

License:Apache License

/** Change the size of this array. Content between indexes <code>0</code> and <code>min(size(), newSize)</code> will be preserved. */
@Override//  ww w. ja  va  2s  .  c o m
public void resize(long newSize) {
    final int numPages = numPages(newSize);
    if (numPages > pages.length) {
        pages = Arrays.copyOf(pages, ArrayUtil.oversize(numPages, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
    }
    for (int i = numPages - 1; i >= 0 && pages[i] == null; --i) {
        pages[i] = newBytePage(i);
    }
    for (int i = numPages; i < pages.length && pages[i] != null; ++i) {
        pages[i] = null;
        releasePage(i);
    }
    this.size = newSize;
}

From source file:org.codelibs.elasticsearch.common.util.BigDoubleArray.java

License:Apache License

/** Change the size of this array. Content between indexes <code>0</code> and <code>min(size(), newSize)</code> will be preserved. */
@Override//from w w  w .j a  v  a  2  s. co m
public void resize(long newSize) {
    final int numPages = numPages(newSize);
    if (numPages > pages.length) {
        pages = Arrays.copyOf(pages, ArrayUtil.oversize(numPages, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
    }
    for (int i = numPages - 1; i >= 0 && pages[i] == null; --i) {
        pages[i] = newLongPage(i);
    }
    for (int i = numPages; i < pages.length && pages[i] != null; ++i) {
        pages[i] = null;
        releasePage(i);
    }
    this.size = newSize;
}