Lucene++ - a full-featured, c++ search engine
API Documentation


 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
DocumentsWriter.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2011 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef DOCUMENTSWRITER_H
8 #define DOCUMENTSWRITER_H
9 
10 #include "ByteBlockPool.h"
11 #include "RAMFile.h"
12 
13 namespace Lucene
14 {
55  {
56  public:
58  virtual ~DocumentsWriter();
59 
61 
62  protected:
63  String docStoreSegment; // Current doc-store segment we are writing
64  int32_t docStoreOffset; // Current starting doc-store offset of current segment
65 
66  int32_t nextDocID; // Next docID to be added
67  int32_t numDocsInRAM; // # docs buffered in RAM
68 
70  static const int32_t MAX_THREAD_STATE;
72  MapThreadDocumentsWriterThreadState threadBindings;
73 
74  int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush)
75  bool aborting; // True if an abort is pending
76 
78 
81 
84 
87 
89  int64_t ramBufferSize;
92 
94  int64_t freeTrigger;
95  int64_t freeLevel;
96 
98  int32_t maxBufferedDocs;
99 
102 
103  bool closed;
104 
108 
111 
112  public:
114  static const int32_t OBJECT_HEADER_BYTES;
115  static const int32_t POINTER_NUM_BYTE;
116  static const int32_t INT_NUM_BYTE;
117  static const int32_t CHAR_NUM_BYTE;
118 
124  static const int32_t BYTES_PER_DEL_TERM;
125 
128  static const int32_t BYTES_PER_DEL_DOCID;
129 
133  static const int32_t BYTES_PER_DEL_QUERY;
134 
136  static const int32_t BYTE_BLOCK_SHIFT;
137  static const int32_t BYTE_BLOCK_SIZE;
138  static const int32_t BYTE_BLOCK_MASK;
139  static const int32_t BYTE_BLOCK_NOT_MASK;
140 
142  static const int32_t CHAR_BLOCK_SHIFT;
143  static const int32_t CHAR_BLOCK_SIZE;
144  static const int32_t CHAR_BLOCK_MASK;
145 
146  static const int32_t MAX_TERM_LENGTH;
147 
149  static const int32_t INT_BLOCK_SHIFT;
150  static const int32_t INT_BLOCK_SIZE;
151  static const int32_t INT_BLOCK_MASK;
152 
153  static const int32_t PER_DOC_BLOCK_SIZE;
154 
155  INTERNAL:
159  String segment; // Current segment we are working on
160 
161  int32_t numDocsInStore; // # docs written to doc stores
162 
163  bool flushPending; // True when a thread has decided to flush
164  bool bufferIsFull; // True when it's time to write segment
165 
167  int32_t maxFieldLength;
169 
171 
174 
177 
180 
181  int64_t numBytesAlloc;
182  int64_t numBytesUsed;
183 
184  // used only by assert
186 
187  public:
188  virtual void initialize();
189 
192 
194 
195  void updateFlushedDocCount(int32_t n);
196  int32_t getFlushedDocCount();
197  void setFlushedDocCount(int32_t n);
198 
200  bool hasProx();
201 
204 
205  void setMaxFieldLength(int32_t maxFieldLength);
207 
209  void setRAMBufferSizeMB(double mb);
210  double getRAMBufferSizeMB();
211 
213  void setMaxBufferedDocs(int32_t count);
214  int32_t getMaxBufferedDocs();
215 
217  String getSegment();
218 
220  int32_t getNumDocsInRAM();
221 
223  String getDocStoreSegment();
224 
226  int32_t getDocStoreOffset();
227 
230  String closeDocStore();
231 
233 
234  void message(const String& message);
235 
239 
240  void addOpenFile(const String& name);
241  void removeOpenFile(const String& name);
242 
243  void setAborting();
244 
247  void abort();
248 
250  bool pauseAllThreads();
251  void resumeAllThreads();
252 
253  bool anyChanges();
254 
255  void initFlushState(bool onlyDocStore);
256 
258  int32_t flush(bool _closeDocStore);
259 
261 
263  void createCompoundFile(const String& segment);
264 
267  bool setFlushPending();
268  void clearFlushPending();
269 
270  void pushDeletes();
271 
272  void close();
273 
274  void initSegmentName(bool onlyDocStore);
275 
280 
282  bool addDocument(DocumentPtr doc, AnalyzerPtr analyzer);
283 
284  bool updateDocument(TermPtr t, DocumentPtr doc, AnalyzerPtr analyzer);
285  bool updateDocument(DocumentPtr doc, AnalyzerPtr analyzer, TermPtr delTerm);
286 
287  int32_t getNumBufferedDeleteTerms(); // for testing
288  MapTermNum getBufferedDeleteTerms(); // for testing
289 
291  void remapDeletes(SegmentInfosPtr infos, Collection< Collection<int32_t> > docMaps, Collection<int32_t> delCounts, OneMergePtr merge, int32_t mergeDocCount);
292 
294  bool bufferDeleteTerm(TermPtr term);
296  bool bufferDeleteQuery(QueryPtr query);
297  bool deletesFull();
298  bool doApplyDeletes();
299 
301  int32_t getMaxBufferedDeleteTerms();
302 
303  bool hasDeletes();
304  bool applyDeletes(SegmentInfosPtr infos);
305  bool doBalanceRAM();
306 
307  void waitForWaitQueue();
308 
309  int64_t getRAMUsed();
310 
311  IntArray getIntBlock(bool trackAllocations);
312  void bytesAllocated(int64_t numBytes);
313  void bytesUsed(int64_t numBytes);
314  void recycleIntBlocks(Collection<IntArray> blocks, int32_t start, int32_t end);
315 
316  CharArray getCharBlock();
317  void recycleCharBlocks(Collection<CharArray> blocks, int32_t numBlocks);
318 
319  String toMB(int64_t v);
320 
329  void balanceRAM();
330 
331  protected:
333  void doAfterFlush();
334 
335  bool allThreadsIdle();
336 
338 
339  bool timeToFlushDeletes();
340 
341  // used only by assert
342  bool checkDeleteTerm(TermPtr term);
343 
344  bool applyDeletes(IndexReaderPtr reader, int32_t docIDStart);
345  void addDeleteTerm(TermPtr term, int32_t docCount);
346 
348  void addDeleteDocID(int32_t docID);
349  void addDeleteQuery(QueryPtr query, int32_t docID);
350 
352  void finishDocument(DocumentsWriterThreadStatePtr perThread, DocWriterPtr docWriter);
353 
354  friend class WaitQueue;
355  };
356 
357  class DocState : public LuceneObject
358  {
359  public:
360  DocState();
361  virtual ~DocState();
362 
364 
365  public:
368  int32_t maxFieldLength;
371  int32_t docID;
374 
375  public:
377  virtual bool testPoint(const String& name);
378 
379  void clear();
380  };
381 
383  class PerDocBuffer : public RAMFile
384  {
385  public:
386  PerDocBuffer(DocumentsWriterPtr docWriter);
387  virtual ~PerDocBuffer();
388 
390 
391  protected:
393 
394  public:
396  void recycle();
397 
398  protected:
400  virtual ByteArray newBuffer(int32_t size);
401  };
402 
405  class DocWriter : public LuceneObject
406  {
407  public:
408  DocWriter();
409  virtual ~DocWriter();
410 
412 
413  public:
415  int32_t docID;
416 
417  public:
418  virtual void finish() = 0;
419  virtual void abort() = 0;
420  virtual int64_t sizeInBytes() = 0;
421 
422  virtual void setNext(DocWriterPtr next);
423  };
424 
428  {
429  public:
430  virtual ~IndexingChain();
431 
433 
434  public:
435  virtual DocConsumerPtr getChain(DocumentsWriterPtr documentsWriter) = 0;
436  };
437 
453  {
454  public:
455  virtual ~DefaultIndexingChain();
456 
458 
459  public:
460  virtual DocConsumerPtr getChain(DocumentsWriterPtr documentsWriter);
461  };
462 
463  class SkipDocWriter : public DocWriter
464  {
465  public:
466  virtual ~SkipDocWriter();
467 
469 
470  public:
471  virtual void finish();
472  virtual void abort();
473  virtual int64_t sizeInBytes();
474  };
475 
476  class WaitQueue : public LuceneObject
477  {
478  public:
479  WaitQueue(DocumentsWriterPtr docWriter);
480  virtual ~WaitQueue();
481 
483 
484  protected:
486 
487  public:
489  int32_t nextWriteDocID;
490  int32_t nextWriteLoc;
491  int32_t numWaiting;
492  int64_t waitingBytes;
493 
494  public:
495  void reset();
496  bool doResume();
497  bool doPause();
498  void abort();
499  bool add(DocWriterPtr doc);
500 
501  protected:
502  void writeDocument(DocWriterPtr doc);
503  };
504 
506  {
507  public:
508  ByteBlockAllocator(DocumentsWriterPtr docWriter, int32_t blockSize);
509  virtual ~ByteBlockAllocator();
510 
512 
513  protected:
515 
516  public:
517  int32_t blockSize;
519 
520  public:
522  virtual ByteArray getByteBlock(bool trackAllocations);
523 
525  virtual void recycleByteBlocks(Collection<ByteArray> blocks, int32_t start, int32_t end);
526  virtual void recycleByteBlocks(Collection<ByteArray> blocks);
527  };
528 }
529 
530 #endif

clucene.sourceforge.net