Lucene++ - a full-featured, c++ search engine
API Documentation


 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Pages
StandardTokenizer.h
Go to the documentation of this file.
1 // Copyright (c) 2009-2011 Alan Wright. All rights reserved.
3 // Distributable under the terms of either the Apache License (Version 2.0)
4 // or the GNU Lesser General Public License.
6 
7 #ifndef STANDARDTOKENIZER_H
8 #define STANDARDTOKENIZER_H
9 
10 #include "Tokenizer.h"
11 
12 namespace Lucene
13 {
34  class LPPAPI StandardTokenizer : public Tokenizer
35  {
36  public:
40 
43 
46 
47  virtual ~StandardTokenizer();
48 
50 
51  protected:
54 
56  int32_t maxTokenLength;
57 
58  // this tokenizer generates three attributes: offset, positionIncrement and type
63 
64  public:
65  static const int32_t ALPHANUM;
66  static const int32_t APOSTROPHE;
67  static const int32_t ACRONYM;
68  static const int32_t COMPANY;
69  static const int32_t EMAIL;
70  static const int32_t HOST;
71  static const int32_t NUM;
72  static const int32_t CJ;
73 
75  static const int32_t ACRONYM_DEP;
76 
78  static const Collection<String> TOKEN_TYPES();
79 
80  protected:
81  void init(ReaderPtr input, LuceneVersion::Version matchVersion);
82 
83  public:
85  void setMaxTokenLength(int32_t length);
86 
88  int32_t getMaxTokenLength();
89 
91  virtual bool incrementToken();
92 
93  virtual void end();
94 
95  virtual void reset(ReaderPtr input);
96 
99  bool isReplaceInvalidAcronym();
100 
103  void setReplaceInvalidAcronym(bool replaceInvalidAcronym);
104  };
105 }
106 
107 #endif

clucene.sourceforge.net