Lucene++ - a full-featured, c++ search engine
API Documentation
Main Page
Related Pages
Namespaces
Data Structures
Files
File List
Globals
All
Data Structures
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Pages
include
StandardTokenizer.h
Go to the documentation of this file.
1
// Copyright (c) 2009-2011 Alan Wright. All rights reserved.
3
// Distributable under the terms of either the Apache License (Version 2.0)
4
// or the GNU Lesser General Public License.
6
7
#ifndef STANDARDTOKENIZER_H
8
#define STANDARDTOKENIZER_H
9
10
#include "
Tokenizer.h
"
11
12
namespace
Lucene
13
{
34
class
LPPAPI
StandardTokenizer
:
public
Tokenizer
35
{
36
public
:
39
StandardTokenizer
(
LuceneVersion::Version
matchVersion,
ReaderPtr
input);
40
42
StandardTokenizer
(
LuceneVersion::Version
matchVersion,
AttributeSourcePtr
source,
ReaderPtr
input);
43
45
StandardTokenizer
(
LuceneVersion::Version
matchVersion,
AttributeFactoryPtr
factory,
ReaderPtr
input);
46
47
virtual
~
StandardTokenizer
();
48
49
LUCENE_CLASS
(
StandardTokenizer
);
50
51
protected
:
53
StandardTokenizerImplPtr
scanner;
54
55
bool
replaceInvalidAcronym
;
56
int32_t
maxTokenLength
;
57
58
// this tokenizer generates three attributes: offset, positionIncrement and type
59
TermAttributePtr
termAtt
;
60
OffsetAttributePtr
offsetAtt
;
61
PositionIncrementAttributePtr
posIncrAtt
;
62
TypeAttributePtr
typeAtt
;
63
64
public
:
65
static
const
int32_t
ALPHANUM
;
66
static
const
int32_t
APOSTROPHE
;
67
static
const
int32_t
ACRONYM
;
68
static
const
int32_t
COMPANY
;
69
static
const
int32_t
EMAIL
;
70
static
const
int32_t
HOST
;
71
static
const
int32_t
NUM
;
72
static
const
int32_t
CJ
;
73
75
static
const
int32_t
ACRONYM_DEP
;
76
78
static
const
Collection<String>
TOKEN_TYPES();
79
80
protected
:
81
void
init(
ReaderPtr
input,
LuceneVersion::Version
matchVersion);
82
83
public
:
85
void
setMaxTokenLength(int32_t length);
86
88
int32_t getMaxTokenLength();
89
91
virtual
bool
incrementToken();
92
93
virtual
void
end();
94
95
virtual
void
reset(
ReaderPtr
input);
96
99
bool
isReplaceInvalidAcronym();
100
103
void
setReplaceInvalidAcronym(
bool
replaceInvalidAcronym);
104
};
105
}
106
107
#endif
clucene.sourceforge.net