presage
0.8.8
Main Page
Classes
Files
File List
File Members
src
lib
core
tokenizer
tokenizer.h
Go to the documentation of this file.
1
2
/******************************************************
3
* Presage, an extensible predictive text entry system
4
* ---------------------------------------------------
5
*
6
* Copyright (C) 2008 Matteo Vescovi <matteo.vescovi@yahoo.co.uk>
7
8
This program is free software; you can redistribute it and/or modify
9
it under the terms of the GNU General Public License as published by
10
the Free Software Foundation; either version 2 of the License, or
11
(at your option) any later version.
12
13
This program is distributed in the hope that it will be useful,
14
but WITHOUT ANY WARRANTY; without even the implied warranty of
15
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
GNU General Public License for more details.
17
18
You should have received a copy of the GNU General Public License along
19
with this program; if not, write to the Free Software Foundation, Inc.,
20
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
21
*
22
**********(*)*/
23
24
25
#ifndef PRESAGE_TOKENIZER
26
#define PRESAGE_TOKENIZER
27
28
#ifdef HAVE_CONFIG_H
29
#include "config.h"
30
#endif
31
32
#include <iostream>
33
#include <istream>
34
#include <string>
35
#include <assert.h>
36
64
class
Tokenizer
{
65
public
:
66
Tokenizer
(std::istream&
stream
,
67
const
std::string
blankspaces
,
68
const
std::string
separators
);
69
virtual
~Tokenizer
();
70
73
virtual
int
countTokens
() = 0;
74
77
virtual
bool
hasMoreTokens
()
const
= 0;
78
81
virtual
std::string
nextToken
() = 0;
82
85
virtual
double
progress
()
const
= 0;
86
87
90
void
blankspaceChars
(
const
std::string);
93
std::string
blankspaceChars
()
const
;
94
97
void
separatorChars
(
const
std::string);
100
std::string
separatorChars
()
const
;
101
104
void
lowercaseMode
(
const
bool
);
107
bool
lowercaseMode
()
const
;
108
109
std::string
streamToString
()
const
{
110
std::streamoff offbackup = stream.tellg();
111
std::string str;
112
std::streamoff curroff =
offbeg
;
113
stream.seekg(curroff);
114
while
(curroff <
offend
) {
115
stream.clear();
116
str.push_back(stream.peek());
117
curroff++;
118
stream.seekg(curroff);
119
}
120
stream.seekg(offbackup);
121
return
str;
122
}
123
124
protected
:
125
class
StreamGuard
{
126
public
:
127
StreamGuard
(std::istream& so, std::streamoff& of)
128
:
guardedStream
(so) {
129
currstate
=
guardedStream
.rdstate();
130
curroff
=
guardedStream
.tellg();
131
guardedStream
.seekg (of );
132
}
133
~StreamGuard
() {
134
guardedStream
.seekg (
curroff
);
135
guardedStream
.setstate(
currstate
);
136
}
137
138
private
:
139
std::istream&
guardedStream
;
140
std::ios::iostate
currstate
;
141
std::streamoff
curroff
;
142
};
143
144
std::istream&
stream
;
145
std::ios::iostate
sstate
;
146
std::streamoff
offbeg
;
147
std::streamoff
offend
;
148
std::streamoff
offset
;
149
150
bool
isBlankspace
(
const
int
character)
const
;
151
bool
isSeparator
(
const
int
character)
const
;
152
153
private
:
154
std::string
blankspaces
;
155
std::string
separators
;
156
157
bool
lowercase
;
158
};
159
160
#endif // PRESAGE_TOKENIZER
Generated on Thu Nov 7 2013 14:39:06 for presage by
1.8.4