librcsb-core-wrapper  1.000
ISTable.h
Go to the documentation of this file.
1 //$$FILE$$
2 //$$VERSION$$
3 //$$DATE$$
4 //$$LICENSE$$
5 
6 
14 #ifndef ISTABLE_H
15 #define ISTABLE_H
16 
17 
18 #include <float.h>
19 
20 #include <string>
21 #include <vector>
22 #include <map>
23 
24 #include <rcsb/mapped_vector.h>
25 #include <rcsb/mapped_vector.C>
26 #include <rcsb/GenString.h>
27 #include <rcsb/ITTable.h>
28 #include <rcsb/Serializer.h>
29 
30 
31 typedef std::multimap<std::string, unsigned int, StringLess> tIndex;
32 
33 
53 class ISTable
54 {
55  public:
57 
60 
62  {
63  eNONE = 0,
71  // Used only in block diff to indicate missing table in first block
73  // Used only in block diff to indicate extra table in first block
75  };
76 
78 
85 
86 #ifdef VLAD_SECOND_ITTABLE
87  enum eSearchType
88  {
89  eEQUAL = 0,
90  eLESS_THAN,
94  };
95 #endif
96 
98 
101 
102 #ifdef VLAD_SECOND_ITTABLE
103  enum eSearchDir
104  {
105  eFORWARD = 0,
106  eBACKWARD
107  };
108 #endif
109 
110  static const unsigned char DT_STRING_VAL = 1;
111  static const unsigned char DT_INTEGER_VAL = 2;
112  // static const unsigned char DT_DOUBLE_VAL = 3;
113 
114  // Sets string comparison case sensitive
115  static const unsigned char CASE_SENSE = 0x00;
116  // Sets string comparison case insensitive
117  static const unsigned char CASE_INSENSE = 0x01;
118  // Sets string comparison to be sensitive to whitespace
119  static const unsigned char W_SPACE_SENSE = 0x00;
120  // Sets string comparison to ignore repeating whitspace.
121  // Also ignores leading and trailing whitespace
122  static const unsigned char W_SPACE_INSENSE = 0x02;
123  // string datatype
124  static const unsigned char DT_STRING = DT_STRING_VAL << 4;
125  // integer datatype
126  static const unsigned char DT_INTEGER = DT_INTEGER_VAL << 4;
127  // VLAD FEATURE NOT WORKING double is not working, maybe integer. check it // double datatype
128  // static const unsigned char DT_DOUBLE = DT_DOUBLE_VAL << 4;
129 
147  ISTable(const Char::eCompareType colCaseSense = Char::eCASE_SENSITIVE);
148 
170  colCaseSense = Char::eCASE_SENSITIVE);
171 
189  ISTable(const std::string& name,
190  const Char::eCompareType colCaseSense = Char::eCASE_SENSITIVE);
191 
212  ISTable(const std::string& name, eOrientation orient,
213  const Char::eCompareType colCaseSense = Char::eCASE_SENSITIVE);
214 
231  ISTable(const ISTable& inTable);
232 
246  ~ISTable();
247 
263  ISTable& operator=(const ISTable& inTable);
264 
286  eTableDiff operator==(ISTable& inTable);
287 
301  inline const std::string& GetName() const;
302 
316  void SetName(const std::string& name);
317 
331  inline unsigned int GetNumColumns() const;
332 
346  const std::vector<std::string>& GetColumnNames() const;
347 
362  bool IsColumnPresent(const std::string& colName);
363 
392  void AddColumn(const std::string& colName,
393  const std::vector<std::string>& col = std::vector<std::string>());
394 
437  void InsertColumn(const std::string& colName,
438  const std::string& afColName, const std::vector<std::string>& col =
439  std::vector<std::string>());
440 
469  void FillColumn(const std::string& colName,
470  const std::vector<std::string>& col);
471 
490  void GetColumn(std::vector<std::string>& col, const std::string& colName);
491 
523  void GetColumn(std::vector<std::string>& col, const std::string& colName,
524  const unsigned int fromRowIndex, unsigned int toRowIndex);
525 
549  void GetColumn(std::vector<std::string>& col, const std::string& colName,
550  const std::vector<unsigned int>& rowIndex);
551 
574  void RenameColumn(const std::string& oldColName,
575  const std::string& newColName);
576 
593  void ClearColumn(const std::string& colName);
594 
611  void DeleteColumn(const std::string& colName);
612 
626  inline unsigned int GetNumRows() const;
627 
664  unsigned int AddRow(const std::vector<std::string>& row =
665  std::vector<std::string>());
666 
713  unsigned int InsertRow(const unsigned int atRowIndex,
714  const std::vector<std::string>& row = std::vector<std::string>());
715 
740  void FillRow(const unsigned int rowIndex,
741  const std::vector<std::string>& row);
742 
782  void GetRow(std::vector<std::string>& row, const unsigned int rowIndex,
783  const std::string& fromColName = std::string(),
784  const std::string& toColName = std::string());
785 
802  const std::vector<std::string>& GetRow(const unsigned int rowIndex);
803 
819  void ClearRow(const unsigned int rowIndex);
820 
839  void DeleteRow(const unsigned int rowIndex);
840 
858  void DeleteRows(const std::vector<unsigned int>& rows);
859 
873  inline unsigned int GetLastRowIndex();
874 
898  void UpdateCell(const unsigned int rowIndex, const std::string& colName,
899  const std::string& value);
900 
922  const std::string& operator()(const unsigned int rowIndex,
923  const std::string& colName) const;
924 
949  void SetFlags(const std::string& colName, const unsigned char flags);
950 
968  unsigned char GetDataType(const std::string& colName);
969 
998  unsigned int FindFirst(const std::vector<std::string>& targets,
999  const std::vector<std::string>& colNames,
1000  const std::string& indexName = std::string());
1001 
1025  void Search(std::vector<unsigned int>& res, const std::string& target,
1026  const std::string& colName, const unsigned int fromRowIndex = 0,
1027  const eSearchDir searchDir = eFORWARD,
1028  const eSearchType searchType = eEQUAL);
1029 
1059  void Search(std::vector<unsigned int>& res,
1060  const std::vector<std::string>& targets,
1061  const std::vector<std::string>& colNames,
1062  const unsigned int fromRowIndex = 0,
1063  const eSearchDir searchDir = eFORWARD,
1064  const eSearchType searchType = eEQUAL,
1065  const std::string& indexName = std::string());
1066 
1098  void FindDuplicateRows(std::vector<std::pair<unsigned int,
1099  unsigned int> >& duplRows, const std::vector<std::string>& colNames,
1100  const bool keepDuplRows, const eSearchDir searchDir = eFORWARD);
1101 
1116  inline Char::eCompareType GetColCaseSense() const;
1117 
1121  inline void SetModified(const bool modified);
1122 
1126  inline bool GetModified();
1127 
1131  void SetSerializer(Serializer* ser);
1132 
1136  int WriteObject(Serializer* ser, int& size);
1137 
1141  int GetObject(UInt32 index, Serializer* ser);
1142 
1146  void Read(unsigned int indexInFile);
1147 
1151  int Write();
1152 
1156  // typeOfMerge is 0 for overwrite, 1 for overlap
1157  static ISTable* Merge(ISTable& firstTable, ISTable& secondTable,
1158  unsigned int typeOfMerge = 0);
1159 
1163  bool PrintDiff(ISTable& inTable);
1164 
1168  inline bool IndexExists(const std::string& indexName);
1169 
1173  void CreateIndex(const std::string& indexName,
1174  const std::vector<std::string>& colNames,
1175  const unsigned int unique = 0);
1176 
1180  void UpdateIndex(const std::string& indexName, const unsigned int rowIndex);
1181 
1185  void RebuildIndex(const std::string& indexName);
1186 
1190  void RebuildIndices();
1191 
1195  void DeleteIndex(const std::string& indexName);
1196 
1200  inline unsigned int GetNumIndices();
1201 
1205  void CreateKey(const std::vector<std::string>& colNames);
1206 
1210  void DeleteKey();
1211 
1215  static void SetUnion(const std::vector<unsigned int>& a,
1216  const std::vector<unsigned int>& b, std::vector<unsigned int>& ret);
1217 
1221  static void SetIntersect(const std::vector<unsigned int>& a,
1222  const std::vector<unsigned int>& b, std::vector<unsigned int>& ret);
1223 
1227  void GetColumnsIndices(std::vector<unsigned int>& colIndices,
1228  const std::vector<std::string>& colNames);
1229 
1233  void GetColumn(std::vector<std::string>& col, const std::string& colName,
1234  const std::string& indexName);
1235 
1236  private:
1237  static const unsigned int MAX_NUM_ITTABLE_ROWS = 1000;
1238 
1239  // number of digit DBL_MIN_10_EXP, letter e is not included in size
1240  static const unsigned int EXPONENT = 4;
1241  static const unsigned int MAX_PRECISION = DBL_DIG;
1242  //???DBL_MANT_DIG;
1243  static const unsigned int MANTISSA = MAX_PRECISION + 2;
1244  static const unsigned int INT_LIMIT = 11;
1245 
1246  // datatype mask
1247  static const unsigned char DT_MASK = 15 << 4;
1248  // string comparison sensitivity mask
1249  static const unsigned char SC_MASK = 0x01;
1250  // white space sensitivity mask
1251  static const unsigned char WS_MASK = 0x02;
1252  static const unsigned char LAST_DT_VALUE = 3;
1253  static const unsigned int DEFAULT_PRECISION = MAX_PRECISION;
1254  static const unsigned char DEFAULT_OPTIONS;
1255 
1256  static const std::string _version;
1257 
1258  std::string _name;
1259 
1260  std::vector<ITTable> _ittables;
1261 
1262  ITTable::eOrientation _orient;
1263 
1264  Char::eCompareType _colCaseSense;
1265 
1267 
1268  std::vector<unsigned int> _precision;
1269  std::vector<unsigned char> _compare_opts;
1270 
1271  std::vector<std::string> _indexNames;
1272  std::vector<std::vector<unsigned int> > _listsOfColumns;
1273  std::vector<unsigned int> _unique;
1274 
1275  Serializer* _ser;
1276 
1277  bool _modified; // Indicates whether table has been modified
1278 
1279  unsigned int _numRows;
1280 
1281  mutable unsigned int _rowIndexCache;
1282  mutable std::pair<unsigned int, unsigned int> _rowLocCache;
1283 
1284  void InsertColumn(const std::string& colName, const unsigned int atColIndex,
1285  const std::vector<std::string>& col = std::vector<std::string>());
1286  void CreateColumn(const std::string& colName, const unsigned int atColIndex,
1287  const std::vector<std::string>& col = std::vector<std::string>());
1288  int UpdateCell(const std::string& cell, const unsigned int colIndex,
1289  const unsigned int rowIndex);
1290  const std::string& operator()(const unsigned int rowIndex,
1291  const unsigned int colIndex) const;
1292  int SetFlags(const unsigned char newOpts, const unsigned int colIndex);
1293  void FindDuplicateRows(const std::vector<unsigned int>& colIndices,
1294  std::vector<std::pair<unsigned int, unsigned int> >& duplRows,
1295  const unsigned int keep, const eSearchDir searchDir = eFORWARD);
1296  void VerifyColumnsIndices(const std::vector<unsigned int>& colIndices);
1297  bool AreListsOfColumnsValid(const std::vector<unsigned int>& colIndices);
1298  void CreateIndex(const std::string& indexName,
1299  const std::vector<unsigned int>& colIndices,
1300  const unsigned int unique = 0);
1301  void CreateKey(const std::vector<unsigned int>& colIndices);
1302  unsigned int FindFirst(const std::vector<std::string>& targets,
1303  const std::vector<unsigned int>& colIndices,
1304  const std::string& indexName = std::string());
1305  void Search(std::vector<unsigned int>& res,
1306  const std::vector<std::string>& targets,
1307  const std::vector<unsigned int>& colIndices,
1308  const unsigned int fromRowIndex = 0,
1309  const eSearchDir searchDir = eFORWARD,
1310  const eSearchType searchType = eEQUAL,
1311  const std::string& indexName = std::string());
1312 
1313  void Init();
1314  void Clear();
1315 
1317  GetCompareType(const std::vector<unsigned int>& colIndices);
1318 
1319  std::string CellValue(const unsigned int colIndex,
1320  const unsigned int rowIndex);
1321  std::string ConvertString(const std::string& value,
1322  const unsigned int colIndex);
1323  std::string MultiStringsValue(const std::vector<std::string>& values,
1324  const std::vector<unsigned int>& colIndices);
1325  std::string SubRowValue(const std::vector<unsigned int>& colIndices,
1326  const unsigned int rowIndex);
1327  std::string AggregateRow(const std::vector<unsigned int>& colIndices,
1328  const unsigned int rowIndex);
1329 
1330  inline void AppendToAndDelimit(std::string& to,
1331  const std::string& appending);
1332 
1333  void ValidateOptions(unsigned int colIndex);
1334 
1335  std::string CreateInternalIndexName(const unsigned int indexIndex);
1336  void UpdateIndex(const unsigned int indexIndex,
1337  const unsigned int rowIndex);
1338  void RebuildIndex(const unsigned int indexIndex);
1339  void ClearIndex(const unsigned int indexIndex);
1340  void DeleteIndex(const unsigned int indexIndex);
1341 
1342  int FindIndex(const std::string& indexName);
1343  int FindIndex(const std::vector<unsigned int>& colIndices);
1344 
1345  void UpdateIndices(const unsigned int rowIndex);
1346  void ClearIndices();
1347 
1348  bool IsColumnInIndex(const unsigned int indexIndex,
1349  const unsigned int colIndex);
1350 
1351  int FindKeyIndex();
1352 
1353  void UpdateColListOnColInsert(const unsigned int colIndex);
1354  void UpdateColListOnColDelete(const unsigned int colIndex);
1355  void UpdateColListOnCellUpdate(const unsigned int rowIndex,
1356  const unsigned int colIndex);
1357 
1358  unsigned int FindFirst(const std::vector<std::string>& targets,
1359  const std::vector<unsigned int>& colIndices,
1360  const unsigned int indexIndex);
1361 
1362  int WriteObjectV9(Serializer*, int& size);
1363 
1364  int GetObjectV9(UInt32 index, Serializer*);
1365  int GetObjectV8(UInt32 index, Serializer*);
1366  int GetObjectV7(UInt32 index, Serializer*);
1367  int GetObjectV6(UInt32 index, Serializer*);
1368  int GetObjectV3(UInt32 index, Serializer*);
1369  int GetObjectV2(UInt32 index, Serializer*);
1370  int GetObjectV1(UInt32 index, Serializer*);
1371  int GetObjectV1_1(UInt32 index, Serializer*);
1372 
1373  void ConvertToInt(const std::string& a, std::string& ret);
1374  void ConvertDouble(const std::string& a, std::string& ret);
1375  void ConvertToLowerNoWhiteSpace(const std::string& a, std::string& ret);
1376 
1377  void GetRowLocation(std::pair<unsigned int, unsigned int>& rowLoc,
1378  const unsigned int rowIndex) const;
1379  void CacheRowLocation(const unsigned int rowIndex) const;
1380 
1381  void CreateSubtables(const unsigned int numRows);
1382  void CreateSubtableColumns(const unsigned int colIndex,
1383  const std::vector<std::string>& col);
1384  void CreateColumn(const unsigned int atColIndex,
1385  const std::vector<std::string>& col);
1386 
1387  void Print(const std::string& indexName);
1388 
1389  unsigned int GetColumnIndex(const std::string& colName) const;
1390 
1391 };
1392 
1393 
1394 std::ostream& operator<<(std::ostream& out, const ISTable& isTable);
1395 
1396 
1397 inline unsigned int ISTable::GetLastRowIndex()
1398 {
1399 
1400  return(GetNumRows() - 1);
1401 
1402 }
1403 
1404 
1405 inline unsigned int ISTable::GetNumIndices()
1406 {
1407 
1408  return(_indexNames.size());
1409 
1410 }
1411 
1412 
1413 inline bool ISTable::IndexExists(const std::string& indexName)
1414 {
1415 
1416  int ret = FindIndex(indexName);
1417 
1418  if (ret == -1)
1419  {
1420  return(false);
1421  }
1422  else
1423  {
1424  return(true);
1425  }
1426 
1427 }
1428 
1429 
1430 inline void ISTable::AppendToAndDelimit(std::string& to,
1431  const std::string& appending)
1432 {
1433 
1434  to += appending;
1435  // VLAD HARDCODED CONST
1436  to += " ";
1437 
1438 }
1439 
1440 
1441 inline void ISTable::SetModified(const bool modified)
1442 {
1443  _modified = modified;
1444 }
1445 
1446 
1448 {
1449  return _modified;
1450 }
1451 
1452 
1453 inline const std::string& ISTable::GetName() const
1454 {
1455  return(_name);
1456 }
1457 
1458 
1459 inline unsigned int ISTable::GetNumRows() const
1460 {
1461  return(_numRows);
1462 }
1463 
1464 
1465 inline unsigned int ISTable::GetNumColumns() const
1466 {
1467  return(_colNames.size());
1468 }
1469 
1470 
1472 {
1473  return(_colCaseSense);
1474 }
1475 
1476 
1477 #endif // ISTABLE_H