00001 /* Copyright (C) 2000 MySQL AB 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License as published by 00005 the Free Software Foundation; version 2 of the License. 00006 00007 This program is distributed in the hope that it will be useful, 00008 but WITHOUT ANY WARRANTY; without even the implied warranty of 00009 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00010 GNU General Public License for more details. 00011 00012 You should have received a copy of the GNU General Public License 00013 along with this program; if not, write to the Free Software 00014 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ 00015 00016 /* 00017 These functions handle keyblock cacheing for ISAM and MyISAM tables. 00018 00019 One cache can handle many files. 00020 It must contain buffers of the same blocksize. 00021 init_key_cache() should be used to init cache handler. 00022 00023 The free list (free_block_list) is a stack like structure. 00024 When a block is freed by free_block(), it is pushed onto the stack. 00025 When a new block is required it is first tried to pop one from the stack. 00026 If the stack is empty, it is tried to get a never-used block from the pool. 00027 If this is empty too, then a block is taken from the LRU ring, flushing it 00028 to disk, if neccessary. This is handled in find_key_block(). 00029 With the new free list, the blocks can have three temperatures: 00030 hot, warm and cold (which is free). This is remembered in the block header 00031 by the enum BLOCK_TEMPERATURE temperature variable. Remembering the 00032 temperature is neccessary to correctly count the number of warm blocks, 00033 which is required to decide when blocks are allowed to become hot. Whenever 00034 a block is inserted to another (sub-)chain, we take the old and new 00035 temperature into account to decide if we got one more or less warm block. 00036 blocks_unused is the sum of never used blocks in the pool and of currently 00037 free blocks. blocks_used is the number of blocks fetched from the pool and 00038 as such gives the maximum number of in-use blocks at any time. 00039 00040 Key Cache Locking 00041 ================= 00042 00043 All key cache locking is done with a single mutex per key cache: 00044 keycache->cache_lock. This mutex is locked almost all the time 00045 when executing code in this file (mf_keycache.c). 00046 However it is released for I/O and some copy operations. 00047 00048 The cache_lock is also released when waiting for some event. Waiting 00049 and signalling is done via condition variables. In most cases the 00050 thread waits on its thread->suspend condition variable. Every thread 00051 has a my_thread_var structure, which contains this variable and a 00052 '*next' and '**prev' pointer. These pointers are used to insert the 00053 thread into a wait queue. 00054 00055 A thread can wait for one block and thus be in one wait queue at a 00056 time only. 00057 00058 Before starting to wait on its condition variable with 00059 pthread_cond_wait(), the thread enters itself to a specific wait queue 00060 with link_into_queue() (double linked with '*next' + '**prev') or 00061 wait_on_queue() (single linked with '*next'). 00062 00063 Another thread, when releasing a resource, looks up the waiting thread 00064 in the related wait queue. It sends a signal with 00065 pthread_cond_signal() to the waiting thread. 00066 00067 NOTE: Depending on the particular wait situation, either the sending 00068 thread removes the waiting thread from the wait queue with 00069 unlink_from_queue() or release_whole_queue() respectively, or the waiting 00070 thread removes itself. 00071 00072 There is one exception from this locking scheme when one thread wants 00073 to reuse a block for some other address. This works by first marking 00074 the block reserved (status= BLOCK_IN_SWITCH) and then waiting for all 00075 threads that are reading the block to finish. Each block has a 00076 reference to a condition variable (condvar). It holds a reference to 00077 the thread->suspend condition variable for the waiting thread (if such 00078 a thread exists). When that thread is signaled, the reference is 00079 cleared. The number of readers of a block is registered in 00080 block->hash_link->requests. See wait_for_readers() / remove_reader() 00081 for details. This is similar to the above, but it clearly means that 00082 only one thread can wait for a particular block. There is no queue in 00083 this case. Strangely enough block->convar is used for waiting for the 00084 assigned hash_link only. More precisely it is used to wait for all 00085 requests to be unregistered from the assigned hash_link. 00086 00087 The resize_queue serves two purposes: 00088 1. Threads that want to do a resize wait there if in_resize is set. 00089 This is not used in the server. The server refuses a second resize 00090 request if one is already active. keycache->in_init is used for the 00091 synchronization. See set_var.cc. 00092 2. Threads that want to access blocks during resize wait here during 00093 the re-initialization phase. 00094 When the resize is done, all threads on the queue are signalled. 00095 Hypothetical resizers can compete for resizing, and read/write 00096 requests will restart to request blocks from the freshly resized 00097 cache. If the cache has been resized too small, it is disabled and 00098 'can_be_used' is false. In this case read/write requests bypass the 00099 cache. Since they increment and decrement 'cnt_for_resize_op', the 00100 next resizer can wait on the queue 'waiting_for_resize_cnt' until all 00101 I/O finished. 00102 */ 00103 00104 #include <config.h> 00105 #include <drizzled/error.h> 00106 #include <drizzled/internal/my_sys.h> 00107 #include "keycache.h" 00108 #include <drizzled/internal/m_string.h> 00109 #include <drizzled/internal/my_bit.h> 00110 #include <errno.h> 00111 #include <stdarg.h> 00112 00113 using namespace drizzled; 00114 00115 /* 00116 Some compilation flags have been added specifically for this module 00117 to control the following: 00118 - not to let a thread to yield the control when reading directly 00119 from key cache, which might improve performance in many cases; 00120 to enable this add: 00121 #define SERIALIZED_READ_FROM_CACHE 00122 - to set an upper bound for number of threads simultaneously 00123 using the key cache; this setting helps to determine an optimal 00124 size for hash table and improve performance when the number of 00125 blocks in the key cache much less than the number of threads 00126 accessing it; 00127 to set this number equal to <N> add 00128 #define MAX_THREADS <N> 00129 00130 Example of the settings: 00131 #define SERIALIZED_READ_FROM_CACHE 00132 #define MAX_THREADS 100 00133 */ 00134 00135 #define STRUCT_PTR(TYPE, MEMBER, a) \ 00136 (TYPE *) ((char *) (a) - offsetof(TYPE, MEMBER)) 00137 00138 /* types of condition variables */ 00139 #define COND_FOR_REQUESTED 0 00140 #define COND_FOR_SAVED 1 00141 00142 typedef pthread_cond_t KEYCACHE_CONDVAR; 00143 00144 /* descriptor of the page in the key cache block buffer */ 00145 struct st_keycache_page 00146 { 00147 int file; /* file to which the page belongs to */ 00148 internal::my_off_t filepos; /* position of the page in the file */ 00149 }; 00150 00151 /* element in the chain of a hash table bucket */ 00152 struct st_hash_link 00153 { 00154 struct st_hash_link *next, **prev; /* to connect links in the same bucket */ 00155 struct st_block_link *block; /* reference to the block for the page: */ 00156 int file; /* from such a file */ 00157 internal::my_off_t diskpos; /* with such an offset */ 00158 uint32_t requests; /* number of requests for the page */ 00159 }; 00160 00161 /* simple states of a block */ 00162 #define BLOCK_ERROR 1 /* an error occured when performing file i/o */ 00163 #define BLOCK_READ 2 /* file block is in the block buffer */ 00164 #define BLOCK_IN_SWITCH 4 /* block is preparing to read new page */ 00165 #define BLOCK_REASSIGNED 8 /* blk does not accept requests for old page */ 00166 #define BLOCK_IN_FLUSH 16 /* block is selected for flush */ 00167 #define BLOCK_CHANGED 32 /* block buffer contains a dirty page */ 00168 #define BLOCK_IN_USE 64 /* block is not free */ 00169 #define BLOCK_IN_EVICTION 128 /* block is selected for eviction */ 00170 #define BLOCK_IN_FLUSHWRITE 256 /* block is in write to file */ 00171 #define BLOCK_FOR_UPDATE 512 /* block is selected for buffer modification */ 00172 00173 /* page status, returned by find_key_block */ 00174 #define PAGE_READ 0 00175 #define PAGE_TO_BE_READ 1 00176 #define PAGE_WAIT_TO_BE_READ 2 00177 00178 /* block temperature determines in which (sub-)chain the block currently is */ 00179 enum BLOCK_TEMPERATURE { BLOCK_COLD /*free*/ , BLOCK_WARM , BLOCK_HOT }; 00180 00181 /* key cache block */ 00182 struct st_block_link 00183 { 00184 struct st_block_link 00185 *next_used, **prev_used; /* to connect links in the LRU chain (ring) */ 00186 struct st_block_link 00187 *next_changed, **prev_changed; /* for lists of file dirty/clean blocks */ 00188 struct st_hash_link *hash_link; /* backward ptr to referring hash_link */ 00189 KEYCACHE_WQUEUE wqueue[2]; /* queues on waiting requests for new/old pages */ 00190 uint32_t requests; /* number of requests for the block */ 00191 unsigned char *buffer; /* buffer for the block page */ 00192 uint32_t offset; /* beginning of modified data in the buffer */ 00193 uint32_t length; /* end of data in the buffer */ 00194 uint32_t status; /* state of the block */ 00195 enum BLOCK_TEMPERATURE temperature; /* block temperature: cold, warm, hot */ 00196 uint32_t hits_left; /* number of hits left until promotion */ 00197 uint64_t last_hit_time; /* timestamp of the last hit */ 00198 KEYCACHE_CONDVAR *condvar; /* condition variable for 'no readers' event */ 00199 }; 00200 00201 #define FLUSH_CACHE 2000 /* sort this many blocks at once */ 00202 00203 #define KEYCACHE_HASH(f, pos) \ 00204 (((uint32_t) ((pos) / keycache->key_cache_block_size) + \ 00205 (uint32_t) (f)) & (keycache->hash_entries-1)) 00206 #define FILE_HASH(f) ((uint) (f) & (CHANGED_BLOCKS_HASH-1)) 00207 00208 00209 #define keycache_pthread_cond_wait(A,B) (void)A; 00210 #define keycache_pthread_mutex_lock(A) (void)A; 00211 #define keycache_pthread_mutex_unlock(A) (void)A; 00212 #define keycache_pthread_cond_signal(A) (void)A; 00213 00214 static inline uint32_t next_power(uint32_t value) 00215 { 00216 return my_round_up_to_next_power(value) << 1; 00217 } 00218 00219 00220 /* 00221 Initialize a key cache 00222 00223 SYNOPSIS 00224 init_key_cache() 00225 keycache pointer to a key cache data structure 00226 key_cache_block_size size of blocks to keep cached data 00227 use_mem total memory to use for the key cache 00228 division_limit division limit (may be zero) 00229 age_threshold age threshold (may be zero) 00230 00231 RETURN VALUE 00232 number of blocks in the key cache, if successful, 00233 0 - otherwise. 00234 00235 NOTES. 00236 if keycache->key_cache_inited != 0 we assume that the key cache 00237 is already initialized. This is for now used by myisamchk, but shouldn't 00238 be something that a program should rely on! 00239 00240 It's assumed that no two threads call this function simultaneously 00241 referring to the same key cache handle. 00242 00243 */ 00244 00245 int init_key_cache(KEY_CACHE *keycache, uint32_t key_cache_block_size, 00246 size_t use_mem, uint32_t division_limit, 00247 uint32_t age_threshold) 00248 { 00249 (void)keycache; 00250 (void)key_cache_block_size; 00251 (void)use_mem; 00252 (void)division_limit; 00253 (void)age_threshold; 00254 memset(keycache, 0, sizeof(KEY_CACHE)); 00255 00256 return 0; 00257 } 00258 00259 00260 /* 00261 Remove key_cache from memory 00262 00263 SYNOPSIS 00264 end_key_cache() 00265 keycache key cache handle 00266 cleanup Complete free (Free also mutex for key cache) 00267 00268 RETURN VALUE 00269 none 00270 */ 00271 00272 void end_key_cache(KEY_CACHE *keycache, bool cleanup) 00273 { 00274 (void)keycache; 00275 (void)cleanup; 00276 } /* end_key_cache */ 00277 00278 00279 /* 00280 Add a hash link to a bucket in the hash_table 00281 */ 00282 00283 static inline void link_hash(HASH_LINK **start, HASH_LINK *hash_link) 00284 { 00285 if (*start) 00286 (*start)->prev= &hash_link->next; 00287 hash_link->next= *start; 00288 hash_link->prev= start; 00289 *start= hash_link; 00290 } 00291 00292 00293 /* 00294 Read a block of data from a cached file into a buffer; 00295 00296 SYNOPSIS 00297 00298 key_cache_read() 00299 keycache pointer to a key cache data structure 00300 file handler for the file for the block of data to be read 00301 filepos position of the block of data in the file 00302 level determines the weight of the data 00303 buff buffer to where the data must be placed 00304 length length of the buffer 00305 block_length length of the block in the key cache buffer 00306 return_buffer return pointer to the key cache buffer with the data 00307 00308 RETURN VALUE 00309 Returns address from where the data is placed if sucessful, 0 - otherwise. 00310 00311 NOTES. 00312 The function ensures that a block of data of size length from file 00313 positioned at filepos is in the buffers for some key cache blocks. 00314 Then the function either copies the data into the buffer buff, or, 00315 if return_buffer is true, it just returns the pointer to the key cache 00316 buffer with the data. 00317 Filepos must be a multiple of 'block_length', but it doesn't 00318 have to be a multiple of key_cache_block_size; 00319 */ 00320 00321 unsigned char *key_cache_read(KEY_CACHE *keycache, 00322 int file, internal::my_off_t filepos, int level, 00323 unsigned char *buff, uint32_t length, 00324 uint32_t block_length, 00325 int return_buffer) 00326 { 00327 (void)block_length; 00328 (void)return_buffer; 00329 (void)level; 00330 int error=0; 00331 unsigned char *start= buff; 00332 00333 assert (! keycache->key_cache_inited); 00334 00335 if (!pread(file, (unsigned char*) buff, length, filepos)) 00336 error= 1; 00337 return(error ? (unsigned char*) 0 : start); 00338 } 00339 00340 00341 /* 00342 Insert a block of file data from a buffer into key cache 00343 00344 SYNOPSIS 00345 key_cache_insert() 00346 keycache pointer to a key cache data structure 00347 file handler for the file to insert data from 00348 filepos position of the block of data in the file to insert 00349 level determines the weight of the data 00350 buff buffer to read data from 00351 length length of the data in the buffer 00352 00353 NOTES 00354 This is used by MyISAM to move all blocks from a index file to the key 00355 cache 00356 00357 RETURN VALUE 00358 0 if a success, 1 - otherwise. 00359 */ 00360 00361 int key_cache_insert(KEY_CACHE *keycache, 00362 int file, internal::my_off_t filepos, int level, 00363 unsigned char *buff, uint32_t length) 00364 { 00365 (void)file; 00366 (void)filepos; 00367 (void)level; 00368 (void)buff; 00369 (void)length; 00370 00371 assert (!keycache->key_cache_inited); 00372 return 0; 00373 } 00374 00375 00376 /* 00377 Write a buffer into a cached file. 00378 00379 SYNOPSIS 00380 00381 key_cache_write() 00382 keycache pointer to a key cache data structure 00383 file handler for the file to write data to 00384 filepos position in the file to write data to 00385 level determines the weight of the data 00386 buff buffer with the data 00387 length length of the buffer 00388 dont_write if is 0 then all dirty pages involved in writing 00389 should have been flushed from key cache 00390 00391 RETURN VALUE 00392 0 if a success, 1 - otherwise. 00393 00394 NOTES. 00395 The function copies the data of size length from buff into buffers 00396 for key cache blocks that are assigned to contain the portion of 00397 the file starting with position filepos. 00398 It ensures that this data is flushed to the file if dont_write is false. 00399 Filepos must be a multiple of 'block_length', but it doesn't 00400 have to be a multiple of key_cache_block_size; 00401 00402 dont_write is always true in the server (info->lock_type is never F_UNLCK). 00403 */ 00404 00405 int key_cache_write(KEY_CACHE *keycache, 00406 int file, internal::my_off_t filepos, int level, 00407 unsigned char *buff, uint32_t length, 00408 uint32_t block_length, 00409 int dont_write) 00410 { 00411 (void)block_length; 00412 (void)level; 00413 int error=0; 00414 00415 if (!dont_write) 00416 { 00417 /* Not used in the server. */ 00418 /* Force writing from buff into disk. */ 00419 if (pwrite(file, buff, length, filepos) == 0) 00420 return(1); 00421 } 00422 00423 assert (!keycache->key_cache_inited); 00424 00425 /* Key cache is not used */ 00426 if (dont_write) 00427 { 00428 /* Used in the server. */ 00429 if (pwrite(file, (unsigned char*) buff, length, filepos) == 0) 00430 error=1; 00431 } 00432 00433 return(error); 00434 } 00435 00436 00437 /* 00438 Flush all blocks for a file to disk 00439 00440 SYNOPSIS 00441 00442 flush_key_blocks() 00443 keycache pointer to a key cache data structure 00444 file handler for the file to flush to 00445 flush_type type of the flush 00446 00447 RETURN 00448 0 ok 00449 1 error 00450 */ 00451 00452 int flush_key_blocks(KEY_CACHE *keycache, 00453 int file, enum flush_type type) 00454 { 00455 (void)file; 00456 (void)type; 00457 assert (!keycache->key_cache_inited); 00458 return 0; 00459 }