Drizzled Public API Documentation

srv0srv.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4 Copyright (C) 2008, 2009 Google Inc.
5 Copyright (C) 2009, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify it under
21 the terms of the GNU General Public License as published by the Free Software
22 Foundation; version 2 of the License.
23 
24 This program is distributed in the hope that it will be useful, but WITHOUT
25 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
27 
28 You should have received a copy of the GNU General Public License along with
29 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
30 St, Fifth Floor, Boston, MA 02110-1301 USA
31 
32 *****************************************************************************/
33 
34 /**************************************************/
60 /* Dummy comment */
61 #include "srv0srv.h"
62 
63 #include <drizzled/error.h>
64 #include <drizzled/errmsg_print.h>
65 
66 #include "ut0mem.h"
67 #include "ut0ut.h"
68 #include "os0proc.h"
69 #include "mem0mem.h"
70 #include "mem0pool.h"
71 #include "sync0sync.h"
72 #include "thr0loc.h"
73 #include "que0que.h"
74 #include "log0recv.h"
75 #include "pars0pars.h"
76 #include "usr0sess.h"
77 #include "lock0lock.h"
78 #include "trx0purge.h"
79 #include "ibuf0ibuf.h"
80 #include "buf0flu.h"
81 #include "buf0lru.h"
82 #include "btr0sea.h"
83 #include "dict0load.h"
84 #include "dict0boot.h"
85 #include "srv0start.h"
86 #include "row0mysql.h"
87 #include "ha_prototypes.h"
88 #include "trx0i_s.h"
89 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
90 
91 /* This is set to TRUE if the MySQL user has set it in MySQL; currently
92 affects only FOREIGN KEY definition parsing */
93 UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
94 
95 /* The following counter is incremented whenever there is some user activity
96 in the server */
97 UNIV_INTERN ulint srv_activity_count = 0;
98 
99 /* The following is the maximum allowed duration of a lock wait. */
100 UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
101 
102 /* How much data manipulation language (DML) statements need to be delayed,
103 in microseconds, in order to reduce the lagging of the purge thread. */
104 UNIV_INTERN ulint srv_dml_needed_delay = 0;
105 
106 UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
107 UNIV_INTERN ibool srv_monitor_active = FALSE;
108 UNIV_INTERN ibool srv_error_monitor_active = FALSE;
109 
110 UNIV_INTERN const char* srv_main_thread_op_info = "";
111 
112 /* Server parameters which are read from the initfile */
113 
114 /* The following three are dir paths which are catenated before file
115 names, where the file name itself may also contain a path */
116 
117 UNIV_INTERN char* srv_data_home = NULL;
118 #ifdef UNIV_LOG_ARCHIVE
119 UNIV_INTERN char* srv_arch_dir = NULL;
120 #endif /* UNIV_LOG_ARCHIVE */
121 
124 UNIV_INTERN my_bool srv_file_per_table;
126 UNIV_INTERN ulint srv_file_format = 0;
131 
132 #if DICT_TF_FORMAT_51
133 # error "DICT_TF_FORMAT_51 must be 0!"
134 #endif
135 
137 UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
138 
139 /* If this flag is TRUE, then we will use the native aio of the
140 OS (provided we compiled Innobase with it in), otherwise we will
141 use simulated aio we build below with threads.
142 Currently we support native aio on windows and linux */
143 UNIV_INTERN my_bool srv_use_native_aio = TRUE;
144 
145 #ifdef __WIN__
146 /* Windows native condition variables. We use runtime loading / function
147 pointers, because they are not available on Windows Server 2003 and
148 Windows XP/2000.
149 
150 We use condition for events on Windows if possible, even if os_event
151 resembles Windows kernel event object well API-wise. The reason is
152 performance, kernel objects are heavyweights and WaitForSingleObject() is a
153 performance killer causing calling thread to context switch. Besides, Innodb
154 is preallocating large number (often millions) of os_events. With kernel event
155 objects it takes a big chunk out of non-paged pool, which is better suited
156 for tasks like IO than for storing idle event objects. */
157 UNIV_INTERN ibool srv_use_native_conditions = FALSE;
158 #endif /* __WIN__ */
159 
160 UNIV_INTERN ulint srv_n_data_files = 0;
161 UNIV_INTERN char** srv_data_file_names = NULL;
162 /* size in database pages */
163 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
164 
165 /* if TRUE, then we auto-extend the last data file */
166 UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
167 /* if != 0, this tells the max size auto-extending may increase the
168 last data file size */
169 UNIV_INTERN ulint srv_last_file_size_max = 0;
170 /* If the last data file is auto-extended, we add this
171 many pages to it at a time */
172 UNIV_INTERN unsigned int srv_auto_extend_increment = 8;
173 UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
174 
175 /* If the following is TRUE we do not allow inserts etc. This protects
176 the user from forgetting the 'newraw' keyword to my.cnf */
177 
178 UNIV_INTERN ibool srv_created_new_raw = FALSE;
179 
180 UNIV_INTERN char** srv_log_group_home_dirs = NULL;
181 
182 UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
183 UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
184 /* size in database pages */
185 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
186 /* size in database pages */
187 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
188 UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
189 
190 /* Try to flush dirty pages so as to avoid IO bursts at
191 the checkpoints. */
192 UNIV_INTERN bool srv_adaptive_flushing = TRUE;
193 
196 #define MAX_MUTEX_NOWAIT 20
197 
202 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
203 
206 #if defined(BUILD_DRIZZLE)
207 const byte srv_latin1_ordering[256] /* The sort order table of the latin1
208  character set. The following table is
209  the MySQL order as of Feb 10th, 2002 */
210 = {
211  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
212 , 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
213 , 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
214 , 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
215 , 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
216 , 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
217 , 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
218 , 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
219 , 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
220 , 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
221 , 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
222 , 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
223 , 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
224 , 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
225 , 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
226 , 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
227 , 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
228 , 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
229 , 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
230 , 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
231 , 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
232 , 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
233 , 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
234 , 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
235 , 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
236 , 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
237 , 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
238 , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
239 , 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
240 , 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
241 , 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
242 , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
243 };
244 #else
245 UNIV_INTERN const byte* srv_latin1_ordering;
246 #endif /* BUILD_DRIZZLE */
247 
248 
249 /* use os/external memory allocator */
250 UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
251 /* requested size in kilobytes */
252 UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
253 /* requested number of buffer pool instances */
254 UNIV_INTERN ulint srv_buf_pool_instances = 1;
255 /* previously requested size */
256 UNIV_INTERN ulint srv_buf_pool_old_size;
257 /* current size in kilobytes */
258 UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
259 /* size in bytes */
260 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
261 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
262 
263 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
264 instead. */
265 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
266 UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
267 UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
268 
269 /* The universal page size of the database */
270 UNIV_INTERN ulint srv_page_size_shift = 0;
271 UNIV_INTERN ulint srv_page_size = 0;
272 
273 /* The log block size */
274 UNIV_INTERN uint32_t srv_log_block_size = 0;
275 
276 /* User settable value of the number of pages that must be present
277 in the buffer cache and accessed sequentially for InnoDB to trigger a
278 readahead request. */
279 UNIV_INTERN ulong srv_read_ahead_threshold = 56;
280 
281 #ifdef UNIV_LOG_ARCHIVE
282 UNIV_INTERN ibool srv_log_archive_on = FALSE;
283 UNIV_INTERN ibool srv_archive_recovery = 0;
284 UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
285 #endif /* UNIV_LOG_ARCHIVE */
286 
287 /* This parameter is used to throttle the number of insert buffers that are
288 merged in a batch. By increasing this parameter on a faster disk you can
289 possibly reduce the number of I/O operations performed to complete the
290 merge operation. The value of this parameter is used as is by the
291 background loop when the system is idle (low load), on a busy system
292 the parameter is scaled down by a factor of 4, this is to avoid putting
293 a heavier load on the I/O sub system. */
294 
295 UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
296 
297 UNIV_INTERN char* srv_file_flush_method_str = NULL;
298 UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
299 UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
300 
301 UNIV_INTERN ulint srv_max_n_open_files = 300;
302 
303 /* Number of IO operations per second the server can do */
304 UNIV_INTERN ulong srv_io_capacity = 200;
305 
306 /* The InnoDB main thread tries to keep the ratio of modified pages
307 in the buffer pool to all database pages in the buffer pool smaller than
308 the following number. But it is not guaranteed that the value stays below
309 that during a time of heavy update/insert activity. */
310 
311 UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
312 
313 /* the number of purge threads to use from the worker pool (currently 0 or 1).*/
314 UNIV_INTERN ulong srv_n_purge_threads = 0;
315 
316 /* the number of records to purge in one batch */
317 UNIV_INTERN ulong srv_purge_batch_size = 20;
318 
319 /* variable counts amount of data read in total (in bytes) */
320 UNIV_INTERN ulint srv_data_read = 0;
321 
322 /* here we count the amount of data written in total (in bytes) */
323 UNIV_INTERN ulint srv_data_written = 0;
324 
325 /* the number of the log write requests done */
326 UNIV_INTERN ulint srv_log_write_requests = 0;
327 
328 /* the number of physical writes to the log performed */
329 UNIV_INTERN ulint srv_log_writes = 0;
330 
331 /* amount of data written to the log files in bytes */
332 UNIV_INTERN ulint srv_os_log_written = 0;
333 
334 /* amount of writes being done to the log files */
335 UNIV_INTERN ulint srv_os_log_pending_writes = 0;
336 
337 /* we increase this counter, when there we don't have enough space in the
338 log buffer and have to flush it */
339 UNIV_INTERN ulint srv_log_waits = 0;
340 
341 /* this variable counts the amount of times, when the doublewrite buffer
342 was flushed */
343 UNIV_INTERN ulint srv_dblwr_writes = 0;
344 
345 /* here we store the number of pages that have been flushed to the
346 doublewrite buffer */
347 UNIV_INTERN ulint srv_dblwr_pages_written = 0;
348 
349 /* in this variable we store the number of write requests issued */
350 UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
351 
352 /* here we store the number of times when we had to wait for a free page
353 in the buffer pool. It happens when the buffer pool is full and we need
354 to make a flush, in order to be able to read or create a page. */
355 UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
356 
357 /* variable to count the number of pages that were written from buffer
358 pool to the disk */
359 UNIV_INTERN ulint srv_buf_pool_flushed = 0;
360 
363 UNIV_INTERN ulint srv_buf_pool_reads = 0;
364 
366 UNIV_INTERN uint srv_auto_lru_dump = 0;
367 
368 /* structure to pass status variables to MySQL */
370 
371 /* If the following is != 0 we do not allow inserts etc. This protects
372 the user from forgetting the innodb_force_recovery keyword to my.cnf */
373 
374 UNIV_INTERN ulint srv_force_recovery = 0;
375 /*-----------------------*/
376 /* We are prepared for a situation that we have this many threads waiting for
377 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
378 value. */
379 
380 UNIV_INTERN ulint srv_max_n_threads = 0;
381 
382 /* The following controls how many threads we let inside InnoDB concurrently:
383 threads waiting for locks are not counted into the number because otherwise
384 we could get a deadlock. MySQL creates a thread for each user session, and
385 semaphore contention and convoy problems can occur withput this restriction.
386 Value 10 should be good if there are less than 4 processors + 4 disks in the
387 computer. Bigger computers need bigger values. Value 0 will disable the
388 concurrency check. */
389 
390 UNIV_INTERN ulong srv_thread_concurrency = 0;
391 
392 /* this mutex protects srv_conc data structures */
393 UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
394 /* number of transactions that have declared_to_be_inside_innodb set.
395 It used to be a non-error for this value to drop below zero temporarily.
396 This is no longer true. We'll, however, keep the lint datatype to add
397 assertions to catch any corner cases that we may have missed. */
398 UNIV_INTERN lint srv_conc_n_threads = 0;
399 /* number of OS threads waiting in the FIFO for a permission to enter
400 InnoDB */
401 UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
402 
406  ibool reserved;
408  ibool wait_ended;
415  UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue;
416 };
417 
418 /* queue of threads waiting to get in */
419 UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
420 /* array of wait slots */
421 UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
422 
423 /* Number of times a thread is allowed to enter InnoDB within the same
424 SQL query after it has once got the ticket at srv_conc_enter_innodb */
425 #define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
426 #define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
427 /*-----------------------*/
428 /* If the following is set to 1 then we do not run purge and insert buffer
429 merge to completion before shutdown. If it is set to 2, do not even flush the
430 buffer pool to data files at the shutdown: we effectively 'crash'
431 InnoDB (but lose no committed transactions). */
432 UNIV_INTERN ulint srv_fast_shutdown = 0;
433 
434 /* Generate a innodb_status.<pid> file */
435 UNIV_INTERN ibool srv_innodb_status = FALSE;
436 
437 /* When estimating number of different key values in an index, sample
438 this many index pages */
439 UNIV_INTERN ib_uint64_t srv_stats_sample_pages = 8;
440 
441 UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
442 UNIV_INTERN ibool srv_use_checksums = TRUE;
443 
444 UNIV_INTERN ulong srv_replication_delay = 0;
445 
446 UNIV_INTERN uint64_t srv_ibuf_max_size = 0;
447 UNIV_INTERN uint32_t srv_ibuf_active_contract = 0;
448 UNIV_INTERN uint32_t srv_ibuf_accel_rate = 100;
449 
450 #define PCT_IBUF_IO(pct) (srv_io_capacity * srv_ibuf_accel_rate \
451  * (pct / 10000.0))
452 
453 UNIV_INTERN uint32_t srv_checkpoint_age_target = 0;
454 UNIV_INTERN uint32_t srv_flush_neighbor_pages = 1;
455 
456 UNIV_INTERN uint32_t srv_read_ahead = 3; /* 1: random, 2: linear, 3: both */
457 UNIV_INTERN uint32_t srv_adaptive_flushing_method = 0; /* 0: native,
458  1: estimate,
459  2: keep_average */
460 
461 UNIV_INTERN ibool srv_read_only = FALSE;
462 UNIV_INTERN ibool srv_fake_write = FALSE;
463 UNIV_INTERN ibool srv_apply_log_only = FALSE;
464 
465 /*-------------------------------------------*/
466 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
467 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
468 UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
469 UNIV_INTERN ulong srv_spin_wait_delay = 6;
470 UNIV_INTERN ibool srv_priority_boost = TRUE;
471 
472 #ifdef UNIV_DEBUG
473 UNIV_INTERN ibool srv_print_thread_releases = FALSE;
474 UNIV_INTERN ibool srv_print_lock_waits = FALSE;
475 UNIV_INTERN ibool srv_print_buf_io = FALSE;
476 UNIV_INTERN ibool srv_print_log_io = FALSE;
477 UNIV_INTERN ibool srv_print_latch_waits = FALSE;
478 #endif /* UNIV_DEBUG */
479 
480 UNIV_INTERN ulint srv_n_rows_inserted = 0;
481 UNIV_INTERN ulint srv_n_rows_updated = 0;
482 UNIV_INTERN ulint srv_n_rows_deleted = 0;
483 UNIV_INTERN ulint srv_n_rows_read = 0;
484 
485 static ulint srv_n_rows_inserted_old = 0;
486 static ulint srv_n_rows_updated_old = 0;
487 static ulint srv_n_rows_deleted_old = 0;
488 static ulint srv_n_rows_read_old = 0;
489 
490 UNIV_INTERN ulint srv_n_lock_wait_count = 0;
491 UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
492 UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
493 UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
494 
495 UNIV_INTERN ulint srv_truncated_status_writes = 0;
496 
497 /*
498  Set the following to 0 if you want InnoDB to write messages on
499  stderr on startup/shutdown
500 */
501 UNIV_INTERN ibool srv_print_verbose_log = TRUE;
502 UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
503 UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
504 UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
505 UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
506 
507 /* Array of English strings describing the current state of an
508 i/o handler thread */
509 
510 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
511 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
512 
513 UNIV_INTERN time_t srv_last_monitor_time;
514 
515 UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
516 
517 /* Mutex for locking srv_monitor_file */
518 UNIV_INTERN mutex_t srv_monitor_file_mutex;
519 
520 #ifdef UNIV_PFS_MUTEX
521 /* Key to register kernel_mutex with performance schema */
522 UNIV_INTERN mysql_pfs_key_t kernel_mutex_key;
523 /* Key to protect writing the commit_id to the sys header */
524 UNIV_INTERN mysql_pfs_key_t commit_id_mutex_key;
525 /* Key to register srv_innodb_monitor_mutex with performance schema */
526 UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
527 /* Key to register srv_monitor_file_mutex with performance schema */
528 UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
529 /* Key to register srv_dict_tmpfile_mutex with performance schema */
530 UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
531 /* Key to register the mutex with performance schema */
532 UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
533 #endif /* UNIV_PFS_MUTEX */
534 
535 /* Temporary file for innodb monitor output */
536 UNIV_INTERN FILE* srv_monitor_file;
537 /* Mutex for locking srv_dict_tmpfile.
538 This mutex has a very high rank; threads reserving it should not
539 be holding any InnoDB latches. */
540 UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
541 /* Temporary file for output from the data dictionary */
542 UNIV_INTERN FILE* srv_dict_tmpfile;
543 /* Mutex for locking srv_misc_tmpfile.
544 This mutex has a very low rank; threads reserving it should not
545 acquire any further latches or sleep before releasing this one. */
546 UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
547 /* Temporary file for miscellanous diagnostic output */
548 UNIV_INTERN FILE* srv_misc_tmpfile;
549 
550 UNIV_INTERN ulint srv_main_thread_process_no = 0;
551 UNIV_INTERN ulint srv_main_thread_id = 0;
552 
553 /* The following count work done by srv_master_thread. */
554 
555 /* Iterations by the 'once per second' loop. */
556 static ulint srv_main_1_second_loops = 0;
557 /* Calls to sleep by the 'once per second' loop. */
558 static ulint srv_main_sleeps = 0;
559 /* Iterations by the 'once per 10 seconds' loop. */
560 static ulint srv_main_10_second_loops = 0;
561 /* Iterations of the loop bounded by the 'background_loop' label. */
562 static ulint srv_main_background_loops = 0;
563 /* Iterations of the loop bounded by the 'flush_loop' label. */
564 static ulint srv_main_flush_loops = 0;
565 /* Log writes involving flush. */
566 static ulint srv_log_writes_and_flush = 0;
567 
568 /* This is only ever touched by the master thread. It records the
569 time when the last flush of log file has happened. The master
570 thread ensures that we flush the log files at least once per
571 second. */
572 static time_t srv_last_log_flush_time;
573 
574 /* The master thread performs various tasks based on the current
575 state of IO activity and the level of IO utilization is past
576 intervals. Following macros define thresholds for these conditions. */
577 #define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
578 #define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
579 #define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
580 
581 /*
582  IMPLEMENTATION OF THE SERVER MAIN PROGRAM
583  =========================================
584 
585 There is the following analogue between this database
586 server and an operating system kernel:
587 
588 DB concept equivalent OS concept
589 ---------- ---------------------
590 transaction -- process;
591 
592 query thread -- thread;
593 
594 lock -- semaphore;
595 
596 transaction set to
597 the rollback state -- kill signal delivered to a process;
598 
599 kernel -- kernel;
600 
601 query thread execution:
602 (a) without kernel mutex
603 reserved -- process executing in user mode;
604 (b) with kernel mutex reserved
605  -- process executing in kernel mode;
606 
607 The server is controlled by a master thread which runs at
608 a priority higher than normal, that is, higher than user threads.
609 It sleeps most of the time, and wakes up, say, every 300 milliseconds,
610 to check whether there is anything happening in the server which
611 requires intervention of the master thread. Such situations may be,
612 for example, when flushing of dirty blocks is needed in the buffer
613 pool or old version of database rows have to be cleaned away.
614 
615 The threads which we call user threads serve the queries of
616 the clients and input from the console of the server.
617 They run at normal priority. The server may have several
618 communications endpoints. A dedicated set of user threads waits
619 at each of these endpoints ready to receive a client request.
620 Each request is taken by a single user thread, which then starts
621 processing and, when the result is ready, sends it to the client
622 and returns to wait at the same endpoint the thread started from.
623 
624 So, we do not have dedicated communication threads listening at
625 the endpoints and dealing the jobs to dedicated worker threads.
626 Our architecture saves one thread swithch per request, compared
627 to the solution with dedicated communication threads
628 which amounts to 15 microseconds on 100 MHz Pentium
629 running NT. If the client
630 is communicating over a network, this saving is negligible, but
631 if the client resides in the same machine, maybe in an SMP machine
632 on a different processor from the server thread, the saving
633 can be important as the threads can communicate over shared
634 memory with an overhead of a few microseconds.
635 
636 We may later implement a dedicated communication thread solution
637 for those endpoints which communicate over a network.
638 
639 Our solution with user threads has two problems: for each endpoint
640 there has to be a number of listening threads. If there are many
641 communication endpoints, it may be difficult to set the right number
642 of concurrent threads in the system, as many of the threads
643 may always be waiting at less busy endpoints. Another problem
644 is queuing of the messages, as the server internally does not
645 offer any queue for jobs.
646 
647 Another group of user threads is intended for splitting the
648 queries and processing them in parallel. Let us call these
649 parallel communication threads. These threads are waiting for
650 parallelized tasks, suspended on event semaphores.
651 
652 A single user thread waits for input from the console,
653 like a command to shut the database.
654 
655 Utility threads are a different group of threads which takes
656 care of the buffer pool flushing and other, mainly background
657 operations, in the server.
658 Some of these utility threads always run at a lower than normal
659 priority, so that they are always in background. Some of them
660 may dynamically boost their priority by the pri_adjust function,
661 even to higher than normal priority, if their task becomes urgent.
662 The running of utilities is controlled by high- and low-water marks
663 of urgency. The urgency may be measured by the number of dirty blocks
664 in the buffer pool, in the case of the flush thread, for example.
665 When the high-water mark is exceeded, an utility starts running, until
666 the urgency drops under the low-water mark. Then the utility thread
667 suspend itself to wait for an event. The master thread is
668 responsible of signaling this event when the utility thread is
669 again needed.
670 
671 For each individual type of utility, some threads always remain
672 at lower than normal priority. This is because pri_adjust is implemented
673 so that the threads at normal or higher priority control their
674 share of running time by calling sleep. Thus, if the load of the
675 system sudenly drops, these threads cannot necessarily utilize
676 the system fully. The background priority threads make up for this,
677 starting to run when the load drops.
678 
679 When there is no activity in the system, also the master thread
680 suspends itself to wait for an event making
681 the server totally silent. The responsibility to signal this
682 event is on the user thread which again receives a message
683 from a client.
684 
685 There is still one complication in our server design. If a
686 background utility thread obtains a resource (e.g., mutex) needed by a user
687 thread, and there is also some other user activity in the system,
688 the user thread may have to wait indefinitely long for the
689 resource, as the OS does not schedule a background thread if
690 there is some other runnable user thread. This problem is called
691 priority inversion in real-time programming.
692 
693 One solution to the priority inversion problem would be to
694 keep record of which thread owns which resource and
695 in the above case boost the priority of the background thread
696 so that it will be scheduled and it can release the resource.
697 This solution is called priority inheritance in real-time programming.
698 A drawback of this solution is that the overhead of acquiring a mutex
699 increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
700 the thread has to call os_thread_get_curr_id.
701 This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
702 pair. Note that the thread
703 cannot store the information in the resource, say mutex, itself,
704 because competing threads could wipe out the information if it is
705 stored before acquiring the mutex, and if it stored afterwards,
706 the information is outdated for the time of one machine instruction,
707 at least. (To be precise, the information could be stored to
708 lock_word in mutex if the machine supports atomic swap.)
709 
710 The above solution with priority inheritance may become actual in the
711 future, but at the moment we plan to implement a more coarse solution,
712 which could be called a global priority inheritance. If a thread
713 has to wait for a long time, say 300 milliseconds, for a resource,
714 we just guess that it may be waiting for a resource owned by a background
715 thread, and boost the the priority of all runnable background threads
716 to the normal level. The background threads then themselves adjust
717 their fixed priority back to background after releasing all resources
718 they had (or, at some fixed points in their program code).
719 
720 What is the performance of the global priority inheritance solution?
721 We may weigh the length of the wait time 300 milliseconds, during
722 which the system processes some other thread
723 to the cost of boosting the priority of each runnable background
724 thread, rescheduling it, and lowering the priority again.
725 On 100 MHz Pentium + NT this overhead may be of the order 100
726 microseconds per thread. So, if the number of runnable background
727 threads is not very big, say < 100, the cost is tolerable.
728 Utility threads probably will access resources used by
729 user threads not very often, so collisions of user threads
730 to preempted utility threads should not happen very often.
731 
732 The thread table contains
733 information of the current status of each thread existing in the system,
734 and also the event semaphores used in suspending the master thread
735 and utility and parallel communication threads when they have nothing to do.
736 The thread table can be seen as an analogue to the process table
737 in a traditional Unix implementation.
738 
739 The thread table is also used in the global priority inheritance
740 scheme. This brings in one additional complication: threads accessing
741 the thread table must have at least normal fixed priority,
742 because the priority inheritance solution does not work if a background
743 thread is preempted while possessing the mutex protecting the thread table.
744 So, if a thread accesses the thread table, its priority has to be
745 boosted at least to normal. This priority requirement can be seen similar to
746 the privileged mode used when processing the kernel calls in traditional
747 Unix.*/
748 
749 /* Thread slot in the thread table */
752  os_thread_t handle;
753  unsigned type:3;
754  unsigned in_use:1;
755  unsigned suspended:1;
763 };
764 
765 /* Table for MySQL threads where they will be suspended to wait for locks */
766 UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
767 
768 UNIV_INTERN os_event_t srv_timeout_event;
769 
770 UNIV_INTERN os_event_t srv_monitor_event;
771 
772 UNIV_INTERN os_event_t srv_error_event;
773 
774 UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
775 
776 UNIV_INTERN srv_sys_t* srv_sys = NULL;
777 
778 /* padding to prevent other memory update hotspots from residing on
779 the same memory cache line */
780 UNIV_INTERN byte srv_pad1[64];
781 /* mutex protecting the server, trx structs, query threads, and lock table */
782 UNIV_INTERN mutex_t* kernel_mutex_temp;
783 /* mutex protecting the sys header for writing the commit id */
784 UNIV_INTERN mutex_t* commit_id_mutex_temp;
785 
786 /* padding to prevent other memory update hotspots from residing on
787 the same memory cache line */
788 UNIV_INTERN byte srv_pad2[64];
789 
790 #if 0
791 /* The following three values measure the urgency of the jobs of
792 buffer, version, and insert threads. They may vary from 0 - 1000.
793 The server mutex protects all these variables. The low-water values
794 tell that the server can acquiesce the utility when the value
795 drops below this low-water mark. */
796 
797 static ulint srv_meter[SRV_MASTER + 1];
798 static ulint srv_meter_low_water[SRV_MASTER + 1];
799 static ulint srv_meter_high_water[SRV_MASTER + 1];
800 static ulint srv_meter_high_water2[SRV_MASTER + 1];
801 static ulint srv_meter_foreground[SRV_MASTER + 1];
802 #endif
803 
804 /***********************************************************************
805 Prints counters for work done by srv_master_thread. */
806 static
807 void
808 srv_print_master_thread_info(
809 /*=========================*/
810  FILE *file) /* in: output stream */
811 {
812  fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
813  "%lu 10_second, %lu background, %lu flush\n",
814  srv_main_1_second_loops, srv_main_sleeps,
815  srv_main_10_second_loops, srv_main_background_loops,
816  srv_main_flush_loops);
817  fprintf(file, "srv_master_thread log flush and writes: %lu\n",
818  srv_log_writes_and_flush);
819 }
820 
821 /* The following values give info about the activity going on in
822 the database. They are protected by the server mutex. The arrays
823 are indexed by the type of the thread. */
824 
825 UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
826 UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
827 
828 /*********************************************************************/
830 UNIV_INTERN
831 void
833 /*======================*/
834  ulint i,
835  const char* str)
837 {
838  ut_a(i < SRV_MAX_N_IO_THREADS);
839 
840  srv_io_thread_op_info[i] = str;
841 }
842 
843 /*********************************************************************/
847 static
848 srv_slot_t*
849 srv_table_get_nth_slot(
850 /*===================*/
851  ulint index)
852 {
853  ut_a(index < OS_THREAD_MAX_N);
854 
855  return(srv_sys->threads + index);
856 }
857 
858 /*********************************************************************/
861 UNIV_INTERN
862 ulint
864 /*===================*/
865 {
866  ulint i;
867  ulint n_threads = 0;
868 
869  mutex_enter(&kernel_mutex);
870 
871  for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
872 
873  n_threads += srv_n_threads[i];
874  }
875 
876  mutex_exit(&kernel_mutex);
877 
878  return(n_threads);
879 }
880 
881 /*********************************************************************/
886 static
887 ulint
888 srv_table_reserve_slot(
889 /*===================*/
890  enum srv_thread_type type)
891 {
892  srv_slot_t* slot;
893  ulint i;
894 
895  ut_a(type > 0);
896  ut_a(type <= SRV_MASTER);
897 
898  i = 0;
899  slot = srv_table_get_nth_slot(i);
900 
901  while (slot->in_use) {
902  i++;
903  slot = srv_table_get_nth_slot(i);
904  }
905 
906  ut_a(slot->in_use == FALSE);
907 
908  slot->in_use = TRUE;
909  slot->suspended = FALSE;
910  slot->type = type;
911  slot->id = os_thread_get_curr_id();
912  slot->handle = os_thread_get_curr();
913 
915 
917 
918  return(i);
919 }
920 
921 /*********************************************************************/
925 static
927 srv_suspend_thread(void)
928 /*====================*/
929 {
930  srv_slot_t* slot;
932  ulint slot_no;
933  enum srv_thread_type type;
934 
935  ut_ad(mutex_own(&kernel_mutex));
936 
938 
939  if (srv_print_thread_releases) {
940  fprintf(stderr,
941  "Suspending thread %lu to slot %lu\n",
942  (ulong) os_thread_get_curr_id(), (ulong) slot_no);
943  }
944 
945  slot = srv_table_get_nth_slot(slot_no);
946 
947  type = static_cast<srv_thread_type>(slot->type);
948 
949  ut_ad(type >= SRV_WORKER);
950  ut_ad(type <= SRV_MASTER);
951 
952  event = slot->event;
953 
954  slot->suspended = TRUE;
955 
956  ut_ad(srv_n_threads_active[type] > 0);
957 
958  srv_n_threads_active[type]--;
959 
960  os_event_reset(event);
961 
962  return(event);
963 }
964 
965 /*********************************************************************/
970 UNIV_INTERN
971 ulint
973 /*================*/
974  enum srv_thread_type type,
975  ulint n)
976 {
977  srv_slot_t* slot;
978  ulint i;
979  ulint count = 0;
980 
981  ut_ad(type >= SRV_WORKER);
982  ut_ad(type <= SRV_MASTER);
983  ut_ad(n > 0);
984  ut_ad(mutex_own(&kernel_mutex));
985 
986  for (i = 0; i < OS_THREAD_MAX_N; i++) {
987 
988  slot = srv_table_get_nth_slot(i);
989 
990  if (slot->in_use &&
991  (static_cast<srv_thread_type>(slot->type) == type) &&
992  slot->suspended) {
993 
994  slot->suspended = FALSE;
995 
996  srv_n_threads_active[type]++;
997 
998  os_event_set(slot->event);
999 
1000  if (srv_print_thread_releases) {
1001  fprintf(stderr,
1002  "Releasing thread %lu type %lu"
1003  " from slot %lu\n",
1004  (ulong) slot->id, (ulong) type,
1005  (ulong) i);
1006  }
1007 
1008  count++;
1009 
1010  if (count == n) {
1011  break;
1012  }
1013  }
1014  }
1015 
1016  return(count);
1017 }
1018 
1019 /*********************************************************************/
1022 UNIV_INTERN
1023 enum srv_thread_type
1025 /*=====================*/
1026 {
1027  ulint slot_no;
1028  srv_slot_t* slot;
1029  enum srv_thread_type type;
1030 
1031  mutex_enter(&kernel_mutex);
1032 
1034 
1035  slot = srv_table_get_nth_slot(slot_no);
1036 
1037  type = static_cast<srv_thread_type>(slot->type);
1038 
1039  ut_ad(type >= SRV_WORKER);
1040  ut_ad(type <= SRV_MASTER);
1041 
1042  mutex_exit(&kernel_mutex);
1043 
1044  return(type);
1045 }
1046 
1047 /*********************************************************************/
1049 UNIV_INTERN
1050 void
1052 /*==========*/
1053 {
1054  srv_conc_slot_t* conc_slot;
1055  srv_slot_t* slot;
1056  ulint i;
1057 
1058  srv_sys = static_cast<srv_sys_t *>(mem_alloc(sizeof(srv_sys_t)));
1059 
1060  kernel_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1061  mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
1062 
1063  commit_id_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1064  mutex_create(commit_id_mutex_key, &commit_id_mutex, SYNC_COMMIT_ID_LOCK);
1065 
1066  mutex_create(srv_innodb_monitor_mutex_key,
1067  &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
1068 
1069  srv_sys->threads = static_cast<srv_table_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1070 
1071  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1072  slot = srv_table_get_nth_slot(i);
1073  slot->in_use = FALSE;
1074  slot->type=0; /* Avoid purify errors */
1075  slot->event = os_event_create(NULL);
1076  ut_a(slot->event);
1077  }
1078 
1079  srv_mysql_table = static_cast<srv_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1080 
1081  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1082  slot = srv_mysql_table + i;
1083  slot->in_use = FALSE;
1084  slot->type = 0;
1085  slot->event = os_event_create(NULL);
1086  ut_a(slot->event);
1087  }
1088 
1089  srv_error_event = os_event_create(NULL);
1090 
1091  srv_timeout_event = os_event_create(NULL);
1092 
1093  srv_monitor_event = os_event_create(NULL);
1094 
1095  srv_lock_timeout_thread_event = os_event_create(NULL);
1096 
1097  for (i = 0; i < SRV_MASTER + 1; i++) {
1098  srv_n_threads_active[i] = 0;
1099  srv_n_threads[i] = 0;
1100 #if 0
1101  srv_meter[i] = 30;
1102  srv_meter_low_water[i] = 50;
1103  srv_meter_high_water[i] = 100;
1104  srv_meter_high_water2[i] = 200;
1105  srv_meter_foreground[i] = 250;
1106 #endif
1107  }
1108 
1110 
1111  /* Create dummy indexes for infimum and supremum records */
1112 
1113  dict_ind_init();
1114 
1115  /* Init the server concurrency restriction data structures */
1116 
1117  os_fast_mutex_init(&srv_conc_mutex);
1118 
1119  UT_LIST_INIT(srv_conc_queue);
1120 
1121  srv_conc_slots = static_cast<srv_conc_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)));
1122 
1123  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1124  conc_slot = srv_conc_slots + i;
1125  conc_slot->reserved = FALSE;
1126  conc_slot->event = os_event_create(NULL);
1127  ut_a(conc_slot->event);
1128  }
1129 
1130  /* Initialize some INFORMATION SCHEMA internal structures */
1132 }
1133 
1134 /*********************************************************************/
1136 UNIV_INTERN
1137 void
1139 /*==========*/
1140 {
1141  os_fast_mutex_free(&srv_conc_mutex);
1142  mem_free(srv_conc_slots);
1143  srv_conc_slots = NULL;
1144 
1146  mem_free(srv_sys);
1147  srv_sys = NULL;
1148 
1149  mem_free(kernel_mutex_temp);
1150  kernel_mutex_temp = NULL;
1151  mem_free(srv_mysql_table);
1152  srv_mysql_table = NULL;
1153 
1154  mem_free(commit_id_mutex_temp);
1155  commit_id_mutex_temp = NULL;
1156 
1158 }
1159 
1160 /*********************************************************************/
1163 UNIV_INTERN
1164 void
1166 /*==================*/
1167 {
1168  ut_mem_init();
1169  /* Reset the system variables in the recovery module. */
1171  os_sync_init();
1172  sync_init();
1173  mem_init(srv_mem_pool_size);
1174  thr_local_init();
1175 }
1176 
1177 /*======================= InnoDB Server FIFO queue =======================*/
1178 
1179 /* Maximum allowable purge history length. <=0 means 'infinite'. */
1180 UNIV_INTERN ulong srv_max_purge_lag = 0;
1181 
1182 /*********************************************************************/
1185 UNIV_INTERN
1186 void
1188 /*==================*/
1189  trx_t* trx)
1191 {
1192  ibool has_slept = FALSE;
1193  srv_conc_slot_t* slot = NULL;
1194  ulint i;
1195 
1196  if (trx->mysql_thd != NULL
1198 
1199  UT_WAIT_FOR(srv_conc_n_threads
1200  < (lint)srv_thread_concurrency,
1201  srv_replication_delay * 1000);
1202 
1203  return;
1204  }
1205 
1206  /* If trx has 'free tickets' to enter the engine left, then use one
1207  such ticket */
1208 
1209  if (trx->n_tickets_to_enter_innodb > 0) {
1210  trx->n_tickets_to_enter_innodb--;
1211 
1212  return;
1213  }
1214 
1215  os_fast_mutex_lock(&srv_conc_mutex);
1216 retry:
1217  if (trx->declared_to_be_inside_innodb) {
1218  ut_print_timestamp(stderr);
1219  fputs(" InnoDB: Error: trying to declare trx"
1220  " to enter InnoDB, but\n"
1221  "InnoDB: it already is declared.\n", stderr);
1222  trx_print(stderr, trx, 0);
1223  putc('\n', stderr);
1224  os_fast_mutex_unlock(&srv_conc_mutex);
1225 
1226  return;
1227  }
1228 
1229  ut_ad(srv_conc_n_threads >= 0);
1230 
1231  if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1232 
1233  srv_conc_n_threads++;
1234  trx->declared_to_be_inside_innodb = TRUE;
1235  trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1236 
1237  os_fast_mutex_unlock(&srv_conc_mutex);
1238 
1239  return;
1240  }
1241 
1242  /* If the transaction is not holding resources, let it sleep
1243  for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1244 
1245  if (!has_slept && !trx->has_search_latch
1246  && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1247 
1248  has_slept = TRUE; /* We let it sleep only once to avoid
1249  starvation */
1250 
1251  srv_conc_n_waiting_threads++;
1252 
1253  os_fast_mutex_unlock(&srv_conc_mutex);
1254 
1255  trx->op_info = "sleeping before joining InnoDB queue";
1256 
1257  /* Peter Zaitsev suggested that we take the sleep away
1258  altogether. But the sleep may be good in pathological
1259  situations of lots of thread switches. Simply put some
1260  threads aside for a while to reduce the number of thread
1261  switches. */
1262  if (SRV_THREAD_SLEEP_DELAY > 0) {
1263  os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1264  }
1265 
1266  trx->op_info = "";
1267 
1268  os_fast_mutex_lock(&srv_conc_mutex);
1269 
1270  srv_conc_n_waiting_threads--;
1271 
1272  goto retry;
1273  }
1274 
1275  /* Too many threads inside: put the current thread to a queue */
1276 
1277  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1278  slot = srv_conc_slots + i;
1279 
1280  if (!slot->reserved) {
1281 
1282  break;
1283  }
1284  }
1285 
1286  if (i == OS_THREAD_MAX_N) {
1287  /* Could not find a free wait slot, we must let the
1288  thread enter */
1289 
1290  srv_conc_n_threads++;
1291  trx->declared_to_be_inside_innodb = TRUE;
1292  trx->n_tickets_to_enter_innodb = 0;
1293 
1294  os_fast_mutex_unlock(&srv_conc_mutex);
1295 
1296  return;
1297  }
1298 
1299  /* Release possible search system latch this thread has */
1300  if (trx->has_search_latch) {
1302  }
1303 
1304  /* Add to the queue */
1305  slot->reserved = TRUE;
1306  slot->wait_ended = FALSE;
1307 
1308  UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1309 
1310  os_event_reset(slot->event);
1311 
1312  srv_conc_n_waiting_threads++;
1313 
1314  os_fast_mutex_unlock(&srv_conc_mutex);
1315 
1316  /* Go to wait for the event; when a thread leaves InnoDB it will
1317  release this thread */
1318 
1319  trx->op_info = "waiting in InnoDB queue";
1320 
1321  os_event_wait(slot->event);
1322 
1323  trx->op_info = "";
1324 
1325  os_fast_mutex_lock(&srv_conc_mutex);
1326 
1327  srv_conc_n_waiting_threads--;
1328 
1329  /* NOTE that the thread which released this thread already
1330  incremented the thread counter on behalf of this thread */
1331 
1332  slot->reserved = FALSE;
1333 
1334  UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1335 
1336  trx->declared_to_be_inside_innodb = TRUE;
1337  trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1338 
1339  os_fast_mutex_unlock(&srv_conc_mutex);
1340 }
1341 
1342 /*********************************************************************/
1345 UNIV_INTERN
1346 void
1348 /*========================*/
1349  trx_t* trx)
1351 {
1352  if (UNIV_LIKELY(!srv_thread_concurrency)) {
1353 
1354  return;
1355  }
1356 
1357  ut_ad(srv_conc_n_threads >= 0);
1358 
1359  os_fast_mutex_lock(&srv_conc_mutex);
1360 
1361  srv_conc_n_threads++;
1362  trx->declared_to_be_inside_innodb = TRUE;
1363  trx->n_tickets_to_enter_innodb = 1;
1364 
1365  os_fast_mutex_unlock(&srv_conc_mutex);
1366 }
1367 
1368 /*********************************************************************/
1371 UNIV_INTERN
1372 void
1374 /*=======================*/
1375  trx_t* trx)
1377 {
1378  srv_conc_slot_t* slot = NULL;
1379 
1380  if (trx->mysql_thd != NULL
1382 
1383  return;
1384  }
1385 
1386  if (trx->declared_to_be_inside_innodb == FALSE) {
1387 
1388  return;
1389  }
1390 
1391  os_fast_mutex_lock(&srv_conc_mutex);
1392 
1393  ut_ad(srv_conc_n_threads > 0);
1394  srv_conc_n_threads--;
1395  trx->declared_to_be_inside_innodb = FALSE;
1396  trx->n_tickets_to_enter_innodb = 0;
1397 
1398  if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1399  /* Look for a slot where a thread is waiting and no other
1400  thread has yet released the thread */
1401 
1402  slot = UT_LIST_GET_FIRST(srv_conc_queue);
1403 
1404  while (slot && slot->wait_ended == TRUE) {
1405  slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1406  }
1407 
1408  if (slot != NULL) {
1409  slot->wait_ended = TRUE;
1410 
1411  /* We increment the count on behalf of the released
1412  thread */
1413 
1414  srv_conc_n_threads++;
1415  }
1416  }
1417 
1418  os_fast_mutex_unlock(&srv_conc_mutex);
1419 
1420  if (slot != NULL) {
1421  os_event_set(slot->event);
1422  }
1423 }
1424 
1425 /*********************************************************************/
1427 UNIV_INTERN
1428 void
1430 /*=================*/
1431  trx_t* trx)
1433 {
1434  if (trx->n_tickets_to_enter_innodb > 0) {
1435  /* We will pretend the thread is still inside InnoDB though it
1436  now leaves the InnoDB engine. In this way we save
1437  a lot of semaphore operations. srv_conc_force_exit_innodb is
1438  used to declare the thread definitely outside InnoDB. It
1439  should be called when there is a lock wait or an SQL statement
1440  ends. */
1441 
1442  return;
1443  }
1444 
1446 }
1447 
1448 /*========================================================================*/
1449 
1450 /*********************************************************************/
1453 static
1454 ulint
1455 srv_normalize_init_values(void)
1456 /*===========================*/
1457 {
1458  ulint n;
1459  ulint i;
1460 
1461  n = srv_n_data_files;
1462 
1463  for (i = 0; i < n; i++) {
1464  srv_data_file_sizes[i] = srv_data_file_sizes[i]
1465  * ((1024 * 1024) / UNIV_PAGE_SIZE);
1466  }
1467 
1468  srv_last_file_size_max = srv_last_file_size_max
1469  * ((1024 * 1024) / UNIV_PAGE_SIZE);
1470 
1471  srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1472 
1473  srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1474 
1475  srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1476 
1477  return(DB_SUCCESS);
1478 }
1479 
1480 /*********************************************************************/
1483 UNIV_INTERN
1484 ulint
1486 /*==========*/
1487 {
1488  ulint err;
1489 
1490  /* Transform the init parameter values given by MySQL to
1491  use units we use inside InnoDB: */
1492 
1493  err = srv_normalize_init_values();
1494 
1495  if (err != DB_SUCCESS) {
1496  return(err);
1497  }
1498 
1499  /* Initialize synchronization primitives, memory management, and thread
1500  local storage */
1501 
1502  srv_general_init();
1503 
1504  /* Initialize this module */
1505 
1506  srv_init();
1507 
1508  return(DB_SUCCESS);
1509 }
1510 
1511 /*********************************************************************/
1515 static
1516 srv_slot_t*
1517 srv_table_reserve_slot_for_mysql(void)
1518 /*==================================*/
1519 {
1520  srv_slot_t* slot;
1521  ulint i;
1522 
1523  ut_ad(mutex_own(&kernel_mutex));
1524 
1525  i = 0;
1526  slot = srv_mysql_table + i;
1527 
1528  while (slot->in_use) {
1529  i++;
1530 
1531  if (i >= OS_THREAD_MAX_N) {
1532 
1533  ut_print_timestamp(stderr);
1534 
1535  fprintf(stderr,
1536  " InnoDB: There appear to be %lu MySQL"
1537  " threads currently waiting\n"
1538  "InnoDB: inside InnoDB, which is the"
1539  " upper limit. Cannot continue operation.\n"
1540  "InnoDB: We intentionally generate"
1541  " a seg fault to print a stack trace\n"
1542  "InnoDB: on Linux. But first we print"
1543  " a list of waiting threads.\n", (ulong) i);
1544 
1545  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1546 
1547  slot = srv_mysql_table + i;
1548 
1549  fprintf(stderr,
1550  "Slot %lu: thread id %lu, type %lu,"
1551  " in use %lu, susp %lu, time %lu\n",
1552  (ulong) i,
1553  (ulong) os_thread_pf(slot->id),
1554  (ulong) slot->type,
1555  (ulong) slot->in_use,
1556  (ulong) slot->suspended,
1557  (ulong) difftime(ut_time(),
1558  slot->suspend_time));
1559  }
1560 
1561  ut_error;
1562  }
1563 
1564  slot = srv_mysql_table + i;
1565  }
1566 
1567  ut_a(slot->in_use == FALSE);
1568 
1569  slot->in_use = TRUE;
1570  slot->id = os_thread_get_curr_id();
1571  slot->handle = os_thread_get_curr();
1572 
1573  return(slot);
1574 }
1575 
1576 /***************************************************************/
1582 UNIV_INTERN
1583 void
1585 /*=====================*/
1586  que_thr_t* thr)
1588 {
1589  srv_slot_t* slot;
1590  os_event_t event;
1591  double wait_time;
1592  trx_t* trx;
1593  ulint had_dict_lock;
1594  ibool was_declared_inside_innodb = FALSE;
1595  ib_int64_t start_time = 0;
1596  ib_int64_t finish_time;
1597  ulint diff_time;
1598  ulint sec;
1599  ulint ms;
1600  ulong lock_wait_timeout;
1601 
1602  ut_ad(!mutex_own(&kernel_mutex));
1603 
1604  trx = thr_get_trx(thr);
1605 
1606  os_event_set(srv_lock_timeout_thread_event);
1607 
1608  mutex_enter(&kernel_mutex);
1609 
1610  trx->error_state = DB_SUCCESS;
1611 
1612  if (thr->state == QUE_THR_RUNNING) {
1613 
1614  ut_ad(thr->is_active == TRUE);
1615 
1616  /* The lock has already been released or this transaction
1617  was chosen as a deadlock victim: no need to suspend */
1618 
1619  if (trx->was_chosen_as_deadlock_victim) {
1620 
1621  trx->error_state = DB_DEADLOCK;
1622  trx->was_chosen_as_deadlock_victim = FALSE;
1623  }
1624 
1625  mutex_exit(&kernel_mutex);
1626 
1627  return;
1628  }
1629 
1630  ut_ad(thr->is_active == FALSE);
1631 
1632  slot = srv_table_reserve_slot_for_mysql();
1633 
1634  event = slot->event;
1635 
1636  slot->thr = thr;
1637 
1638  os_event_reset(event);
1639 
1640  slot->suspend_time = ut_time();
1641 
1642  if (thr->lock_state == QUE_THR_LOCK_ROW) {
1643  srv_n_lock_wait_count++;
1644  srv_n_lock_wait_current_count++;
1645 
1646  if (ut_usectime(&sec, &ms) == -1) {
1647  start_time = -1;
1648  } else {
1649  start_time = (ib_int64_t) sec * 1000000 + ms;
1650  }
1651  }
1652  /* Wake the lock timeout monitor thread, if it is suspended */
1653 
1654  os_event_set(srv_lock_timeout_thread_event);
1655 
1656  mutex_exit(&kernel_mutex);
1657 
1658  if (trx->declared_to_be_inside_innodb) {
1659 
1660  was_declared_inside_innodb = TRUE;
1661 
1662  /* We must declare this OS thread to exit InnoDB, since a
1663  possible other thread holding a lock which this thread waits
1664  for must be allowed to enter, sooner or later */
1665 
1667  }
1668 
1669  had_dict_lock = trx->dict_operation_lock_mode;
1670 
1671  switch (had_dict_lock) {
1672  case RW_S_LATCH:
1673  /* Release foreign key check latch */
1675  break;
1676  case RW_X_LATCH:
1677  /* There should never be a lock wait when the
1678  dictionary latch is reserved in X mode. Dictionary
1679  transactions should only acquire locks on dictionary
1680  tables, not other tables. All access to dictionary
1681  tables should be covered by dictionary
1682  transactions. */
1683  ut_print_timestamp(stderr);
1684  fputs(" InnoDB: Error: dict X latch held in "
1685  "srv_suspend_mysql_thread\n", stderr);
1686  /* This should never occur. This incorrect handling
1687  was added in the early development of
1688  ha_innobase::add_index() in InnoDB Plugin 1.0. */
1689  /* Release fast index creation latch */
1691  break;
1692  }
1693 
1694  ut_a(trx->dict_operation_lock_mode == 0);
1695 
1696  /* Suspend this thread and wait for the event. */
1697 
1698  os_event_wait(event);
1699 
1700  /* After resuming, reacquire the data dictionary latch if
1701  necessary. */
1702 
1703  switch (had_dict_lock) {
1704  case RW_S_LATCH:
1705  row_mysql_freeze_data_dictionary(trx);
1706  break;
1707  case RW_X_LATCH:
1708  /* This should never occur. This incorrect handling
1709  was added in the early development of
1710  ha_innobase::add_index() in InnoDB Plugin 1.0. */
1711  row_mysql_lock_data_dictionary(trx);
1712  break;
1713  }
1714 
1715  if (was_declared_inside_innodb) {
1716 
1717  /* Return back inside InnoDB */
1718 
1720  }
1721 
1722  mutex_enter(&kernel_mutex);
1723 
1724  /* Release the slot for others to use */
1725 
1726  slot->in_use = FALSE;
1727 
1728  wait_time = ut_difftime(ut_time(), slot->suspend_time);
1729 
1730  if (thr->lock_state == QUE_THR_LOCK_ROW) {
1731  if (ut_usectime(&sec, &ms) == -1) {
1732  finish_time = -1;
1733  } else {
1734  finish_time = (ib_int64_t) sec * 1000000 + ms;
1735  }
1736 
1737  diff_time = (ulint) (finish_time - start_time);
1738 
1739  srv_n_lock_wait_current_count--;
1740  srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1741  if (diff_time > srv_n_lock_max_wait_time &&
1742  /* only update the variable if we successfully
1743  retrieved the start and finish times. See Bug#36819. */
1744  start_time != -1 && finish_time != -1) {
1745  srv_n_lock_max_wait_time = diff_time;
1746  }
1747 
1748  /* Record the lock wait time for this thread */
1749  thd_set_lock_wait_time(trx->mysql_thd, diff_time);
1750  }
1751 
1752  if (trx->was_chosen_as_deadlock_victim) {
1753 
1754  trx->error_state = DB_DEADLOCK;
1755  trx->was_chosen_as_deadlock_victim = FALSE;
1756  }
1757 
1758  mutex_exit(&kernel_mutex);
1759 
1760  /* InnoDB system transactions (such as the purge, and
1761  incomplete transactions that are being rolled back after crash
1762  recovery) will use the global value of
1763  innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
1764  lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
1765 
1766  if (lock_wait_timeout < 100000000
1767  && wait_time > (double) lock_wait_timeout) {
1768 
1769  trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1770  }
1771 
1772  if (trx_is_interrupted(trx)) {
1773 
1774  trx->error_state = DB_INTERRUPTED;
1775  }
1776 }
1777 
1778 /********************************************************************/
1781 UNIV_INTERN
1782 void
1784 /*==================================*/
1785  que_thr_t* thr)
1787 {
1788  srv_slot_t* slot;
1789  ulint i;
1790 
1791  ut_ad(mutex_own(&kernel_mutex));
1792 
1793  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1794 
1795  slot = srv_mysql_table + i;
1796 
1797  if (slot->in_use && slot->thr == thr) {
1798  /* Found */
1799 
1800  os_event_set(slot->event);
1801 
1802  return;
1803  }
1804  }
1805 
1806  /* not found */
1807 }
1808 
1809 /******************************************************************/
1811 static
1812 void
1813 srv_refresh_innodb_monitor_stats(void)
1814 /*==================================*/
1815 {
1816  mutex_enter(&srv_innodb_monitor_mutex);
1817 
1818  srv_last_monitor_time = time(NULL);
1819 
1821 
1822  btr_cur_n_sea_old = btr_cur_n_sea;
1823  btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1824 
1826 
1827  buf_refresh_io_stats_all();
1828 
1829  srv_n_rows_inserted_old = srv_n_rows_inserted;
1830  srv_n_rows_updated_old = srv_n_rows_updated;
1831  srv_n_rows_deleted_old = srv_n_rows_deleted;
1832  srv_n_rows_read_old = srv_n_rows_read;
1833 
1834  mutex_exit(&srv_innodb_monitor_mutex);
1835 }
1836 
1837 /******************************************************************/
1841 UNIV_INTERN
1842 ibool
1844 /*======================*/
1845  FILE* file,
1846  ibool nowait,
1847  ulint* trx_start,
1849  ulint* trx_end)
1851 {
1852  double time_elapsed;
1853  time_t current_time;
1854  ulint n_reserved;
1855  ibool ret;
1856 
1857  mutex_enter(&srv_innodb_monitor_mutex);
1858 
1859  current_time = time(NULL);
1860 
1861  /* We add 0.001 seconds to time_elapsed to prevent division
1862  by zero if two users happen to call SHOW INNODB STATUS at the same
1863  time */
1864 
1865  time_elapsed = difftime(current_time, srv_last_monitor_time)
1866  + 0.001;
1867 
1868  srv_last_monitor_time = time(NULL);
1869 
1870  fputs("\n=====================================\n", file);
1871 
1872  ut_print_timestamp(file);
1873  fprintf(file,
1874  " INNODB MONITOR OUTPUT\n"
1875  "=====================================\n"
1876  "Per second averages calculated from the last %lu seconds\n",
1877  (ulong)time_elapsed);
1878 
1879  fputs("-----------------\n"
1880  "BACKGROUND THREAD\n"
1881  "-----------------\n", file);
1882  srv_print_master_thread_info(file);
1883 
1884  fputs("----------\n"
1885  "SEMAPHORES\n"
1886  "----------\n", file);
1887  sync_print(file);
1888 
1889  /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1890  order level in sync0sync.h, while dict_foreign_err_mutex has a very
1891  low level 135. Therefore we can reserve the latter mutex here without
1892  a danger of a deadlock of threads. */
1893 
1894  mutex_enter(&dict_foreign_err_mutex);
1895 
1896  if (ftell(dict_foreign_err_file) != 0L) {
1897  fputs("------------------------\n"
1898  "LATEST FOREIGN KEY ERROR\n"
1899  "------------------------\n", file);
1900  ut_copy_file(file, dict_foreign_err_file);
1901  }
1902 
1903  mutex_exit(&dict_foreign_err_mutex);
1904 
1905  /* Only if lock_print_info_summary proceeds correctly,
1906  before we call the lock_print_info_all_transactions
1907  to print all the lock information. */
1908  ret = lock_print_info_summary(file, nowait);
1909 
1910  if (ret) {
1911  if (trx_start) {
1912  long t = ftell(file);
1913  if (t < 0) {
1914  *trx_start = ULINT_UNDEFINED;
1915  } else {
1916  *trx_start = (ulint) t;
1917  }
1918  }
1920  if (trx_end) {
1921  long t = ftell(file);
1922  if (t < 0) {
1923  *trx_end = ULINT_UNDEFINED;
1924  } else {
1925  *trx_end = (ulint) t;
1926  }
1927  }
1928  }
1929 
1930  fputs("--------\n"
1931  "FILE I/O\n"
1932  "--------\n", file);
1933  os_aio_print(file);
1934 
1935  fputs("-------------------------------------\n"
1936  "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1937  "-------------------------------------\n", file);
1938  ibuf_print(file);
1939 
1940  ha_print_info(file, btr_search_sys->hash_index);
1941 
1942  fprintf(file,
1943  "%.2f hash searches/s, %.2f non-hash searches/s\n",
1944  (btr_cur_n_sea - btr_cur_n_sea_old)
1945  / time_elapsed,
1946  (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1947  / time_elapsed);
1948  btr_cur_n_sea_old = btr_cur_n_sea;
1949  btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1950 
1951  fputs("---\n"
1952  "LOG\n"
1953  "---\n", file);
1954  log_print(file);
1955 
1956  fputs("----------------------\n"
1957  "BUFFER POOL AND MEMORY\n"
1958  "----------------------\n", file);
1959  fprintf(file,
1960  "Total memory allocated " ULINTPF
1961  "; in additional pool allocated " ULINTPF "\n",
1964  fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1965  dict_sys->size);
1966 
1967  buf_print_io(file);
1968 
1969  fputs("--------------\n"
1970  "ROW OPERATIONS\n"
1971  "--------------\n", file);
1972  fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1973  (long) srv_conc_n_threads,
1974  (ulong) srv_conc_n_waiting_threads);
1975 
1976  fprintf(file, "%lu read views open inside InnoDB\n",
1977  static_cast<ulint>(UT_LIST_GET_LEN(trx_sys->view_list)));
1978 
1979  n_reserved = fil_space_get_n_reserved_extents(0);
1980  if (n_reserved > 0) {
1981  fprintf(file,
1982  "%lu tablespace extents now reserved for"
1983  " B-tree split operations\n",
1984  (ulong) n_reserved);
1985  }
1986 
1987 #ifdef UNIV_LINUX
1988  fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1989  (ulong) srv_main_thread_process_no,
1990  (ulong) srv_main_thread_id,
1991  srv_main_thread_op_info);
1992 #else
1993  fprintf(file, "Main thread id %lu, state: %s\n",
1994  (ulong) srv_main_thread_id,
1995  srv_main_thread_op_info);
1996 #endif
1997  fprintf(file,
1998  "Number of rows inserted " ULINTPF
1999  ", updated " ULINTPF ", deleted " ULINTPF
2000  ", read " ULINTPF "\n",
2001  srv_n_rows_inserted,
2002  srv_n_rows_updated,
2003  srv_n_rows_deleted,
2004  srv_n_rows_read);
2005  fprintf(file,
2006  "%.2f inserts/s, %.2f updates/s,"
2007  " %.2f deletes/s, %.2f reads/s\n",
2008  (srv_n_rows_inserted - srv_n_rows_inserted_old)
2009  / time_elapsed,
2010  (srv_n_rows_updated - srv_n_rows_updated_old)
2011  / time_elapsed,
2012  (srv_n_rows_deleted - srv_n_rows_deleted_old)
2013  / time_elapsed,
2014  (srv_n_rows_read - srv_n_rows_read_old)
2015  / time_elapsed);
2016 
2017  srv_n_rows_inserted_old = srv_n_rows_inserted;
2018  srv_n_rows_updated_old = srv_n_rows_updated;
2019  srv_n_rows_deleted_old = srv_n_rows_deleted;
2020  srv_n_rows_read_old = srv_n_rows_read;
2021 
2022  fputs("----------------------------\n"
2023  "END OF INNODB MONITOR OUTPUT\n"
2024  "============================\n", file);
2025  mutex_exit(&srv_innodb_monitor_mutex);
2026  fflush(file);
2027 
2028  return(ret);
2029 }
2030 
2031 /******************************************************************/
2033 UNIV_INTERN
2034 void
2036 /*==========================*/
2037 {
2038  buf_pool_stat_t stat;
2039  ulint LRU_len;
2040  ulint free_len;
2041  ulint flush_list_len;
2042 
2043  buf_get_total_stat(&stat);
2044  buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
2045 
2046  mutex_enter(&srv_innodb_monitor_mutex);
2047 
2053  = fil_n_pending_log_flushes
2054  + fil_n_pending_tablespace_flushes;
2055  export_vars.innodb_data_fsyncs = os_n_fsyncs;
2056  export_vars.innodb_data_read = srv_data_read;
2057  export_vars.innodb_data_reads = os_n_file_reads;
2058  export_vars.innodb_data_writes = os_n_file_writes;
2059  export_vars.innodb_data_written = srv_data_written;
2062  = srv_buf_pool_write_requests;
2063  export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
2064  export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
2067  = stat.n_ra_pages_read;
2069  = stat.n_ra_pages_evicted;
2073 #ifdef UNIV_DEBUG
2074  export_vars.innodb_buffer_pool_pages_latched
2075  = buf_get_latched_pages_number();
2076 #endif /* UNIV_DEBUG */
2078 
2080  = buf_pool_get_n_pages() - LRU_len - free_len;
2081 #ifdef HAVE_ATOMIC_BUILTINS
2083 #else
2085 #endif
2086  export_vars.innodb_page_size = UNIV_PAGE_SIZE;
2087  export_vars.innodb_log_waits = srv_log_waits;
2088  export_vars.innodb_os_log_written = srv_os_log_written;
2089  export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
2090  export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
2091  export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
2092  export_vars.innodb_log_write_requests = srv_log_write_requests;
2093  export_vars.innodb_log_writes = srv_log_writes;
2094  export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
2095  export_vars.innodb_dblwr_writes = srv_dblwr_writes;
2099  export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
2101  = srv_n_lock_wait_current_count;
2102  export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
2103  if (srv_n_lock_wait_count > 0) {
2105  (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
2106  } else {
2108  }
2110  = srv_n_lock_max_wait_time / 1000;
2111  export_vars.innodb_rows_read = srv_n_rows_read;
2112  export_vars.innodb_rows_inserted = srv_n_rows_inserted;
2113  export_vars.innodb_rows_updated = srv_n_rows_updated;
2114  export_vars.innodb_rows_deleted = srv_n_rows_deleted;
2115  export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
2116 
2117  mutex_exit(&srv_innodb_monitor_mutex);
2118 }
2119 
2120 /*********************************************************************/
2123 UNIV_INTERN
2124 os_thread_ret_t
2126 /*===============*/
2127  void* /*arg __attribute__((unused))*/)
2130 {
2131  ib_int64_t sig_count;
2132  double time_elapsed;
2133  time_t current_time;
2134  time_t last_table_monitor_time;
2135  time_t last_tablespace_monitor_time;
2136  time_t last_monitor_time;
2137  ulint mutex_skipped;
2138  ibool last_srv_print_monitor;
2139 
2140 #ifdef UNIV_DEBUG_THREAD_CREATION
2141  fprintf(stderr, "Lock timeout thread starts, id %lu\n",
2143 #endif
2144 
2145 #ifdef UNIV_PFS_THREAD
2146  pfs_register_thread(srv_monitor_thread_key);
2147 #endif
2148 
2149  srv_last_monitor_time = ut_time();
2150  last_table_monitor_time = ut_time();
2151  last_tablespace_monitor_time = ut_time();
2152  last_monitor_time = ut_time();
2153  mutex_skipped = 0;
2154  last_srv_print_monitor = srv_print_innodb_monitor;
2155 loop:
2156  srv_monitor_active = TRUE;
2157 
2158  /* Wake up every 5 seconds to see if we need to print
2159  monitor information or if signalled at shutdown. */
2160 
2161  sig_count = os_event_reset(srv_monitor_event);
2162 
2163  os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
2164 
2165  current_time = ut_time();
2166 
2167  time_elapsed = difftime(current_time, last_monitor_time);
2168 
2169  if (time_elapsed > 15) {
2170  last_monitor_time = ut_time();
2171 
2172  if (srv_print_innodb_monitor) {
2173  /* Reset mutex_skipped counter everytime
2174  srv_print_innodb_monitor changes. This is to
2175  ensure we will not be blocked by kernel_mutex
2176  for short duration information printing,
2177  such as requested by sync_array_print_long_waits() */
2178  if (!last_srv_print_monitor) {
2179  mutex_skipped = 0;
2180  last_srv_print_monitor = TRUE;
2181  }
2182 
2183  if (!srv_printf_innodb_monitor(stderr,
2184  MUTEX_NOWAIT(mutex_skipped),
2185  NULL, NULL)) {
2186  mutex_skipped++;
2187  } else {
2188  /* Reset the counter */
2189  mutex_skipped = 0;
2190  }
2191  } else {
2192  last_srv_print_monitor = FALSE;
2193  }
2194 
2195 
2196  if (srv_innodb_status) {
2197  mutex_enter(&srv_monitor_file_mutex);
2198  rewind(srv_monitor_file);
2199  if (!srv_printf_innodb_monitor(srv_monitor_file,
2200  MUTEX_NOWAIT(mutex_skipped),
2201  NULL, NULL)) {
2202  mutex_skipped++;
2203  } else {
2204  mutex_skipped = 0;
2205  }
2206 
2207  os_file_set_eof(srv_monitor_file);
2208  mutex_exit(&srv_monitor_file_mutex);
2209  }
2210 
2211  if (srv_print_innodb_tablespace_monitor
2212  && difftime(current_time,
2213  last_tablespace_monitor_time) > 60) {
2214  last_tablespace_monitor_time = ut_time();
2215 
2216  fputs("========================"
2217  "========================\n",
2218  stderr);
2219 
2220  ut_print_timestamp(stderr);
2221 
2222  fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
2223  "========================"
2224  "========================\n",
2225  stderr);
2226 
2227  fsp_print(0);
2228  fputs("Validating tablespace\n", stderr);
2229  fsp_validate(0);
2230  fputs("Validation ok\n"
2231  "---------------------------------------\n"
2232  "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
2233  "=======================================\n",
2234  stderr);
2235  }
2236 
2237  if (srv_print_innodb_table_monitor
2238  && difftime(current_time, last_table_monitor_time) > 60) {
2239 
2240  last_table_monitor_time = ut_time();
2241 
2242  fputs("===========================================\n",
2243  stderr);
2244 
2245  ut_print_timestamp(stderr);
2246 
2247  fputs(" INNODB TABLE MONITOR OUTPUT\n"
2248  "===========================================\n",
2249  stderr);
2250  dict_print();
2251 
2252  fputs("-----------------------------------\n"
2253  "END OF INNODB TABLE MONITOR OUTPUT\n"
2254  "==================================\n",
2255  stderr);
2256  }
2257  }
2258 
2260  goto exit_func;
2261  }
2262 
2263  if (srv_print_innodb_monitor
2264  || srv_print_innodb_lock_monitor
2265  || srv_print_innodb_tablespace_monitor
2266  || srv_print_innodb_table_monitor) {
2267  goto loop;
2268  }
2269 
2270  srv_monitor_active = FALSE;
2271 
2272  goto loop;
2273 
2274 exit_func:
2275  srv_monitor_active = FALSE;
2276 
2277  /* We count the number of threads in os_thread_exit(). A created
2278  thread should always use that to exit and not use return() to exit. */
2279 
2280  os_thread_exit(NULL);
2281 
2282  OS_THREAD_DUMMY_RETURN;
2283 }
2284 
2285 /*********************************************************************/
2288 UNIV_INTERN
2289 os_thread_ret_t
2291 /*====================*/
2292  void* /*arg __attribute__((unused))*/)
2293  /* in: a dummy parameter required by
2294  os_thread_create */
2295 {
2296  srv_slot_t* slot;
2297  ibool some_waits;
2298  double wait_time;
2299  ulint i;
2300  ib_int64_t sig_count;
2301 
2302 #ifdef UNIV_PFS_THREAD
2303  pfs_register_thread(srv_lock_timeout_thread_key);
2304 #endif
2305 
2306 loop:
2307 
2308  /* When someone is waiting for a lock, we wake up every second
2309  and check if a timeout has passed for a lock wait */
2310 
2311  sig_count = os_event_reset(srv_timeout_event);
2312 
2313  os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
2314 
2315  srv_lock_timeout_active = TRUE;
2316 
2317  mutex_enter(&kernel_mutex);
2318 
2319  some_waits = FALSE;
2320 
2321  /* Check of all slots if a thread is waiting there, and if it
2322  has exceeded the time limit */
2323 
2324  for (i = 0; i < OS_THREAD_MAX_N; i++) {
2325 
2326  slot = srv_mysql_table + i;
2327 
2328  if (slot->in_use) {
2329  trx_t* trx;
2330  ulong lock_wait_timeout;
2331 
2332  some_waits = TRUE;
2333 
2334  wait_time = ut_difftime(ut_time(), slot->suspend_time);
2335 
2336  trx = thr_get_trx(slot->thr);
2337  lock_wait_timeout = thd_lock_wait_timeout(
2338  trx->mysql_thd);
2339 
2340  if (trx_is_interrupted(trx)
2341  || (lock_wait_timeout < 100000000
2342  && (wait_time > (double) lock_wait_timeout
2343  || wait_time < 0))) {
2344 
2345  /* Timeout exceeded or a wrap-around in system
2346  time counter: cancel the lock request queued
2347  by the transaction and release possible
2348  other transactions waiting behind; it is
2349  possible that the lock has already been
2350  granted: in that case do nothing */
2351 
2352  if (trx->wait_lock) {
2354  trx->wait_lock);
2355  }
2356  }
2357  }
2358  }
2359 
2360  os_event_reset(srv_lock_timeout_thread_event);
2361 
2362  mutex_exit(&kernel_mutex);
2363 
2365  goto exit_func;
2366  }
2367 
2368  if (some_waits) {
2369  goto loop;
2370  }
2371 
2372  srv_lock_timeout_active = FALSE;
2373 
2374 #if 0
2375  /* The following synchronisation is disabled, since
2376  the InnoDB monitor output is to be updated every 15 seconds. */
2377  os_event_wait(srv_lock_timeout_thread_event);
2378 #endif
2379  goto loop;
2380 
2381 exit_func:
2382  srv_lock_timeout_active = FALSE;
2383 
2384  /* We count the number of threads in os_thread_exit(). A created
2385  thread should always use that to exit and not use return() to exit. */
2386 
2387  os_thread_exit(NULL);
2388 
2389  OS_THREAD_DUMMY_RETURN;
2390 }
2391 
2392 /*********************************************************************/
2396 UNIV_INTERN
2397 os_thread_ret_t
2399 /*=====================*/
2400  void* /*arg __attribute__((unused))*/)
2403 {
2404  /* number of successive fatal timeouts observed */
2405  ulint fatal_cnt = 0;
2406  ib_uint64_t old_lsn;
2407  ib_uint64_t new_lsn;
2408  ib_int64_t sig_count;
2409 
2410  old_lsn = srv_start_lsn;
2411 
2412 #ifdef UNIV_DEBUG_THREAD_CREATION
2413  fprintf(stderr, "Error monitor thread starts, id %lu\n",
2415 #endif
2416 
2417 #ifdef UNIV_PFS_THREAD
2418  pfs_register_thread(srv_error_monitor_thread_key);
2419 #endif
2420 
2421 loop:
2422  srv_error_monitor_active = TRUE;
2423 
2424  /* Try to track a strange bug reported by Harald Fuchs and others,
2425  where the lsn seems to decrease at times */
2426 
2427  new_lsn = log_get_lsn();
2428 
2429  if (new_lsn < old_lsn) {
2430  drizzled::errmsg_printf(drizzled::error::INFO,
2431  "InnoDB: Error: old log sequence number %"PRIu64" was greater than the new log sequence number %"PRIu64"!"
2432  "InnoDB: Please submit a bug report to http://bugs.launchpad.net/drizzle",
2433  old_lsn, new_lsn);
2434  }
2435 
2436  old_lsn = new_lsn;
2437 
2438  if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2439  /* We referesh InnoDB Monitor values so that averages are
2440  printed from at most 60 last seconds */
2441 
2442  srv_refresh_innodb_monitor_stats();
2443  }
2444 
2445  /* Update the statistics collected for deciding LRU
2446  eviction policy. */
2447  buf_LRU_stat_update();
2448 
2449  /* Update the statistics collected for flush rate policy. */
2450  buf_flush_stat_update();
2451 
2452  /* In case mutex_exit is not a memory barrier, it is
2453  theoretically possible some threads are left waiting though
2454  the semaphore is already released. Wake up those threads: */
2455 
2457 
2459  fatal_cnt++;
2460  if (fatal_cnt > 10) {
2461 
2462  fprintf(stderr,
2463  "InnoDB: Error: semaphore wait has lasted"
2464  " > %lu seconds\n"
2465  "InnoDB: We intentionally crash the server,"
2466  " because it appears to be hung.\n",
2467  (ulong) srv_fatal_semaphore_wait_threshold);
2468 
2469  ut_error;
2470  }
2471  } else {
2472  fatal_cnt = 0;
2473  }
2474 
2475  /* Flush stderr so that a database user gets the output
2476  to possible MySQL error file */
2477 
2478  fflush(stderr);
2479 
2480  sig_count = os_event_reset(srv_error_event);
2481 
2482  os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2483 
2485 
2486  goto loop;
2487  }
2488 
2489  srv_error_monitor_active = FALSE;
2490 
2491  /* We count the number of threads in os_thread_exit(). A created
2492  thread should always use that to exit and not use return() to exit. */
2493 
2494  os_thread_exit(NULL);
2495 
2496  OS_THREAD_DUMMY_RETURN;
2497 }
2498 
2499 /*********************************************************************/
2503 UNIV_INTERN
2504 os_thread_ret_t
2506 /*====================*/
2507  void* /*arg __attribute__((unused))*/)
2510 {
2511  uint auto_lru_dump;
2512  time_t last_dump_time;
2513  time_t time_elapsed;
2514 
2515 #ifdef UNIV_DEBUG_THREAD_CREATION
2516  fprintf(stderr, "The LRU dump/restore thread has started, id %lu\n",
2518 #endif
2519 
2520  if (srv_auto_lru_dump)
2521  buf_LRU_file_restore();
2522 
2523  last_dump_time = time(NULL);
2524 
2525 loop:
2526  os_thread_sleep(5000000);
2527 
2529  goto exit_func;
2530  }
2531 
2532  time_elapsed = time(NULL) - last_dump_time;
2533  auto_lru_dump = srv_auto_lru_dump;
2534  if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
2535  last_dump_time = time(NULL);
2536  buf_LRU_file_dump();
2537  }
2538 
2539  goto loop;
2540 exit_func:
2541  /* We count the number of threads in os_thread_exit(). A created
2542  thread should always use that to exit and not use return() to exit. */
2543 
2544  os_thread_exit(NULL);
2545 
2546  OS_THREAD_DUMMY_RETURN;
2547 }
2548 
2549 /**********************************************************************/
2552 UNIV_INTERN
2553 ibool
2555 /*=====================================*/
2556 {
2557  ulint i;
2558  ibool ret = FALSE;
2559 
2560  mutex_enter(&kernel_mutex);
2561 
2562  for (i = SRV_COM; i <= SRV_MASTER; ++i) {
2563  if (srv_n_threads_active[i] != 0) {
2564  ret = TRUE;
2565  break;
2566  }
2567  }
2568 
2569  mutex_exit(&kernel_mutex);
2570 
2571  return(ret);
2572 }
2573 
2574 /*******************************************************************/
2580 UNIV_INTERN
2581 void
2583 /*===============================*/
2584 {
2585  srv_activity_count++;
2586 
2587  if (srv_n_threads_active[SRV_MASTER] == 0) {
2588 
2589  mutex_enter(&kernel_mutex);
2590 
2592 
2593  mutex_exit(&kernel_mutex);
2594  }
2595 }
2596 
2597 /*******************************************************************/
2603 UNIV_INTERN
2604 void
2606 /*=====================================*/
2607 {
2608  ut_ad(!mutex_own(&kernel_mutex));
2609 
2610  if (srv_n_purge_threads > 0
2611  && srv_n_threads_active[SRV_WORKER] == 0) {
2612 
2613  mutex_enter(&kernel_mutex);
2614 
2616 
2617  mutex_exit(&kernel_mutex);
2618  }
2619 }
2620 
2621 /*******************************************************************/
2623 UNIV_INTERN
2624 void
2626 /*========================*/
2627 {
2628  srv_activity_count++;
2629 
2630  mutex_enter(&kernel_mutex);
2631 
2633 
2634  mutex_exit(&kernel_mutex);
2635 }
2636 
2637 /*******************************************************************/
2639 UNIV_INTERN
2640 void
2642 /*=======================*/
2643 {
2644  ut_ad(!mutex_own(&kernel_mutex));
2645 
2646  if (srv_n_purge_threads > 0) {
2647 
2648  mutex_enter(&kernel_mutex);
2649 
2651 
2652  mutex_exit(&kernel_mutex);
2653  }
2654 }
2655 
2656 /**********************************************************************
2657 The master thread is tasked to ensure that flush of log file happens
2658 once every second in the background. This is to ensure that not more
2659 than one second of trxs are lost in case of crash when
2660 innodb_flush_logs_at_trx_commit != 1 */
2661 static
2662 void
2663 srv_sync_log_buffer_in_background(void)
2664 /*===================================*/
2665 {
2666  time_t current_time = time(NULL);
2667 
2668  srv_main_thread_op_info = "flushing log";
2669  if (difftime(current_time, srv_last_log_flush_time) >= 1) {
2671  srv_last_log_flush_time = current_time;
2672  srv_log_writes_and_flush++;
2673  }
2674 }
2675 
2676 /********************************************************************/
2679 static
2680 void
2681 srv_master_do_purge(void)
2682 /*=====================*/
2683 {
2684  ulint n_pages_purged;
2685 
2686  ut_ad(!mutex_own(&kernel_mutex));
2687 
2688  ut_a(srv_n_purge_threads == 0);
2689 
2690  do {
2691  /* Check for shutdown and change in purge config. */
2692  if (srv_fast_shutdown && srv_shutdown_state > 0) {
2693  /* Nothing to purge. */
2694  n_pages_purged = 0;
2695  } else {
2696  n_pages_purged = trx_purge(srv_purge_batch_size);
2697  }
2698 
2699  srv_sync_log_buffer_in_background();
2700 
2701  } while (n_pages_purged > 0);
2702 }
2703 
2704 /*********************************************************************/
2707 UNIV_INTERN
2708 os_thread_ret_t
2710 /*==============*/
2711  void* /*arg __attribute__((unused))*/)
2714 {
2715  buf_pool_stat_t buf_stat;
2716  os_event_t event;
2717  ulint old_activity_count;
2718  ulint n_pages_purged = 0;
2719  ulint n_bytes_merged;
2720  ulint n_pages_flushed;
2721  uint32_t n_pages_flushed_prev = 0;
2722  ulint n_bytes_archived;
2723  ulint n_tables_to_drop;
2724  ulint n_ios;
2725  ulint n_ios_old;
2726  ulint n_ios_very_old;
2727  ulint n_pend_ios;
2728  ulint next_itr_time;
2729  uint32_t prev_adaptive_flushing_method = ULINT32_UNDEFINED;
2730  uint32_t inner_loop = 0;
2731  bool skip_sleep = false;
2732  ulint i;
2733 
2734  struct t_prev_flush_info_struct {
2735  uint32_t count;
2736  uint32_t space;
2737  uint32_t offset;
2738  uint64_t oldest_modification;
2739  } prev_flush_info[MAX_BUFFER_POOLS];
2740 
2741  uint64_t lsn_old;
2742  uint64_t oldest_lsn;
2743 
2744 #ifdef UNIV_DEBUG_THREAD_CREATION
2745  fprintf(stderr, "Master thread starts, id %lu\n",
2747 #endif
2748 
2749 #ifdef UNIV_PFS_THREAD
2750  pfs_register_thread(srv_master_thread_key);
2751 #endif
2752 
2753  srv_main_thread_process_no = os_proc_get_number();
2754  srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2755 
2756  srv_table_reserve_slot(SRV_MASTER);
2757 
2758  mutex_enter(&kernel_mutex);
2759 
2760  srv_n_threads_active[SRV_MASTER]++;
2761 
2762  mutex_exit(&kernel_mutex);
2763 
2764  mutex_enter(&(log_sys->mutex));
2765  lsn_old = log_sys->lsn;
2766  mutex_exit(&(log_sys->mutex));
2767 loop:
2768  /*****************************************************************/
2769  /* ---- When there is database activity by users, we cycle in this
2770  loop */
2771 
2772  srv_main_thread_op_info = "reserving kernel mutex";
2773 
2774  buf_get_total_stat(&buf_stat);
2775  n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
2776  + buf_stat.n_pages_written;
2777  mutex_enter(&kernel_mutex);
2778 
2779  /* Store the user activity counter at the start of this loop */
2780  old_activity_count = srv_activity_count;
2781 
2782  mutex_exit(&kernel_mutex);
2783 
2784  if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2785 
2786  goto suspend_thread;
2787  }
2788 
2789  /* ---- We run the following loop approximately once per second
2790  when there is database activity */
2791 
2792  srv_last_log_flush_time = time(NULL);
2793 
2794  /* Sleep for 1 second on entrying the for loop below the first time. */
2795  next_itr_time = ut_time_ms() + 1000;
2796 
2797  skip_sleep = false;
2798 
2799  for (i = 0; i < 10; i++) {
2800  ulint cur_time = ut_time_ms();
2801 
2802  n_pages_flushed = 0;
2803 
2804  /* ALTER TABLE in MySQL requires on Unix that the table handler
2805  can drop tables lazily after there no longer are SELECT
2806  queries to them. */
2807 
2808  srv_main_thread_op_info = "doing background drop tables";
2809 
2811 
2812  srv_main_thread_op_info = "";
2813 
2814  if (srv_fast_shutdown && srv_shutdown_state > 0) {
2815 
2816  goto background_loop;
2817  }
2818 
2819  buf_get_total_stat(&buf_stat);
2820 
2821  n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
2822  + buf_stat.n_pages_written;
2823 
2824  srv_main_thread_op_info = "sleeping";
2825  srv_main_1_second_loops++;
2826 
2827  if (skip_sleep == false) {
2828  if (next_itr_time > cur_time
2830 
2831  /* Get sleep interval in micro seconds. We use
2832  ut_min() to avoid long sleep in case of
2833  wrap around. */
2834  os_thread_sleep(ut_min(1000000,
2835  (next_itr_time - cur_time)
2836  * 1000));
2837  srv_main_sleeps++;
2838 
2839  /*
2840  TODO: tracing code unported to Drizzle
2841  mutex_enter(&(log_sys->mutex));
2842  oldest_lsn = buf_pool_get_oldest_modification();
2843  ib_uint64_t lsn = log_sys->lsn;
2844  mutex_exit(&(log_sys->mutex));
2845 
2846  if(oldest_lsn)
2847  fprintf(stderr,
2848  "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
2849  (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
2850  lsn - lsn_old);
2851  */
2852 
2853  }
2854 
2855  /* Each iteration should happen at 1 second interval. */
2856  next_itr_time = ut_time_ms() + 1000;
2857  }
2858 
2859  skip_sleep = false;
2860 
2861  /* Flush logs if needed */
2862  srv_sync_log_buffer_in_background();
2863 
2864  srv_main_thread_op_info = "making checkpoint";
2865  log_free_check();
2866 
2867  /* If i/os during one second sleep were less than 5% of
2868  capacity, we assume that there is free disk i/o capacity
2869  available, and it makes sense to do an insert buffer merge. */
2870 
2871  buf_get_total_stat(&buf_stat);
2872  n_pend_ios = buf_get_n_pending_ios()
2873  + log_sys->n_pending_writes;
2874  n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2875  + buf_stat.n_pages_written;
2876  if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2877  && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
2878  srv_main_thread_op_info = "doing insert buffer merge";
2879  ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
2880 
2881  /* Flush logs if needed */
2882  srv_sync_log_buffer_in_background();
2883  }
2884 
2885  if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2886  > srv_max_buf_pool_modified_pct)) {
2887 
2888  /* Try to keep the number of modified pages in the
2889  buffer pool under the limit wished by the user */
2890 
2891  srv_main_thread_op_info =
2892  "flushing buffer pool pages";
2893  n_pages_flushed = buf_flush_list(
2894  PCT_IO(100), IB_ULONGLONG_MAX);
2895 
2896  mutex_enter(&(log_sys->mutex));
2897  lsn_old = log_sys->lsn;
2898  mutex_exit(&(log_sys->mutex));
2899  prev_adaptive_flushing_method = ULINT32_UNDEFINED;
2900  } else if (srv_adaptive_flushing
2901  && srv_adaptive_flushing_method == 0) {
2902 
2903  /* Try to keep the rate of flushing of dirty
2904  pages such that redo log generation does not
2905  produce bursts of IO at checkpoint time. */
2906  ulint n_flush = buf_flush_get_desired_flush_rate();
2907 
2908  if (n_flush) {
2909  srv_main_thread_op_info =
2910  "flushing buffer pool pages";
2911  n_flush = ut_min(PCT_IO(100), n_flush);
2912  n_pages_flushed =
2913  buf_flush_list(
2914  n_flush,
2915  IB_ULONGLONG_MAX);
2916  }
2917 
2918  mutex_enter(&(log_sys->mutex));
2919  lsn_old = log_sys->lsn;
2920  mutex_exit(&(log_sys->mutex));
2921  prev_adaptive_flushing_method = ULINT32_UNDEFINED;
2922  } else if (srv_adaptive_flushing
2923  && srv_adaptive_flushing_method == 1) {
2924 
2925  /* Try to keep modified age not to exceed
2926  max_checkpoint_age * 7/8 line */
2927 
2928  mutex_enter(&(log_sys->mutex));
2929 
2930  oldest_lsn = buf_pool_get_oldest_modification();
2931  if (oldest_lsn == 0) {
2932  lsn_old = log_sys->lsn;
2933  mutex_exit(&(log_sys->mutex));
2934 
2935  } else {
2936  if ((log_sys->lsn - oldest_lsn)
2937  > (log_sys->max_checkpoint_age)
2938  - ((log_sys->max_checkpoint_age) / 8)) {
2939  /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
2940  /* We should not flush from here. */
2941  lsn_old = log_sys->lsn;
2942  mutex_exit(&(log_sys->mutex));
2943  } else if ((log_sys->lsn - oldest_lsn)
2944  > (log_sys->max_checkpoint_age)/4) {
2945 
2946  /* defence line (max_checkpoint_age * 1/2) */
2947  uint64_t lsn = log_sys->lsn;
2948 
2949  uint64_t level, bpl;
2950  buf_page_t* bpage;
2951  ulint j;
2952 
2953  mutex_exit(&(log_sys->mutex));
2954 
2955  bpl = 0;
2956 
2957  for (j = 0; j < srv_buf_pool_instances; j++) {
2958  buf_pool_t* buf_pool;
2959  uint32_t n_blocks = 0;
2960 
2961  buf_pool = buf_pool_from_array(j);
2962 
2963  /* The scanning flush_list is optimistic here */
2964 
2965  level = 0;
2966  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
2967 
2968  while (bpage != NULL) {
2969  uint64_t oldest_modification = bpage->oldest_modification;
2970  if (oldest_modification != 0) {
2971  level += log_sys->max_checkpoint_age
2972  - (lsn - oldest_modification);
2973  }
2974  bpage = UT_LIST_GET_NEXT(list, bpage);
2975  n_blocks++;
2976  }
2977 
2978  if (level) {
2979  bpl += ((ib_uint64_t) n_blocks * n_blocks
2980  * (lsn - lsn_old)) / level;
2981  }
2982 
2983  }
2984 
2985  if (!srv_use_doublewrite_buf) {
2986  /* flush is faster than when doublewrite */
2987  bpl = (bpl * 7) / 8;
2988  }
2989 
2990  if (bpl) {
2991 retry_flush_batch:
2992  n_pages_flushed = buf_flush_list(bpl,
2993  oldest_lsn + (lsn - lsn_old));
2994  if (n_pages_flushed == ULINT32_UNDEFINED) {
2995  os_thread_sleep(5000);
2996  goto retry_flush_batch;
2997  }
2998  }
2999 
3000  lsn_old = lsn;
3001  /*
3002  TODO: tracing code unported to Drizzle
3003  fprintf(stderr,
3004  "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
3005  (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
3006  lsn - lsn_old, bpl);
3007  */
3008  } else {
3009  lsn_old = log_sys->lsn;
3010  mutex_exit(&(log_sys->mutex));
3011  }
3012  }
3013  prev_adaptive_flushing_method = 1;
3014  } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
3015  buf_pool_t* buf_pool;
3016  buf_page_t* bpage;
3017  uint64_t lsn;
3018  ulint j;
3019 
3020  mutex_enter(&(log_sys->mutex));
3021  oldest_lsn = buf_pool_get_oldest_modification();
3022  lsn = log_sys->lsn;
3023  mutex_exit(&(log_sys->mutex));
3024 
3025  /* upper loop/sec. (x10) */
3026  next_itr_time -= 900; /* 1000 - 900 == 100 */
3027  inner_loop++;
3028  if (inner_loop < 10) {
3029  i--;
3030  } else {
3031  inner_loop = 0;
3032  }
3033 
3034  if (prev_adaptive_flushing_method == 2) {
3035  int32_t n_flush;
3036  int32_t blocks_sum = 0;
3037  uint32_t new_blocks_sum = 0;
3038  uint32_t flushed_blocks_sum = 0;
3039 
3040  /* prev_flush_info[j] should be the previous loop's */
3041  for (j = 0; j < srv_buf_pool_instances; j++) {
3042  int32_t blocks_num, new_blocks_num, flushed_blocks_num;
3043  bool found = false;
3044 
3045  buf_pool = buf_pool_from_array(j);
3046 
3047  blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
3048  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3049  new_blocks_num = 0;
3050 
3051  while (bpage != NULL) {
3052  if (prev_flush_info[j].space == bpage->space
3053  && prev_flush_info[j].offset == bpage->offset
3054  && prev_flush_info[j].oldest_modification
3055  == bpage->oldest_modification) {
3056  found = true;
3057  break;
3058  }
3059  bpage = UT_LIST_GET_NEXT(list, bpage);
3060  new_blocks_num++;
3061  }
3062  if (!found) {
3063  new_blocks_num = blocks_num;
3064  }
3065  flushed_blocks_num = new_blocks_num
3066  + prev_flush_info[j].count
3067  - blocks_num;
3068  if (flushed_blocks_num < 0) {
3069  flushed_blocks_num = 0;
3070  }
3071 
3072  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3073 
3074  prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
3075  if (bpage) {
3076  prev_flush_info[j].space = bpage->space;
3077  prev_flush_info[j].offset = bpage->offset;
3078  prev_flush_info[j].oldest_modification = bpage->oldest_modification;
3079  } else {
3080  prev_flush_info[j].space = 0;
3081  prev_flush_info[j].offset = 0;
3082  prev_flush_info[j].oldest_modification = 0;
3083  }
3084 
3085  new_blocks_sum += new_blocks_num;
3086  flushed_blocks_sum += flushed_blocks_num;
3087  blocks_sum += blocks_num;
3088  }
3089 
3090  n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
3091  if (flushed_blocks_sum > n_pages_flushed_prev) {
3092  n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
3093  }
3094 
3095  if (n_flush > 0) {
3096  n_flush++;
3097  n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
3098  } else {
3099  n_pages_flushed = 0;
3100  }
3101  } else {
3102  /* store previous first pages of the flush_list */
3103  for (j = 0; j < srv_buf_pool_instances; j++) {
3104  buf_pool = buf_pool_from_array(j);
3105 
3106  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3107 
3108  prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
3109  if (bpage) {
3110  prev_flush_info[j].space = bpage->space;
3111  prev_flush_info[j].offset = bpage->offset;
3112  prev_flush_info[j].oldest_modification = bpage->oldest_modification;
3113  } else {
3114  prev_flush_info[j].space = 0;
3115  prev_flush_info[j].offset = 0;
3116  prev_flush_info[j].oldest_modification = 0;
3117  }
3118  }
3119  n_pages_flushed = 0;
3120  }
3121 
3122  lsn_old = lsn;
3123  prev_adaptive_flushing_method = 2;
3124  } else {
3125  mutex_enter(&(log_sys->mutex));
3126  lsn_old = log_sys->lsn;
3127  mutex_exit(&(log_sys->mutex));
3128  prev_adaptive_flushing_method = ULINT32_UNDEFINED;
3129  }
3130 
3131  if (n_pages_flushed == ULINT_UNDEFINED) {
3132  n_pages_flushed_prev = 0;
3133  } else {
3134  n_pages_flushed_prev = n_pages_flushed;
3135  }
3136 
3137  if (srv_activity_count == old_activity_count) {
3138 
3139  /* There is no user activity at the moment, go to
3140  the background loop */
3141 
3142  goto background_loop;
3143  }
3144  }
3145 
3146  /* ---- We perform the following code approximately once per
3147  10 seconds when there is database activity */
3148 
3149 #ifdef MEM_PERIODIC_CHECK
3150  /* Check magic numbers of every allocated mem block once in 10
3151  seconds */
3152  mem_validate_all_blocks();
3153 #endif
3154  /* If i/os during the 10 second period were less than 200% of
3155  capacity, we assume that there is free disk i/o capacity
3156  available, and it makes sense to flush srv_io_capacity pages.
3157 
3158  Note that this is done regardless of the fraction of dirty
3159  pages relative to the max requested by the user. The one second
3160  loop above requests writes for that case. The writes done here
3161  are not required, and may be disabled. */
3162 
3163  buf_get_total_stat(&buf_stat);
3164  n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
3165  n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
3166  + buf_stat.n_pages_written;
3167 
3168  srv_main_10_second_loops++;
3169  if (n_pend_ios < SRV_PEND_IO_THRESHOLD
3170  && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
3171 
3172  srv_main_thread_op_info = "flushing buffer pool pages";
3173  buf_flush_list(PCT_IO(100), IB_ULONGLONG_MAX);
3174 
3175  /* Flush logs if needed */
3176  srv_sync_log_buffer_in_background();
3177  }
3178 
3179  /* We run a batch of insert buffer merge every 10 seconds,
3180  even if the server were active */
3181 
3182  srv_main_thread_op_info = "doing insert buffer merge";
3183  ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
3184 
3185  /* Flush logs if needed */
3186  srv_sync_log_buffer_in_background();
3187 
3188  if (srv_n_purge_threads == 0) {
3189  srv_main_thread_op_info = "master purging";
3190 
3191  srv_master_do_purge();
3192 
3193  if (srv_fast_shutdown && srv_shutdown_state > 0) {
3194 
3195  goto background_loop;
3196  }
3197  }
3198 
3199  srv_main_thread_op_info = "flushing buffer pool pages";
3200 
3201  /* Flush a few oldest pages to make a new checkpoint younger */
3202 
3203  if (buf_get_modified_ratio_pct() > 70) {
3204 
3205  /* If there are lots of modified pages in the buffer pool
3206  (> 70 %), we assume we can afford reserving the disk(s) for
3207  the time it requires to flush 100 pages */
3208 
3209  n_pages_flushed = buf_flush_list(
3210  PCT_IO(100), IB_ULONGLONG_MAX);
3211  } else {
3212  /* Otherwise, we only flush a small number of pages so that
3213  we do not unnecessarily use much disk i/o capacity from
3214  other work */
3215 
3216  n_pages_flushed = buf_flush_list(
3217  PCT_IO(10), IB_ULONGLONG_MAX);
3218  }
3219 
3220  srv_main_thread_op_info = "making checkpoint";
3221 
3222  /* Make a new checkpoint about once in 10 seconds */
3223 
3224  log_checkpoint(TRUE, FALSE);
3225 
3226  srv_main_thread_op_info = "reserving kernel mutex";
3227 
3228  mutex_enter(&kernel_mutex);
3229 
3230  /* ---- When there is database activity, we jump from here back to
3231  the start of loop */
3232 
3233  if (srv_activity_count != old_activity_count) {
3234  mutex_exit(&kernel_mutex);
3235  goto loop;
3236  }
3237 
3238  mutex_exit(&kernel_mutex);
3239 
3240  /* If the database is quiet, we enter the background loop */
3241 
3242  /*****************************************************************/
3243 background_loop:
3244  /* ---- In this loop we run background operations when the server
3245  is quiet from user activity. Also in the case of a shutdown, we
3246  loop here, flushing the buffer pool to the data files. */
3247 
3248  /* The server has been quiet for a while: start running background
3249  operations */
3250  srv_main_background_loops++;
3251  srv_main_thread_op_info = "doing background drop tables";
3252 
3253  n_tables_to_drop = row_drop_tables_for_mysql_in_background();
3254 
3255  if (n_tables_to_drop > 0) {
3256  /* Do not monopolize the CPU even if there are tables waiting
3257  in the background drop queue. (It is essentially a bug if
3258  MySQL tries to drop a table while there are still open handles
3259  to it and we had to put it to the background drop queue.) */
3260 
3262  os_thread_sleep(100000);
3263  }
3264  }
3265 
3266  if (srv_n_purge_threads == 0) {
3267  srv_main_thread_op_info = "master purging";
3268 
3269  srv_master_do_purge();
3270  }
3271 
3272  srv_main_thread_op_info = "reserving kernel mutex";
3273 
3274  mutex_enter(&kernel_mutex);
3275  if (srv_activity_count != old_activity_count) {
3276  mutex_exit(&kernel_mutex);
3277  goto loop;
3278  }
3279  mutex_exit(&kernel_mutex);
3280 
3281  srv_main_thread_op_info = "doing insert buffer merge";
3282 
3283  if (srv_fast_shutdown && srv_shutdown_state > 0) {
3284  n_bytes_merged = 0;
3285  } else {
3286  /* This should do an amount of IO similar to the number of
3287  dirty pages that will be flushed in the call to
3288  buf_flush_list below. Otherwise, the system favors
3289  clean pages over cleanup throughput. */
3290  n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
3291  PCT_IBUF_IO(100));
3292  }
3293 
3294  srv_main_thread_op_info = "reserving kernel mutex";
3295 
3296  mutex_enter(&kernel_mutex);
3297  if (srv_activity_count != old_activity_count) {
3298  mutex_exit(&kernel_mutex);
3299  goto loop;
3300  }
3301  mutex_exit(&kernel_mutex);
3302 
3303 flush_loop:
3304  srv_main_thread_op_info = "flushing buffer pool pages";
3305  srv_main_flush_loops++;
3306  if (srv_fast_shutdown < 2) {
3307  n_pages_flushed = buf_flush_list(
3308  PCT_IO(100), IB_ULONGLONG_MAX);
3309  } else {
3310  /* In the fastest shutdown we do not flush the buffer pool
3311  to data files: we set n_pages_flushed to 0 artificially. */
3312 
3313  n_pages_flushed = 0;
3314  }
3315 
3316  srv_main_thread_op_info = "reserving kernel mutex";
3317 
3318  mutex_enter(&kernel_mutex);
3319  if (srv_activity_count != old_activity_count) {
3320  mutex_exit(&kernel_mutex);
3321  goto loop;
3322  }
3323  mutex_exit(&kernel_mutex);
3324 
3325  srv_main_thread_op_info = "waiting for buffer pool flush to end";
3326  buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
3327 
3328  /* Flush logs if needed */
3329  srv_sync_log_buffer_in_background();
3330 
3331  srv_main_thread_op_info = "making checkpoint";
3332 
3333  log_checkpoint(TRUE, FALSE);
3334 
3335  if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
3336 
3337  /* Try to keep the number of modified pages in the
3338  buffer pool under the limit wished by the user */
3339 
3340  goto flush_loop;
3341  }
3342 
3343  srv_main_thread_op_info = "reserving kernel mutex";
3344 
3345  mutex_enter(&kernel_mutex);
3346  if (srv_activity_count != old_activity_count) {
3347  mutex_exit(&kernel_mutex);
3348  goto loop;
3349  }
3350  mutex_exit(&kernel_mutex);
3351  /*
3352  srv_main_thread_op_info = "archiving log (if log archive is on)";
3353 
3354  log_archive_do(FALSE, &n_bytes_archived);
3355  */
3356  n_bytes_archived = 0;
3357 
3358  /* Keep looping in the background loop if still work to do */
3359 
3360  if (srv_fast_shutdown && srv_shutdown_state > 0) {
3361  if (n_tables_to_drop + n_pages_flushed
3362  + n_bytes_archived != 0) {
3363 
3364  /* If we are doing a fast shutdown (= the default)
3365  we do not do purge or insert buffer merge. But we
3366  flush the buffer pool completely to disk.
3367  In a 'very fast' shutdown we do not flush the buffer
3368  pool to data files: we have set n_pages_flushed to
3369  0 artificially. */
3370 
3371  goto background_loop;
3372  }
3373  } else if (n_tables_to_drop
3374  + n_pages_purged + n_bytes_merged + n_pages_flushed
3375  + n_bytes_archived != 0) {
3376  /* In a 'slow' shutdown we run purge and the insert buffer
3377  merge to completion */
3378 
3379  goto background_loop;
3380  }
3381 
3382  /* There is no work for background operations either: suspend
3383  master thread to wait for more server activity */
3384 
3385 suspend_thread:
3386  srv_main_thread_op_info = "suspending";
3387 
3388  mutex_enter(&kernel_mutex);
3389 
3391  mutex_exit(&kernel_mutex);
3392 
3393  goto loop;
3394  }
3395 
3396  event = srv_suspend_thread();
3397 
3398  mutex_exit(&kernel_mutex);
3399 
3400  /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
3401  waits for database activity to die down when converting < 4.1.x
3402  databases, and relies on this string being exactly as it is. InnoDB
3403  manual also mentions this string in several places. */
3404  srv_main_thread_op_info = "waiting for server activity";
3405 
3406  os_event_wait(event);
3407 
3409  /* This is only extra safety, the thread should exit
3410  already when the event wait ends */
3411 
3412  os_thread_exit(NULL);
3413 
3414  }
3415 
3416  /* When there is user activity, InnoDB will set the event and the
3417  main thread goes back to loop. */
3418 
3419  goto loop;
3420 
3421 
3422 #if !defined(__SUNPRO_C)
3423  OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3424 #endif
3425 }
3426 
3427 /*********************************************************************/
3430 UNIV_INTERN
3431 os_thread_ret_t
3433 /*=============*/
3434  void* /*arg __attribute__((unused))*/)
3436 {
3437  srv_slot_t* slot;
3438  ulint slot_no = ULINT_UNDEFINED;
3439  ulint n_total_purged = ULINT_UNDEFINED;
3440  ulint next_itr_time;
3441 
3442  ut_a(srv_n_purge_threads == 1);
3443 
3444 #ifdef UNIV_DEBUG_THREAD_CREATION
3445  fprintf(stderr, "InnoDB: Purge thread running, id %lu\n",
3447 #endif /* UNIV_DEBUG_THREAD_CREATION */
3448 
3449  mutex_enter(&kernel_mutex);
3450 
3451  slot_no = srv_table_reserve_slot(SRV_WORKER);
3452 
3453  slot = srv_table_get_nth_slot(slot_no);
3454 
3455  ++srv_n_threads_active[SRV_WORKER];
3456 
3457  mutex_exit(&kernel_mutex);
3458 
3459  next_itr_time = ut_time_ms();
3460 
3462 
3463  ulint n_pages_purged;
3464  ulint cur_time;
3465 
3466  /* If there are very few records to purge or the last
3467  purge didn't purge any records then wait for activity.
3468  We peek at the history len without holding any mutex
3469  because in the worst case we will end up waiting for
3470  the next purge event. */
3471  if (trx_sys->rseg_history_len < srv_purge_batch_size
3472  || n_total_purged == 0) {
3473 
3474  os_event_t event;
3475 
3476  mutex_enter(&kernel_mutex);
3477 
3478  event = srv_suspend_thread();
3479 
3480  mutex_exit(&kernel_mutex);
3481 
3482  os_event_wait(event);
3483  }
3484 
3485  /* Check for shutdown and whether we should do purge at all. */
3486  if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
3487  || srv_shutdown_state != 0
3488  || srv_fast_shutdown) {
3489 
3490  break;
3491  }
3492 
3493  n_total_purged = 0;
3494 
3495  /* Purge until there are no more records to purge and there is
3496  no change in configuration or server state. */
3497  do {
3498  n_pages_purged = trx_purge(srv_purge_batch_size);
3499 
3500  n_total_purged += n_pages_purged;
3501 
3502  } while (n_pages_purged > 0 && !srv_fast_shutdown);
3503 
3504  srv_sync_log_buffer_in_background();
3505 
3506  cur_time = ut_time_ms();
3507  if (next_itr_time > cur_time) {
3508  os_thread_sleep(ut_min(1000000,
3509  (next_itr_time - cur_time)
3510  * 1000));
3511  next_itr_time = ut_time_ms() + 1000;
3512  } else {
3513  next_itr_time = cur_time + 1000;
3514  }
3515  }
3516 
3517  mutex_enter(&kernel_mutex);
3518 
3519  ut_ad(srv_table_get_nth_slot(slot_no) == slot);
3520 
3521  /* Decrement the active count. */
3522  srv_suspend_thread();
3523 
3524  slot->in_use = FALSE;
3525 
3526  /* Free the thread local memory. */
3528 
3529  mutex_exit(&kernel_mutex);
3530 
3531 #ifdef UNIV_DEBUG_THREAD_CREATION
3532  fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
3534 #endif /* UNIV_DEBUG_THREAD_CREATION */
3535 
3536  /* We count the number of threads in os_thread_exit(). A created
3537  thread should always use that to exit and not use return() to exit. */
3538  os_thread_exit(NULL);
3539 
3540  OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3541 }
3542 
3543 /**********************************************************************/
3546 UNIV_INTERN
3547 void
3549 /*=====================*/
3550  que_thr_t* thr)
3551 {
3552  ut_ad(thr);
3553 
3554  mutex_enter(&kernel_mutex);
3555 
3556  UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
3557 
3559 
3560  mutex_exit(&kernel_mutex);
3561 }