Drizzled Public API Documentation

buf0lru.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include <config.h>
27 #include "buf0lru.h"
28 
29 #ifdef UNIV_NONINL
30 #include "buf0lru.ic"
31 #endif
32 
33 #include "ut0byte.h"
34 #include "ut0lst.h"
35 #include "ut0rnd.h"
36 #include "sync0sync.h"
37 #include "sync0rw.h"
38 #include "hash0hash.h"
39 #include "os0sync.h"
40 #include "fil0fil.h"
41 #include "btr0btr.h"
42 #include "buf0buddy.h"
43 #include "buf0buf.h"
44 #include "buf0flu.h"
45 #include "buf0rea.h"
46 #include "btr0sea.h"
47 #include "ibuf0ibuf.h"
48 #include "os0file.h"
49 #include "page0zip.h"
50 #include "log0recv.h"
51 #include "srv0srv.h"
52 
60 #define BUF_LRU_OLD_TOLERANCE 20
61 
65 #define BUF_LRU_NON_OLD_MIN_LEN 5
66 #if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
67 # error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
68 #endif
69 
73 #define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024
74 
77 static ibool buf_lru_switched_on_innodb_mon = FALSE;
78 
79 /******************************************************************/
88 /* @{ */
89 
93 #define BUF_LRU_STAT_N_INTERVAL 50
94 
97 #define BUF_LRU_IO_TO_UNZIP_FACTOR 50
98 
101 static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
102 
104 static ulint buf_LRU_stat_arr_ind;
105 
108 UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur;
109 
112 UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum;
113 
114 /* @} */
115 
119 UNIV_INTERN uint buf_LRU_old_threshold_ms;
120 /* @} */
121 
122 /******************************************************************/
132 static
133 enum buf_page_state
134 buf_LRU_block_remove_hashed_page(
135 /*=============================*/
136  buf_page_t* bpage,
139  ibool zip);
141 /******************************************************************/
143 static
144 void
145 buf_LRU_block_free_hashed_page(
146 /*===========================*/
147  buf_block_t* block);
150 /******************************************************************/
154 UNIV_INLINE
155 ibool
156 buf_LRU_evict_from_unzip_LRU(
157 /*=========================*/
158  buf_pool_t* buf_pool)
159 {
160  ulint io_avg;
161  ulint unzip_avg;
162 
163  ut_ad(buf_pool_mutex_own(buf_pool));
164 
165  /* If the unzip_LRU list is empty, we can only use the LRU. */
166  if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
167  return(FALSE);
168  }
169 
170  /* If unzip_LRU is at most 10% of the size of the LRU list,
171  then use the LRU. This slack allows us to keep hot
172  decompressed pages in the buffer pool. */
173  if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
174  <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
175  return(FALSE);
176  }
177 
178  /* If eviction hasn't started yet, we assume by default
179  that a workload is disk bound. */
180  if (buf_pool->freed_page_clock == 0) {
181  return(TRUE);
182  }
183 
184  /* Calculate the average over past intervals, and add the values
185  of the current interval. */
186  io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
187  + buf_LRU_stat_cur.io;
188  unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
189  + buf_LRU_stat_cur.unzip;
190 
191  /* Decide based on our formula. If the load is I/O bound
192  (unzip_avg is smaller than the weighted io_avg), evict an
193  uncompressed frame from unzip_LRU. Otherwise we assume that
194  the load is CPU bound and evict from the regular LRU. */
195  return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
196 }
197 
198 /******************************************************************/
201 static
202 void
203 buf_LRU_drop_page_hash_batch(
204 /*=========================*/
205  ulint space_id,
206  ulint zip_size,
208  const ulint* arr,
209  ulint count)
210 {
211  ulint i;
212 
213  ut_ad(arr != NULL);
214  ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
215 
216  for (i = 0; i < count; ++i) {
217  btr_search_drop_page_hash_when_freed(space_id, zip_size,
218  arr[i]);
219  }
220 }
221 
222 /******************************************************************/
227 static
228 void
229 buf_LRU_drop_page_hash_for_tablespace(
230 /*==================================*/
231  buf_pool_t* buf_pool,
232  ulint id)
233 {
234  buf_page_t* bpage;
235  ulint* page_arr;
236  ulint num_entries;
237  ulint zip_size;
238 
239  zip_size = fil_space_get_zip_size(id);
240 
241  if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
242  /* Somehow, the tablespace does not exist. Nothing to drop. */
243  ut_ad(0);
244  return;
245  }
246 
247  page_arr = static_cast<unsigned long *>(ut_malloc(
248  sizeof(ulint) * BUF_LRU_DROP_SEARCH_HASH_SIZE));
249 
250  buf_pool_mutex_enter(buf_pool);
251 
252 scan_again:
253  num_entries = 0;
254  bpage = UT_LIST_GET_LAST(buf_pool->LRU);
255 
256  while (bpage != NULL) {
257  mutex_t* block_mutex = buf_page_get_mutex(bpage);
258  buf_page_t* prev_bpage;
259 
260  mutex_enter(block_mutex);
261  prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
262 
263  ut_a(buf_page_in_file(bpage));
264 
266  || bpage->space != id
267  || bpage->buf_fix_count > 0
268  || bpage->io_fix != BUF_IO_NONE) {
269  /* We leave the fixed pages as is in this scan.
270  To be dealt with later in the final scan. */
271  mutex_exit(block_mutex);
272  goto next_page;
273  }
274 
275  if (((buf_block_t*) bpage)->is_hashed) {
276 
277  /* Store the offset(i.e.: page_no) in the array
278  so that we can drop hash index in a batch
279  later. */
280  page_arr[num_entries] = bpage->offset;
281  mutex_exit(block_mutex);
282  ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
283  ++num_entries;
284 
285  if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
286  goto next_page;
287  }
288 
289  /* Array full. We release the buf_pool->mutex to
290  obey the latching order. */
291  buf_pool_mutex_exit(buf_pool);
292 
293  buf_LRU_drop_page_hash_batch(
294  id, zip_size, page_arr, num_entries);
295 
296  num_entries = 0;
297 
298  buf_pool_mutex_enter(buf_pool);
299  } else {
300  mutex_exit(block_mutex);
301  }
302 
303 next_page:
304  /* Note that we may have released the buf_pool mutex
305  above after reading the prev_bpage during processing
306  of a page_hash_batch (i.e.: when the array was full).
307  This means that prev_bpage can change in LRU list.
308  This is OK because this function is a 'best effort'
309  to drop as many search hash entries as possible and
310  it does not guarantee that ALL such entries will be
311  dropped. */
312  bpage = prev_bpage;
313 
314  /* If, however, bpage has been removed from LRU list
315  to the free list then we should restart the scan.
316  bpage->state is protected by buf_pool mutex. */
317  if (bpage && !buf_page_in_file(bpage)) {
318  ut_a(num_entries == 0);
319  goto scan_again;
320  }
321  }
322 
323  buf_pool_mutex_exit(buf_pool);
324 
325  /* Drop any remaining batch of search hashed pages. */
326  buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
327  ut_free(page_arr);
328 }
329 
330 /******************************************************************/
334 static
335 void
336 buf_LRU_invalidate_tablespace_buf_pool_instance(
337 /*============================================*/
338  buf_pool_t* buf_pool,
339  ulint id)
340 {
341  buf_page_t* bpage;
342  ibool all_freed;
343 
344 scan_again:
345  buf_pool_mutex_enter(buf_pool);
346 
347  all_freed = TRUE;
348 
349  bpage = UT_LIST_GET_LAST(buf_pool->LRU);
350 
351  while (bpage != NULL) {
352  buf_page_t* prev_bpage;
353  ibool prev_bpage_buf_fix = FALSE;
354 
355  ut_a(buf_page_in_file(bpage));
356 
357  prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
358 
359  /* bpage->space and bpage->io_fix are protected by
360  buf_pool->mutex and block_mutex. It is safe to check
361  them while holding buf_pool->mutex only. */
362 
363  if (buf_page_get_space(bpage) != id) {
364  /* Skip this block, as it does not belong to
365  the space that is being invalidated. */
366  } else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
367  /* We cannot remove this page during this scan
368  yet; maybe the system is currently reading it
369  in, or flushing the modifications to the file */
370 
371  all_freed = FALSE;
372  } else {
373  mutex_t* block_mutex = buf_page_get_mutex(bpage);
374  mutex_enter(block_mutex);
375 
376  if (bpage->buf_fix_count > 0) {
377 
378  /* We cannot remove this page during
379  this scan yet; maybe the system is
380  currently reading it in, or flushing
381  the modifications to the file */
382 
383  all_freed = FALSE;
384 
385  goto next_page;
386  }
387 
388 #ifdef UNIV_DEBUG
389  if (buf_debug_prints) {
390  fprintf(stderr,
391  "Dropping space %lu page %lu\n",
392  (ulong) buf_page_get_space(bpage),
393  (ulong) buf_page_get_page_no(bpage));
394  }
395 #endif
396  if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
397  /* This is a compressed-only block
398  descriptor. Ensure that prev_bpage
399  cannot be relocated when bpage is freed. */
400  if (UNIV_LIKELY(prev_bpage != NULL)) {
401  switch (buf_page_get_state(
402  prev_bpage)) {
403  case BUF_BLOCK_FILE_PAGE:
404  /* Descriptors of uncompressed
405  blocks will not be relocated,
406  because we are holding the
407  buf_pool->mutex. */
408  break;
409  case BUF_BLOCK_ZIP_PAGE:
410  case BUF_BLOCK_ZIP_DIRTY:
411  /* Descriptors of compressed-
412  only blocks can be relocated,
413  unless they are buffer-fixed.
414  Because both bpage and
415  prev_bpage are protected by
416  buf_pool_zip_mutex, it is
417  not necessary to acquire
418  further mutexes. */
419  ut_ad(&buf_pool->zip_mutex
420  == block_mutex);
421  ut_ad(mutex_own(block_mutex));
422  prev_bpage_buf_fix = TRUE;
423  prev_bpage->buf_fix_count++;
424  break;
425  default:
426  ut_error;
427  }
428  }
429  } else if (((buf_block_t*) bpage)->is_hashed) {
430  ulint page_no;
431  ulint zip_size;
432 
433  buf_pool_mutex_exit(buf_pool);
434 
435  zip_size = buf_page_get_zip_size(bpage);
436  page_no = buf_page_get_page_no(bpage);
437 
438  mutex_exit(block_mutex);
439 
440  /* Note that the following call will acquire
441  an S-latch on the page */
442 
443  btr_search_drop_page_hash_when_freed(
444  id, zip_size, page_no);
445  goto scan_again;
446  }
447 
448  if (bpage->oldest_modification != 0) {
449 
450  buf_flush_remove(bpage);
451  }
452 
453  /* Remove from the LRU list. */
454 
455  if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
456  != BUF_BLOCK_ZIP_FREE) {
457  buf_LRU_block_free_hashed_page((buf_block_t*)
458  bpage);
459  } else {
460  /* The block_mutex should have been
461  released by buf_LRU_block_remove_hashed_page()
462  when it returns BUF_BLOCK_ZIP_FREE. */
463  ut_ad(block_mutex == &buf_pool->zip_mutex);
464  ut_ad(!mutex_own(block_mutex));
465 
466  if (prev_bpage_buf_fix) {
467  /* We temporarily buffer-fixed
468  prev_bpage, so that
469  buf_buddy_free() could not
470  relocate it, in case it was a
471  compressed-only block
472  descriptor. */
473 
474  mutex_enter(block_mutex);
475  ut_ad(prev_bpage->buf_fix_count > 0);
476  prev_bpage->buf_fix_count--;
477  mutex_exit(block_mutex);
478  }
479 
480  goto next_page_no_mutex;
481  }
482 next_page:
483  mutex_exit(block_mutex);
484  }
485 
486 next_page_no_mutex:
487  bpage = prev_bpage;
488  }
489 
490  buf_pool_mutex_exit(buf_pool);
491 
492  if (!all_freed) {
493  os_thread_sleep(20000);
494 
495  goto scan_again;
496  }
497 }
498 
499 /******************************************************************/
502 UNIV_INTERN
503 void
504 buf_LRU_invalidate_tablespace(
505 /*==========================*/
506  ulint id)
507 {
508  ulint i;
509 
510  /* Before we attempt to drop pages one by one we first
511  attempt to drop page hash index entries in batches to make
512  it more efficient. The batching attempt is a best effort
513  attempt and does not guarantee that all pages hash entries
514  will be dropped. We get rid of remaining page hash entries
515  one by one below. */
516  for (i = 0; i < srv_buf_pool_instances; i++) {
517  buf_pool_t* buf_pool;
518 
519  buf_pool = buf_pool_from_array(i);
520  buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
521  buf_LRU_invalidate_tablespace_buf_pool_instance(buf_pool, id);
522  }
523 }
524 
525 /********************************************************************/
527 UNIV_INTERN
528 void
529 buf_LRU_insert_zip_clean(
530 /*=====================*/
531  buf_page_t* bpage)
532 {
533  buf_page_t* b;
534  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
535 
536  ut_ad(buf_pool_mutex_own(buf_pool));
538 
539  /* Find the first successor of bpage in the LRU list
540  that is in the zip_clean list. */
541  b = bpage;
542  do {
543  b = UT_LIST_GET_NEXT(LRU, b);
544  } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
545 
546  /* Insert bpage before b, i.e., after the predecessor of b. */
547  if (b) {
548  b = UT_LIST_GET_PREV(list, b);
549  }
550 
551  if (b) {
552  UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
553  } else {
554  UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
555  }
556 }
557 
558 /******************************************************************/
562 UNIV_INLINE
563 ibool
564 buf_LRU_free_from_unzip_LRU_list(
565 /*=============================*/
566  buf_pool_t* buf_pool,
567  ulint n_iterations)
574 {
575  buf_block_t* block;
576  ulint distance;
577 
578  ut_ad(buf_pool_mutex_own(buf_pool));
579 
580  /* Theoratically it should be much easier to find a victim
581  from unzip_LRU as we can choose even a dirty block (as we'll
582  be evicting only the uncompressed frame). In a very unlikely
583  eventuality that we are unable to find a victim from
584  unzip_LRU, we fall back to the regular LRU list. We do this
585  if we have done five iterations so far. */
586 
587  if (UNIV_UNLIKELY(n_iterations >= 5)
588  || !buf_LRU_evict_from_unzip_LRU(buf_pool)) {
589 
590  return(FALSE);
591  }
592 
593  distance = 100 + (n_iterations
594  * UT_LIST_GET_LEN(buf_pool->unzip_LRU)) / 5;
595 
596  for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
597  UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
598  block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
599 
600  enum buf_lru_free_block_status freed;
601 
603  ut_ad(block->in_unzip_LRU_list);
604  ut_ad(block->page.in_LRU_list);
605 
606  mutex_enter(&block->mutex);
607  freed = buf_LRU_free_block(&block->page, FALSE, NULL);
608  mutex_exit(&block->mutex);
609 
610  switch (freed) {
611  case BUF_LRU_FREED:
612  return(TRUE);
613 
615  /* If we failed to relocate, try
616  regular LRU eviction. */
617  return(FALSE);
618 
619  case BUF_LRU_NOT_FREED:
620  /* The block was buffer-fixed or I/O-fixed.
621  Keep looking. */
622  continue;
623  }
624 
625  /* inappropriate return value from
626  buf_LRU_free_block() */
627  ut_error;
628  }
629 
630  return(FALSE);
631 }
632 
633 /******************************************************************/
636 UNIV_INLINE
637 ibool
638 buf_LRU_free_from_common_LRU_list(
639 /*==============================*/
640  buf_pool_t* buf_pool,
641  ulint n_iterations)
648 {
649  buf_page_t* bpage;
650  ulint distance;
651 
652  ut_ad(buf_pool_mutex_own(buf_pool));
653 
654  distance = 100 + (n_iterations * buf_pool->curr_size) / 10;
655 
656  for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
657  UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
658  bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
659 
660  enum buf_lru_free_block_status freed;
661  unsigned accessed;
662  mutex_t* block_mutex
663  = buf_page_get_mutex(bpage);
664 
665  ut_ad(buf_page_in_file(bpage));
666  ut_ad(bpage->in_LRU_list);
667 
668  mutex_enter(block_mutex);
669  accessed = buf_page_is_accessed(bpage);
670  freed = buf_LRU_free_block(bpage, TRUE, NULL);
671  mutex_exit(block_mutex);
672 
673  switch (freed) {
674  case BUF_LRU_FREED:
675  /* Keep track of pages that are evicted without
676  ever being accessed. This gives us a measure of
677  the effectiveness of readahead */
678  if (!accessed) {
679  ++buf_pool->stat.n_ra_pages_evicted;
680  }
681  return(TRUE);
682 
683  case BUF_LRU_NOT_FREED:
684  /* The block was dirty, buffer-fixed, or I/O-fixed.
685  Keep looking. */
686  continue;
687 
689  /* This should never occur, because we
690  want to discard the compressed page too. */
691  break;
692  }
693 
694  /* inappropriate return value from
695  buf_LRU_free_block() */
696  ut_error;
697  }
698 
699  return(FALSE);
700 }
701 
702 /******************************************************************/
705 UNIV_INTERN
706 ibool
707 buf_LRU_search_and_free_block(
708 /*==========================*/
709  buf_pool_t* buf_pool,
711  ulint n_iterations)
720 {
721  ibool freed = FALSE;
722 
723  buf_pool_mutex_enter(buf_pool);
724 
725  freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, n_iterations);
726 
727  if (!freed) {
728  freed = buf_LRU_free_from_common_LRU_list(
729  buf_pool, n_iterations);
730  }
731 
732  if (!freed) {
733  buf_pool->LRU_flush_ended = 0;
734  } else if (buf_pool->LRU_flush_ended > 0) {
735  buf_pool->LRU_flush_ended--;
736  }
737 
738  buf_pool_mutex_exit(buf_pool);
739 
740  return(freed);
741 }
742 
743 /******************************************************************/
751 UNIV_INTERN
752 void
753 buf_LRU_try_free_flushed_blocks(
754 /*============================*/
755  buf_pool_t* buf_pool)
756 {
757 
758  if (buf_pool == NULL) {
759  ulint i;
760 
761  for (i = 0; i < srv_buf_pool_instances; i++) {
762  buf_pool = buf_pool_from_array(i);
763  buf_LRU_try_free_flushed_blocks(buf_pool);
764  }
765  } else {
766  buf_pool_mutex_enter(buf_pool);
767 
768  while (buf_pool->LRU_flush_ended > 0) {
769 
770  buf_pool_mutex_exit(buf_pool);
771 
772  buf_LRU_search_and_free_block(buf_pool, 1);
773 
774  buf_pool_mutex_enter(buf_pool);
775  }
776 
777  buf_pool_mutex_exit(buf_pool);
778  }
779 }
780 
781 /******************************************************************/
786 UNIV_INTERN
787 ibool
788 buf_LRU_buf_pool_running_out(void)
789 /*==============================*/
790 {
791  ulint i;
792  ibool ret = FALSE;
793 
794  for (i = 0; i < srv_buf_pool_instances && !ret; i++) {
795  buf_pool_t* buf_pool;
796 
797  buf_pool = buf_pool_from_array(i);
798 
799  buf_pool_mutex_enter(buf_pool);
800 
801  if (!recv_recovery_on
802  && UT_LIST_GET_LEN(buf_pool->free)
803  + UT_LIST_GET_LEN(buf_pool->LRU)
804  < buf_pool->curr_size / 4) {
805 
806  ret = TRUE;
807  }
808 
809  buf_pool_mutex_exit(buf_pool);
810  }
811 
812  return(ret);
813 }
814 
815 /******************************************************************/
819 UNIV_INTERN
821 buf_LRU_get_free_only(
822 /*==================*/
823  buf_pool_t* buf_pool)
824 {
825  buf_block_t* block;
826 
827  ut_ad(buf_pool_mutex_own(buf_pool));
828 
829  block = (buf_block_t*) UT_LIST_GET_FIRST(buf_pool->free);
830 
831  if (block) {
832 
833  ut_ad(block->page.in_free_list);
834  ut_d(block->page.in_free_list = FALSE);
835  ut_ad(!block->page.in_flush_list);
836  ut_ad(!block->page.in_LRU_list);
837  ut_a(!buf_page_in_file(&block->page));
838  UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
839 
840  mutex_enter(&block->mutex);
841 
843  UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
844 
845  ut_ad(buf_pool_from_block(block) == buf_pool);
846 
847  mutex_exit(&block->mutex);
848  }
849 
850  return(block);
851 }
852 
853 /******************************************************************/
858 UNIV_INTERN
860 buf_LRU_get_free_block(
861 /*===================*/
862  buf_pool_t* buf_pool,
863  ulint zip_size)
865 {
866  buf_block_t* block = NULL;
867  ibool freed;
868  ulint n_iterations = 1;
869  ibool mon_value_was = FALSE;
870  ibool started_monitor = FALSE;
871 loop:
872  buf_pool_mutex_enter(buf_pool);
873 
874  if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
875  + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
876  ut_print_timestamp(stderr);
877 
878  fprintf(stderr,
879  " InnoDB: ERROR: over 95 percent of the buffer pool"
880  " is occupied by\n"
881  "InnoDB: lock heaps or the adaptive hash index!"
882  " Check that your\n"
883  "InnoDB: transactions do not set too many row locks.\n"
884  "InnoDB: Your buffer pool size is %lu MB."
885  " Maybe you should make\n"
886  "InnoDB: the buffer pool bigger?\n"
887  "InnoDB: We intentionally generate a seg fault"
888  " to print a stack trace\n"
889  "InnoDB: on Linux!\n",
890  (ulong) (buf_pool->curr_size
891  / (1024 * 1024 / UNIV_PAGE_SIZE)));
892 
893  ut_error;
894 
895  } else if (!recv_recovery_on
896  && (UT_LIST_GET_LEN(buf_pool->free)
897  + UT_LIST_GET_LEN(buf_pool->LRU))
898  < buf_pool->curr_size / 3) {
899 
900  if (!buf_lru_switched_on_innodb_mon) {
901 
902  /* Over 67 % of the buffer pool is occupied by lock
903  heaps or the adaptive hash index. This may be a memory
904  leak! */
905 
906  ut_print_timestamp(stderr);
907  fprintf(stderr,
908  " InnoDB: WARNING: over 67 percent of"
909  " the buffer pool is occupied by\n"
910  "InnoDB: lock heaps or the adaptive"
911  " hash index! Check that your\n"
912  "InnoDB: transactions do not set too many"
913  " row locks.\n"
914  "InnoDB: Your buffer pool size is %lu MB."
915  " Maybe you should make\n"
916  "InnoDB: the buffer pool bigger?\n"
917  "InnoDB: Starting the InnoDB Monitor to print"
918  " diagnostics, including\n"
919  "InnoDB: lock heap and hash index sizes.\n",
920  (ulong) (buf_pool->curr_size
921  / (1024 * 1024 / UNIV_PAGE_SIZE)));
922 
923  buf_lru_switched_on_innodb_mon = TRUE;
924  srv_print_innodb_monitor = TRUE;
925  os_event_set(srv_lock_timeout_thread_event);
926  }
927  } else if (buf_lru_switched_on_innodb_mon) {
928 
929  /* Switch off the InnoDB Monitor; this is a simple way
930  to stop the monitor if the situation becomes less urgent,
931  but may also surprise users if the user also switched on the
932  monitor! */
933 
934  buf_lru_switched_on_innodb_mon = FALSE;
935  srv_print_innodb_monitor = FALSE;
936  }
937 
938  /* If there is a block in the free list, take it */
939  block = buf_LRU_get_free_only(buf_pool);
940  if (block) {
941 
942  ut_ad(buf_pool_from_block(block) == buf_pool);
943 
944 #ifdef UNIV_DEBUG
945  block->page.zip.m_start =
946 #endif /* UNIV_DEBUG */
947  block->page.zip.m_end =
948  block->page.zip.m_nonempty =
949  block->page.zip.n_blobs = 0;
950 
951  if (UNIV_UNLIKELY(zip_size)) {
952  ibool lru;
953  page_zip_set_size(&block->page.zip, zip_size);
954 
955  block->page.zip.data = static_cast<unsigned char *>(buf_buddy_alloc(
956  buf_pool, zip_size, &lru));
957 
958  UNIV_MEM_DESC(block->page.zip.data, zip_size, block);
959  } else {
960  page_zip_set_size(&block->page.zip, 0);
961  block->page.zip.data = NULL;
962  }
963 
964  buf_pool_mutex_exit(buf_pool);
965 
966  if (started_monitor) {
967  srv_print_innodb_monitor = mon_value_was;
968  }
969 
970  return(block);
971  }
972 
973  /* If no block was in the free list, search from the end of the LRU
974  list and try to free a block there */
975 
976  buf_pool_mutex_exit(buf_pool);
977 
978  freed = buf_LRU_search_and_free_block(buf_pool, n_iterations);
979 
980  if (freed > 0) {
981  goto loop;
982  }
983 
984  if (n_iterations > 30) {
985  ut_print_timestamp(stderr);
986  fprintf(stderr,
987  " InnoDB: Warning: difficult to find free blocks in\n"
988  "InnoDB: the buffer pool (%lu search iterations)!"
989  " Consider\n"
990  "InnoDB: increasing the buffer pool size.\n"
991  "InnoDB: It is also possible that"
992  " in your Unix version\n"
993  "InnoDB: fsync is very slow, or"
994  " completely frozen inside\n"
995  "InnoDB: the OS kernel. Then upgrading to"
996  " a newer version\n"
997  "InnoDB: of your operating system may help."
998  " Look at the\n"
999  "InnoDB: number of fsyncs in diagnostic info below.\n"
1000  "InnoDB: Pending flushes (fsync) log: %lu;"
1001  " buffer pool: %lu\n"
1002  "InnoDB: %lu OS file reads, %lu OS file writes,"
1003  " %lu OS fsyncs\n"
1004  "InnoDB: Starting InnoDB Monitor to print further\n"
1005  "InnoDB: diagnostics to the standard output.\n",
1006  (ulong) n_iterations,
1007  (ulong) fil_n_pending_log_flushes,
1008  (ulong) fil_n_pending_tablespace_flushes,
1009  (ulong) os_n_file_reads, (ulong) os_n_file_writes,
1010  (ulong) os_n_fsyncs);
1011 
1012  mon_value_was = srv_print_innodb_monitor;
1013  started_monitor = TRUE;
1014  srv_print_innodb_monitor = TRUE;
1015  os_event_set(srv_lock_timeout_thread_event);
1016  }
1017 
1018  /* No free block was found: try to flush the LRU list */
1019 
1020  buf_flush_free_margin(buf_pool);
1021  ++srv_buf_pool_wait_free;
1022 
1024 
1025  buf_pool_mutex_enter(buf_pool);
1026 
1027  if (buf_pool->LRU_flush_ended > 0) {
1028  /* We have written pages in an LRU flush. To make the insert
1029  buffer more efficient, we try to move these pages to the free
1030  list. */
1031 
1032  buf_pool_mutex_exit(buf_pool);
1033 
1034  buf_LRU_try_free_flushed_blocks(buf_pool);
1035  } else {
1036  buf_pool_mutex_exit(buf_pool);
1037  }
1038 
1039  if (n_iterations > 10) {
1040 
1041  os_thread_sleep(500000);
1042  }
1043 
1044  n_iterations++;
1045 
1046  goto loop;
1047 }
1048 
1049 /*******************************************************************/
1052 UNIV_INLINE
1053 void
1054 buf_LRU_old_adjust_len(
1055 /*===================*/
1056  buf_pool_t* buf_pool)
1057 {
1058  ulint old_len;
1059  ulint new_len;
1060 
1061  ut_a(buf_pool->LRU_old);
1062  ut_ad(buf_pool_mutex_own(buf_pool));
1065 #if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
1066 # error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
1067 #endif
1068 #ifdef UNIV_LRU_DEBUG
1069  /* buf_pool->LRU_old must be the first item in the LRU list
1070  whose "old" flag is set. */
1071  ut_a(buf_pool->LRU_old->old);
1072  ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1073  || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1074  ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1075  || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1076 #endif /* UNIV_LRU_DEBUG */
1077 
1078  old_len = buf_pool->LRU_old_len;
1079  new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
1080  * buf_pool->LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
1081  UT_LIST_GET_LEN(buf_pool->LRU)
1082  - (BUF_LRU_OLD_TOLERANCE
1083  + BUF_LRU_NON_OLD_MIN_LEN));
1084 
1085  for (;;) {
1086  buf_page_t* LRU_old = buf_pool->LRU_old;
1087 
1088  ut_a(LRU_old);
1089  ut_ad(LRU_old->in_LRU_list);
1090 #ifdef UNIV_LRU_DEBUG
1091  ut_a(LRU_old->old);
1092 #endif /* UNIV_LRU_DEBUG */
1093 
1094  /* Update the LRU_old pointer if necessary */
1095 
1096  if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
1097 
1098  buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
1099  LRU, LRU_old);
1100 #ifdef UNIV_LRU_DEBUG
1101  ut_a(!LRU_old->old);
1102 #endif /* UNIV_LRU_DEBUG */
1103  old_len = ++buf_pool->LRU_old_len;
1104  buf_page_set_old(LRU_old, TRUE);
1105 
1106  } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
1107 
1108  buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
1109  old_len = --buf_pool->LRU_old_len;
1110  buf_page_set_old(LRU_old, FALSE);
1111  } else {
1112  return;
1113  }
1114  }
1115 }
1116 
1117 /*******************************************************************/
1120 static
1121 void
1122 buf_LRU_old_init(
1123 /*=============*/
1124  buf_pool_t* buf_pool)
1125 {
1126  buf_page_t* bpage;
1127 
1128  ut_ad(buf_pool_mutex_own(buf_pool));
1129  ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
1130 
1131  /* We first initialize all blocks in the LRU list as old and then use
1132  the adjust function to move the LRU_old pointer to the right
1133  position */
1134 
1135  for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
1136  bpage = UT_LIST_GET_PREV(LRU, bpage)) {
1137  ut_ad(bpage->in_LRU_list);
1138  ut_ad(buf_page_in_file(bpage));
1139  /* This loop temporarily violates the
1140  assertions of buf_page_set_old(). */
1141  bpage->old = TRUE;
1142  }
1143 
1144  buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
1145  buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
1146 
1147  buf_LRU_old_adjust_len(buf_pool);
1148 }
1149 
1150 /******************************************************************/
1152 static
1153 void
1154 buf_unzip_LRU_remove_block_if_needed(
1155 /*=================================*/
1156  buf_page_t* bpage)
1157 {
1158  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1159 
1160  ut_ad(buf_pool);
1161  ut_ad(bpage);
1162  ut_ad(buf_page_in_file(bpage));
1163  ut_ad(buf_pool_mutex_own(buf_pool));
1164 
1165  if (buf_page_belongs_to_unzip_LRU(bpage)) {
1166  buf_block_t* block = (buf_block_t*) bpage;
1167 
1168  ut_ad(block->in_unzip_LRU_list);
1169  ut_d(block->in_unzip_LRU_list = FALSE);
1170 
1171  UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
1172  }
1173 }
1174 
1175 /******************************************************************/
1177 UNIV_INLINE
1178 void
1179 buf_LRU_remove_block(
1180 /*=================*/
1181  buf_page_t* bpage)
1182 {
1183  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1184 
1185  ut_ad(buf_pool);
1186  ut_ad(bpage);
1187  ut_ad(buf_pool_mutex_own(buf_pool));
1188 
1189  ut_a(buf_page_in_file(bpage));
1190 
1191  ut_ad(bpage->in_LRU_list);
1192 
1193  /* If the LRU_old pointer is defined and points to just this block,
1194  move it backward one step */
1195 
1196  if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
1197 
1198  /* Below: the previous block is guaranteed to exist,
1199  because the LRU_old pointer is only allowed to differ
1200  by BUF_LRU_OLD_TOLERANCE from strict
1201  buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
1202  list length. */
1203  buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
1204 
1205  ut_a(prev_bpage);
1206 #ifdef UNIV_LRU_DEBUG
1207  ut_a(!prev_bpage->old);
1208 #endif /* UNIV_LRU_DEBUG */
1209  buf_pool->LRU_old = prev_bpage;
1210  buf_page_set_old(prev_bpage, TRUE);
1211 
1212  buf_pool->LRU_old_len++;
1213  }
1214 
1215  /* Remove the block from the LRU list */
1216  UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1217  ut_d(bpage->in_LRU_list = FALSE);
1218 
1219  buf_unzip_LRU_remove_block_if_needed(bpage);
1220 
1221  /* If the LRU list is so short that LRU_old is not defined,
1222  clear the "old" flags and return */
1223  if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
1224 
1225  for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
1226  bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
1227  /* This loop temporarily violates the
1228  assertions of buf_page_set_old(). */
1229  bpage->old = FALSE;
1230  }
1231 
1232  buf_pool->LRU_old = NULL;
1233  buf_pool->LRU_old_len = 0;
1234 
1235  return;
1236  }
1237 
1238  ut_ad(buf_pool->LRU_old);
1239 
1240  /* Update the LRU_old_len field if necessary */
1241  if (buf_page_is_old(bpage)) {
1242 
1243  buf_pool->LRU_old_len--;
1244  }
1245 
1246  /* Adjust the length of the old block list if necessary */
1247  buf_LRU_old_adjust_len(buf_pool);
1248 }
1249 
1250 /******************************************************************/
1252 UNIV_INTERN
1253 void
1254 buf_unzip_LRU_add_block(
1255 /*====================*/
1256  buf_block_t* block,
1257  ibool old)
1259 {
1260  buf_pool_t* buf_pool = buf_pool_from_block(block);
1261 
1262  ut_ad(buf_pool);
1263  ut_ad(block);
1264  ut_ad(buf_pool_mutex_own(buf_pool));
1265 
1267 
1268  ut_ad(!block->in_unzip_LRU_list);
1269  ut_d(block->in_unzip_LRU_list = TRUE);
1270 
1271  if (old) {
1272  UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
1273  } else {
1274  UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
1275  }
1276 }
1277 
1278 /******************************************************************/
1280 UNIV_INLINE
1281 void
1282 buf_LRU_add_block_to_end_low(
1283 /*=========================*/
1284  buf_page_t* bpage)
1285 {
1286  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1287 
1288  ut_ad(buf_pool);
1289  ut_ad(bpage);
1290  ut_ad(buf_pool_mutex_own(buf_pool));
1291 
1292  ut_a(buf_page_in_file(bpage));
1293 
1294  ut_ad(!bpage->in_LRU_list);
1295  UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
1296  ut_d(bpage->in_LRU_list = TRUE);
1297 
1298  if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
1299 
1300  ut_ad(buf_pool->LRU_old);
1301 
1302  /* Adjust the length of the old block list if necessary */
1303 
1304  buf_page_set_old(bpage, TRUE);
1305  buf_pool->LRU_old_len++;
1306  buf_LRU_old_adjust_len(buf_pool);
1307 
1308  } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
1309 
1310  /* The LRU list is now long enough for LRU_old to become
1311  defined: init it */
1312 
1313  buf_LRU_old_init(buf_pool);
1314  } else {
1315  buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
1316  }
1317 
1318  /* If this is a zipped block with decompressed frame as well
1319  then put it on the unzip_LRU list */
1320  if (buf_page_belongs_to_unzip_LRU(bpage)) {
1321  buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
1322  }
1323 }
1324 
1325 /******************************************************************/
1327 UNIV_INLINE
1328 void
1329 buf_LRU_add_block_low(
1330 /*==================*/
1331  buf_page_t* bpage,
1332  ibool old)
1336 {
1337  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1338 
1339  ut_ad(buf_pool);
1340  ut_ad(bpage);
1341  ut_ad(buf_pool_mutex_own(buf_pool));
1342 
1343  ut_a(buf_page_in_file(bpage));
1344  ut_ad(!bpage->in_LRU_list);
1345 
1346  if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
1347 
1348  UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
1349 
1350  bpage->freed_page_clock = buf_pool->freed_page_clock;
1351  } else {
1352 #ifdef UNIV_LRU_DEBUG
1353  /* buf_pool->LRU_old must be the first item in the LRU list
1354  whose "old" flag is set. */
1355  ut_a(buf_pool->LRU_old->old);
1356  ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1357  || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1358  ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1359  || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1360 #endif /* UNIV_LRU_DEBUG */
1361  UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
1362  bpage);
1363  buf_pool->LRU_old_len++;
1364  }
1365 
1366  ut_d(bpage->in_LRU_list = TRUE);
1367 
1368  if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
1369 
1370  ut_ad(buf_pool->LRU_old);
1371 
1372  /* Adjust the length of the old block list if necessary */
1373 
1374  buf_page_set_old(bpage, old);
1375  buf_LRU_old_adjust_len(buf_pool);
1376 
1377  } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
1378 
1379  /* The LRU list is now long enough for LRU_old to become
1380  defined: init it */
1381 
1382  buf_LRU_old_init(buf_pool);
1383  } else {
1384  buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
1385  }
1386 
1387  /* If this is a zipped block with decompressed frame as well
1388  then put it on the unzip_LRU list */
1389  if (buf_page_belongs_to_unzip_LRU(bpage)) {
1390  buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
1391  }
1392 }
1393 
1394 /******************************************************************/
1396 UNIV_INTERN
1397 void
1398 buf_LRU_add_block(
1399 /*==============*/
1400  buf_page_t* bpage,
1401  ibool old)
1406 {
1407  buf_LRU_add_block_low(bpage, old);
1408 }
1409 
1410 /******************************************************************/
1412 UNIV_INTERN
1413 void
1414 buf_LRU_make_block_young(
1415 /*=====================*/
1416  buf_page_t* bpage)
1417 {
1418  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1419 
1420  ut_ad(buf_pool_mutex_own(buf_pool));
1421 
1422  if (bpage->old) {
1423  buf_pool->stat.n_pages_made_young++;
1424  }
1425 
1426  buf_LRU_remove_block(bpage);
1427  buf_LRU_add_block_low(bpage, FALSE);
1428 }
1429 
1430 /******************************************************************/
1432 UNIV_INTERN
1433 void
1434 buf_LRU_make_block_old(
1435 /*===================*/
1436  buf_page_t* bpage)
1437 {
1438  buf_LRU_remove_block(bpage);
1439  buf_LRU_add_block_to_end_low(bpage);
1440 }
1441 
1442 /******************************************************************/
1455 UNIV_INTERN
1457 buf_LRU_free_block(
1458 /*===============*/
1459  buf_page_t* bpage,
1460  ibool zip,
1462  ibool* buf_pool_mutex_released)
1466 {
1467  buf_page_t* b = NULL;
1468  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1469  mutex_t* block_mutex = buf_page_get_mutex(bpage);
1470 
1471  ut_ad(buf_pool_mutex_own(buf_pool));
1472  ut_ad(mutex_own(block_mutex));
1473  ut_ad(buf_page_in_file(bpage));
1474  ut_ad(bpage->in_LRU_list);
1475  ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
1476 #if UNIV_WORD_SIZE == 4
1477  /* On 32-bit systems, there is no padding in buf_page_t. On
1478  other systems, Valgrind could complain about uninitialized pad
1479  bytes. */
1480  UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
1481 #endif
1482 
1483  if (!buf_page_can_relocate(bpage)) {
1484 
1485  /* Do not free buffer-fixed or I/O-fixed blocks. */
1486  return(BUF_LRU_NOT_FREED);
1487  }
1488 
1489 #ifdef UNIV_IBUF_COUNT_DEBUG
1490  ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
1491 #endif /* UNIV_IBUF_COUNT_DEBUG */
1492 
1493  if (zip || !bpage->zip.data) {
1494  /* This would completely free the block. */
1495  /* Do not completely free dirty blocks. */
1496 
1497  if (bpage->oldest_modification) {
1498  return(BUF_LRU_NOT_FREED);
1499  }
1500  } else if (bpage->oldest_modification) {
1501  /* Do not completely free dirty blocks. */
1502 
1503  if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
1504  ut_ad(buf_page_get_state(bpage)
1505  == BUF_BLOCK_ZIP_DIRTY);
1506  return(BUF_LRU_NOT_FREED);
1507  }
1508 
1509  goto alloc;
1510  } else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
1511  /* Allocate the control block for the compressed page.
1512  If it cannot be allocated (without freeing a block
1513  from the LRU list), refuse to free bpage. */
1514 alloc:
1515  buf_pool_mutex_exit_forbid(buf_pool);
1516  b = static_cast<buf_page_t *>(buf_buddy_alloc(buf_pool, sizeof *b, NULL));
1517  buf_pool_mutex_exit_allow(buf_pool);
1518 
1519  if (UNIV_UNLIKELY(!b)) {
1520  return(BUF_LRU_CANNOT_RELOCATE);
1521  }
1522 
1523  memcpy(b, bpage, sizeof *b);
1524  }
1525 
1526 #ifdef UNIV_DEBUG
1527  if (buf_debug_prints) {
1528  fprintf(stderr, "Putting space %lu page %lu to free list\n",
1529  (ulong) buf_page_get_space(bpage),
1530  (ulong) buf_page_get_page_no(bpage));
1531  }
1532 #endif /* UNIV_DEBUG */
1533 
1534  if (buf_LRU_block_remove_hashed_page(bpage, zip)
1535  != BUF_BLOCK_ZIP_FREE) {
1536  ut_a(bpage->buf_fix_count == 0);
1537 
1538  if (b) {
1539  buf_page_t* hash_b;
1540  buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
1541 
1542  const ulint fold = buf_page_address_fold(
1543  bpage->space, bpage->offset);
1544 
1545  hash_b = buf_page_hash_get_low(
1546  buf_pool, bpage->space, bpage->offset, fold);
1547 
1548  ut_a(!hash_b);
1549 
1550  b->state = b->oldest_modification
1553  UNIV_MEM_DESC(b->zip.data,
1554  page_zip_get_size(&b->zip), b);
1555 
1556  /* The fields in_page_hash and in_LRU_list of
1557  the to-be-freed block descriptor should have
1558  been cleared in
1559  buf_LRU_block_remove_hashed_page(), which
1560  invokes buf_LRU_remove_block(). */
1561  ut_ad(!bpage->in_page_hash);
1562  ut_ad(!bpage->in_LRU_list);
1563  /* bpage->state was BUF_BLOCK_FILE_PAGE because
1564  b != NULL. The type cast below is thus valid. */
1565  ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
1566 
1567  /* The fields of bpage were copied to b before
1568  buf_LRU_block_remove_hashed_page() was invoked. */
1569  ut_ad(!b->in_zip_hash);
1570  ut_ad(b->in_page_hash);
1571  ut_ad(b->in_LRU_list);
1572 
1573  HASH_INSERT(buf_page_t, hash,
1574  buf_pool->page_hash, fold, b);
1575 
1576  /* Insert b where bpage was in the LRU list. */
1577  if (UNIV_LIKELY(prev_b != NULL)) {
1578  ulint lru_len;
1579 
1580  ut_ad(prev_b->in_LRU_list);
1581  ut_ad(buf_page_in_file(prev_b));
1582 #if UNIV_WORD_SIZE == 4
1583  /* On 32-bit systems, there is no
1584  padding in buf_page_t. On other
1585  systems, Valgrind could complain about
1586  uninitialized pad bytes. */
1587  UNIV_MEM_ASSERT_RW(prev_b, sizeof *prev_b);
1588 #endif
1589  UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
1590  prev_b, b);
1591 
1592  if (buf_page_is_old(b)) {
1593  buf_pool->LRU_old_len++;
1594  if (UNIV_UNLIKELY
1595  (buf_pool->LRU_old
1596  == UT_LIST_GET_NEXT(LRU, b))) {
1597 
1598  buf_pool->LRU_old = b;
1599  }
1600  }
1601 
1602  lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
1603 
1604  if (lru_len > BUF_LRU_OLD_MIN_LEN) {
1605  ut_ad(buf_pool->LRU_old);
1606  /* Adjust the length of the
1607  old block list if necessary */
1608  buf_LRU_old_adjust_len(buf_pool);
1609  } else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
1610  /* The LRU list is now long
1611  enough for LRU_old to become
1612  defined: init it */
1613  buf_LRU_old_init(buf_pool);
1614  }
1615 #ifdef UNIV_LRU_DEBUG
1616  /* Check that the "old" flag is consistent
1617  in the block and its neighbours. */
1619 #endif /* UNIV_LRU_DEBUG */
1620  } else {
1621  ut_d(b->in_LRU_list = FALSE);
1622  buf_LRU_add_block_low(b, buf_page_is_old(b));
1623  }
1624 
1625  if (b->state == BUF_BLOCK_ZIP_PAGE) {
1626  buf_LRU_insert_zip_clean(b);
1627  } else {
1628  /* Relocate on buf_pool->flush_list. */
1629  buf_flush_relocate_on_flush_list(bpage, b);
1630  }
1631 
1632  bpage->zip.data = NULL;
1633  page_zip_set_size(&bpage->zip, 0);
1634 
1635  /* Prevent buf_page_get_gen() from
1636  decompressing the block while we release
1637  buf_pool->mutex and block_mutex. */
1638  b->buf_fix_count++;
1639  b->io_fix = BUF_IO_READ;
1640  }
1641 
1642  if (buf_pool_mutex_released) {
1643  *buf_pool_mutex_released = TRUE;
1644  }
1645 
1646  buf_pool_mutex_exit(buf_pool);
1647  mutex_exit(block_mutex);
1648 
1649  /* Remove possible adaptive hash index on the page.
1650  The page was declared uninitialized by
1651  buf_LRU_block_remove_hashed_page(). We need to flag
1652  the contents of the page valid (which it still is) in
1653  order to avoid bogus Valgrind warnings.*/
1654 
1655  UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
1656  UNIV_PAGE_SIZE);
1657  btr_search_drop_page_hash_index((buf_block_t*) bpage);
1658  UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
1659  UNIV_PAGE_SIZE);
1660 
1661  if (b) {
1662  /* Compute and stamp the compressed page
1663  checksum while not holding any mutex. The
1664  block is already half-freed
1665  (BUF_BLOCK_REMOVE_HASH) and removed from
1666  buf_pool->page_hash, thus inaccessible by any
1667  other thread. */
1668 
1671  UNIV_LIKELY(srv_use_checksums)
1673  b->zip.data,
1674  page_zip_get_size(&b->zip))
1676  }
1677 
1678  buf_pool_mutex_enter(buf_pool);
1679  mutex_enter(block_mutex);
1680 
1681  if (b) {
1682  mutex_enter(&buf_pool->zip_mutex);
1683  b->buf_fix_count--;
1685  mutex_exit(&buf_pool->zip_mutex);
1686  }
1687 
1688  buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
1689  } else {
1690  /* The block_mutex should have been released by
1691  buf_LRU_block_remove_hashed_page() when it returns
1692  BUF_BLOCK_ZIP_FREE. */
1693  ut_ad(block_mutex == &buf_pool->zip_mutex);
1694  mutex_enter(block_mutex);
1695  }
1696 
1697  return(BUF_LRU_FREED);
1698 }
1699 
1700 /******************************************************************/
1702 UNIV_INTERN
1703 void
1704 buf_LRU_block_free_non_file_page(
1705 /*=============================*/
1706  buf_block_t* block)
1707 {
1708  void* data;
1709  buf_pool_t* buf_pool = buf_pool_from_block(block);
1710 
1711  ut_ad(block);
1712  ut_ad(buf_pool_mutex_own(buf_pool));
1713  ut_ad(mutex_own(&block->mutex));
1714 
1715  switch (buf_block_get_state(block)) {
1716  case BUF_BLOCK_MEMORY:
1718  break;
1719  default:
1720  ut_error;
1721  }
1722 
1723 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
1724  ut_a(block->n_pointers == 0);
1725 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1726  ut_ad(!block->page.in_free_list);
1727  ut_ad(!block->page.in_flush_list);
1728  ut_ad(!block->page.in_LRU_list);
1729 
1731 
1732  UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
1733 #ifdef UNIV_DEBUG
1734  /* Wipe contents of page to reveal possible stale pointers to it */
1735  memset(block->frame, '\0', UNIV_PAGE_SIZE);
1736 #else
1737  /* Wipe page_no and space_id */
1738  memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
1739  memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
1740 #endif
1741  data = block->page.zip.data;
1742 
1743  if (data) {
1744  block->page.zip.data = NULL;
1745  mutex_exit(&block->mutex);
1746  buf_pool_mutex_exit_forbid(buf_pool);
1747 
1749  buf_pool, data, page_zip_get_size(&block->page.zip));
1750 
1751  buf_pool_mutex_exit_allow(buf_pool);
1752  mutex_enter(&block->mutex);
1753  page_zip_set_size(&block->page.zip, 0);
1754  }
1755 
1756  UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
1757  ut_d(block->page.in_free_list = TRUE);
1758 
1759  UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
1760 }
1761 
1762 /******************************************************************/
1772 static
1773 enum buf_page_state
1774 buf_LRU_block_remove_hashed_page(
1775 /*=============================*/
1776  buf_page_t* bpage,
1779  ibool zip)
1781 {
1782  ulint fold;
1783  const buf_page_t* hashed_bpage;
1784  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1785 
1786  ut_ad(bpage);
1787  ut_ad(buf_pool_mutex_own(buf_pool));
1788  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1789 
1791  ut_a(bpage->buf_fix_count == 0);
1792 
1793 #if UNIV_WORD_SIZE == 4
1794  /* On 32-bit systems, there is no padding in
1795  buf_page_t. On other systems, Valgrind could complain
1796  about uninitialized pad bytes. */
1797  UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage);
1798 #endif
1799 
1800  buf_LRU_remove_block(bpage);
1801 
1802  buf_pool->freed_page_clock += 1;
1803 
1804  switch (buf_page_get_state(bpage)) {
1805  case BUF_BLOCK_FILE_PAGE:
1806  UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
1807  UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
1808  UNIV_PAGE_SIZE);
1810  if (bpage->zip.data) {
1811  const page_t* page = ((buf_block_t*) bpage)->frame;
1812  const ulint zip_size
1813  = page_zip_get_size(&bpage->zip);
1814 
1815  ut_a(!zip || bpage->oldest_modification == 0);
1816 
1817  switch (UNIV_EXPECT(fil_page_get_type(page),
1818  FIL_PAGE_INDEX)) {
1820  case FIL_PAGE_INODE:
1821  case FIL_PAGE_IBUF_BITMAP:
1822  case FIL_PAGE_TYPE_FSP_HDR:
1823  case FIL_PAGE_TYPE_XDES:
1824  /* These are essentially uncompressed pages. */
1825  if (!zip) {
1826  /* InnoDB writes the data to the
1827  uncompressed page frame. Copy it
1828  to the compressed page, which will
1829  be preserved. */
1830  memcpy(bpage->zip.data, page,
1831  zip_size);
1832  }
1833  break;
1834  case FIL_PAGE_TYPE_ZBLOB:
1835  case FIL_PAGE_TYPE_ZBLOB2:
1836  break;
1837  case FIL_PAGE_INDEX:
1838 #ifdef UNIV_ZIP_DEBUG
1839  ut_a(page_zip_validate(&bpage->zip, page));
1840 #endif /* UNIV_ZIP_DEBUG */
1841  break;
1842  default:
1843  ut_print_timestamp(stderr);
1844  fputs(" InnoDB: ERROR: The compressed page"
1845  " to be evicted seems corrupt:", stderr);
1846  ut_print_buf(stderr, page, zip_size);
1847  fputs("\nInnoDB: Possibly older version"
1848  " of the page:", stderr);
1849  ut_print_buf(stderr, bpage->zip.data,
1850  zip_size);
1851  putc('\n', stderr);
1852  ut_error;
1853  }
1854 
1855  break;
1856  }
1857  /* fall through */
1858  case BUF_BLOCK_ZIP_PAGE:
1859  ut_a(bpage->oldest_modification == 0);
1860  UNIV_MEM_ASSERT_W(bpage->zip.data,
1861  page_zip_get_size(&bpage->zip));
1862  break;
1863  case BUF_BLOCK_ZIP_FREE:
1864  case BUF_BLOCK_ZIP_DIRTY:
1865  case BUF_BLOCK_NOT_USED:
1867  case BUF_BLOCK_MEMORY:
1868  case BUF_BLOCK_REMOVE_HASH:
1869  ut_error;
1870  break;
1871  }
1872 
1873  fold = buf_page_address_fold(bpage->space, bpage->offset);
1874  hashed_bpage = buf_page_hash_get_low(
1875  buf_pool, bpage->space, bpage->offset, fold);
1876 
1877  if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
1878  fprintf(stderr,
1879  "InnoDB: Error: page %lu %lu not found"
1880  " in the hash table\n",
1881  (ulong) bpage->space,
1882  (ulong) bpage->offset);
1883  if (hashed_bpage) {
1884  fprintf(stderr,
1885  "InnoDB: In hash table we find block"
1886  " %p of %lu %lu which is not %p\n",
1887  (const void*) hashed_bpage,
1888  (ulong) hashed_bpage->space,
1889  (ulong) hashed_bpage->offset,
1890  (const void*) bpage);
1891  }
1892 
1893 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1894  mutex_exit(buf_page_get_mutex(bpage));
1895  buf_pool_mutex_exit(buf_pool);
1896  buf_print();
1897  buf_LRU_print();
1898  buf_validate();
1899  buf_LRU_validate();
1900 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1901  ut_error;
1902  }
1903 
1904  ut_ad(!bpage->in_zip_hash);
1905  ut_ad(bpage->in_page_hash);
1906  ut_d(bpage->in_page_hash = FALSE);
1907  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1908  switch (buf_page_get_state(bpage)) {
1909  case BUF_BLOCK_ZIP_PAGE:
1910  ut_ad(!bpage->in_free_list);
1911  ut_ad(!bpage->in_flush_list);
1912  ut_ad(!bpage->in_LRU_list);
1913  ut_a(bpage->zip.data);
1914  ut_a(buf_page_get_zip_size(bpage));
1915 
1916  UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
1917 
1918  mutex_exit(&buf_pool->zip_mutex);
1919  buf_pool_mutex_exit_forbid(buf_pool);
1920 
1922  buf_pool, bpage->zip.data,
1923  page_zip_get_size(&bpage->zip));
1924 
1925  buf_buddy_free(buf_pool, bpage, sizeof(*bpage));
1926  buf_pool_mutex_exit_allow(buf_pool);
1927 
1928  UNIV_MEM_UNDESC(bpage);
1929  return(BUF_BLOCK_ZIP_FREE);
1930 
1931  case BUF_BLOCK_FILE_PAGE:
1932  memset(((buf_block_t*) bpage)->frame
1933  + FIL_PAGE_OFFSET, 0xff, 4);
1934  memset(((buf_block_t*) bpage)->frame
1936  UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
1937  UNIV_PAGE_SIZE);
1939 
1940  if (zip && bpage->zip.data) {
1941  /* Free the compressed page. */
1942  void* data = bpage->zip.data;
1943  bpage->zip.data = NULL;
1944 
1945  ut_ad(!bpage->in_free_list);
1946  ut_ad(!bpage->in_flush_list);
1947  ut_ad(!bpage->in_LRU_list);
1948  mutex_exit(&((buf_block_t*) bpage)->mutex);
1949  buf_pool_mutex_exit_forbid(buf_pool);
1950 
1952  buf_pool, data,
1953  page_zip_get_size(&bpage->zip));
1954 
1955  buf_pool_mutex_exit_allow(buf_pool);
1956  mutex_enter(&((buf_block_t*) bpage)->mutex);
1957  page_zip_set_size(&bpage->zip, 0);
1958  }
1959 
1960  return(BUF_BLOCK_REMOVE_HASH);
1961 
1962  case BUF_BLOCK_ZIP_FREE:
1963  case BUF_BLOCK_ZIP_DIRTY:
1964  case BUF_BLOCK_NOT_USED:
1966  case BUF_BLOCK_MEMORY:
1967  case BUF_BLOCK_REMOVE_HASH:
1968  break;
1969  }
1970 
1971  ut_error;
1972  return(BUF_BLOCK_ZIP_FREE);
1973 }
1974 
1975 /******************************************************************/
1977 static
1978 void
1979 buf_LRU_block_free_hashed_page(
1980 /*===========================*/
1981  buf_block_t* block)
1983 {
1984 #ifdef UNIV_DEBUG
1985  buf_pool_t* buf_pool = buf_pool_from_block(block);
1986  ut_ad(buf_pool_mutex_own(buf_pool));
1987 #endif
1988  ut_ad(mutex_own(&block->mutex));
1989 
1991 
1992  buf_LRU_block_free_non_file_page(block);
1993 }
1994 
1995 /**********************************************************************/
1998 static
1999 uint
2000 buf_LRU_old_ratio_update_instance(
2001 /*==============================*/
2002  buf_pool_t* buf_pool,
2003  uint old_pct,
2005  ibool adjust)
2008 {
2009  uint ratio;
2010 
2011  ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
2012  if (ratio < BUF_LRU_OLD_RATIO_MIN) {
2013  ratio = BUF_LRU_OLD_RATIO_MIN;
2014  } else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
2015  ratio = BUF_LRU_OLD_RATIO_MAX;
2016  }
2017 
2018  if (adjust) {
2019  buf_pool_mutex_enter(buf_pool);
2020 
2021  if (ratio != buf_pool->LRU_old_ratio) {
2022  buf_pool->LRU_old_ratio = ratio;
2023 
2024  if (UT_LIST_GET_LEN(buf_pool->LRU)
2025  >= BUF_LRU_OLD_MIN_LEN) {
2026 
2027  buf_LRU_old_adjust_len(buf_pool);
2028  }
2029  }
2030 
2031  buf_pool_mutex_exit(buf_pool);
2032  } else {
2033  buf_pool->LRU_old_ratio = ratio;
2034  }
2035  /* the reverse of
2036  ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
2037  return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
2038 }
2039 
2040 /**********************************************************************/
2043 UNIV_INTERN
2044 ulint
2045 buf_LRU_old_ratio_update(
2046 /*=====================*/
2047  uint old_pct,
2049  ibool adjust)
2052 {
2053  ulint i;
2054  ulint new_ratio = 0;
2055 
2056  for (i = 0; i < srv_buf_pool_instances; i++) {
2057  buf_pool_t* buf_pool;
2058 
2059  buf_pool = buf_pool_from_array(i);
2060 
2061  new_ratio = buf_LRU_old_ratio_update_instance(
2062  buf_pool, old_pct, adjust);
2063  }
2064 
2065  return(new_ratio);
2066 }
2067 
2068 /********************************************************************/
2071 UNIV_INTERN
2072 void
2073 buf_LRU_stat_update(void)
2074 /*=====================*/
2075 {
2076  ulint i;
2077  buf_LRU_stat_t* item;
2078  buf_pool_t* buf_pool;
2079  ibool evict_started = FALSE;
2080 
2081  /* If we haven't started eviction yet then don't update stats. */
2082  for (i = 0; i < srv_buf_pool_instances; i++) {
2083 
2084  buf_pool = buf_pool_from_array(i);
2085 
2086  if (buf_pool->freed_page_clock != 0) {
2087  evict_started = TRUE;
2088  break;
2089  }
2090  }
2091 
2092  if (!evict_started) {
2093  goto func_exit;
2094  }
2095 
2096  /* Update the index. */
2097  item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
2098  buf_LRU_stat_arr_ind++;
2099  buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
2100 
2101  /* Add the current value and subtract the obsolete entry. */
2102  buf_LRU_stat_sum.io += buf_LRU_stat_cur.io - item->io;
2103  buf_LRU_stat_sum.unzip += buf_LRU_stat_cur.unzip - item->unzip;
2104 
2105  /* Put current entry in the array. */
2106  memcpy(item, &buf_LRU_stat_cur, sizeof *item);
2107 
2108 func_exit:
2109  /* Clear the current entry. */
2110  memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
2111 }
2112 
2113 /********************************************************************/
2115 #define LRU_DUMP_FILE "ib_lru_dump"
2116 
2117 UNIV_INTERN
2118 bool
2119 buf_LRU_file_dump(void)
2120 /*===================*/
2121 {
2122  os_file_t dump_file = -1;
2123  ibool success;
2124  byte* buffer_base = NULL;
2125  byte* buffer = NULL;
2126  buf_page_t* bpage;
2127  ulint buffers;
2128  ulint offset;
2129  bool ret = false;
2130  ulint i;
2131 
2132  for (i = 0; i < srv_n_data_files; i++) {
2133  if (strstr(srv_data_file_names[i], LRU_DUMP_FILE) != NULL) {
2134  fprintf(stderr,
2135  " InnoDB: The name '%s' seems to be used for"
2136  " innodb_data_file_path. For safety, dumping of the LRU list"
2137  " is not being done.\n", LRU_DUMP_FILE);
2138  goto end;
2139  }
2140  }
2141 
2142  buffer_base = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
2143  buffer = static_cast<byte *>(ut_align(buffer_base, UNIV_PAGE_SIZE));
2144  if (buffer == NULL) {
2145  fprintf(stderr,
2146  " InnoDB: cannot allocate buffer.\n");
2147  goto end;
2148  }
2149 
2150  dump_file = os_file_create(innodb_file_temp_key, LRU_DUMP_FILE, OS_FILE_OVERWRITE,
2151  OS_FILE_NORMAL, OS_DATA_FILE, &success);
2152  if (success == FALSE) {
2153  os_file_get_last_error(TRUE);
2154  fprintf(stderr,
2155  " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
2156  goto end;
2157  }
2158 
2159  buffers = offset = 0;
2160 
2161  for (i = 0; i < srv_buf_pool_instances; i++) {
2162  buf_pool_t* buf_pool;
2163 
2164  buf_pool = buf_pool_from_array(i);
2165 
2166  buf_pool_mutex_enter(buf_pool);
2167  bpage = UT_LIST_GET_LAST(buf_pool->LRU);
2168 
2169  while (bpage != NULL) {
2170  if (offset == 0) {
2171  memset(buffer, 0, UNIV_PAGE_SIZE);
2172  }
2173 
2174  mach_write_to_4(buffer + offset * 4, bpage->space);
2175  offset++;
2176  mach_write_to_4(buffer + offset * 4, bpage->offset);
2177  offset++;
2178 
2179  if (offset == UNIV_PAGE_SIZE/4) {
2180  success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
2181  (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
2182  (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
2183  UNIV_PAGE_SIZE);
2184  if (success == FALSE) {
2185  buf_pool_mutex_exit(buf_pool);
2186  fprintf(stderr,
2187  " InnoDB: cannot write page %lu of %s\n",
2188  buffers, LRU_DUMP_FILE);
2189  goto end;
2190  }
2191  buffers++;
2192  offset = 0;
2193  }
2194 
2195  bpage = UT_LIST_GET_PREV(LRU, bpage);
2196  }
2197  buf_pool_mutex_exit(buf_pool);
2198  }
2199 
2200  if (offset == 0) {
2201  memset(buffer, 0, UNIV_PAGE_SIZE);
2202  }
2203 
2204  mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
2205  offset++;
2206  mach_write_to_4(buffer + offset * 4, 0xFFFFFFFFUL);
2207  offset++;
2208 
2209  success = os_file_write(LRU_DUMP_FILE, dump_file, buffer,
2210  (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
2211  (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
2212  UNIV_PAGE_SIZE);
2213  if (success == FALSE) {
2214  goto end;
2215  }
2216 
2217  ret = true;
2218 end:
2219  if (dump_file != -1)
2220  os_file_close(dump_file);
2221  if (buffer_base)
2222  ut_free(buffer_base);
2223 
2224  return(ret);
2225 }
2226 
2227 typedef struct {
2228  ib_uint32_t space_id;
2229  ib_uint32_t page_no;
2230 } dump_record_t;
2231 
2232 static int dump_record_cmp(const void *a, const void *b)
2233 {
2234  const dump_record_t *rec1 = (dump_record_t *) a;
2235  const dump_record_t *rec2 = (dump_record_t *) b;
2236 
2237  if (rec1->space_id < rec2->space_id)
2238  return -1;
2239  if (rec1->space_id > rec2->space_id)
2240  return 1;
2241  if (rec1->page_no < rec2->page_no)
2242  return -1;
2243  return rec1->page_no > rec2->page_no;
2244 }
2245 
2246 /********************************************************************/
2248 UNIV_INTERN
2249 bool
2250 buf_LRU_file_restore(void)
2251 /*======================*/
2252 {
2253  os_file_t dump_file = -1;
2254  ibool success;
2255  byte* buffer_base = NULL;
2256  byte* buffer = NULL;
2257  ulint buffers;
2258  ulint offset;
2259  ulint reads = 0;
2260  ulint req = 0;
2261  bool terminated = false;
2262  bool ret = false;
2263  dump_record_t* records = NULL;
2264  ulint size;
2265  ulint size_high;
2266  ulint length;
2267 
2268  dump_file = os_file_create_simple_no_error_handling(innodb_file_temp_key,
2269  LRU_DUMP_FILE, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
2270  if (success == FALSE || !os_file_get_size(dump_file, &size, &size_high)) {
2271  os_file_get_last_error(TRUE);
2272  fprintf(stderr,
2273  " InnoDB: cannot open %s\n", LRU_DUMP_FILE);
2274  goto end;
2275  }
2276  if (size == 0 || size_high > 0 || size % 8) {
2277  fprintf(stderr, " InnoDB: broken LRU dump file\n");
2278  goto end;
2279  }
2280  buffer_base = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
2281  buffer = static_cast<byte *>(ut_align(buffer_base, UNIV_PAGE_SIZE));
2282  records = static_cast<dump_record_t *>(ut_malloc(size));
2283  if (buffer == NULL || records == NULL) {
2284  fprintf(stderr,
2285  " InnoDB: cannot allocate buffer.\n");
2286  goto end;
2287  }
2288 
2289  buffers = 0;
2290  length = 0;
2291  while (!terminated) {
2292  success = os_file_read(dump_file, buffer,
2293  (buffers << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL,
2294  (buffers >> (32 - UNIV_PAGE_SIZE_SHIFT)),
2295  UNIV_PAGE_SIZE);
2296  if (success == FALSE) {
2297  fprintf(stderr,
2298  " InnoDB: either could not read page %lu of %s,"
2299  " or terminated unexpectedly.\n",
2300  buffers, LRU_DUMP_FILE);
2301  goto end;
2302  }
2303 
2304  for (offset = 0; offset < UNIV_PAGE_SIZE/4; offset += 2) {
2305  ulint space_id;
2306  ulint page_no;
2307 
2308  space_id = mach_read_from_4(buffer + offset * 4);
2309  page_no = mach_read_from_4(buffer + (offset + 1) * 4);
2310  if (space_id == 0xFFFFFFFFUL
2311  || page_no == 0xFFFFFFFFUL) {
2312  terminated = true;
2313  break;
2314  }
2315 
2316  records[length].space_id = space_id;
2317  records[length].page_no = page_no;
2318  length++;
2319  if (length * 8 >= size) {
2320  fprintf(stderr,
2321  " InnoDB: could not find the "
2322  "end-of-file marker after reading "
2323  "the expected %lu bytes from the "
2324  "LRU dump file.\n"
2325  " InnoDB: this could be caused by a "
2326  "broken or incomplete file.\n"
2327  " InnoDB: trying to process what has "
2328  "been read so far.\n",
2329  size);
2330  terminated = true;
2331  break;
2332  }
2333  }
2334  buffers++;
2335  }
2336 
2337  qsort(records, length, sizeof(dump_record_t), dump_record_cmp);
2338 
2339  for (offset = 0; offset < length; offset++) {
2340  ulint space_id;
2341  ulint page_no;
2342  ulint zip_size;
2343  ulint err;
2344  int64_t tablespace_version;
2345 
2346  space_id = records[offset].space_id;
2347  page_no = records[offset].page_no;
2348 
2349  if (offset % 16 == 15) {
2351  buf_flush_free_margins();
2352  }
2353 
2354  zip_size = fil_space_get_zip_size(space_id);
2355  if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
2356  continue;
2357  }
2358 
2359  if (fil_is_exist(space_id, page_no)) {
2360 
2361  tablespace_version = fil_space_get_version(space_id);
2362 
2363  req++;
2364  reads += buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
2366  space_id, zip_size, TRUE,
2367  tablespace_version, page_no);
2369  }
2370  }
2371 
2373  buf_flush_free_margins();
2374 
2375  ut_print_timestamp(stderr);
2376  fprintf(stderr,
2377  " InnoDB: reading pages based on the dumped LRU list was done."
2378  " (requested: %lu, read: %lu)\n", req, reads);
2379  ret = true;
2380 end:
2381  if (dump_file != -1)
2382  os_file_close(dump_file);
2383  if (buffer_base)
2384  ut_free(buffer_base);
2385  if (records)
2386  ut_free(records);
2387 
2388  return(ret);
2389 }
2390 
2391 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2392 /**********************************************************************/
2394 static
2395 void
2396 buf_LRU_validate_instance(
2397 /*======================*/
2398  buf_pool_t* buf_pool)
2399 {
2400  buf_page_t* bpage;
2401  buf_block_t* block;
2402  ulint old_len;
2403  ulint new_len;
2404 
2405  ut_ad(buf_pool);
2406  buf_pool_mutex_enter(buf_pool);
2407 
2408  if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
2409 
2410  ut_a(buf_pool->LRU_old);
2411  old_len = buf_pool->LRU_old_len;
2412  new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
2413  * buf_pool->LRU_old_ratio
2415  UT_LIST_GET_LEN(buf_pool->LRU)
2416  - (BUF_LRU_OLD_TOLERANCE
2417  + BUF_LRU_NON_OLD_MIN_LEN));
2418  ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
2419  ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
2420  }
2421 
2422  UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
2423  ut_ad(ut_list_node_313->in_LRU_list));
2424 
2425  bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
2426 
2427  old_len = 0;
2428 
2429  while (bpage != NULL) {
2430 
2431  switch (buf_page_get_state(bpage)) {
2432  case BUF_BLOCK_ZIP_FREE:
2433  case BUF_BLOCK_NOT_USED:
2435  case BUF_BLOCK_MEMORY:
2436  case BUF_BLOCK_REMOVE_HASH:
2437  ut_error;
2438  break;
2439  case BUF_BLOCK_FILE_PAGE:
2440  ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
2441  == buf_page_belongs_to_unzip_LRU(bpage));
2442  case BUF_BLOCK_ZIP_PAGE:
2443  case BUF_BLOCK_ZIP_DIRTY:
2444  break;
2445  }
2446 
2447  if (buf_page_is_old(bpage)) {
2448  const buf_page_t* prev
2449  = UT_LIST_GET_PREV(LRU, bpage);
2450  const buf_page_t* next
2451  = UT_LIST_GET_NEXT(LRU, bpage);
2452 
2453  if (!old_len++) {
2454  ut_a(buf_pool->LRU_old == bpage);
2455  } else {
2456  ut_a(!prev || buf_page_is_old(prev));
2457  }
2458 
2459  ut_a(!next || buf_page_is_old(next));
2460  }
2461 
2462  bpage = UT_LIST_GET_NEXT(LRU, bpage);
2463  }
2464 
2465  ut_a(buf_pool->LRU_old_len == old_len);
2466 
2467  UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free,
2468  ut_ad(ut_list_node_313->in_free_list));
2469 
2470  for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
2471  bpage != NULL;
2472  bpage = UT_LIST_GET_NEXT(list, bpage)) {
2473 
2475  }
2476 
2477  UT_LIST_VALIDATE(unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
2478  ut_ad(ut_list_node_313->in_unzip_LRU_list
2479  && ut_list_node_313->page.in_LRU_list));
2480 
2481  for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
2482  block;
2483  block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
2484 
2485  ut_ad(block->in_unzip_LRU_list);
2486  ut_ad(block->page.in_LRU_list);
2488  }
2489 
2490  buf_pool_mutex_exit(buf_pool);
2491 }
2492 
2493 /**********************************************************************/
2496 UNIV_INTERN
2497 ibool
2498 buf_LRU_validate(void)
2499 /*==================*/
2500 {
2501  ulint i;
2502 
2503  for (i = 0; i < srv_buf_pool_instances; i++) {
2504  buf_pool_t* buf_pool;
2505 
2506  buf_pool = buf_pool_from_array(i);
2507  buf_LRU_validate_instance(buf_pool);
2508  }
2509 
2510  return(TRUE);
2511 }
2512 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2513 
2514 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2515 /**********************************************************************/
2517 UNIV_INTERN
2518 void
2519 buf_LRU_print_instance(
2520 /*===================*/
2521  buf_pool_t* buf_pool)
2522 {
2523  const buf_page_t* bpage;
2524 
2525  ut_ad(buf_pool);
2526  buf_pool_mutex_enter(buf_pool);
2527 
2528  bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
2529 
2530  while (bpage != NULL) {
2531 
2532  mutex_enter(buf_page_get_mutex(bpage));
2533  fprintf(stderr, "BLOCK space %lu page %lu ",
2534  (ulong) buf_page_get_space(bpage),
2535  (ulong) buf_page_get_page_no(bpage));
2536 
2537  if (buf_page_is_old(bpage)) {
2538  fputs("old ", stderr);
2539  }
2540 
2541  if (bpage->buf_fix_count) {
2542  fprintf(stderr, "buffix count %lu ",
2543  (ulong) bpage->buf_fix_count);
2544  }
2545 
2546  if (buf_page_get_io_fix(bpage)) {
2547  fprintf(stderr, "io_fix %lu ",
2548  (ulong) buf_page_get_io_fix(bpage));
2549  }
2550 
2551  if (bpage->oldest_modification) {
2552  fputs("modif. ", stderr);
2553  }
2554 
2555  switch (buf_page_get_state(bpage)) {
2556  const byte* frame;
2557  case BUF_BLOCK_FILE_PAGE:
2558  frame = buf_block_get_frame((buf_block_t*) bpage);
2559  fprintf(stderr, "\ntype %lu"
2560  " index id %llu\n",
2561  (ulong) fil_page_get_type(frame),
2562  (ullint) btr_page_get_index_id(frame));
2563  break;
2564  case BUF_BLOCK_ZIP_PAGE:
2565  frame = bpage->zip.data;
2566  fprintf(stderr, "\ntype %lu size %lu"
2567  " index id %llu\n",
2568  (ulong) fil_page_get_type(frame),
2569  (ulong) buf_page_get_zip_size(bpage),
2570  (ullint) btr_page_get_index_id(frame));
2571  break;
2572 
2573  default:
2574  fprintf(stderr, "\n!state %lu!\n",
2575  (ulong) buf_page_get_state(bpage));
2576  break;
2577  }
2578 
2579  mutex_exit(buf_page_get_mutex(bpage));
2580  bpage = UT_LIST_GET_NEXT(LRU, bpage);
2581  }
2582 
2583  buf_pool_mutex_exit(buf_pool);
2584 }
2585 
2586 /**********************************************************************/
2588 UNIV_INTERN
2589 void
2590 buf_LRU_print(void)
2591 /*===============*/
2592 {
2593  ulint i;
2594  buf_pool_t* buf_pool;
2595 
2596  for (i = 0; i < srv_buf_pool_instances; i++) {
2597  buf_pool = buf_pool_from_array(i);
2598  buf_LRU_print_instance(buf_pool);
2599  }
2600 }
2601 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */