Drizzled Public API Documentation

buf0buf.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4 Copyright (C) 2008, Google Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11 
12 This program is free software; you can redistribute it and/or modify it under
13 the terms of the GNU General Public License as published by the Free Software
14 Foundation; version 2 of the License.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 St, Fifth Floor, Boston, MA 02110-1301 USA
23 
24 *****************************************************************************/
25 
26 /**************************************************/
33 #include "buf0buf.h"
34 
35 #ifdef UNIV_NONINL
36 #include "buf0buf.ic"
37 #endif
38 
39 #include "mem0mem.h"
40 #include "btr0btr.h"
41 #include "fil0fil.h"
42 #ifndef UNIV_HOTBACKUP
43 #include "buf0buddy.h"
44 #include "lock0lock.h"
45 #include "btr0sea.h"
46 #include "ibuf0ibuf.h"
47 #include "trx0undo.h"
48 #include "log0log.h"
49 #endif /* !UNIV_HOTBACKUP */
50 #include "srv0srv.h"
51 #include "dict0dict.h"
52 #include "log0recv.h"
53 #include "page0zip.h"
54 
55 #include <drizzled/errmsg_print.h>
56 
57 /*
58  IMPLEMENTATION OF THE BUFFER POOL
59  =================================
60 
61 Performance improvement:
62 ------------------------
63 Thread scheduling in NT may be so slow that the OS wait mechanism should
64 not be used even in waiting for disk reads to complete.
65 Rather, we should put waiting query threads to the queue of
66 waiting jobs, and let the OS thread do something useful while the i/o
67 is processed. In this way we could remove most OS thread switches in
68 an i/o-intensive benchmark like TPC-C.
69 
70 A possibility is to put a user space thread library between the database
71 and NT. User space thread libraries might be very fast.
72 
73 SQL Server 7.0 can be configured to use 'fibers' which are lightweight
74 threads in NT. These should be studied.
75 
76  Buffer frames and blocks
77  ------------------------
78 Following the terminology of Gray and Reuter, we call the memory
79 blocks where file pages are loaded buffer frames. For each buffer
80 frame there is a control block, or shortly, a block, in the buffer
81 control array. The control info which does not need to be stored
82 in the file along with the file page, resides in the control block.
83 
84  Buffer pool struct
85  ------------------
86 The buffer buf_pool contains a single mutex which protects all the
87 control data structures of the buf_pool. The content of a buffer frame is
88 protected by a separate read-write lock in its control block, though.
89 These locks can be locked and unlocked without owning the buf_pool->mutex.
90 The OS events in the buf_pool struct can be waited for without owning the
91 buf_pool->mutex.
92 
93 The buf_pool->mutex is a hot-spot in main memory, causing a lot of
94 memory bus traffic on multiprocessor systems when processors
95 alternately access the mutex. On our Pentium, the mutex is accessed
96 maybe every 10 microseconds. We gave up the solution to have mutexes
97 for each control block, for instance, because it seemed to be
98 complicated.
99 
100 A solution to reduce mutex contention of the buf_pool->mutex is to
101 create a separate mutex for the page hash table. On Pentium,
102 accessing the hash table takes 2 microseconds, about half
103 of the total buf_pool->mutex hold time.
104 
105  Control blocks
106  --------------
107 
108 The control block contains, for instance, the bufferfix count
109 which is incremented when a thread wants a file page to be fixed
110 in a buffer frame. The bufferfix operation does not lock the
111 contents of the frame, however. For this purpose, the control
112 block contains a read-write lock.
113 
114 The buffer frames have to be aligned so that the start memory
115 address of a frame is divisible by the universal page size, which
116 is a power of two.
117 
118 We intend to make the buffer buf_pool size on-line reconfigurable,
119 that is, the buf_pool size can be changed without closing the database.
120 Then the database administarator may adjust it to be bigger
121 at night, for example. The control block array must
122 contain enough control blocks for the maximum buffer buf_pool size
123 which is used in the particular database.
124 If the buf_pool size is cut, we exploit the virtual memory mechanism of
125 the OS, and just refrain from using frames at high addresses. Then the OS
126 can swap them to disk.
127 
128 The control blocks containing file pages are put to a hash table
129 according to the file address of the page.
130 We could speed up the access to an individual page by using
131 "pointer swizzling": we could replace the page references on
132 non-leaf index pages by direct pointers to the page, if it exists
133 in the buf_pool. We could make a separate hash table where we could
134 chain all the page references in non-leaf pages residing in the buf_pool,
135 using the page reference as the hash key,
136 and at the time of reading of a page update the pointers accordingly.
137 Drawbacks of this solution are added complexity and,
138 possibly, extra space required on non-leaf pages for memory pointers.
139 A simpler solution is just to speed up the hash table mechanism
140 in the database, using tables whose size is a power of 2.
141 
142  Lists of blocks
143  ---------------
144 
145 There are several lists of control blocks.
146 
147 The free list (buf_pool->free) contains blocks which are currently not
148 used.
149 
150 The common LRU list contains all the blocks holding a file page
151 except those for which the bufferfix count is non-zero.
152 The pages are in the LRU list roughly in the order of the last
153 access to the page, so that the oldest pages are at the end of the
154 list. We also keep a pointer to near the end of the LRU list,
155 which we can use when we want to artificially age a page in the
156 buf_pool. This is used if we know that some page is not needed
157 again for some time: we insert the block right after the pointer,
158 causing it to be replaced sooner than would normally be the case.
159 Currently this aging mechanism is used for read-ahead mechanism
160 of pages, and it can also be used when there is a scan of a full
161 table which cannot fit in the memory. Putting the pages near the
162 end of the LRU list, we make sure that most of the buf_pool stays
163 in the main memory, undisturbed.
164 
165 The unzip_LRU list contains a subset of the common LRU list. The
166 blocks on the unzip_LRU list hold a compressed file page and the
167 corresponding uncompressed page frame. A block is in unzip_LRU if and
168 only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
169 holds. The blocks in unzip_LRU will be in same order as they are in
170 the common LRU list. That is, each manipulation of the common LRU
171 list will result in the same manipulation of the unzip_LRU list.
172 
173 The chain of modified blocks (buf_pool->flush_list) contains the blocks
174 holding file pages that have been modified in the memory
175 but not written to disk yet. The block with the oldest modification
176 which has not yet been written to disk is at the end of the chain.
177 The access to this list is protected by buf_pool->flush_list_mutex.
178 
179 The chain of unmodified compressed blocks (buf_pool->zip_clean)
180 contains the control blocks (buf_page_t) of those compressed pages
181 that are not in buf_pool->flush_list and for which no uncompressed
182 page has been allocated in the buffer pool. The control blocks for
183 uncompressed pages are accessible via buf_block_t objects that are
184 reachable via buf_pool->chunks[].
185 
186 The chains of free memory blocks (buf_pool->zip_free[]) are used by
187 the buddy allocator (buf0buddy.c) to keep track of currently unused
188 memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
189 blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
190 BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
191 pool. The buddy allocator is solely used for allocating control
192 blocks for compressed pages (buf_page_t) and compressed page frames.
193 
194  Loading a file page
195  -------------------
196 
197 First, a victim block for replacement has to be found in the
198 buf_pool. It is taken from the free list or searched for from the
199 end of the LRU-list. An exclusive lock is reserved for the frame,
200 the io_fix field is set in the block fixing the block in buf_pool,
201 and the io-operation for loading the page is queued. The io-handler thread
202 releases the X-lock on the frame and resets the io_fix field
203 when the io operation completes.
204 
205 A thread may request the above operation using the function
206 buf_page_get(). It may then continue to request a lock on the frame.
207 The lock is granted when the io-handler releases the x-lock.
208 
209  Read-ahead
210  ----------
211 
212 The read-ahead mechanism is intended to be intelligent and
213 isolated from the semantically higher levels of the database
214 index management. From the higher level we only need the
215 information if a file page has a natural successor or
216 predecessor page. On the leaf level of a B-tree index,
217 these are the next and previous pages in the natural
218 order of the pages.
219 
220 Let us first explain the read-ahead mechanism when the leafs
221 of a B-tree are scanned in an ascending or descending order.
222 When a read page is the first time referenced in the buf_pool,
223 the buffer manager checks if it is at the border of a so-called
224 linear read-ahead area. The tablespace is divided into these
225 areas of size 64 blocks, for example. So if the page is at the
226 border of such an area, the read-ahead mechanism checks if
227 all the other blocks in the area have been accessed in an
228 ascending or descending order. If this is the case, the system
229 looks at the natural successor or predecessor of the page,
230 checks if that is at the border of another area, and in this case
231 issues read-requests for all the pages in that area. Maybe
232 we could relax the condition that all the pages in the area
233 have to be accessed: if data is deleted from a table, there may
234 appear holes of unused pages in the area.
235 
236 A different read-ahead mechanism is used when there appears
237 to be a random access pattern to a file.
238 If a new page is referenced in the buf_pool, and several pages
239 of its random access area (for instance, 32 consecutive pages
240 in a tablespace) have recently been referenced, we may predict
241 that the whole area may be needed in the near future, and issue
242 the read requests for the whole area.
243 */
244 
245 #ifndef UNIV_HOTBACKUP
246 
247 static const int WAIT_FOR_READ = 5000;
249 static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
250 
252 UNIV_INTERN buf_pool_t* buf_pool_ptr;
253 
254 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
255 static ulint buf_dbg_counter = 0;
258 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
259 #ifdef UNIV_DEBUG
260 
262 UNIV_INTERN ibool buf_debug_prints = FALSE;
263 #endif /* UNIV_DEBUG */
264 
265 #ifdef UNIV_PFS_RWLOCK
266 /* Keys to register buffer block related rwlocks and mutexes with
267 performance schema */
268 UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
269 # ifdef UNIV_SYNC_DEBUG
270 UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
271 # endif /* UNIV_SYNC_DEBUG */
272 #endif /* UNIV_PFS_RWLOCK */
273 
274 #ifdef UNIV_PFS_MUTEX
275 UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
276 UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
277 UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
278 UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
279 #endif /* UNIV_PFS_MUTEX */
280 
281 #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
282 # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
283 
284 /* Buffer block mutexes and rwlocks can be registered
285 in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
286 is defined, register buffer block mutex and rwlock
287 in one group after their initialization. */
288 # define PFS_GROUP_BUFFER_SYNC
289 
290 /* This define caps the number of mutexes/rwlocks can
291 be registered with performance schema. Developers can
292 modify this define if necessary. Please note, this would
293 be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
294 # define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
295 
296 # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
297 #endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
298 
301  ulint mem_size;
302  ulint size;
303  void* mem;
306 };
307 #endif /* !UNIV_HOTBACKUP */
308 
309 /********************************************************************/
313 UNIV_INTERN
314 ib_uint64_t
315 buf_pool_get_oldest_modification(void)
316 /*==================================*/
317 {
318  ulint i;
319  buf_page_t* bpage;
320  ib_uint64_t lsn = 0;
321  ib_uint64_t oldest_lsn = 0;
322 
323  /* When we traverse all the flush lists we don't want another
324  thread to add a dirty page to any flush list. */
325  if (srv_buf_pool_instances > 1)
327 
328  for (i = 0; i < srv_buf_pool_instances; i++) {
329  buf_pool_t* buf_pool;
330 
331  buf_pool = buf_pool_from_array(i);
332 
333  buf_flush_list_mutex_enter(buf_pool);
334 
335  bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
336 
337  if (bpage != NULL) {
338  ut_ad(bpage->in_flush_list);
339  lsn = bpage->oldest_modification;
340  }
341 
342  buf_flush_list_mutex_exit(buf_pool);
343 
344  if (!oldest_lsn || oldest_lsn > lsn) {
345  oldest_lsn = lsn;
346  }
347  }
348 
350 
351  /* The returned answer may be out of date: the flush_list can
352  change after the mutex has been released. */
353 
354  return(oldest_lsn);
355 }
356 
357 /********************************************************************/
359 UNIV_INTERN
360 void
361 buf_get_total_list_len(
362 /*===================*/
363  ulint* LRU_len,
364  ulint* free_len,
365  ulint* flush_list_len)
366 {
367  ulint i;
368 
369  *LRU_len = 0;
370  *free_len = 0;
371  *flush_list_len = 0;
372 
373  for (i = 0; i < srv_buf_pool_instances; i++) {
374  buf_pool_t* buf_pool;
375 
376  buf_pool = buf_pool_from_array(i);
377  *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
378  *free_len += UT_LIST_GET_LEN(buf_pool->free);
379  *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
380  }
381 }
382 
383 /********************************************************************/
385 UNIV_INTERN
386 void
387 buf_get_total_stat(
388 /*===============*/
389  buf_pool_stat_t* tot_stat)
390 {
391  ulint i;
392 
393  memset(tot_stat, 0, sizeof(*tot_stat));
394 
395  for (i = 0; i < srv_buf_pool_instances; i++) {
396  buf_pool_stat_t*buf_stat;
397  buf_pool_t* buf_pool;
398 
399  buf_pool = buf_pool_from_array(i);
400 
401  buf_stat = &buf_pool->stat;
402  tot_stat->n_page_gets += buf_stat->n_page_gets;
403  tot_stat->n_pages_read += buf_stat->n_pages_read;
404  tot_stat->n_pages_written += buf_stat->n_pages_written;
405  tot_stat->n_pages_created += buf_stat->n_pages_created;
406  tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
407  tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
408  tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
409 
410  tot_stat->n_pages_not_made_young +=
411  buf_stat->n_pages_not_made_young;
412  }
413 }
414 
415 /********************************************************************/
418 UNIV_INTERN
420 buf_block_alloc(
421 /*============*/
422  buf_pool_t* buf_pool,
423  ulint zip_size)
425 {
426  buf_block_t* block;
427  ulint index;
428  static ulint buf_pool_index;
429 
430  if (buf_pool == NULL) {
431  /* We are allocating memory from any buffer pool, ensure
432  we spread the grace on all buffer pool instances. */
433  index = buf_pool_index++ % srv_buf_pool_instances;
434  buf_pool = buf_pool_from_array(index);
435  }
436 
437  block = buf_LRU_get_free_block(buf_pool, zip_size);
438 
440 
441  return(block);
442 }
443 
444 /********************************************************************/
449 UNIV_INTERN
450 ulint
451 buf_calc_page_new_checksum(
452 /*=======================*/
453  const byte* page)
454 {
455  ulint checksum;
456 
457  /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
458  ..._ARCH_LOG_NO, are written outside the buffer pool to the first
459  pages of data files, we have to skip them in the page checksum
460  calculation.
461  We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
462  checksum is stored, and also the last 8 bytes of page because
463  there we store the old formula checksum. */
464 
465  checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
467  + ut_fold_binary(page + FIL_PAGE_DATA,
468  UNIV_PAGE_SIZE - FIL_PAGE_DATA
470  checksum = checksum & 0xFFFFFFFFUL;
471 
472  return(checksum);
473 }
474 
475 /********************************************************************/
483 UNIV_INTERN
484 ulint
485 buf_calc_page_old_checksum(
486 /*=======================*/
487  const byte* page)
488 {
489  ulint checksum;
490 
491  checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
492 
493  checksum = checksum & 0xFFFFFFFFUL;
494 
495  return(checksum);
496 }
497 
498 /********************************************************************/
501 UNIV_INTERN
502 ibool
503 buf_page_is_corrupted(
504 /*==================*/
505  const byte* read_buf,
506  ulint zip_size)
508 {
509  ulint checksum_field;
510  ulint old_checksum_field;
511 
512  if (UNIV_LIKELY(!zip_size)
513  && memcmp(read_buf + FIL_PAGE_LSN + 4,
514  read_buf + UNIV_PAGE_SIZE
515  - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
516 
517  /* Stored log sequence numbers at the start and the end
518  of page do not match */
519 
520  return(TRUE);
521  }
522 
523 #ifndef UNIV_HOTBACKUP
524  if (recv_lsn_checks_on) {
525  ib_uint64_t current_lsn;
526 
527  if (log_peek_lsn(&current_lsn)
528  && UNIV_UNLIKELY
529  (current_lsn
530  < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
531  ut_print_timestamp(stderr);
532 
533  drizzled::errmsg_printf(drizzled::error::INFO,
534  "InnoDB: Error: page %lu log sequence number %"PRIu64". "
535  "InnoDB: is in the future! Current system log sequence number %"PRIu64". "
536  "Your database may be corrupt or you may have copied the InnoDB tablespace but not the InnoDB log files. See "
537  " " REFMAN "forcing-recovery.html for more information. ",
538  (ulong) mach_read_from_4(read_buf
539  + FIL_PAGE_OFFSET),
540  mach_read_from_8(read_buf + FIL_PAGE_LSN),
541  current_lsn);
542  }
543  }
544 #endif
545 
546  /* If we use checksums validation, make additional check before
547  returning TRUE to ensure that the checksum is not equal to
548  BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
549  disabled. Otherwise, skip checksum calculation and return FALSE */
550 
551  if (UNIV_LIKELY(srv_use_checksums)) {
552  checksum_field = mach_read_from_4(read_buf
554 
555  if (UNIV_UNLIKELY(zip_size)) {
556  return(checksum_field != BUF_NO_CHECKSUM_MAGIC
557  && checksum_field
558  != page_zip_calc_checksum(read_buf, zip_size));
559  }
560 
561  old_checksum_field = mach_read_from_4(
562  read_buf + UNIV_PAGE_SIZE
564 
565  /* There are 2 valid formulas for old_checksum_field:
566 
567  1. Very old versions of InnoDB only stored 8 byte lsn to the
568  start and the end of the page.
569 
570  2. Newer InnoDB versions store the old formula checksum
571  there. */
572 
573  if (old_checksum_field != mach_read_from_4(read_buf
574  + FIL_PAGE_LSN)
575  && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
576  && old_checksum_field
577  != buf_calc_page_old_checksum(read_buf)) {
578 
579  return(TRUE);
580  }
581 
582  /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
583  (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
584 
585  if (checksum_field != 0
586  && checksum_field != BUF_NO_CHECKSUM_MAGIC
587  && checksum_field
588  != buf_calc_page_new_checksum(read_buf)) {
589 
590  return(TRUE);
591  }
592  }
593 
594  return(FALSE);
595 }
596 
597 /********************************************************************/
599 UNIV_INTERN
600 void
601 buf_page_print(
602 /*===========*/
603  const byte* read_buf,
604  ulint zip_size)
606 {
607 #ifndef UNIV_HOTBACKUP
608  dict_index_t* index;
609 #endif /* !UNIV_HOTBACKUP */
610  ulint checksum;
611  ulint old_checksum;
612  ulint size = zip_size;
613 
614  if (!size) {
615  size = UNIV_PAGE_SIZE;
616  }
617 
618  ut_print_timestamp(stderr);
619  fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
620  (ulong) size);
621  ut_print_buf(stderr, read_buf, size);
622  fputs("\nInnoDB: End of page dump\n", stderr);
623 
624  if (zip_size) {
625  /* Print compressed page. */
626 
627  switch (fil_page_get_type(read_buf)) {
628  case FIL_PAGE_TYPE_ZBLOB:
630  checksum = srv_use_checksums
631  ? page_zip_calc_checksum(read_buf, zip_size)
633  ut_print_timestamp(stderr);
634  fprintf(stderr,
635  " InnoDB: Compressed BLOB page"
636  " checksum %lu, stored %lu\n"
637  "InnoDB: Page lsn %lu %lu\n"
638  "InnoDB: Page number (if stored"
639  " to page already) %lu,\n"
640  "InnoDB: space id (if stored"
641  " to page already) %lu\n",
642  (ulong) checksum,
643  (ulong) mach_read_from_4(
644  read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
645  (ulong) mach_read_from_4(
646  read_buf + FIL_PAGE_LSN),
647  (ulong) mach_read_from_4(
648  read_buf + (FIL_PAGE_LSN + 4)),
649  (ulong) mach_read_from_4(
650  read_buf + FIL_PAGE_OFFSET),
651  (ulong) mach_read_from_4(
652  read_buf
654  return;
655  default:
656  ut_print_timestamp(stderr);
657  fprintf(stderr,
658  " InnoDB: unknown page type %lu,"
659  " assuming FIL_PAGE_INDEX\n",
660  fil_page_get_type(read_buf));
661  /* fall through */
662  case FIL_PAGE_INDEX:
663  checksum = srv_use_checksums
664  ? page_zip_calc_checksum(read_buf, zip_size)
666 
667  ut_print_timestamp(stderr);
668  fprintf(stderr,
669  " InnoDB: Compressed page checksum %lu,"
670  " stored %lu\n"
671  "InnoDB: Page lsn %lu %lu\n"
672  "InnoDB: Page number (if stored"
673  " to page already) %lu,\n"
674  "InnoDB: space id (if stored"
675  " to page already) %lu\n",
676  (ulong) checksum,
677  (ulong) mach_read_from_4(
678  read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
679  (ulong) mach_read_from_4(
680  read_buf + FIL_PAGE_LSN),
681  (ulong) mach_read_from_4(
682  read_buf + (FIL_PAGE_LSN + 4)),
683  (ulong) mach_read_from_4(
684  read_buf + FIL_PAGE_OFFSET),
685  (ulong) mach_read_from_4(
686  read_buf
688  return;
689  case FIL_PAGE_TYPE_XDES:
690  /* This is an uncompressed page. */
691  break;
692  }
693  }
694 
695  checksum = srv_use_checksums
696  ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
697  old_checksum = srv_use_checksums
698  ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
699 
700  ut_print_timestamp(stderr);
701  fprintf(stderr,
702  " InnoDB: Page checksum %lu, prior-to-4.0.14-form"
703  " checksum %lu\n"
704  "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
705  " stored checksum %lu\n"
706  "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
707  " at page end %lu\n"
708  "InnoDB: Page number (if stored to page already) %lu,\n"
709  "InnoDB: space id (if created with >= MySQL-4.1.1"
710  " and stored already) %lu\n",
711  (ulong) checksum, (ulong) old_checksum,
712  (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
713  (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
715  (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
716  (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
717  (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
719  (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
720  (ulong) mach_read_from_4(read_buf
722 
723 #ifndef UNIV_HOTBACKUP
725  == TRX_UNDO_INSERT) {
726  fprintf(stderr,
727  "InnoDB: Page may be an insert undo log page\n");
728  } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
730  == TRX_UNDO_UPDATE) {
731  fprintf(stderr,
732  "InnoDB: Page may be an update undo log page\n");
733  }
734 #endif /* !UNIV_HOTBACKUP */
735 
736  switch (fil_page_get_type(read_buf)) {
737  index_id_t index_id;
738  case FIL_PAGE_INDEX:
739  index_id = btr_page_get_index_id(read_buf);
740  fprintf(stderr,
741  "InnoDB: Page may be an index page where"
742  " index id is %llu\n",
743  (ullint) index_id);
744 #ifndef UNIV_HOTBACKUP
745  index = dict_index_find_on_id_low(index_id);
746  if (index) {
747  fputs("InnoDB: (", stderr);
748  dict_index_name_print(stderr, NULL, index);
749  fputs(")\n", stderr);
750  }
751 #endif /* !UNIV_HOTBACKUP */
752  break;
753  case FIL_PAGE_INODE:
754  fputs("InnoDB: Page may be an 'inode' page\n", stderr);
755  break;
757  fputs("InnoDB: Page may be an insert buffer free list page\n",
758  stderr);
759  break;
761  fputs("InnoDB: Page may be a freshly allocated page\n",
762  stderr);
763  break;
765  fputs("InnoDB: Page may be an insert buffer bitmap page\n",
766  stderr);
767  break;
768  case FIL_PAGE_TYPE_SYS:
769  fputs("InnoDB: Page may be a system page\n",
770  stderr);
771  break;
773  fputs("InnoDB: Page may be a transaction system page\n",
774  stderr);
775  break;
777  fputs("InnoDB: Page may be a file space header page\n",
778  stderr);
779  break;
780  case FIL_PAGE_TYPE_XDES:
781  fputs("InnoDB: Page may be an extent descriptor page\n",
782  stderr);
783  break;
784  case FIL_PAGE_TYPE_BLOB:
785  fputs("InnoDB: Page may be a BLOB page\n",
786  stderr);
787  break;
788  case FIL_PAGE_TYPE_ZBLOB:
790  fputs("InnoDB: Page may be a compressed BLOB page\n",
791  stderr);
792  break;
793  }
794 }
795 
796 #ifndef UNIV_HOTBACKUP
797 
798 # ifdef PFS_GROUP_BUFFER_SYNC
799 /********************************************************************/
805 static
806 void
807 pfs_register_buffer_block(
808 /*======================*/
809  buf_chunk_t* chunk)
810 {
811  ulint i;
812  ulint num_to_register;
813  buf_block_t* block;
814 
815  block = chunk->blocks;
816 
817  num_to_register = ut_min(chunk->size,
818  PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
819 
820  for (i = 0; i < num_to_register; i++) {
821  mutex_t* mutex;
822  rw_lock_t* rwlock;
823 
824 # ifdef UNIV_PFS_MUTEX
825  mutex = &block->mutex;
826  ut_a(!mutex->pfs_psi);
827  mutex->pfs_psi = (PSI_server)
828  ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
829  : NULL;
830 # endif /* UNIV_PFS_MUTEX */
831 
832 # ifdef UNIV_PFS_RWLOCK
833  rwlock = &block->lock;
834  ut_a(!rwlock->pfs_psi);
835  rwlock->pfs_psi = (PSI_server)
836  ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
837  : NULL;
838 # endif /* UNIV_PFS_RWLOCK */
839  block++;
840  }
841 }
842 # endif /* PFS_GROUP_BUFFER_SYNC */
843 
844 /********************************************************************/
846 static
847 void
848 buf_block_init(
849 /*===========*/
850  buf_pool_t* buf_pool,
851  buf_block_t* block,
852  byte* frame)
853 {
854  UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
855 
856  block->frame = frame;
857 
858  block->page.buf_pool_index = buf_pool_index(buf_pool);
859  block->page.state = BUF_BLOCK_NOT_USED;
860  block->page.buf_fix_count = 0;
861  block->page.io_fix = BUF_IO_NONE;
862 
863  block->modify_clock = 0;
864 
865 #ifdef UNIV_DEBUG_FILE_ACCESSES
866  block->page.file_page_was_freed = FALSE;
867 #endif /* UNIV_DEBUG_FILE_ACCESSES */
868 
869  block->check_index_page_at_flush = FALSE;
870  block->index = NULL;
871 
872  block->is_hashed = FALSE;
873 
874 #ifdef UNIV_DEBUG
875  block->page.in_page_hash = FALSE;
876  block->page.in_zip_hash = FALSE;
877  block->page.in_flush_list = FALSE;
878  block->page.in_free_list = FALSE;
879  block->page.in_LRU_list = FALSE;
880  block->in_unzip_LRU_list = FALSE;
881 #endif /* UNIV_DEBUG */
882 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
883  block->n_pointers = 0;
884 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
885  page_zip_des_init(&block->page.zip);
886 
887 #if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
888  /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
889  of buffer block mutex/rwlock with performance schema. If
890  PFS_GROUP_BUFFER_SYNC is defined, skip the registration
891  since buffer block mutex/rwlock will be registered later in
892  pfs_register_buffer_block() */
893 
894  mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
895  rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
896 #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
897  mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
898  rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
899 #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
900 
901  ut_ad(rw_lock_validate(&(block->lock)));
902 
903 #ifdef UNIV_SYNC_DEBUG
904  rw_lock_create(buf_block_debug_latch_key,
905  &block->debug_latch, SYNC_NO_ORDER_CHECK);
906 #endif /* UNIV_SYNC_DEBUG */
907 }
908 
909 /********************************************************************/
912 static
914 buf_chunk_init(
915 /*===========*/
916  buf_pool_t* buf_pool,
917  buf_chunk_t* chunk,
918  ulint mem_size)
919 {
920  buf_block_t* block;
921  byte* frame;
922  ulint i;
923 
924  /* Round down to a multiple of page size,
925  although it already should be. */
926  mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
927  /* Reserve space for the block descriptors. */
928  mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
929  + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
930 
931  chunk->mem_size = mem_size;
932  chunk->mem = os_mem_alloc_large(&chunk->mem_size);
933 
934  if (UNIV_UNLIKELY(chunk->mem == NULL)) {
935 
936  return(NULL);
937  }
938 
939  /* Allocate the block descriptors from
940  the start of the memory block. */
941  chunk->blocks = static_cast<buf_block_struct *>(chunk->mem);
942 
943  /* Align a pointer to the first frame. Note that when
944  os_large_page_size is smaller than UNIV_PAGE_SIZE,
945  we may allocate one fewer block than requested. When
946  it is bigger, we may allocate more blocks than requested. */
947 
948  frame = static_cast<unsigned char *>(ut_align(chunk->mem, UNIV_PAGE_SIZE));
949  chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
950  - (frame != chunk->mem);
951 
952  /* Subtract the space needed for block descriptors. */
953  {
954  ulint size = chunk->size;
955 
956  while (frame < (byte*) (chunk->blocks + size)) {
957  frame += UNIV_PAGE_SIZE;
958  size--;
959  }
960 
961  chunk->size = size;
962  }
963 
964  /* Init block structs and assign frames for them. Then we
965  assign the frames to the first blocks (we already mapped the
966  memory above). */
967 
968  block = chunk->blocks;
969 
970  for (i = chunk->size; i--; ) {
971 
972  buf_block_init(buf_pool, block, frame);
973 
974 #ifdef HAVE_VALGRIND
975  /* Wipe contents of frame to eliminate a Purify warning */
976  memset(block->frame, '\0', UNIV_PAGE_SIZE);
977 #endif
978  /* Add the block to the free list */
979  UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
980 
981  ut_d(block->page.in_free_list = TRUE);
982  ut_ad(buf_pool_from_block(block) == buf_pool);
983 
984  block++;
985  frame += UNIV_PAGE_SIZE;
986  }
987 
988 #ifdef PFS_GROUP_BUFFER_SYNC
989  pfs_register_buffer_block(chunk);
990 #endif
991  return(chunk);
992 }
993 
994 #ifdef UNIV_DEBUG
995 /*********************************************************************/
999 static
1000 buf_block_t*
1001 buf_chunk_contains_zip(
1002 /*===================*/
1003  buf_chunk_t* chunk,
1004  const void* data)
1005 {
1006  buf_block_t* block;
1007  ulint i;
1008 
1009  block = chunk->blocks;
1010 
1011  for (i = chunk->size; i--; block++) {
1012  if (block->page.zip.data == data) {
1013 
1014  return(block);
1015  }
1016  }
1017 
1018  return(NULL);
1019 }
1020 
1021 /*********************************************************************/
1025 UNIV_INTERN
1026 buf_block_t*
1027 buf_pool_contains_zip(
1028 /*==================*/
1029  buf_pool_t* buf_pool,
1030  const void* data)
1031 {
1032  ulint n;
1033  buf_chunk_t* chunk = buf_pool->chunks;
1034 
1035  ut_ad(buf_pool);
1036  ut_ad(buf_pool_mutex_own(buf_pool));
1037  for (n = buf_pool->n_chunks; n--; chunk++) {
1038 
1039  buf_block_t* block = buf_chunk_contains_zip(chunk, data);
1040 
1041  if (block) {
1042  return(block);
1043  }
1044  }
1045 
1046  return(NULL);
1047 }
1048 #endif /* UNIV_DEBUG */
1049 
1050 /*********************************************************************/
1053 static
1054 const buf_block_t*
1055 buf_chunk_not_freed(
1056 /*================*/
1057  buf_chunk_t* chunk)
1058 {
1059  buf_block_t* block;
1060  ulint i;
1061 
1062  block = chunk->blocks;
1063 
1064  for (i = chunk->size; i--; block++) {
1065  ibool ready;
1066 
1067  switch (buf_block_get_state(block)) {
1068  case BUF_BLOCK_ZIP_FREE:
1069  case BUF_BLOCK_ZIP_PAGE:
1070  case BUF_BLOCK_ZIP_DIRTY:
1071  /* The uncompressed buffer pool should never
1072  contain compressed block descriptors. */
1073  ut_error;
1074  break;
1075  case BUF_BLOCK_NOT_USED:
1077  case BUF_BLOCK_MEMORY:
1078  case BUF_BLOCK_REMOVE_HASH:
1079  /* Skip blocks that are not being used for
1080  file pages. */
1081  break;
1082  case BUF_BLOCK_FILE_PAGE:
1083  mutex_enter(&block->mutex);
1084  ready = buf_flush_ready_for_replace(&block->page);
1085  mutex_exit(&block->mutex);
1086 
1087  if (!ready) {
1088 
1089  return(block);
1090  }
1091 
1092  break;
1093  }
1094  }
1095 
1096  return(NULL);
1097 }
1098 
1099 /*********************************************************************/
1102 static
1103 ibool
1104 buf_chunk_all_free(
1105 /*===============*/
1106  const buf_chunk_t* chunk)
1107 {
1108  const buf_block_t* block;
1109  ulint i;
1110 
1111  block = chunk->blocks;
1112 
1113  for (i = chunk->size; i--; block++) {
1114 
1115  if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
1116 
1117  return(FALSE);
1118  }
1119  }
1120 
1121  return(TRUE);
1122 }
1123 
1124 /********************************************************************/
1126 static
1127 void
1128 buf_chunk_free(
1129 /*===========*/
1130  buf_pool_t* buf_pool,
1131  buf_chunk_t* chunk)
1132 {
1133  buf_block_t* block;
1134  const buf_block_t* block_end;
1135 
1136  ut_ad(buf_pool_mutex_own(buf_pool));
1137 
1138  block_end = chunk->blocks + chunk->size;
1139 
1140  for (block = chunk->blocks; block < block_end; block++) {
1142  ut_a(!block->page.zip.data);
1143 
1144  ut_ad(!block->page.in_LRU_list);
1145  ut_ad(!block->in_unzip_LRU_list);
1146  ut_ad(!block->page.in_flush_list);
1147  /* Remove the block from the free list. */
1148  ut_ad(block->page.in_free_list);
1149  UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
1150 
1151  /* Free the latches. */
1152  mutex_free(&block->mutex);
1153  rw_lock_free(&block->lock);
1154 #ifdef UNIV_SYNC_DEBUG
1155  rw_lock_free(&block->debug_latch);
1156 #endif /* UNIV_SYNC_DEBUG */
1157  UNIV_MEM_UNDESC(block);
1158  }
1159 
1160  os_mem_free_large(chunk->mem, chunk->mem_size);
1161 }
1162 
1163 /********************************************************************/
1165 static
1166 void
1167 buf_pool_set_sizes(void)
1168 /*====================*/
1169 {
1170  ulint i;
1171  ulint curr_size = 0;
1172 
1174 
1175  for (i = 0; i < srv_buf_pool_instances; i++) {
1176  buf_pool_t* buf_pool;
1177 
1178  buf_pool = buf_pool_from_array(i);
1179  curr_size += buf_pool->curr_pool_size;
1180  }
1181 
1182  srv_buf_pool_curr_size = curr_size;
1184 
1186 }
1187 
1188 /********************************************************************/
1191 static
1192 ulint
1193 buf_pool_init_instance(
1194 /*===================*/
1195  buf_pool_t* buf_pool,
1196  ulint buf_pool_size,
1197  ulint instance_no)
1198 {
1199  ulint i;
1200  buf_chunk_t* chunk;
1201 
1202  /* 1. Initialize general fields
1203  ------------------------------- */
1204  mutex_create(buf_pool_mutex_key,
1205  &buf_pool->mutex, SYNC_BUF_POOL);
1206  mutex_create(buf_pool_zip_mutex_key,
1207  &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
1208 
1209  buf_pool_mutex_enter(buf_pool);
1210 
1211  if (buf_pool_size > 0) {
1212  buf_pool->n_chunks = 1;
1213  void *chunk_ptr= mem_zalloc((sizeof *chunk));
1214  buf_pool->chunks = chunk = static_cast<buf_chunk_t *>(chunk_ptr);
1215 
1216  UT_LIST_INIT(buf_pool->free);
1217 
1218  if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
1219  mem_free(chunk);
1220  mem_free(buf_pool);
1221 
1222  buf_pool_mutex_exit(buf_pool);
1223 
1224  return(DB_ERROR);
1225  }
1226 
1227  buf_pool->instance_no = instance_no;
1228  buf_pool->old_pool_size = buf_pool_size;
1229  buf_pool->curr_size = chunk->size;
1230  buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1231 
1232  buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
1233  buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
1234 
1235  buf_pool->last_printout_time = ut_time();
1236  }
1237  /* 2. Initialize flushing fields
1238  -------------------------------- */
1239 
1240  mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
1241  SYNC_BUF_FLUSH_LIST);
1242 
1243  for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
1244  buf_pool->no_flush[i] = os_event_create(NULL);
1245  }
1246 
1247  /* 3. Initialize LRU fields
1248  --------------------------- */
1249 
1250  /* All fields are initialized by mem_zalloc(). */
1251 
1252  buf_pool_mutex_exit(buf_pool);
1253 
1254  return(DB_SUCCESS);
1255 }
1256 
1257 /********************************************************************/
1259 static
1260 void
1261 buf_pool_free_instance(
1262 /*===================*/
1263  buf_pool_t* buf_pool) /* in,own: buffer pool instance
1264  to free */
1265 {
1266  buf_chunk_t* chunk;
1267  buf_chunk_t* chunks;
1268 
1269  chunks = buf_pool->chunks;
1270  chunk = chunks + buf_pool->n_chunks;
1271 
1272  while (--chunk >= chunks) {
1273  /* Bypass the checks of buf_chunk_free(), since they
1274  would fail at shutdown. */
1275  os_mem_free_large(chunk->mem, chunk->mem_size);
1276  }
1277 
1278  mem_free(buf_pool->chunks);
1279  hash_table_free(buf_pool->page_hash);
1280  hash_table_free(buf_pool->zip_hash);
1281 }
1282 
1283 /********************************************************************/
1286 UNIV_INTERN
1287 ulint
1288 buf_pool_init(
1289 /*==========*/
1290  ulint total_size,
1291  ulint n_instances)
1292 {
1293  ulint i;
1294  const ulint size = total_size / n_instances;
1295 
1296  ut_ad(n_instances > 0);
1297  ut_ad(n_instances <= MAX_BUFFER_POOLS);
1298  ut_ad(n_instances == srv_buf_pool_instances);
1299 
1300  /* We create an extra buffer pool instance, this instance is used
1301  for flushing the flush lists, to keep track of n_flush for all
1302  the buffer pools and also used as a waiting object during flushing. */
1303  void *buf_pool_void_ptr= mem_zalloc(n_instances * sizeof *buf_pool_ptr);
1304  buf_pool_ptr = static_cast<buf_pool_struct *>(buf_pool_void_ptr);
1305 
1306  for (i = 0; i < n_instances; i++) {
1307  buf_pool_t* ptr = &buf_pool_ptr[i];
1308 
1309  if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
1310 
1311  /* Free all the instances created so far. */
1312  buf_pool_free(i);
1313 
1314  return(DB_ERROR);
1315  }
1316  }
1317 
1318  buf_pool_set_sizes();
1319  buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
1320 
1321  btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
1322 
1323  return(DB_SUCCESS);
1324 }
1325 
1326 /********************************************************************/
1329 UNIV_INTERN
1330 void
1331 buf_pool_free(
1332 /*==========*/
1333  ulint n_instances)
1334 {
1335  ulint i;
1336 
1337  for (i = 0; i < n_instances; i++) {
1338  buf_pool_free_instance(buf_pool_from_array(i));
1339  }
1340 
1341  mem_free(buf_pool_ptr);
1342  buf_pool_ptr = NULL;
1343 }
1344 
1345 /********************************************************************/
1347 static
1348 void
1349 buf_pool_drop_hash_index_instance(
1350 /*==============================*/
1351  buf_pool_t* buf_pool,
1352  ibool* released_search_latch)
1355 {
1356  buf_chunk_t* chunks = buf_pool->chunks;
1357  buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
1358 
1359  while (--chunk >= chunks) {
1360  ulint i;
1361  buf_block_t* block = chunk->blocks;
1362 
1363  for (i = chunk->size; i--; block++) {
1364  /* block->is_hashed cannot be modified
1365  when we have an x-latch on btr_search_latch;
1366  see the comment in buf0buf.h */
1367 
1368  if (!block->is_hashed) {
1369  continue;
1370  }
1371 
1372  /* To follow the latching order, we
1373  have to release btr_search_latch
1374  before acquiring block->latch. */
1375  rw_lock_x_unlock(&btr_search_latch);
1376  /* When we release the search latch,
1377  we must rescan all blocks, because
1378  some may become hashed again. */
1379  *released_search_latch = TRUE;
1380 
1381  rw_lock_x_lock(&block->lock);
1382 
1383  /* This should be guaranteed by the
1384  callers, which will be holding
1385  btr_search_enabled_mutex. */
1386  ut_ad(!btr_search_enabled);
1387 
1388  /* Because we did not buffer-fix the
1389  block by calling buf_block_get_gen(),
1390  it is possible that the block has been
1391  allocated for some other use after
1392  btr_search_latch was released above.
1393  We do not care which file page the
1394  block is mapped to. All we want to do
1395  is to drop any hash entries referring
1396  to the page. */
1397 
1398  /* It is possible that
1399  block->page.state != BUF_FILE_PAGE.
1400  Even that does not matter, because
1401  btr_search_drop_page_hash_index() will
1402  check block->is_hashed before doing
1403  anything. block->is_hashed can only
1404  be set on uncompressed file pages. */
1405 
1406  btr_search_drop_page_hash_index(block);
1407 
1408  rw_lock_x_unlock(&block->lock);
1409 
1410  rw_lock_x_lock(&btr_search_latch);
1411 
1412  ut_ad(!btr_search_enabled);
1413  }
1414  }
1415 }
1416 
1417 /********************************************************************/
1421 UNIV_INTERN
1422 void
1423 buf_pool_drop_hash_index(void)
1424 /*==========================*/
1425 {
1426  ibool released_search_latch;
1427 
1428 #ifdef UNIV_SYNC_DEBUG
1429  ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
1430 #endif /* UNIV_SYNC_DEBUG */
1431  ut_ad(!btr_search_enabled);
1432 
1433  do {
1434  ulint i;
1435 
1436  released_search_latch = FALSE;
1437 
1438  for (i = 0; i < srv_buf_pool_instances; i++) {
1439  buf_pool_t* buf_pool;
1440 
1441  buf_pool = buf_pool_from_array(i);
1442 
1443  buf_pool_drop_hash_index_instance(
1444  buf_pool, &released_search_latch);
1445  }
1446 
1447  } while (released_search_latch);
1448 }
1449 
1450 /********************************************************************/
1454 UNIV_INTERN
1455 void
1456 buf_relocate(
1457 /*=========*/
1458  buf_page_t* bpage,
1461  buf_page_t* dpage)
1462 {
1463  buf_page_t* b;
1464  ulint fold;
1465  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
1466 
1467  ut_ad(buf_pool_mutex_own(buf_pool));
1468  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
1470  ut_a(bpage->buf_fix_count == 0);
1471  ut_ad(bpage->in_LRU_list);
1472  ut_ad(!bpage->in_zip_hash);
1473  ut_ad(bpage->in_page_hash);
1474  ut_ad(bpage == buf_page_hash_get(buf_pool,
1475  bpage->space, bpage->offset));
1476  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
1477 #ifdef UNIV_DEBUG
1478  switch (buf_page_get_state(bpage)) {
1479  case BUF_BLOCK_ZIP_FREE:
1480  case BUF_BLOCK_NOT_USED:
1482  case BUF_BLOCK_FILE_PAGE:
1483  case BUF_BLOCK_MEMORY:
1484  case BUF_BLOCK_REMOVE_HASH:
1485  ut_error;
1486  case BUF_BLOCK_ZIP_DIRTY:
1487  case BUF_BLOCK_ZIP_PAGE:
1488  break;
1489  }
1490 #endif /* UNIV_DEBUG */
1491 
1492  memcpy(dpage, bpage, sizeof *dpage);
1493 
1494  ut_d(bpage->in_LRU_list = FALSE);
1495  ut_d(bpage->in_page_hash = FALSE);
1496 
1497  /* relocate buf_pool->LRU */
1498  b = UT_LIST_GET_PREV(LRU, bpage);
1499  UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
1500 
1501  if (b) {
1502  UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
1503  } else {
1504  UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
1505  }
1506 
1507  if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
1508  buf_pool->LRU_old = dpage;
1509 #ifdef UNIV_LRU_DEBUG
1510  /* buf_pool->LRU_old must be the first item in the LRU list
1511  whose "old" flag is set. */
1512  ut_a(buf_pool->LRU_old->old);
1513  ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
1514  || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
1515  ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
1516  || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
1517  } else {
1518  /* Check that the "old" flag is consistent in
1519  the block and its neighbours. */
1520  buf_page_set_old(dpage, buf_page_is_old(dpage));
1521 #endif /* UNIV_LRU_DEBUG */
1522  }
1523 
1524  ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
1525  ut_ad(ut_list_node_313->in_LRU_list)));
1526 
1527  /* relocate buf_pool->page_hash */
1528  fold = buf_page_address_fold(bpage->space, bpage->offset);
1529 
1530  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
1531  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
1532 }
1533 
1534 /********************************************************************/
1536 static
1537 void
1538 buf_pool_shrink_instance(
1539 /*=====================*/
1540  buf_pool_t* buf_pool,
1541  ulint chunk_size)
1542 {
1543  buf_chunk_t* chunks;
1544  buf_chunk_t* chunk;
1545  ulint max_size;
1546  ulint max_free_size;
1547  buf_chunk_t* max_chunk;
1548  buf_chunk_t* max_free_chunk;
1549 
1550  ut_ad(!buf_pool_mutex_own(buf_pool));
1551 
1552 try_again:
1553  btr_search_disable(); /* Empty the adaptive hash index again */
1554  buf_pool_mutex_enter(buf_pool);
1555 
1556 shrink_again:
1557  if (buf_pool->n_chunks <= 1) {
1558 
1559  /* Cannot shrink if there is only one chunk */
1560  goto func_done;
1561  }
1562 
1563  /* Search for the largest free chunk
1564  not larger than the size difference */
1565  chunks = buf_pool->chunks;
1566  chunk = chunks + buf_pool->n_chunks;
1567  max_size = max_free_size = 0;
1568  max_chunk = max_free_chunk = NULL;
1569 
1570  while (--chunk >= chunks) {
1571  if (chunk->size <= chunk_size
1572  && chunk->size > max_free_size) {
1573  if (chunk->size > max_size) {
1574  max_size = chunk->size;
1575  max_chunk = chunk;
1576  }
1577 
1578  if (buf_chunk_all_free(chunk)) {
1579  max_free_size = chunk->size;
1580  max_free_chunk = chunk;
1581  }
1582  }
1583  }
1584 
1585  if (!max_free_size) {
1586 
1587  ulint dirty = 0;
1588  ulint nonfree = 0;
1589  buf_block_t* block;
1590  buf_block_t* bend;
1591 
1592  /* Cannot shrink: try again later
1593  (do not assign srv_buf_pool_old_size) */
1594  if (!max_chunk) {
1595 
1596  goto func_exit;
1597  }
1598 
1599  block = max_chunk->blocks;
1600  bend = block + max_chunk->size;
1601 
1602  /* Move the blocks of chunk to the end of the
1603  LRU list and try to flush them. */
1604  for (; block < bend; block++) {
1605  switch (buf_block_get_state(block)) {
1606  case BUF_BLOCK_NOT_USED:
1607  continue;
1608  case BUF_BLOCK_FILE_PAGE:
1609  break;
1610  default:
1611  nonfree++;
1612  continue;
1613  }
1614 
1615  mutex_enter(&block->mutex);
1616  /* The following calls will temporarily
1617  release block->mutex and buf_pool->mutex.
1618  Therefore, we have to always retry,
1619  even if !dirty && !nonfree. */
1620 
1621  if (!buf_flush_ready_for_replace(&block->page)) {
1622 
1623  buf_LRU_make_block_old(&block->page);
1624  dirty++;
1625  } else if (buf_LRU_free_block(&block->page, TRUE, NULL)
1626  != BUF_LRU_FREED) {
1627  nonfree++;
1628  }
1629 
1630  mutex_exit(&block->mutex);
1631  }
1632 
1633  buf_pool_mutex_exit(buf_pool);
1634 
1635  /* Request for a flush of the chunk if it helps.
1636  Do not flush if there are non-free blocks, since
1637  flushing will not make the chunk freeable. */
1638  if (nonfree) {
1639  /* Avoid busy-waiting. */
1640  os_thread_sleep(100000);
1641  } else if (dirty
1642  && buf_flush_LRU(buf_pool, dirty)
1643  == ULINT_UNDEFINED) {
1644 
1645  buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
1646  }
1647 
1648  goto try_again;
1649  }
1650 
1651  max_size = max_free_size;
1652  max_chunk = max_free_chunk;
1653 
1654  buf_pool->old_pool_size = buf_pool->curr_pool_size;
1655 
1656  /* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
1657  chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks));
1658  memcpy(chunks, buf_pool->chunks,
1659  (max_chunk - buf_pool->chunks) * sizeof *chunks);
1660  memcpy(chunks + (max_chunk - buf_pool->chunks),
1661  max_chunk + 1,
1662  buf_pool->chunks + buf_pool->n_chunks
1663  - (max_chunk + 1));
1664  ut_a(buf_pool->curr_size > max_chunk->size);
1665  buf_pool->curr_size -= max_chunk->size;
1666  buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1667  chunk_size -= max_chunk->size;
1668  buf_chunk_free(buf_pool, max_chunk);
1669  mem_free(buf_pool->chunks);
1670  buf_pool->chunks = chunks;
1671  buf_pool->n_chunks--;
1672 
1673  /* Allow a slack of one megabyte. */
1674  if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
1675 
1676  goto shrink_again;
1677  }
1678  goto func_exit;
1679 
1680 func_done:
1681  buf_pool->old_pool_size = buf_pool->curr_pool_size;
1682 func_exit:
1683  buf_pool_mutex_exit(buf_pool);
1684  btr_search_enable();
1685 }
1686 
1687 /********************************************************************/
1689 static
1690 void
1691 buf_pool_shrink(
1692 /*============*/
1693  ulint chunk_size)
1694 {
1695  ulint i;
1696 
1697  for (i = 0; i < srv_buf_pool_instances; i++) {
1698  buf_pool_t* buf_pool;
1699  ulint instance_chunk_size;
1700 
1701  instance_chunk_size = chunk_size / srv_buf_pool_instances;
1702  buf_pool = buf_pool_from_array(i);
1703  buf_pool_shrink_instance(buf_pool, instance_chunk_size);
1704  }
1705 
1706  buf_pool_set_sizes();
1707 }
1708 
1709 /********************************************************************/
1711 static
1712 void
1713 buf_pool_page_hash_rebuild_instance(
1714 /*================================*/
1715  buf_pool_t* buf_pool)
1716 {
1717  ulint i;
1718  buf_page_t* b;
1719  buf_chunk_t* chunk;
1720  ulint n_chunks;
1721  hash_table_t* zip_hash;
1722  hash_table_t* page_hash;
1723 
1724  buf_pool_mutex_enter(buf_pool);
1725 
1726  /* Free, create, and populate the hash table. */
1727  hash_table_free(buf_pool->page_hash);
1728  buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
1729  zip_hash = hash_create(2 * buf_pool->curr_size);
1730 
1731  HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
1732  BUF_POOL_ZIP_FOLD_BPAGE);
1733 
1734  hash_table_free(buf_pool->zip_hash);
1735  buf_pool->zip_hash = zip_hash;
1736 
1737  /* Insert the uncompressed file pages to buf_pool->page_hash. */
1738 
1739  chunk = buf_pool->chunks;
1740  n_chunks = buf_pool->n_chunks;
1741 
1742  for (i = 0; i < n_chunks; i++, chunk++) {
1743  ulint j;
1744  buf_block_t* block = chunk->blocks;
1745 
1746  for (j = 0; j < chunk->size; j++, block++) {
1747  if (buf_block_get_state(block)
1748  == BUF_BLOCK_FILE_PAGE) {
1749  ut_ad(!block->page.in_zip_hash);
1750  ut_ad(block->page.in_page_hash);
1751 
1752  HASH_INSERT(buf_page_t, hash, page_hash,
1754  block->page.space,
1755  block->page.offset),
1756  &block->page);
1757  }
1758  }
1759  }
1760 
1761  /* Insert the compressed-only pages to buf_pool->page_hash.
1762  All such blocks are either in buf_pool->zip_clean or
1763  in buf_pool->flush_list. */
1764 
1765  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
1766  b = UT_LIST_GET_NEXT(list, b)) {
1768  ut_ad(!b->in_flush_list);
1769  ut_ad(b->in_LRU_list);
1770  ut_ad(b->in_page_hash);
1771  ut_ad(!b->in_zip_hash);
1772 
1773  HASH_INSERT(buf_page_t, hash, page_hash,
1774  buf_page_address_fold(b->space, b->offset), b);
1775  }
1776 
1777  buf_flush_list_mutex_enter(buf_pool);
1778  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
1779  b = UT_LIST_GET_NEXT(list, b)) {
1780  ut_ad(b->in_flush_list);
1781  ut_ad(b->in_LRU_list);
1782  ut_ad(b->in_page_hash);
1783  ut_ad(!b->in_zip_hash);
1784 
1785  switch (buf_page_get_state(b)) {
1786  case BUF_BLOCK_ZIP_DIRTY:
1787  HASH_INSERT(buf_page_t, hash, page_hash,
1789  b->offset), b);
1790  break;
1791  case BUF_BLOCK_FILE_PAGE:
1792  /* uncompressed page */
1793  break;
1794  case BUF_BLOCK_ZIP_FREE:
1795  case BUF_BLOCK_ZIP_PAGE:
1796  case BUF_BLOCK_NOT_USED:
1798  case BUF_BLOCK_MEMORY:
1799  case BUF_BLOCK_REMOVE_HASH:
1800  ut_error;
1801  break;
1802  }
1803  }
1804 
1805  buf_flush_list_mutex_exit(buf_pool);
1806  buf_pool_mutex_exit(buf_pool);
1807 }
1808 
1809 /********************************************************************
1810 Determine if a block is a sentinel for a buffer pool watch.
1811 @return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1812 UNIV_INTERN
1813 ibool
1814 buf_pool_watch_is_sentinel(
1815 /*=======================*/
1816  buf_pool_t* buf_pool,
1817  const buf_page_t* bpage)
1818 {
1819  ut_ad(buf_page_in_file(bpage));
1820 
1821  if (bpage < &buf_pool->watch[0]
1822  || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
1823 
1825  || bpage->zip.data != NULL);
1826 
1827  return(FALSE);
1828  }
1829 
1831  ut_ad(!bpage->in_zip_hash);
1832  ut_ad(bpage->in_page_hash);
1833  ut_ad(bpage->zip.data == NULL);
1834  ut_ad(bpage->buf_fix_count > 0);
1835  return(TRUE);
1836 }
1837 
1838 /****************************************************************/
1842 UNIV_INTERN
1843 buf_page_t*
1844 buf_pool_watch_set(
1845 /*===============*/
1846  ulint space,
1847  ulint offset,
1848  ulint fold)
1849 {
1850  buf_page_t* bpage;
1851  ulint i;
1852  buf_pool_t* buf_pool = buf_pool_get(space, offset);
1853 
1854  ut_ad(buf_pool_mutex_own(buf_pool));
1855 
1856  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
1857 
1858  if (UNIV_LIKELY_NULL(bpage)) {
1859  if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1860  /* The page was loaded meanwhile. */
1861  return(bpage);
1862  }
1863  /* Add to an existing watch. */
1864  bpage->buf_fix_count++;
1865  return(NULL);
1866  }
1867 
1868  for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
1869  bpage = &buf_pool->watch[i];
1870 
1871  ut_ad(bpage->access_time == 0);
1872  ut_ad(bpage->newest_modification == 0);
1873  ut_ad(bpage->oldest_modification == 0);
1874  ut_ad(bpage->zip.data == NULL);
1875  ut_ad(!bpage->in_zip_hash);
1876 
1877  switch (bpage->state) {
1878  case BUF_BLOCK_POOL_WATCH:
1879  ut_ad(!bpage->in_page_hash);
1880  ut_ad(bpage->buf_fix_count == 0);
1881 
1882  /* bpage is pointing to buf_pool->watch[],
1883  which is protected by buf_pool->mutex.
1884  Normally, buf_page_t objects are protected by
1885  buf_block_t::mutex or buf_pool->zip_mutex or both. */
1886 
1887  bpage->state = BUF_BLOCK_ZIP_PAGE;
1888  bpage->space = space;
1889  bpage->offset = offset;
1890  bpage->buf_fix_count = 1;
1891 
1892  ut_d(bpage->in_page_hash = TRUE);
1893  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
1894  fold, bpage);
1895  return(NULL);
1896  case BUF_BLOCK_ZIP_PAGE:
1897  ut_ad(bpage->in_page_hash);
1898  ut_ad(bpage->buf_fix_count > 0);
1899  break;
1900  default:
1901  ut_error;
1902  }
1903  }
1904 
1905  /* Allocation failed. Either the maximum number of purge
1906  threads should never exceed BUF_POOL_WATCH_SIZE, or this code
1907  should be modified to return a special non-NULL value and the
1908  caller should purge the record directly. */
1909  ut_error;
1910 
1911  /* Fix compiler warning */
1912  return(NULL);
1913 }
1914 
1915 /********************************************************************/
1917 static
1918 void
1919 buf_pool_page_hash_rebuild(void)
1920 /*============================*/
1921 {
1922  ulint i;
1923 
1924  for (i = 0; i < srv_buf_pool_instances; i++) {
1925  buf_pool_page_hash_rebuild_instance(buf_pool_from_array(i));
1926  }
1927 }
1928 
1929 /********************************************************************/
1931 static
1932 void
1933 buf_pool_increase_instance(
1934 /*=======================*/
1935  buf_pool_t* buf_pool,
1936  ulint change_size)
1937 {
1938  buf_chunk_t* chunks;
1939  buf_chunk_t* chunk;
1940 
1941  buf_pool_mutex_enter(buf_pool);
1942  chunks = static_cast<buf_chunk_t *>(mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks));
1943 
1944  memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks * sizeof *chunks);
1945 
1946  chunk = &chunks[buf_pool->n_chunks];
1947 
1948  if (!buf_chunk_init(buf_pool, chunk, change_size)) {
1949  mem_free(chunks);
1950  } else {
1951  buf_pool->old_pool_size = buf_pool->curr_pool_size;
1952  buf_pool->curr_size += chunk->size;
1953  buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
1954  mem_free(buf_pool->chunks);
1955  buf_pool->chunks = chunks;
1956  buf_pool->n_chunks++;
1957  }
1958 
1959  buf_pool_mutex_exit(buf_pool);
1960 }
1961 
1962 /********************************************************************/
1964 static
1965 void
1966 buf_pool_increase(
1967 /*==============*/
1968  ulint change_size)
1969 {
1970  ulint i;
1971 
1972  for (i = 0; i < srv_buf_pool_instances; i++) {
1973  buf_pool_increase_instance(
1975  change_size / srv_buf_pool_instances);
1976  }
1977 
1978  buf_pool_set_sizes();
1979 }
1980 
1981 /********************************************************************/
1983 UNIV_INTERN
1984 void
1985 buf_pool_resize(void)
1986 /*=================*/
1987 {
1988  ulint change_size;
1989  ulint min_change_size = 1048576 * srv_buf_pool_instances;
1990 
1992 
1994 
1996 
1997  return;
1998 
1999  } else if (srv_buf_pool_curr_size + min_change_size
2000  > srv_buf_pool_size) {
2001 
2002  change_size = (srv_buf_pool_curr_size - srv_buf_pool_size)
2003  / UNIV_PAGE_SIZE;
2004 
2006 
2007  /* Disable adaptive hash indexes and empty the index
2008  in order to free up memory in the buffer pool chunks. */
2009  buf_pool_shrink(change_size);
2010 
2011  } else if (srv_buf_pool_curr_size + min_change_size
2012  < srv_buf_pool_size) {
2013 
2014  /* Enlarge the buffer pool by at least one megabyte */
2015 
2016  change_size = srv_buf_pool_size - srv_buf_pool_curr_size;
2017 
2019 
2020  buf_pool_increase(change_size);
2021  } else {
2023 
2025 
2026  return;
2027  }
2028 
2029  buf_pool_page_hash_rebuild();
2030 }
2031 
2032 /****************************************************************/
2037 static
2038 void
2039 buf_pool_watch_remove(
2040 /*==================*/
2041  buf_pool_t* buf_pool,
2042  ulint fold,
2044  buf_page_t* watch)
2045 {
2046  ut_ad(buf_pool_mutex_own(buf_pool));
2047 
2048  HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
2049  ut_d(watch->in_page_hash = FALSE);
2050  watch->buf_fix_count = 0;
2051  watch->state = BUF_BLOCK_POOL_WATCH;
2052 }
2053 
2054 /****************************************************************/
2057 UNIV_INTERN
2058 void
2059 buf_pool_watch_unset(
2060 /*=================*/
2061  ulint space,
2062  ulint offset)
2063 {
2064  buf_page_t* bpage;
2065  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2066  ulint fold = buf_page_address_fold(space, offset);
2067 
2068  buf_pool_mutex_enter(buf_pool);
2069  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2070  /* The page must exist because buf_pool_watch_set()
2071  increments buf_fix_count. */
2072  ut_a(bpage);
2073 
2074  if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
2075  mutex_t* mutex = buf_page_get_mutex(bpage);
2076 
2077  mutex_enter(mutex);
2078  ut_a(bpage->buf_fix_count > 0);
2079  bpage->buf_fix_count--;
2080  mutex_exit(mutex);
2081  } else {
2082  ut_a(bpage->buf_fix_count > 0);
2083 
2084  if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
2085  buf_pool_watch_remove(buf_pool, fold, bpage);
2086  }
2087  }
2088 
2089  buf_pool_mutex_exit(buf_pool);
2090 }
2091 
2092 /****************************************************************/
2097 UNIV_INTERN
2098 ibool
2099 buf_pool_watch_occurred(
2100 /*====================*/
2101  ulint space,
2102  ulint offset)
2103 {
2104  ibool ret;
2105  buf_page_t* bpage;
2106  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2107  ulint fold = buf_page_address_fold(space, offset);
2108 
2109  buf_pool_mutex_enter(buf_pool);
2110 
2111  bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
2112  /* The page must exist because buf_pool_watch_set()
2113  increments buf_fix_count. */
2114  ut_a(bpage);
2115  ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
2116  buf_pool_mutex_exit(buf_pool);
2117 
2118  return(ret);
2119 }
2120 
2121 /********************************************************************/
2125 UNIV_INTERN
2126 void
2127 buf_page_make_young(
2128 /*================*/
2129  buf_page_t* bpage)
2130 {
2131  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2132 
2133  buf_pool_mutex_enter(buf_pool);
2134 
2135  ut_a(buf_page_in_file(bpage));
2136 
2137  buf_LRU_make_block_young(bpage);
2138 
2139  buf_pool_mutex_exit(buf_pool);
2140 }
2141 
2142 /********************************************************************/
2147 static
2148 void
2149 buf_page_set_accessed_make_young(
2150 /*=============================*/
2151  buf_page_t* bpage,
2153  unsigned access_time)
2156 {
2157  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
2158 
2159  ut_ad(!buf_pool_mutex_own(buf_pool));
2160  ut_a(buf_page_in_file(bpage));
2161 
2162  if (buf_page_peek_if_too_old(bpage)) {
2163  buf_pool_mutex_enter(buf_pool);
2164  buf_LRU_make_block_young(bpage);
2165  buf_pool_mutex_exit(buf_pool);
2166  } else if (!access_time) {
2167  ulint time_ms = ut_time_ms();
2168  buf_pool_mutex_enter(buf_pool);
2169  buf_page_set_accessed(bpage, time_ms);
2170  buf_pool_mutex_exit(buf_pool);
2171  }
2172 }
2173 
2174 /********************************************************************/
2177 UNIV_INTERN
2178 void
2179 buf_reset_check_index_page_at_flush(
2180 /*================================*/
2181  ulint space,
2182  ulint offset)
2183 {
2184  buf_block_t* block;
2185  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2186 
2187  buf_pool_mutex_enter(buf_pool);
2188 
2189  block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2190 
2191  if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
2192  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2193  block->check_index_page_at_flush = FALSE;
2194  }
2195 
2196  buf_pool_mutex_exit(buf_pool);
2197 }
2198 
2199 /********************************************************************/
2204 UNIV_INTERN
2205 ibool
2206 buf_page_peek_if_search_hashed(
2207 /*===========================*/
2208  ulint space,
2209  ulint offset)
2210 {
2211  buf_block_t* block;
2212  ibool is_hashed;
2213  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2214 
2215  buf_pool_mutex_enter(buf_pool);
2216 
2217  block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
2218 
2219  if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
2220  is_hashed = FALSE;
2221  } else {
2222  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
2223  is_hashed = block->is_hashed;
2224  }
2225 
2226  buf_pool_mutex_exit(buf_pool);
2227 
2228  return(is_hashed);
2229 }
2230 
2231 #ifdef UNIV_DEBUG_FILE_ACCESSES
2232 /********************************************************************/
2238 UNIV_INTERN
2239 buf_page_t*
2240 buf_page_set_file_page_was_freed(
2241 /*=============================*/
2242  ulint space,
2243  ulint offset)
2244 {
2245  buf_page_t* bpage;
2246  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2247 
2248  buf_pool_mutex_enter(buf_pool);
2249 
2250  bpage = buf_page_hash_get(buf_pool, space, offset);
2251 
2252  if (bpage) {
2253  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2254  bpage->file_page_was_freed = TRUE;
2255  }
2256 
2257  buf_pool_mutex_exit(buf_pool);
2258 
2259  return(bpage);
2260 }
2261 
2262 /********************************************************************/
2268 UNIV_INTERN
2269 buf_page_t*
2270 buf_page_reset_file_page_was_freed(
2271 /*===============================*/
2272  ulint space,
2273  ulint offset)
2274 {
2275  buf_page_t* bpage;
2276  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2277 
2278  buf_pool_mutex_enter(buf_pool);
2279 
2280  bpage = buf_page_hash_get(buf_pool, space, offset);
2281 
2282  if (bpage) {
2283  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2284  bpage->file_page_was_freed = FALSE;
2285  }
2286 
2287  buf_pool_mutex_exit(buf_pool);
2288 
2289  return(bpage);
2290 }
2291 #endif /* UNIV_DEBUG_FILE_ACCESSES */
2292 
2293 /********************************************************************/
2302 UNIV_INTERN
2303 buf_page_t*
2304 buf_page_get_zip(
2305 /*=============*/
2306  ulint space,
2307  ulint zip_size,
2308  ulint offset)
2309 {
2310  buf_page_t* bpage;
2311  mutex_t* block_mutex;
2312  ibool must_read;
2313  unsigned access_time;
2314  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2315 
2316 #ifndef UNIV_LOG_DEBUG
2317  ut_ad(!ibuf_inside());
2318 #endif
2319  buf_pool->stat.n_page_gets++;
2320 
2321  for (;;) {
2322  buf_pool_mutex_enter(buf_pool);
2323 lookup:
2324  bpage = buf_page_hash_get(buf_pool, space, offset);
2325  if (bpage) {
2326  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2327  break;
2328  }
2329 
2330  /* Page not in buf_pool: needs to be read from file */
2331 
2332  buf_pool_mutex_exit(buf_pool);
2333 
2334  buf_read_page(space, zip_size, offset);
2335 
2336 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2337  ut_a(++buf_dbg_counter % 37 || buf_validate());
2338 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2339  }
2340 
2341  if (UNIV_UNLIKELY(!bpage->zip.data)) {
2342  /* There is no compressed page. */
2343 err_exit:
2344  buf_pool_mutex_exit(buf_pool);
2345  return(NULL);
2346  }
2347 
2348  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
2349 
2350  switch (buf_page_get_state(bpage)) {
2351  case BUF_BLOCK_NOT_USED:
2353  case BUF_BLOCK_MEMORY:
2354  case BUF_BLOCK_REMOVE_HASH:
2355  case BUF_BLOCK_ZIP_FREE:
2356  break;
2357  case BUF_BLOCK_ZIP_PAGE:
2358  case BUF_BLOCK_ZIP_DIRTY:
2359  block_mutex = &buf_pool->zip_mutex;
2360  mutex_enter(block_mutex);
2361  bpage->buf_fix_count++;
2362  goto got_block;
2363  case BUF_BLOCK_FILE_PAGE:
2364  block_mutex = &((buf_block_t*) bpage)->mutex;
2365  mutex_enter(block_mutex);
2366 
2367  /* Discard the uncompressed page frame if possible. */
2368  if (buf_LRU_free_block(bpage, FALSE, NULL)
2369  == BUF_LRU_FREED) {
2370 
2371  mutex_exit(block_mutex);
2372  goto lookup;
2373  }
2374 
2375  buf_block_buf_fix_inc((buf_block_t*) bpage,
2376  __FILE__, __LINE__);
2377  goto got_block;
2378  }
2379 
2380  ut_error;
2381  goto err_exit;
2382 
2383 got_block:
2384  must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
2385  access_time = buf_page_is_accessed(bpage);
2386 
2387  buf_pool_mutex_exit(buf_pool);
2388 
2389  mutex_exit(block_mutex);
2390 
2391  buf_page_set_accessed_make_young(bpage, access_time);
2392 
2393 #ifdef UNIV_DEBUG_FILE_ACCESSES
2394  ut_a(!bpage->file_page_was_freed);
2395 #endif
2396 
2397 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2398  ut_a(++buf_dbg_counter % 5771 || buf_validate());
2399  ut_a(bpage->buf_fix_count > 0);
2400  ut_a(buf_page_in_file(bpage));
2401 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2402 
2403  if (must_read) {
2404  /* Let us wait until the read operation
2405  completes */
2406 
2407  for (;;) {
2408  enum buf_io_fix io_fix;
2409 
2410  mutex_enter(block_mutex);
2411  io_fix = buf_page_get_io_fix(bpage);
2412  mutex_exit(block_mutex);
2413 
2414  if (io_fix == BUF_IO_READ) {
2415 
2416  os_thread_sleep(WAIT_FOR_READ);
2417  } else {
2418  break;
2419  }
2420  }
2421  }
2422 
2423 #ifdef UNIV_IBUF_COUNT_DEBUG
2424  ut_a(ibuf_count_get(buf_page_get_space(bpage),
2425  buf_page_get_page_no(bpage)) == 0);
2426 #endif
2427  return(bpage);
2428 }
2429 
2430 /********************************************************************/
2432 UNIV_INLINE
2433 void
2434 buf_block_init_low(
2435 /*===============*/
2436  buf_block_t* block)
2437 {
2438  block->check_index_page_at_flush = FALSE;
2439  block->index = NULL;
2440 
2441  block->n_hash_helps = 0;
2442  block->is_hashed = FALSE;
2443  block->n_fields = 1;
2444  block->n_bytes = 0;
2445  block->left_side = TRUE;
2446 }
2447 #endif /* !UNIV_HOTBACKUP */
2448 
2449 /********************************************************************/
2452 UNIV_INTERN
2453 ibool
2454 buf_zip_decompress(
2455 /*===============*/
2456  buf_block_t* block,
2457  ibool check)
2458 {
2459  const byte* frame = block->page.zip.data;
2460  ulint stamp_checksum = mach_read_from_4(
2461  frame + FIL_PAGE_SPACE_OR_CHKSUM);
2462 
2463  ut_ad(buf_block_get_zip_size(block));
2464  ut_a(buf_block_get_space(block) != 0);
2465 
2466  if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
2467  ulint calc_checksum = page_zip_calc_checksum(
2468  frame, page_zip_get_size(&block->page.zip));
2469 
2470  if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
2471  ut_print_timestamp(stderr);
2472  fprintf(stderr,
2473  " InnoDB: compressed page checksum mismatch"
2474  " (space %u page %u): %lu != %lu\n",
2475  block->page.space, block->page.offset,
2476  stamp_checksum, calc_checksum);
2477  return(FALSE);
2478  }
2479  }
2480 
2481  switch (fil_page_get_type(frame)) {
2482  case FIL_PAGE_INDEX:
2483  if (page_zip_decompress(&block->page.zip,
2484  block->frame, TRUE)) {
2485  return(TRUE);
2486  }
2487 
2488  fprintf(stderr,
2489  "InnoDB: unable to decompress space %lu page %lu\n",
2490  (ulong) block->page.space,
2491  (ulong) block->page.offset);
2492  return(FALSE);
2493 
2495  case FIL_PAGE_INODE:
2496  case FIL_PAGE_IBUF_BITMAP:
2497  case FIL_PAGE_TYPE_FSP_HDR:
2498  case FIL_PAGE_TYPE_XDES:
2499  case FIL_PAGE_TYPE_ZBLOB:
2500  case FIL_PAGE_TYPE_ZBLOB2:
2501  /* Copy to uncompressed storage. */
2502  memcpy(block->frame, frame,
2503  buf_block_get_zip_size(block));
2504  return(TRUE);
2505  }
2506 
2507  ut_print_timestamp(stderr);
2508  fprintf(stderr,
2509  " InnoDB: unknown compressed page"
2510  " type %lu\n",
2511  fil_page_get_type(frame));
2512  return(FALSE);
2513 }
2514 
2515 #ifndef UNIV_HOTBACKUP
2516 /*******************************************************************/
2520 static
2521 buf_block_t*
2522 buf_block_align_instance(
2523 /*=====================*/
2524  buf_pool_t* buf_pool,
2526  const byte* ptr)
2527 {
2528  buf_chunk_t* chunk;
2529  ulint i;
2530 
2531  /* TODO: protect buf_pool->chunks with a mutex (it will
2532  currently remain constant after buf_pool_init()) */
2533  for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
2534  lint offs = ptr - chunk->blocks->frame;
2535 
2536  if (UNIV_UNLIKELY(offs < 0)) {
2537 
2538  continue;
2539  }
2540 
2541  offs >>= UNIV_PAGE_SIZE_SHIFT;
2542 
2543  if (UNIV_LIKELY((ulint) offs < chunk->size)) {
2544  buf_block_t* block = &chunk->blocks[offs];
2545 
2546  /* The function buf_chunk_init() invokes
2547  buf_block_init() so that block[n].frame ==
2548  block->frame + n * UNIV_PAGE_SIZE. Check it. */
2549  ut_ad(block->frame == page_align(ptr));
2550 #ifdef UNIV_DEBUG
2551  /* A thread that updates these fields must
2552  hold buf_pool->mutex and block->mutex. Acquire
2553  only the latter. */
2554  mutex_enter(&block->mutex);
2555 
2556  switch (buf_block_get_state(block)) {
2557  case BUF_BLOCK_ZIP_FREE:
2558  case BUF_BLOCK_ZIP_PAGE:
2559  case BUF_BLOCK_ZIP_DIRTY:
2560  /* These types should only be used in
2561  the compressed buffer pool, whose
2562  memory is allocated from
2563  buf_pool->chunks, in UNIV_PAGE_SIZE
2564  blocks flagged as BUF_BLOCK_MEMORY. */
2565  ut_error;
2566  break;
2567  case BUF_BLOCK_NOT_USED:
2569  case BUF_BLOCK_MEMORY:
2570  /* Some data structures contain
2571  "guess" pointers to file pages. The
2572  file pages may have been freed and
2573  reused. Do not complain. */
2574  break;
2575  case BUF_BLOCK_REMOVE_HASH:
2576  /* buf_LRU_block_remove_hashed_page()
2577  will overwrite the FIL_PAGE_OFFSET and
2578  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
2579  0xff and set the state to
2580  BUF_BLOCK_REMOVE_HASH. */
2582  == 0xffffffff);
2584  == 0xffffffff);
2585  break;
2586  case BUF_BLOCK_FILE_PAGE:
2587  ut_ad(block->page.space
2588  == page_get_space_id(page_align(ptr)));
2589  ut_ad(block->page.offset
2590  == page_get_page_no(page_align(ptr)));
2591  break;
2592  }
2593 
2594  mutex_exit(&block->mutex);
2595 #endif /* UNIV_DEBUG */
2596 
2597  return(block);
2598  }
2599  }
2600 
2601  return(NULL);
2602 }
2603 
2604 /*******************************************************************/
2607 UNIV_INTERN
2608 buf_block_t*
2609 buf_block_align(
2610 /*============*/
2611  const byte* ptr)
2612 {
2613  ulint i;
2614 
2615  for (i = 0; i < srv_buf_pool_instances; i++) {
2616  buf_block_t* block;
2617 
2618  block = buf_block_align_instance(
2619  buf_pool_from_array(i), ptr);
2620  if (block) {
2621  return(block);
2622  }
2623  }
2624 
2625  /* The block should always be found. */
2626  ut_error;
2627  return(NULL);
2628 }
2629 
2630 /********************************************************************/
2635 static
2636 ibool
2637 buf_pointer_is_block_field_instance(
2638 /*================================*/
2639  buf_pool_t* buf_pool,
2640  const void* ptr)
2641 {
2642  const buf_chunk_t* chunk = buf_pool->chunks;
2643  const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
2644 
2645  /* TODO: protect buf_pool->chunks with a mutex (it will
2646  currently remain constant after buf_pool_init()) */
2647  while (chunk < echunk) {
2648  if (ptr >= (void *)chunk->blocks
2649  && ptr < (void *)(chunk->blocks + chunk->size)) {
2650 
2651  return(TRUE);
2652  }
2653 
2654  chunk++;
2655  }
2656 
2657  return(FALSE);
2658 }
2659 
2660 /********************************************************************/
2664 UNIV_INTERN
2665 ibool
2666 buf_pointer_is_block_field(
2667 /*=======================*/
2668  const void* ptr)
2669 {
2670  ulint i;
2671 
2672  for (i = 0; i < srv_buf_pool_instances; i++) {
2673  ibool found;
2674 
2675  found = buf_pointer_is_block_field_instance(
2676  buf_pool_from_array(i), ptr);
2677  if (found) {
2678  return(TRUE);
2679  }
2680  }
2681 
2682  return(FALSE);
2683 }
2684 
2685 /********************************************************************/
2688 static
2689 ibool
2690 buf_block_is_uncompressed(
2691 /*======================*/
2692  buf_pool_t* buf_pool,
2693  const buf_block_t* block)
2695 {
2696  ut_ad(buf_pool_mutex_own(buf_pool));
2697 
2698  if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
2699  /* The pointer should be aligned. */
2700  return(FALSE);
2701  }
2702 
2703  return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
2704 }
2705 
2706 /********************************************************************/
2709 UNIV_INTERN
2710 buf_block_t*
2711 buf_page_get_gen(
2712 /*=============*/
2713  ulint space,
2714  ulint zip_size,
2716  ulint offset,
2717  ulint rw_latch,
2718  buf_block_t* guess,
2719  ulint mode,
2722  const char* file,
2723  ulint line,
2724  mtr_t* mtr)
2725 {
2726  buf_block_t* block;
2727  ulint fold;
2728  unsigned access_time;
2729  ulint fix_type;
2730  ibool must_read;
2731  ulint retries = 0;
2732  buf_pool_t* buf_pool = buf_pool_get(space, offset);
2733 
2734  ut_ad(mtr);
2735  ut_ad(mtr->state == MTR_ACTIVE);
2736  ut_ad((rw_latch == RW_S_LATCH)
2737  || (rw_latch == RW_X_LATCH)
2738  || (rw_latch == RW_NO_LATCH));
2739  ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
2740  ut_ad(mode == BUF_GET
2741  || mode == BUF_GET_IF_IN_POOL
2742  || mode == BUF_GET_NO_LATCH
2743  || mode == BUF_GET_IF_IN_POOL_OR_WATCH);
2744  ut_ad(zip_size == fil_space_get_zip_size(space));
2745  ut_ad(ut_is_2pow(zip_size));
2746 #ifndef UNIV_LOG_DEBUG
2747  ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
2748 #endif
2749  buf_pool->stat.n_page_gets++;
2750  fold = buf_page_address_fold(space, offset);
2751 loop:
2752  block = guess;
2753  buf_pool_mutex_enter(buf_pool);
2754 
2755  if (block) {
2756  /* If the guess is a compressed page descriptor that
2757  has been allocated by buf_buddy_alloc(), it may have
2758  been invalidated by buf_buddy_relocate(). In that
2759  case, block could point to something that happens to
2760  contain the expected bits in block->page. Similarly,
2761  the guess may be pointing to a buffer pool chunk that
2762  has been released when resizing the buffer pool. */
2763 
2764  if (!buf_block_is_uncompressed(buf_pool, block)
2765  || offset != block->page.offset
2766  || space != block->page.space
2768 
2769  block = guess = NULL;
2770  } else {
2771  ut_ad(!block->page.in_zip_hash);
2772  ut_ad(block->page.in_page_hash);
2773  }
2774  }
2775 
2776  if (block == NULL) {
2778  buf_pool, space, offset, fold);
2779  }
2780 
2781 loop2:
2782  if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
2783  block = NULL;
2784  }
2785 
2786  if (block == NULL) {
2787  /* Page not in buf_pool: needs to be read from file */
2788 
2789  if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2790  block = (buf_block_t*) buf_pool_watch_set(
2791  space, offset, fold);
2792 
2793  if (UNIV_LIKELY_NULL(block)) {
2794 
2795  goto got_block;
2796  }
2797  }
2798 
2799  buf_pool_mutex_exit(buf_pool);
2800 
2801  if (mode == BUF_GET_IF_IN_POOL
2802  || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
2803 
2804  return(NULL);
2805  }
2806 
2807  if (buf_read_page(space, zip_size, offset)) {
2808  retries = 0;
2809  } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
2810  ++retries;
2811  } else {
2812  fprintf(stderr, "InnoDB: Error: Unable"
2813  " to read tablespace %lu page no"
2814  " %lu into the buffer pool after"
2815  " %lu attempts\n"
2816  "InnoDB: The most probable cause"
2817  " of this error may be that the"
2818  " table has been corrupted.\n"
2819  "InnoDB: You can try to fix this"
2820  " problem by using"
2821  " innodb_force_recovery.\n"
2822  "InnoDB: Please see reference manual"
2823  " for more details.\n"
2824  "InnoDB: Aborting...\n",
2825  space, offset,
2826  BUF_PAGE_READ_MAX_RETRIES);
2827 
2828  ut_error;
2829  }
2830 
2831 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2832  ut_a(++buf_dbg_counter % 37 || buf_validate());
2833 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2834  goto loop;
2835  }
2836 
2837 got_block:
2838  ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
2839 
2840  must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
2841 
2842  if (must_read && mode == BUF_GET_IF_IN_POOL) {
2843 
2844  /* The page is being read to buffer pool,
2845  but we cannot wait around for the read to
2846  complete. */
2847  buf_pool_mutex_exit(buf_pool);
2848 
2849  return(NULL);
2850  }
2851 
2852  switch (buf_block_get_state(block)) {
2853  buf_page_t* bpage;
2854  ibool success;
2855 
2856  case BUF_BLOCK_FILE_PAGE:
2857  break;
2858 
2859  case BUF_BLOCK_ZIP_PAGE:
2860  case BUF_BLOCK_ZIP_DIRTY:
2861  bpage = &block->page;
2862  /* Protect bpage->buf_fix_count. */
2863  mutex_enter(&buf_pool->zip_mutex);
2864 
2865  if (bpage->buf_fix_count
2866  || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
2867  /* This condition often occurs when the buffer
2868  is not buffer-fixed, but I/O-fixed by
2869  buf_page_init_for_read(). */
2870  mutex_exit(&buf_pool->zip_mutex);
2871 wait_until_unfixed:
2872  /* The block is buffer-fixed or I/O-fixed.
2873  Try again later. */
2874  buf_pool_mutex_exit(buf_pool);
2875  os_thread_sleep(WAIT_FOR_READ);
2876 
2877  goto loop;
2878  }
2879 
2880  /* Allocate an uncompressed page. */
2881  buf_pool_mutex_exit(buf_pool);
2882  mutex_exit(&buf_pool->zip_mutex);
2883 
2884  block = buf_LRU_get_free_block(buf_pool, 0);
2885  ut_a(block);
2886 
2887  buf_pool_mutex_enter(buf_pool);
2888  mutex_enter(&block->mutex);
2889 
2890  {
2891  buf_page_t* hash_bpage;
2892 
2893  hash_bpage = buf_page_hash_get_low(
2894  buf_pool, space, offset, fold);
2895 
2896  if (UNIV_UNLIKELY(bpage != hash_bpage)) {
2897  /* The buf_pool->page_hash was modified
2898  while buf_pool->mutex was released.
2899  Free the block that was allocated. */
2900 
2901  buf_LRU_block_free_non_file_page(block);
2902  mutex_exit(&block->mutex);
2903 
2904  block = (buf_block_t*) hash_bpage;
2905  goto loop2;
2906  }
2907  }
2908 
2909  if (UNIV_UNLIKELY
2910  (bpage->buf_fix_count
2911  || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
2912 
2913  /* The block was buffer-fixed or I/O-fixed
2914  while buf_pool->mutex was not held by this thread.
2915  Free the block that was allocated and try again.
2916  This should be extremely unlikely. */
2917 
2918  buf_LRU_block_free_non_file_page(block);
2919  mutex_exit(&block->mutex);
2920 
2921  goto wait_until_unfixed;
2922  }
2923 
2924  /* Move the compressed page from bpage to block,
2925  and uncompress it. */
2926 
2927  mutex_enter(&buf_pool->zip_mutex);
2928 
2929  buf_relocate(bpage, &block->page);
2930  buf_block_init_low(block);
2931  block->lock_hash_val = lock_rec_hash(space, offset);
2932 
2933  UNIV_MEM_DESC(&block->page.zip.data,
2934  page_zip_get_size(&block->page.zip), block);
2935 
2936  if (buf_page_get_state(&block->page)
2937  == BUF_BLOCK_ZIP_PAGE) {
2938  UT_LIST_REMOVE(list, buf_pool->zip_clean,
2939  &block->page);
2940  ut_ad(!block->page.in_flush_list);
2941  } else {
2942  /* Relocate buf_pool->flush_list. */
2943  buf_flush_relocate_on_flush_list(bpage,
2944  &block->page);
2945  }
2946 
2947  /* Buffer-fix, I/O-fix, and X-latch the block
2948  for the duration of the decompression.
2949  Also add the block to the unzip_LRU list. */
2950  block->page.state = BUF_BLOCK_FILE_PAGE;
2951 
2952  /* Insert at the front of unzip_LRU list */
2953  buf_unzip_LRU_add_block(block, FALSE);
2954 
2955  block->page.buf_fix_count = 1;
2957  rw_lock_x_lock_func(&block->lock, 0, file, line);
2958 
2959  UNIV_MEM_INVALID(bpage, sizeof *bpage);
2960 
2961  mutex_exit(&block->mutex);
2962  mutex_exit(&buf_pool->zip_mutex);
2963  buf_pool->n_pend_unzip++;
2964 
2965  buf_buddy_free(buf_pool, bpage, sizeof *bpage);
2966 
2967  buf_pool_mutex_exit(buf_pool);
2968 
2969  /* Decompress the page and apply buffered operations
2970  while not holding buf_pool->mutex or block->mutex. */
2971  success = buf_zip_decompress(block, srv_use_checksums);
2972  ut_a(success);
2973 
2974  if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
2975  ibuf_merge_or_delete_for_page(block, space, offset,
2976  zip_size, TRUE);
2977  }
2978 
2979  /* Unfix and unlatch the block. */
2980  buf_pool_mutex_enter(buf_pool);
2981  mutex_enter(&block->mutex);
2982  block->page.buf_fix_count--;
2984  mutex_exit(&block->mutex);
2985  buf_pool->n_pend_unzip--;
2986  rw_lock_x_unlock(&block->lock);
2987 
2988  break;
2989 
2990  case BUF_BLOCK_ZIP_FREE:
2991  case BUF_BLOCK_NOT_USED:
2993  case BUF_BLOCK_MEMORY:
2994  case BUF_BLOCK_REMOVE_HASH:
2995  ut_error;
2996  break;
2997  }
2998 
3000 
3001  mutex_enter(&block->mutex);
3002 #if UNIV_WORD_SIZE == 4
3003  /* On 32-bit systems, there is no padding in buf_page_t. On
3004  other systems, Valgrind could complain about uninitialized pad
3005  bytes. */
3006  UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
3007 #endif
3008 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3009  if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
3010  && ibuf_debug) {
3011  /* Try to evict the block from the buffer pool, to use the
3012  insert buffer (change buffer) as much as possible. */
3013 
3014  if (buf_LRU_free_block(&block->page, TRUE, NULL)
3015  == BUF_LRU_FREED) {
3016  mutex_exit(&block->mutex);
3017  if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
3018  /* Set the watch, as it would have
3019  been set if the page were not in the
3020  buffer pool in the first place. */
3021  block = (buf_block_t*) buf_pool_watch_set(
3022  space, offset, fold);
3023 
3024  if (UNIV_LIKELY_NULL(block)) {
3025 
3026  /* The page entered the buffer
3027  pool for some reason. Try to
3028  evict it again. */
3029  goto got_block;
3030  }
3031  }
3032  buf_pool_mutex_exit(buf_pool);
3033  fprintf(stderr,
3034  "innodb_change_buffering_debug evict %u %u\n",
3035  (unsigned) space, (unsigned) offset);
3036  return(NULL);
3037  } else if (buf_flush_page_try(buf_pool, block)) {
3038  fprintf(stderr,
3039  "innodb_change_buffering_debug flush %u %u\n",
3040  (unsigned) space, (unsigned) offset);
3041  guess = block;
3042  goto loop;
3043  }
3044 
3045  /* Failed to evict the page; change it directly */
3046  }
3047 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3048 
3049  buf_block_buf_fix_inc(block, file, line);
3050 
3051  mutex_exit(&block->mutex);
3052 
3053  /* Check if this is the first access to the page */
3054 
3055  access_time = buf_page_is_accessed(&block->page);
3056 
3057  buf_pool_mutex_exit(buf_pool);
3058 
3059  buf_page_set_accessed_make_young(&block->page, access_time);
3060 
3061 #ifdef UNIV_DEBUG_FILE_ACCESSES
3062  ut_a(!block->page.file_page_was_freed);
3063 #endif
3064 
3065 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3066  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3067  ut_a(block->page.buf_fix_count > 0);
3069 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3070 
3071  switch (rw_latch) {
3072  case RW_NO_LATCH:
3073  if (must_read) {
3074  /* Let us wait until the read operation
3075  completes */
3076 
3077  for (;;) {
3078  enum buf_io_fix io_fix;
3079 
3080  mutex_enter(&block->mutex);
3081  io_fix = buf_block_get_io_fix(block);
3082  mutex_exit(&block->mutex);
3083 
3084  if (io_fix == BUF_IO_READ) {
3085 
3086  os_thread_sleep(WAIT_FOR_READ);
3087  } else {
3088  break;
3089  }
3090  }
3091  }
3092 
3093  fix_type = MTR_MEMO_BUF_FIX;
3094  break;
3095 
3096  case RW_S_LATCH:
3097  rw_lock_s_lock_func(&(block->lock), 0, file, line);
3098 
3099  fix_type = MTR_MEMO_PAGE_S_FIX;
3100  break;
3101 
3102  default:
3103  ut_ad(rw_latch == RW_X_LATCH);
3104  rw_lock_x_lock_func(&(block->lock), 0, file, line);
3105 
3106  fix_type = MTR_MEMO_PAGE_X_FIX;
3107  break;
3108  }
3109 
3110  mtr_memo_push(mtr, block, fix_type);
3111 
3112  if (!access_time) {
3113  /* In the case of a first access, try to apply linear
3114  read-ahead */
3115 
3116  buf_read_ahead_linear(space, zip_size, offset);
3117  }
3118 
3119 #ifdef UNIV_IBUF_COUNT_DEBUG
3120  ut_a(ibuf_count_get(buf_block_get_space(block),
3121  buf_block_get_page_no(block)) == 0);
3122 #endif
3123  return(block);
3124 }
3125 
3126 /********************************************************************/
3130 UNIV_INTERN
3131 ibool
3132 buf_page_optimistic_get(
3133 /*====================*/
3134  ulint rw_latch,
3135  buf_block_t* block,
3136  ib_uint64_t modify_clock,
3138  const char* file,
3139  ulint line,
3140  mtr_t* mtr)
3141 {
3142  buf_pool_t* buf_pool;
3143  unsigned access_time;
3144  ibool success;
3145  ulint fix_type;
3146 
3147  ut_ad(block);
3148  ut_ad(mtr);
3149  ut_ad(mtr->state == MTR_ACTIVE);
3150  ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3151 
3152  mutex_enter(&block->mutex);
3153 
3154  if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
3155 
3156  mutex_exit(&block->mutex);
3157 
3158  return(FALSE);
3159  }
3160 
3161  buf_block_buf_fix_inc(block, file, line);
3162 
3163  mutex_exit(&block->mutex);
3164 
3165  /* Check if this is the first access to the page.
3166  We do a dirty read on purpose, to avoid mutex contention.
3167  This field is only used for heuristic purposes; it does not
3168  affect correctness. */
3169 
3170  access_time = buf_page_is_accessed(&block->page);
3171  buf_page_set_accessed_make_young(&block->page, access_time);
3172 
3173  ut_ad(!ibuf_inside()
3174  || ibuf_page(buf_block_get_space(block),
3175  buf_block_get_zip_size(block),
3176  buf_block_get_page_no(block), NULL));
3177 
3178  if (rw_latch == RW_S_LATCH) {
3179  success = rw_lock_s_lock_nowait(&(block->lock),
3180  file, line);
3181  fix_type = MTR_MEMO_PAGE_S_FIX;
3182  } else {
3183  success = rw_lock_x_lock_func_nowait(&(block->lock),
3184  file, line);
3185  fix_type = MTR_MEMO_PAGE_X_FIX;
3186  }
3187 
3188  if (UNIV_UNLIKELY(!success)) {
3189  mutex_enter(&block->mutex);
3190  buf_block_buf_fix_dec(block);
3191  mutex_exit(&block->mutex);
3192 
3193  return(FALSE);
3194  }
3195 
3196  if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
3197  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3198 
3199  if (rw_latch == RW_S_LATCH) {
3200  rw_lock_s_unlock(&(block->lock));
3201  } else {
3202  rw_lock_x_unlock(&(block->lock));
3203  }
3204 
3205  mutex_enter(&block->mutex);
3206  buf_block_buf_fix_dec(block);
3207  mutex_exit(&block->mutex);
3208 
3209  return(FALSE);
3210  }
3211 
3212  mtr_memo_push(mtr, block, fix_type);
3213 
3214 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3215  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3216  ut_a(block->page.buf_fix_count > 0);
3218 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3219 
3220 #ifdef UNIV_DEBUG_FILE_ACCESSES
3221  ut_a(block->page.file_page_was_freed == FALSE);
3222 #endif
3223  if (UNIV_UNLIKELY(!access_time)) {
3224  /* In the case of a first access, try to apply linear
3225  read-ahead */
3226 
3227  buf_read_ahead_linear(buf_block_get_space(block),
3228  buf_block_get_zip_size(block),
3229  buf_block_get_page_no(block));
3230  }
3231 
3232 #ifdef UNIV_IBUF_COUNT_DEBUG
3233  ut_a(ibuf_count_get(buf_block_get_space(block),
3234  buf_block_get_page_no(block)) == 0);
3235 #endif
3236  buf_pool = buf_pool_from_block(block);
3237  buf_pool->stat.n_page_gets++;
3238 
3239  return(TRUE);
3240 }
3241 
3242 /********************************************************************/
3247 UNIV_INTERN
3248 ibool
3249 buf_page_get_known_nowait(
3250 /*======================*/
3251  ulint rw_latch,
3252  buf_block_t* block,
3253  ulint mode,
3254  const char* file,
3255  ulint line,
3256  mtr_t* mtr)
3257 {
3258  buf_pool_t* buf_pool;
3259  ibool success;
3260  ulint fix_type;
3261 
3262  ut_ad(mtr);
3263  ut_ad(mtr->state == MTR_ACTIVE);
3264  ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
3265 
3266  mutex_enter(&block->mutex);
3267 
3269  /* Another thread is just freeing the block from the LRU list
3270  of the buffer pool: do not try to access this page; this
3271  attempt to access the page can only come through the hash
3272  index because when the buffer block state is ..._REMOVE_HASH,
3273  we have already removed it from the page address hash table
3274  of the buffer pool. */
3275 
3276  mutex_exit(&block->mutex);
3277 
3278  return(FALSE);
3279  }
3280 
3282 
3283  buf_block_buf_fix_inc(block, file, line);
3284 
3285  mutex_exit(&block->mutex);
3286 
3287  buf_pool = buf_pool_from_block(block);
3288 
3289  if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
3290  buf_pool_mutex_enter(buf_pool);
3291  buf_LRU_make_block_young(&block->page);
3292  buf_pool_mutex_exit(buf_pool);
3293  } else if (!buf_page_is_accessed(&block->page)) {
3294  /* Above, we do a dirty read on purpose, to avoid
3295  mutex contention. The field buf_page_t::access_time
3296  is only used for heuristic purposes. Writes to the
3297  field must be protected by mutex, however. */
3298  ulint time_ms = ut_time_ms();
3299 
3300  buf_pool_mutex_enter(buf_pool);
3301  buf_page_set_accessed(&block->page, time_ms);
3302  buf_pool_mutex_exit(buf_pool);
3303  }
3304 
3305  ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
3306 
3307  if (rw_latch == RW_S_LATCH) {
3308  success = rw_lock_s_lock_nowait(&(block->lock),
3309  file, line);
3310  fix_type = MTR_MEMO_PAGE_S_FIX;
3311  } else {
3312  success = rw_lock_x_lock_func_nowait(&(block->lock),
3313  file, line);
3314  fix_type = MTR_MEMO_PAGE_X_FIX;
3315  }
3316 
3317  if (!success) {
3318  mutex_enter(&block->mutex);
3319  buf_block_buf_fix_dec(block);
3320  mutex_exit(&block->mutex);
3321 
3322  return(FALSE);
3323  }
3324 
3325  mtr_memo_push(mtr, block, fix_type);
3326 
3327 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3328  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3329  ut_a(block->page.buf_fix_count > 0);
3331 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3332 #ifdef UNIV_DEBUG_FILE_ACCESSES
3333  ut_a(block->page.file_page_was_freed == FALSE);
3334 #endif
3335 
3336 #ifdef UNIV_IBUF_COUNT_DEBUG
3337  ut_a((mode == BUF_KEEP_OLD)
3338  || (ibuf_count_get(buf_block_get_space(block),
3339  buf_block_get_page_no(block)) == 0));
3340 #endif
3341  buf_pool->stat.n_page_gets++;
3342 
3343  return(TRUE);
3344 }
3345 
3346 /*******************************************************************/
3351 UNIV_INTERN
3352 const buf_block_t*
3353 buf_page_try_get_func(
3354 /*==================*/
3355  ulint space_id,
3356  ulint page_no,
3357  const char* file,
3358  ulint line,
3359  mtr_t* mtr)
3360 {
3361  buf_block_t* block;
3362  ibool success;
3363  ulint fix_type;
3364  buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
3365 
3366  ut_ad(mtr);
3367  ut_ad(mtr->state == MTR_ACTIVE);
3368 
3369  buf_pool_mutex_enter(buf_pool);
3370  block = buf_block_hash_get(buf_pool, space_id, page_no);
3371 
3372  if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
3373  buf_pool_mutex_exit(buf_pool);
3374  return(NULL);
3375  }
3376 
3377  ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
3378 
3379  mutex_enter(&block->mutex);
3380  buf_pool_mutex_exit(buf_pool);
3381 
3382 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3384  ut_a(buf_block_get_space(block) == space_id);
3385  ut_a(buf_block_get_page_no(block) == page_no);
3386 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3387 
3388  buf_block_buf_fix_inc(block, file, line);
3389  mutex_exit(&block->mutex);
3390 
3391  fix_type = MTR_MEMO_PAGE_S_FIX;
3392  success = rw_lock_s_lock_nowait(&block->lock, file, line);
3393 
3394  if (!success) {
3395  /* Let us try to get an X-latch. If the current thread
3396  is holding an X-latch on the page, we cannot get an
3397  S-latch. */
3398 
3399  fix_type = MTR_MEMO_PAGE_X_FIX;
3400  success = rw_lock_x_lock_func_nowait(&block->lock,
3401  file, line);
3402  }
3403 
3404  if (!success) {
3405  mutex_enter(&block->mutex);
3406  buf_block_buf_fix_dec(block);
3407  mutex_exit(&block->mutex);
3408 
3409  return(NULL);
3410  }
3411 
3412  mtr_memo_push(mtr, block, fix_type);
3413 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3414  ut_a(++buf_dbg_counter % 5771 || buf_validate());
3415  ut_a(block->page.buf_fix_count > 0);
3417 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3418 #ifdef UNIV_DEBUG_FILE_ACCESSES
3419  ut_a(block->page.file_page_was_freed == FALSE);
3420 #endif /* UNIV_DEBUG_FILE_ACCESSES */
3421  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
3422 
3423  buf_pool->stat.n_page_gets++;
3424 
3425 #ifdef UNIV_IBUF_COUNT_DEBUG
3426  ut_a(ibuf_count_get(buf_block_get_space(block),
3427  buf_block_get_page_no(block)) == 0);
3428 #endif
3429 
3430  return(block);
3431 }
3432 
3433 /********************************************************************/
3435 UNIV_INLINE
3436 void
3437 buf_page_init_low(
3438 /*==============*/
3439  buf_page_t* bpage)
3440 {
3441  bpage->flush_type = BUF_FLUSH_LRU;
3442  bpage->io_fix = BUF_IO_NONE;
3443  bpage->buf_fix_count = 0;
3444  bpage->freed_page_clock = 0;
3445  bpage->access_time = 0;
3446  bpage->newest_modification = 0;
3447  bpage->oldest_modification = 0;
3448  HASH_INVALIDATE(bpage, hash);
3449 #ifdef UNIV_DEBUG_FILE_ACCESSES
3450  bpage->file_page_was_freed = FALSE;
3451 #endif /* UNIV_DEBUG_FILE_ACCESSES */
3452 }
3453 
3454 /********************************************************************/
3456 static
3457 void
3458 buf_page_init(
3459 /*==========*/
3460  ulint space,
3461  ulint offset,
3463  ulint fold,
3464  buf_block_t* block)
3465 {
3466  buf_page_t* hash_page;
3467  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3468 
3469  ut_ad(buf_pool_mutex_own(buf_pool));
3470  ut_ad(mutex_own(&(block->mutex)));
3472 
3473  /* Set the state of the block */
3474  buf_block_set_file_page(block, space, offset);
3475 
3476 #ifdef UNIV_DEBUG_VALGRIND
3477  if (!space) {
3478  /* Silence valid Valgrind warnings about uninitialized
3479  data being written to data files. There are some unused
3480  bytes on some pages that InnoDB does not initialize. */
3481  UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
3482  }
3483 #endif /* UNIV_DEBUG_VALGRIND */
3484 
3485  buf_block_init_low(block);
3486 
3487  block->lock_hash_val = lock_rec_hash(space, offset);
3488 
3489  buf_page_init_low(&block->page);
3490 
3491  /* Insert into the hash table of file pages */
3492 
3493  hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3494 
3495  if (UNIV_LIKELY(!hash_page)) {
3496  } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
3497  /* Preserve the reference count. */
3498  ulint buf_fix_count = hash_page->buf_fix_count;
3499 
3500  ut_a(buf_fix_count > 0);
3501  block->page.buf_fix_count += buf_fix_count;
3502  buf_pool_watch_remove(buf_pool, fold, hash_page);
3503  } else {
3504  fprintf(stderr,
3505  "InnoDB: Error: page %lu %lu already found"
3506  " in the hash table: %p, %p\n",
3507  (ulong) space,
3508  (ulong) offset,
3509  (const void*) hash_page, (const void*) block);
3510 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3511  mutex_exit(&block->mutex);
3512  buf_pool_mutex_exit(buf_pool);
3513  buf_print();
3514  buf_LRU_print();
3515  buf_validate();
3516  buf_LRU_validate();
3517 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3518  ut_error;
3519  }
3520 
3521  ut_ad(!block->page.in_zip_hash);
3522  ut_ad(!block->page.in_page_hash);
3523  ut_d(block->page.in_page_hash = TRUE);
3524  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
3525  fold, &block->page);
3526 }
3527 
3528 /********************************************************************/
3538 UNIV_INTERN
3539 buf_page_t*
3540 buf_page_init_for_read(
3541 /*===================*/
3542  ulint* err,
3543  ulint mode,
3544  ulint space,
3545  ulint zip_size,
3546  ibool unzip,
3547  ib_int64_t tablespace_version,
3551  ulint offset)
3552 {
3553  buf_block_t* block;
3554  buf_page_t* bpage = NULL;
3555  buf_page_t* watch_page;
3556  mtr_t mtr;
3557  ulint fold;
3558  ibool lru = FALSE;
3559  void* data;
3560  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3561 
3562  ut_ad(buf_pool);
3563 
3564  *err = DB_SUCCESS;
3565 
3566  if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3567  /* It is a read-ahead within an ibuf routine */
3568 
3569  ut_ad(!ibuf_bitmap_page(zip_size, offset));
3570  ut_ad(ibuf_inside());
3571 
3572  mtr_start(&mtr);
3573 
3575  && !ibuf_page(space, zip_size, offset, &mtr)) {
3576 
3577  mtr_commit(&mtr);
3578 
3579  return(NULL);
3580  }
3581  } else {
3582  ut_ad(mode == BUF_READ_ANY_PAGE);
3583  }
3584 
3585  if (zip_size && UNIV_LIKELY(!unzip)
3586  && UNIV_LIKELY(!recv_recovery_is_on())) {
3587  block = NULL;
3588  } else {
3589  block = buf_LRU_get_free_block(buf_pool, 0);
3590  ut_ad(block);
3591  ut_ad(buf_pool_from_block(block) == buf_pool);
3592  }
3593 
3594  fold = buf_page_address_fold(space, offset);
3595 
3596  buf_pool_mutex_enter(buf_pool);
3597 
3598  watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
3599  if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
3600  /* The page is already in the buffer pool. */
3601  watch_page = NULL;
3602 err_exit:
3603  if (block) {
3604  mutex_enter(&block->mutex);
3605  buf_LRU_block_free_non_file_page(block);
3606  mutex_exit(&block->mutex);
3607  }
3608 
3609  bpage = NULL;
3610  goto func_exit;
3611  }
3612 
3613  if (fil_tablespace_deleted_or_being_deleted_in_mem(
3614  space, tablespace_version)) {
3615  /* The page belongs to a space which has been
3616  deleted or is being deleted. */
3617  *err = DB_TABLESPACE_DELETED;
3618 
3619  goto err_exit;
3620  }
3621 
3622  if (block) {
3623  bpage = &block->page;
3624  mutex_enter(&block->mutex);
3625 
3626  ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
3627 
3628  buf_page_init(space, offset, fold, block);
3629 
3630  /* The block must be put to the LRU list, to the old blocks */
3631  buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3632 
3633  /* We set a pass-type x-lock on the frame because then
3634  the same thread which called for the read operation
3635  (and is running now at this point of code) can wait
3636  for the read to complete by waiting for the x-lock on
3637  the frame; if the x-lock were recursive, the same
3638  thread would illegally get the x-lock before the page
3639  read is completed. The x-lock is cleared by the
3640  io-handler thread. */
3641 
3642  rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
3644 
3645  if (UNIV_UNLIKELY(zip_size)) {
3646  page_zip_set_size(&block->page.zip, zip_size);
3647 
3648  /* buf_pool->mutex may be released and
3649  reacquired by buf_buddy_alloc(). Thus, we
3650  must release block->mutex in order not to
3651  break the latching order in the reacquisition
3652  of buf_pool->mutex. We also must defer this
3653  operation until after the block descriptor has
3654  been added to buf_pool->LRU and
3655  buf_pool->page_hash. */
3656  mutex_exit(&block->mutex);
3657  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3658  mutex_enter(&block->mutex);
3659  block->page.zip.data = static_cast<unsigned char *>(data);
3660 
3661  /* To maintain the invariant
3662  block->in_unzip_LRU_list
3663  == buf_page_belongs_to_unzip_LRU(&block->page)
3664  we have to add this block to unzip_LRU
3665  after block->page.zip.data is set. */
3667  buf_unzip_LRU_add_block(block, TRUE);
3668  }
3669 
3670  mutex_exit(&block->mutex);
3671  } else {
3672  /* Defer buf_buddy_alloc() until after the block has
3673  been found not to exist. The buf_buddy_alloc() and
3674  buf_buddy_free() calls may be expensive because of
3675  buf_buddy_relocate(). */
3676 
3677  /* The compressed page must be allocated before the
3678  control block (bpage), in order to avoid the
3679  invocation of buf_buddy_relocate_block() on
3680  uninitialized data. */
3681  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3682  bpage = static_cast<buf_page_struct *>(buf_buddy_alloc(buf_pool, sizeof *bpage, &lru));
3683 
3684  /* Initialize the buf_pool pointer. */
3685  bpage->buf_pool_index = buf_pool_index(buf_pool);
3686 
3687  /* If buf_buddy_alloc() allocated storage from the LRU list,
3688  it released and reacquired buf_pool->mutex. Thus, we must
3689  check the page_hash again, as it may have been modified. */
3690  if (UNIV_UNLIKELY(lru)) {
3691 
3692  watch_page = buf_page_hash_get_low(
3693  buf_pool, space, offset, fold);
3694 
3695  if (watch_page
3696  && !buf_pool_watch_is_sentinel(buf_pool,
3697  watch_page)) {
3698 
3699  /* The block was added by some other thread. */
3700  watch_page = NULL;
3701  buf_buddy_free(buf_pool, bpage, sizeof *bpage);
3702  buf_buddy_free(buf_pool, data, zip_size);
3703 
3704  bpage = NULL;
3705  goto func_exit;
3706  }
3707  }
3708 
3709  page_zip_des_init(&bpage->zip);
3710  page_zip_set_size(&bpage->zip, zip_size);
3711  bpage->zip.data = static_cast<unsigned char *>(data);
3712 
3713  mutex_enter(&buf_pool->zip_mutex);
3714  UNIV_MEM_DESC(bpage->zip.data,
3715  page_zip_get_size(&bpage->zip), bpage);
3716 
3717  buf_page_init_low(bpage);
3718 
3719  bpage->state = BUF_BLOCK_ZIP_PAGE;
3720  bpage->space = space;
3721  bpage->offset = offset;
3722 
3723 
3724 #ifdef UNIV_DEBUG
3725  bpage->in_page_hash = FALSE;
3726  bpage->in_zip_hash = FALSE;
3727  bpage->in_flush_list = FALSE;
3728  bpage->in_free_list = FALSE;
3729  bpage->in_LRU_list = FALSE;
3730 #endif /* UNIV_DEBUG */
3731 
3732  ut_d(bpage->in_page_hash = TRUE);
3733 
3734  if (UNIV_LIKELY_NULL(watch_page)) {
3735  /* Preserve the reference count. */
3736  ulint buf_fix_count = watch_page->buf_fix_count;
3737  ut_a(buf_fix_count > 0);
3738  bpage->buf_fix_count += buf_fix_count;
3739  ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
3740  buf_pool_watch_remove(buf_pool, fold, watch_page);
3741  }
3742 
3743  HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
3744  bpage);
3745 
3746  /* The block must be put to the LRU list, to the old blocks */
3747  buf_LRU_add_block(bpage, TRUE/* to old blocks */);
3748  buf_LRU_insert_zip_clean(bpage);
3749 
3751 
3752  mutex_exit(&buf_pool->zip_mutex);
3753  }
3754 
3755  buf_pool->n_pend_reads++;
3756 func_exit:
3757  buf_pool_mutex_exit(buf_pool);
3758 
3759  if (mode == BUF_READ_IBUF_PAGES_ONLY) {
3760 
3761  mtr_commit(&mtr);
3762  }
3763 
3764  ut_ad(!bpage || buf_page_in_file(bpage));
3765  return(bpage);
3766 }
3767 
3768 /********************************************************************/
3774 UNIV_INTERN
3775 buf_block_t*
3776 buf_page_create(
3777 /*============*/
3778  ulint space,
3779  ulint offset,
3781  ulint zip_size,
3782  mtr_t* mtr)
3783 {
3784  buf_frame_t* frame;
3785  buf_block_t* block;
3786  ulint fold;
3787  buf_block_t* free_block = NULL;
3788  ulint time_ms = ut_time_ms();
3789  buf_pool_t* buf_pool = buf_pool_get(space, offset);
3790 
3791  ut_ad(mtr);
3792  ut_ad(mtr->state == MTR_ACTIVE);
3793  ut_ad(space || !zip_size);
3794 
3795  free_block = buf_LRU_get_free_block(buf_pool, 0);
3796 
3797  fold = buf_page_address_fold(space, offset);
3798 
3799  buf_pool_mutex_enter(buf_pool);
3800 
3802  buf_pool, space, offset, fold);
3803 
3804  if (block
3805  && buf_page_in_file(&block->page)
3806  && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
3807 #ifdef UNIV_IBUF_COUNT_DEBUG
3808  ut_a(ibuf_count_get(space, offset) == 0);
3809 #endif
3810 #ifdef UNIV_DEBUG_FILE_ACCESSES
3811  block->page.file_page_was_freed = FALSE;
3812 #endif /* UNIV_DEBUG_FILE_ACCESSES */
3813 
3814  /* Page can be found in buf_pool */
3815  buf_pool_mutex_exit(buf_pool);
3816 
3817  buf_block_free(free_block);
3818 
3819  return(buf_page_get_with_no_latch(space, zip_size,
3820  offset, mtr));
3821  }
3822 
3823  /* If we get here, the page was not in buf_pool: init it there */
3824 
3825 #ifdef UNIV_DEBUG
3826  if (buf_debug_prints) {
3827  fprintf(stderr, "Creating space %lu page %lu to buffer\n",
3828  (ulong) space, (ulong) offset);
3829  }
3830 #endif /* UNIV_DEBUG */
3831 
3832  block = free_block;
3833 
3834  mutex_enter(&block->mutex);
3835 
3836  buf_page_init(space, offset, fold, block);
3837 
3838  /* The block must be put to the LRU list */
3839  buf_LRU_add_block(&block->page, FALSE);
3840 
3841  buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3842  buf_pool->stat.n_pages_created++;
3843 
3844  if (zip_size) {
3845  void* data;
3846  ibool lru;
3847 
3848  /* Prevent race conditions during buf_buddy_alloc(),
3849  which may release and reacquire buf_pool->mutex,
3850  by IO-fixing and X-latching the block. */
3851 
3853  rw_lock_x_lock(&block->lock);
3854 
3855  page_zip_set_size(&block->page.zip, zip_size);
3856  mutex_exit(&block->mutex);
3857  /* buf_pool->mutex may be released and reacquired by
3858  buf_buddy_alloc(). Thus, we must release block->mutex
3859  in order not to break the latching order in
3860  the reacquisition of buf_pool->mutex. We also must
3861  defer this operation until after the block descriptor
3862  has been added to buf_pool->LRU and buf_pool->page_hash. */
3863  data = buf_buddy_alloc(buf_pool, zip_size, &lru);
3864  mutex_enter(&block->mutex);
3865  block->page.zip.data = static_cast<unsigned char *>(data);
3866 
3867  /* To maintain the invariant
3868  block->in_unzip_LRU_list
3869  == buf_page_belongs_to_unzip_LRU(&block->page)
3870  we have to add this block to unzip_LRU after
3871  block->page.zip.data is set. */
3873  buf_unzip_LRU_add_block(block, FALSE);
3874 
3876  rw_lock_x_unlock(&block->lock);
3877  }
3878 
3879  buf_page_set_accessed(&block->page, time_ms);
3880 
3881  buf_pool_mutex_exit(buf_pool);
3882 
3883  mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
3884 
3885  mutex_exit(&block->mutex);
3886 
3887  /* Delete possible entries for the page from the insert buffer:
3888  such can exist if the page belonged to an index which was dropped */
3889 
3890  ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
3891 
3892  /* Flush pages from the end of the LRU list if necessary */
3893  buf_flush_free_margin(buf_pool);
3894 
3895  frame = block->frame;
3896 
3897  memset(frame + FIL_PAGE_PREV, 0xff, 4);
3898  memset(frame + FIL_PAGE_NEXT, 0xff, 4);
3900 
3901  /* Reset to zero the file flush lsn field in the page; if the first
3902  page of an ibdata file is 'created' in this function into the buffer
3903  pool then we lose the original contents of the file flush lsn stamp.
3904  Then InnoDB could in a crash recovery print a big, false, corruption
3905  warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
3906 
3907  memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
3908 
3909 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
3910  ut_a(++buf_dbg_counter % 357 || buf_validate());
3911 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
3912 #ifdef UNIV_IBUF_COUNT_DEBUG
3913  ut_a(ibuf_count_get(buf_block_get_space(block),
3914  buf_block_get_page_no(block)) == 0);
3915 #endif
3916  return(block);
3917 }
3918 
3919 /********************************************************************/
3922 UNIV_INTERN
3923 void
3924 buf_page_io_complete(
3925 /*=================*/
3926  buf_page_t* bpage)
3927 {
3928  enum buf_io_fix io_type;
3929  buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
3930  const ibool uncompressed = (buf_page_get_state(bpage)
3931  == BUF_BLOCK_FILE_PAGE);
3932 
3933  ut_a(buf_page_in_file(bpage));
3934 
3935  /* We do not need protect io_fix here by mutex to read
3936  it because this is the only function where we can change the value
3937  from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
3938  ensures that this is the only thread that handles the i/o for this
3939  block. */
3940 
3941  io_type = buf_page_get_io_fix(bpage);
3942  ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
3943 
3944  if (io_type == BUF_IO_READ) {
3945  ulint read_page_no;
3946  ulint read_space_id;
3947  byte* frame;
3948 
3949  if (buf_page_get_zip_size(bpage)) {
3950  frame = bpage->zip.data;
3951  buf_pool->n_pend_unzip++;
3952  if (uncompressed
3953  && !buf_zip_decompress((buf_block_t*) bpage,
3954  FALSE)) {
3955 
3956  buf_pool->n_pend_unzip--;
3957  goto corrupt;
3958  }
3959  buf_pool->n_pend_unzip--;
3960  } else {
3961  ut_a(uncompressed);
3962  frame = ((buf_block_t*) bpage)->frame;
3963  }
3964 
3965  /* If this page is not uninitialized and not in the
3966  doublewrite buffer, then the page number and space id
3967  should be the same as in block. */
3968  read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
3969  read_space_id = mach_read_from_4(
3971 
3972  if (bpage->space == TRX_SYS_SPACE
3973  && trx_doublewrite_page_inside(bpage->offset)) {
3974 
3975  ut_print_timestamp(stderr);
3976  fprintf(stderr,
3977  " InnoDB: Error: reading page %lu\n"
3978  "InnoDB: which is in the"
3979  " doublewrite buffer!\n",
3980  (ulong) bpage->offset);
3981  } else if (!read_space_id && !read_page_no) {
3982  /* This is likely an uninitialized page. */
3983  } else if ((bpage->space
3984  && bpage->space != read_space_id)
3985  || bpage->offset != read_page_no) {
3986  /* We did not compare space_id to read_space_id
3987  if bpage->space == 0, because the field on the
3988  page may contain garbage in MySQL < 4.1.1,
3989  which only supported bpage->space == 0. */
3990 
3991  ut_print_timestamp(stderr);
3992  fprintf(stderr,
3993  " InnoDB: Error: space id and page n:o"
3994  " stored in the page\n"
3995  "InnoDB: read in are %lu:%lu,"
3996  " should be %lu:%lu!\n",
3997  (ulong) read_space_id, (ulong) read_page_no,
3998  (ulong) bpage->space,
3999  (ulong) bpage->offset);
4000  }
4001 
4002  /* From version 3.23.38 up we store the page checksum
4003  to the 4 first bytes of the page end lsn field */
4004 
4005  if (buf_page_is_corrupted(frame,
4006  buf_page_get_zip_size(bpage))) {
4007 corrupt:
4008  fprintf(stderr,
4009  "InnoDB: Database page corruption on disk"
4010  " or a failed\n"
4011  "InnoDB: file read of page %lu.\n"
4012  "InnoDB: You may have to recover"
4013  " from a backup.\n",
4014  (ulong) bpage->offset);
4015  buf_page_print(frame, buf_page_get_zip_size(bpage));
4016  fprintf(stderr,
4017  "InnoDB: Database page corruption on disk"
4018  " or a failed\n"
4019  "InnoDB: file read of page %lu.\n"
4020  "InnoDB: You may have to recover"
4021  " from a backup.\n",
4022  (ulong) bpage->offset);
4023  fputs("InnoDB: It is also possible that"
4024  " your operating\n"
4025  "InnoDB: system has corrupted its"
4026  " own file cache\n"
4027  "InnoDB: and rebooting your computer"
4028  " removes the\n"
4029  "InnoDB: error.\n"
4030  "InnoDB: If the corrupt page is an index page\n"
4031  "InnoDB: you can also try to"
4032  " fix the corruption\n"
4033  "InnoDB: by dumping, dropping,"
4034  " and reimporting\n"
4035  "InnoDB: the corrupt table."
4036  " You can use CHECK\n"
4037  "InnoDB: TABLE to scan your"
4038  " table for corruption.\n"
4039  "InnoDB: See also "
4040  REFMAN "forcing-recovery.html\n"
4041  "InnoDB: about forcing recovery.\n", stderr);
4042 
4043  if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
4044  fputs("InnoDB: Ending processing because of"
4045  " a corrupt database page.\n",
4046  stderr);
4047  exit(1);
4048  }
4049  }
4050 
4051  if (recv_recovery_is_on()) {
4052  /* Pages must be uncompressed for crash recovery. */
4053  ut_a(uncompressed);
4054  recv_recover_page(TRUE, (buf_block_t*) bpage);
4055  }
4056 
4057  if (uncompressed && !recv_no_ibuf_operations && !srv_fake_write) {
4058  ibuf_merge_or_delete_for_page(
4059  (buf_block_t*) bpage, bpage->space,
4060  bpage->offset, buf_page_get_zip_size(bpage),
4061  TRUE);
4062  }
4063  }
4064 
4065  buf_pool_mutex_enter(buf_pool);
4066  mutex_enter(buf_page_get_mutex(bpage));
4067 
4068 #ifdef UNIV_IBUF_COUNT_DEBUG
4069  if (io_type == BUF_IO_WRITE || uncompressed) {
4070  /* For BUF_IO_READ of compressed-only blocks, the
4071  buffered operations will be merged by buf_page_get_gen()
4072  after the block has been uncompressed. */
4073  ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
4074  }
4075 #endif
4076  /* Because this thread which does the unlocking is not the same that
4077  did the locking, we use a pass value != 0 in unlock, which simply
4078  removes the newest lock debug record, without checking the thread
4079  id. */
4080 
4082 
4083  switch (io_type) {
4084  case BUF_IO_READ:
4085  /* NOTE that the call to ibuf may have moved the ownership of
4086  the x-latch to this OS thread: do not let this confuse you in
4087  debugging! */
4088 
4089  ut_ad(buf_pool->n_pend_reads > 0);
4090  buf_pool->n_pend_reads--;
4091  buf_pool->stat.n_pages_read++;
4092 
4093  if (uncompressed) {
4094  rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
4095  BUF_IO_READ);
4096  }
4097 
4098  break;
4099 
4100  case BUF_IO_WRITE:
4101  /* Write means a flush operation: call the completion
4102  routine in the flush system */
4103 
4104  buf_flush_write_complete(bpage);
4105 
4106  if (uncompressed) {
4107  rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
4108  BUF_IO_WRITE);
4109  }
4110 
4111  buf_pool->stat.n_pages_written++;
4112 
4113  break;
4114 
4115  default:
4116  ut_error;
4117  }
4118 
4119 #ifdef UNIV_DEBUG
4120  if (buf_debug_prints) {
4121  fprintf(stderr, "Has %s page space %lu page no %lu\n",
4122  io_type == BUF_IO_READ ? "read" : "written",
4123  (ulong) buf_page_get_space(bpage),
4124  (ulong) buf_page_get_page_no(bpage));
4125  }
4126 #endif /* UNIV_DEBUG */
4127 
4128  mutex_exit(buf_page_get_mutex(bpage));
4129  buf_pool_mutex_exit(buf_pool);
4130 }
4131 
4132 /*********************************************************************/
4135 static
4136 ibool
4137 buf_all_freed_instance(
4138 /*===================*/
4139  buf_pool_t* buf_pool)
4140 {
4141  ulint i;
4142  buf_chunk_t* chunk;
4143 
4144  ut_ad(buf_pool);
4145 
4146  buf_pool_mutex_enter(buf_pool);
4147 
4148  chunk = buf_pool->chunks;
4149 
4150  for (i = buf_pool->n_chunks; i--; chunk++) {
4151 
4152  const buf_block_t* block = buf_chunk_not_freed(chunk);
4153 
4154  if (UNIV_LIKELY_NULL(block)) {
4155  fprintf(stderr,
4156  "Page %lu %lu still fixed or dirty\n",
4157  (ulong) block->page.space,
4158  (ulong) block->page.offset);
4159  ut_error;
4160  }
4161  }
4162 
4163  buf_pool_mutex_exit(buf_pool);
4164 
4165  return(TRUE);
4166 }
4167 
4168 /*********************************************************************/
4170 static
4171 void
4172 buf_pool_invalidate_instance(
4173 /*=========================*/
4174  buf_pool_t* buf_pool)
4175 {
4176  ibool freed;
4177  int i;
4178 
4179  buf_pool_mutex_enter(buf_pool);
4180 
4181  for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
4182 
4183  /* As this function is called during startup and
4184  during redo application phase during recovery, InnoDB
4185  is single threaded (apart from IO helper threads) at
4186  this stage. No new write batch can be in intialization
4187  stage at this point. */
4188  ut_ad(buf_pool->init_flush[i] == FALSE);
4189 
4190  /* However, it is possible that a write batch that has
4191  been posted earlier is still not complete. For buffer
4192  pool invalidation to proceed we must ensure there is NO
4193  write activity happening. */
4194  if (buf_pool->n_flush[i] > 0) {
4195  buf_pool_mutex_exit(buf_pool);
4196  buf_flush_wait_batch_end(buf_pool, static_cast<buf_flush>(i));
4197  buf_pool_mutex_enter(buf_pool);
4198  }
4199  }
4200 
4201  buf_pool_mutex_exit(buf_pool);
4202 
4203  ut_ad(buf_all_freed_instance(buf_pool));
4204 
4205  freed = TRUE;
4206 
4207  while (freed) {
4208  freed = buf_LRU_search_and_free_block(buf_pool, 100);
4209  }
4210 
4211  buf_pool_mutex_enter(buf_pool);
4212 
4213  ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
4214  ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
4215 
4216  buf_pool->freed_page_clock = 0;
4217  buf_pool->LRU_old = NULL;
4218  buf_pool->LRU_old_len = 0;
4219  buf_pool->LRU_flush_ended = 0;
4220 
4221  memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
4222  buf_refresh_io_stats(buf_pool);
4223 
4224  buf_pool_mutex_exit(buf_pool);
4225 }
4226 
4227 /*********************************************************************/
4231 UNIV_INTERN
4232 void
4233 buf_pool_invalidate(void)
4234 /*=====================*/
4235 {
4236  ulint i;
4237 
4238  for (i = 0; i < srv_buf_pool_instances; i++) {
4239  buf_pool_invalidate_instance(buf_pool_from_array(i));
4240  }
4241 }
4242 
4243 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4244 /*********************************************************************/
4247 static
4248 ibool
4249 buf_pool_validate_instance(
4250 /*=======================*/
4251  buf_pool_t* buf_pool)
4252 {
4253  buf_page_t* b;
4254  buf_chunk_t* chunk;
4255  ulint i;
4256  ulint n_single_flush = 0;
4257  ulint n_lru_flush = 0;
4258  ulint n_list_flush = 0;
4259  ulint n_lru = 0;
4260  ulint n_flush = 0;
4261  ulint n_free = 0;
4262  ulint n_zip = 0;
4263 
4264  ut_ad(buf_pool);
4265 
4266  buf_pool_mutex_enter(buf_pool);
4267 
4268  chunk = buf_pool->chunks;
4269 
4270  /* Check the uncompressed blocks. */
4271 
4272  for (i = buf_pool->n_chunks; i--; chunk++) {
4273 
4274  ulint j;
4275  buf_block_t* block = chunk->blocks;
4276 
4277  for (j = chunk->size; j--; block++) {
4278 
4279  mutex_enter(&block->mutex);
4280 
4281  switch (buf_block_get_state(block)) {
4282  case BUF_BLOCK_ZIP_FREE:
4283  case BUF_BLOCK_ZIP_PAGE:
4284  case BUF_BLOCK_ZIP_DIRTY:
4285  /* These should only occur on
4286  zip_clean, zip_free[], or flush_list. */
4287  ut_error;
4288  break;
4289 
4290  case BUF_BLOCK_FILE_PAGE:
4291  ut_a(buf_page_hash_get(buf_pool,
4293  block),
4295  block))
4296  == &block->page);
4297 
4298 #ifdef UNIV_IBUF_COUNT_DEBUG
4299  ut_a(buf_page_get_io_fix(&block->page)
4300  == BUF_IO_READ
4301  || !ibuf_count_get(buf_block_get_space(
4302  block),
4304  block)));
4305 #endif
4306  switch (buf_page_get_io_fix(&block->page)) {
4307  case BUF_IO_NONE:
4308  break;
4309 
4310  case BUF_IO_WRITE:
4311  switch (buf_page_get_flush_type(
4312  &block->page)) {
4313  case BUF_FLUSH_LRU:
4314  n_lru_flush++;
4316  &block->lock,
4317  RW_LOCK_SHARED));
4318  break;
4319  case BUF_FLUSH_LIST:
4320  n_list_flush++;
4321  break;
4322  case BUF_FLUSH_SINGLE_PAGE:
4323  n_single_flush++;
4324  break;
4325  default:
4326  ut_error;
4327  }
4328 
4329  break;
4330 
4331  case BUF_IO_READ:
4332 
4333  ut_a(rw_lock_is_locked(&block->lock,
4334  RW_LOCK_EX));
4335  break;
4336  }
4337 
4338  n_lru++;
4339  break;
4340 
4341  case BUF_BLOCK_NOT_USED:
4342  n_free++;
4343  break;
4344 
4346  case BUF_BLOCK_MEMORY:
4347  case BUF_BLOCK_REMOVE_HASH:
4348  /* do nothing */
4349  break;
4350  }
4351 
4352  mutex_exit(&block->mutex);
4353  }
4354  }
4355 
4356  mutex_enter(&buf_pool->zip_mutex);
4357 
4358  /* Check clean compressed-only blocks. */
4359 
4360  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4361  b = UT_LIST_GET_NEXT(list, b)) {
4363  switch (buf_page_get_io_fix(b)) {
4364  case BUF_IO_NONE:
4365  /* All clean blocks should be I/O-unfixed. */
4366  break;
4367  case BUF_IO_READ:
4368  /* In buf_LRU_free_block(), we temporarily set
4369  b->io_fix = BUF_IO_READ for a newly allocated
4370  control block in order to prevent
4371  buf_page_get_gen() from decompressing the block. */
4372  break;
4373  default:
4374  ut_error;
4375  break;
4376  }
4377 
4378  /* It is OK to read oldest_modification here because
4379  we have acquired buf_pool->zip_mutex above which acts
4380  as the 'block->mutex' for these bpages. */
4382  ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4383 
4384  n_lru++;
4385  n_zip++;
4386  }
4387 
4388  /* Check dirty blocks. */
4389 
4390  buf_flush_list_mutex_enter(buf_pool);
4391  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4392  b = UT_LIST_GET_NEXT(list, b)) {
4393  ut_ad(b->in_flush_list);
4395  n_flush++;
4396 
4397  switch (buf_page_get_state(b)) {
4398  case BUF_BLOCK_ZIP_DIRTY:
4399  n_lru++;
4400  n_zip++;
4401  switch (buf_page_get_io_fix(b)) {
4402  case BUF_IO_NONE:
4403  case BUF_IO_READ:
4404  break;
4405  case BUF_IO_WRITE:
4406  switch (buf_page_get_flush_type(b)) {
4407  case BUF_FLUSH_LRU:
4408  n_lru_flush++;
4409  break;
4410  case BUF_FLUSH_LIST:
4411  n_list_flush++;
4412  break;
4413  case BUF_FLUSH_SINGLE_PAGE:
4414  n_single_flush++;
4415  break;
4416  default:
4417  ut_error;
4418  }
4419  break;
4420  }
4421  break;
4422  case BUF_BLOCK_FILE_PAGE:
4423  /* uncompressed page */
4424  break;
4425  case BUF_BLOCK_ZIP_FREE:
4426  case BUF_BLOCK_ZIP_PAGE:
4427  case BUF_BLOCK_NOT_USED:
4429  case BUF_BLOCK_MEMORY:
4430  case BUF_BLOCK_REMOVE_HASH:
4431  ut_error;
4432  break;
4433  }
4434  ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
4435  }
4436 
4437  ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
4438 
4439  buf_flush_list_mutex_exit(buf_pool);
4440 
4441  mutex_exit(&buf_pool->zip_mutex);
4442 
4443  if (n_lru + n_free > buf_pool->curr_size + n_zip) {
4444  fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
4445  (ulong) n_lru, (ulong) n_free,
4446  (ulong) buf_pool->curr_size, (ulong) n_zip);
4447  ut_error;
4448  }
4449 
4450  ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
4451  if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
4452  fprintf(stderr, "Free list len %lu, free blocks %lu\n",
4453  (ulong) UT_LIST_GET_LEN(buf_pool->free),
4454  (ulong) n_free);
4455  ut_error;
4456  }
4457 
4458  ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
4459  ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
4460  ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
4461 
4462  buf_pool_mutex_exit(buf_pool);
4463 
4464  ut_a(buf_LRU_validate());
4465  ut_a(buf_flush_validate(buf_pool));
4466 
4467  return(TRUE);
4468 }
4469 
4470 /*********************************************************************/
4473 UNIV_INTERN
4474 ibool
4475 buf_validate(void)
4476 /*==============*/
4477 {
4478  ulint i;
4479 
4480  for (i = 0; i < srv_buf_pool_instances; i++) {
4481  buf_pool_t* buf_pool;
4482 
4483  buf_pool = buf_pool_from_array(i);
4484 
4485  buf_pool_validate_instance(buf_pool);
4486  }
4487  return(TRUE);
4488 }
4489 
4490 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
4491 
4492 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
4493 /*********************************************************************/
4495 static
4496 void
4497 buf_print_instance(
4498 /*===============*/
4499  buf_pool_t* buf_pool)
4500 {
4501  index_id_t* index_ids;
4502  ulint* counts;
4503  ulint size;
4504  ulint i;
4505  ulint j;
4506  index_id_t id;
4507  ulint n_found;
4508  buf_chunk_t* chunk;
4509  dict_index_t* index;
4510 
4511  ut_ad(buf_pool);
4512 
4513  size = buf_pool->curr_size;
4514 
4515  index_ids = mem_alloc(size * sizeof *index_ids);
4516  counts = mem_alloc(sizeof(ulint) * size);
4517 
4518  buf_pool_mutex_enter(buf_pool);
4519  buf_flush_list_mutex_enter(buf_pool);
4520 
4521  fprintf(stderr,
4522  "buf_pool size %lu\n"
4523  "database pages %lu\n"
4524  "free pages %lu\n"
4525  "modified database pages %lu\n"
4526  "n pending decompressions %lu\n"
4527  "n pending reads %lu\n"
4528  "n pending flush LRU %lu list %lu single page %lu\n"
4529  "pages made young %lu, not young %lu\n"
4530  "pages read %lu, created %lu, written %lu\n",
4531  (ulong) size,
4532  (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4533  (ulong) UT_LIST_GET_LEN(buf_pool->free),
4534  (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4535  (ulong) buf_pool->n_pend_unzip,
4536  (ulong) buf_pool->n_pend_reads,
4537  (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
4538  (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
4539  (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
4540  (ulong) buf_pool->stat.n_pages_made_young,
4541  (ulong) buf_pool->stat.n_pages_not_made_young,
4542  (ulong) buf_pool->stat.n_pages_read,
4543  (ulong) buf_pool->stat.n_pages_created,
4544  (ulong) buf_pool->stat.n_pages_written);
4545 
4546  buf_flush_list_mutex_exit(buf_pool);
4547 
4548  /* Count the number of blocks belonging to each index in the buffer */
4549 
4550  n_found = 0;
4551 
4552  chunk = buf_pool->chunks;
4553 
4554  for (i = buf_pool->n_chunks; i--; chunk++) {
4555  buf_block_t* block = chunk->blocks;
4556  ulint n_blocks = chunk->size;
4557 
4558  for (; n_blocks--; block++) {
4559  const buf_frame_t* frame = block->frame;
4560 
4561  if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
4562 
4563  id = btr_page_get_index_id(frame);
4564 
4565  /* Look for the id in the index_ids array */
4566  j = 0;
4567 
4568  while (j < n_found) {
4569 
4570  if (index_ids[j] == id) {
4571  counts[j]++;
4572 
4573  break;
4574  }
4575  j++;
4576  }
4577 
4578  if (j == n_found) {
4579  n_found++;
4580  index_ids[j] = id;
4581  counts[j] = 1;
4582  }
4583  }
4584  }
4585  }
4586 
4587  buf_pool_mutex_exit(buf_pool);
4588 
4589  for (i = 0; i < n_found; i++) {
4590  index = dict_index_get_if_in_cache(index_ids[i]);
4591 
4592  fprintf(stderr,
4593  "Block count for index %llu in buffer is about %lu",
4594  (ullint) index_ids[i],
4595  (ulong) counts[i]);
4596 
4597  if (index) {
4598  putc(' ', stderr);
4599  dict_index_name_print(stderr, NULL, index);
4600  }
4601 
4602  putc('\n', stderr);
4603  }
4604 
4605  mem_free(index_ids);
4606  mem_free(counts);
4607 
4608  ut_a(buf_pool_validate_instance(buf_pool));
4609 }
4610 
4611 /*********************************************************************/
4613 UNIV_INTERN
4614 void
4615 buf_print(void)
4616 /*===========*/
4617 {
4618  ulint i;
4619 
4620  for (i = 0; i < srv_buf_pool_instances; i++) {
4621  buf_pool_t* buf_pool;
4622 
4623  buf_pool = buf_pool_from_array(i);
4624  buf_print_instance(buf_pool);
4625  }
4626 }
4627 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
4628 
4629 #ifdef UNIV_DEBUG
4630 /*********************************************************************/
4633 UNIV_INTERN
4634 ulint
4635 buf_get_latched_pages_number_instance(
4636 /*==================================*/
4637  buf_pool_t* buf_pool)
4638 {
4639  buf_page_t* b;
4640  ulint i;
4641  buf_chunk_t* chunk;
4642  ulint fixed_pages_number = 0;
4643 
4644  buf_pool_mutex_enter(buf_pool);
4645 
4646  chunk = buf_pool->chunks;
4647 
4648  for (i = buf_pool->n_chunks; i--; chunk++) {
4649  buf_block_t* block;
4650  ulint j;
4651 
4652  block = chunk->blocks;
4653 
4654  for (j = chunk->size; j--; block++) {
4655  if (buf_block_get_state(block)
4656  != BUF_BLOCK_FILE_PAGE) {
4657 
4658  continue;
4659  }
4660 
4661  mutex_enter(&block->mutex);
4662 
4663  if (block->page.buf_fix_count != 0
4664  || buf_page_get_io_fix(&block->page)
4665  != BUF_IO_NONE) {
4666  fixed_pages_number++;
4667  }
4668 
4669  mutex_exit(&block->mutex);
4670  }
4671  }
4672 
4673  mutex_enter(&buf_pool->zip_mutex);
4674 
4675  /* Traverse the lists of clean and dirty compressed-only blocks. */
4676 
4677  for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
4678  b = UT_LIST_GET_NEXT(list, b)) {
4681 
4682  if (b->buf_fix_count != 0
4683  || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4684  fixed_pages_number++;
4685  }
4686  }
4687 
4688  buf_flush_list_mutex_enter(buf_pool);
4689  for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
4690  b = UT_LIST_GET_NEXT(list, b)) {
4691  ut_ad(b->in_flush_list);
4692 
4693  switch (buf_page_get_state(b)) {
4694  case BUF_BLOCK_ZIP_DIRTY:
4695  if (b->buf_fix_count != 0
4696  || buf_page_get_io_fix(b) != BUF_IO_NONE) {
4697  fixed_pages_number++;
4698  }
4699  break;
4700  case BUF_BLOCK_FILE_PAGE:
4701  /* uncompressed page */
4702  break;
4703  case BUF_BLOCK_ZIP_FREE:
4704  case BUF_BLOCK_ZIP_PAGE:
4705  case BUF_BLOCK_NOT_USED:
4707  case BUF_BLOCK_MEMORY:
4708  case BUF_BLOCK_REMOVE_HASH:
4709  ut_error;
4710  break;
4711  }
4712  }
4713 
4714  buf_flush_list_mutex_exit(buf_pool);
4715  mutex_exit(&buf_pool->zip_mutex);
4716  buf_pool_mutex_exit(buf_pool);
4717 
4718  return(fixed_pages_number);
4719 }
4720 
4721 /*********************************************************************/
4724 UNIV_INTERN
4725 ulint
4726 buf_get_latched_pages_number(void)
4727 /*==============================*/
4728 {
4729  ulint i;
4730  ulint total_latched_pages = 0;
4731 
4732  for (i = 0; i < srv_buf_pool_instances; i++) {
4733  buf_pool_t* buf_pool;
4734 
4735  buf_pool = buf_pool_from_array(i);
4736 
4737  total_latched_pages += buf_get_latched_pages_number_instance(
4738  buf_pool);
4739  }
4740 
4741  return(total_latched_pages);
4742 }
4743 
4744 #endif /* UNIV_DEBUG */
4745 
4746 /*********************************************************************/
4749 UNIV_INTERN
4750 ulint
4751 buf_get_n_pending_ios(void)
4752 /*=======================*/
4753 {
4754  ulint i;
4755  ulint pend_ios = 0;
4756 
4757  for (i = 0; i < srv_buf_pool_instances; i++) {
4758  buf_pool_t* buf_pool;
4759 
4760  buf_pool = buf_pool_from_array(i);
4761 
4762  pend_ios +=
4763  buf_pool->n_pend_reads
4764  + buf_pool->n_flush[BUF_FLUSH_LRU]
4765  + buf_pool->n_flush[BUF_FLUSH_LIST]
4766  + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
4767  }
4768 
4769  return(pend_ios);
4770 }
4771 
4772 /*********************************************************************/
4776 UNIV_INTERN
4777 ulint
4778 buf_get_modified_ratio_pct(void)
4779 /*============================*/
4780 {
4781  ulint ratio;
4782  ulint lru_len = 0;
4783  ulint free_len = 0;
4784  ulint flush_list_len = 0;
4785 
4786  buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
4787 
4788  ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
4789 
4790  /* 1 + is there to avoid division by zero */
4791 
4792  return(ratio);
4793 }
4794 
4795 /*********************************************************************/
4797 static
4798 void
4799 buf_print_io_instance(
4800 /*==================*/
4801  buf_pool_t* buf_pool,
4802  FILE* file)
4803 {
4804  time_t current_time;
4805  double time_elapsed;
4806  ulint n_gets_diff;
4807 
4808  ut_ad(buf_pool);
4809 
4810  buf_pool_mutex_enter(buf_pool);
4811  buf_flush_list_mutex_enter(buf_pool);
4812 
4813  fprintf(file,
4814  "Buffer pool size %lu\n"
4815  "Free buffers %lu\n"
4816  "Database pages %lu\n"
4817  "Old database pages %lu\n"
4818  "Modified db pages %lu\n"
4819  "Pending reads %lu\n"
4820  "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
4821  (ulong) buf_pool->curr_size,
4822  (ulong) UT_LIST_GET_LEN(buf_pool->free),
4823  (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
4824  (ulong) buf_pool->LRU_old_len,
4825  (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
4826  (ulong) buf_pool->n_pend_reads,
4827  (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
4828  + buf_pool->init_flush[BUF_FLUSH_LRU],
4829  (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
4830  + buf_pool->init_flush[BUF_FLUSH_LIST],
4831  (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
4832 
4833  buf_flush_list_mutex_exit(buf_pool);
4834 
4835  current_time = time(NULL);
4836  time_elapsed = 0.001 + difftime(current_time,
4837  buf_pool->last_printout_time);
4838 
4839  fprintf(file,
4840  "Pages made young %lu, not young %lu\n"
4841  "%.2f youngs/s, %.2f non-youngs/s\n"
4842  "Pages read %lu, created %lu, written %lu\n"
4843  "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
4844  (ulong) buf_pool->stat.n_pages_made_young,
4845  (ulong) buf_pool->stat.n_pages_not_made_young,
4846  (buf_pool->stat.n_pages_made_young
4847  - buf_pool->old_stat.n_pages_made_young)
4848  / time_elapsed,
4849  (buf_pool->stat.n_pages_not_made_young
4850  - buf_pool->old_stat.n_pages_not_made_young)
4851  / time_elapsed,
4852  (ulong) buf_pool->stat.n_pages_read,
4853  (ulong) buf_pool->stat.n_pages_created,
4854  (ulong) buf_pool->stat.n_pages_written,
4855  (buf_pool->stat.n_pages_read
4856  - buf_pool->old_stat.n_pages_read)
4857  / time_elapsed,
4858  (buf_pool->stat.n_pages_created
4859  - buf_pool->old_stat.n_pages_created)
4860  / time_elapsed,
4861  (buf_pool->stat.n_pages_written
4862  - buf_pool->old_stat.n_pages_written)
4863  / time_elapsed);
4864 
4865  n_gets_diff = buf_pool->stat.n_page_gets
4866  - buf_pool->old_stat.n_page_gets;
4867 
4868  if (n_gets_diff) {
4869  fprintf(file,
4870  "Buffer pool hit rate %lu / 1000,"
4871  " young-making rate %lu / 1000 not %lu / 1000\n",
4872  (ulong)
4873  (1000 - ((1000 * (buf_pool->stat.n_pages_read
4874  - buf_pool->old_stat.n_pages_read))
4875  / (buf_pool->stat.n_page_gets
4876  - buf_pool->old_stat.n_page_gets))),
4877  (ulong)
4878  (1000 * (buf_pool->stat.n_pages_made_young
4879  - buf_pool->old_stat.n_pages_made_young)
4880  / n_gets_diff),
4881  (ulong)
4882  (1000 * (buf_pool->stat.n_pages_not_made_young
4883  - buf_pool->old_stat.n_pages_not_made_young)
4884  / n_gets_diff));
4885  } else {
4886  fputs("No buffer pool page gets since the last printout\n",
4887  file);
4888  }
4889 
4890  /* Statistics about read ahead algorithm */
4891  fprintf(file, "Pages read ahead %.2f/s,"
4892  " evicted without access %.2f/s\n",
4893  (buf_pool->stat.n_ra_pages_read
4894  - buf_pool->old_stat.n_ra_pages_read)
4895  / time_elapsed,
4896  (buf_pool->stat.n_ra_pages_evicted
4897  - buf_pool->old_stat.n_ra_pages_evicted)
4898  / time_elapsed);
4899 
4900  /* Print some values to help us with visualizing what is
4901  happening with LRU eviction. */
4902  fprintf(file,
4903  "LRU len: %lu, unzip_LRU len: %lu\n"
4904  "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
4905  static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->LRU)),
4906  static_cast<ulint>(UT_LIST_GET_LEN(buf_pool->unzip_LRU)),
4907  buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
4908  buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
4909 
4910  buf_refresh_io_stats(buf_pool);
4911  buf_pool_mutex_exit(buf_pool);
4912 }
4913 
4914 /*********************************************************************/
4916 UNIV_INTERN
4917 void
4918 buf_print_io(
4919 /*=========*/
4920  FILE* file)
4921 {
4922  ulint i;
4923 
4924  for (i = 0; i < srv_buf_pool_instances; i++) {
4925  buf_pool_t* buf_pool;
4926 
4927  buf_pool = buf_pool_from_array(i);
4928  buf_print_io_instance(buf_pool, file);
4929  }
4930 }
4931 
4932 /**********************************************************************/
4934 UNIV_INTERN
4935 void
4936 buf_refresh_io_stats(
4937 /*=================*/
4938  buf_pool_t* buf_pool)
4939 {
4940  buf_pool->last_printout_time = ut_time();
4941  buf_pool->old_stat = buf_pool->stat;
4942 }
4943 
4944 /**********************************************************************/
4946 UNIV_INTERN
4947 void
4948 buf_refresh_io_stats_all(void)
4949 /*==========================*/
4950 {
4951  ulint i;
4952 
4953  for (i = 0; i < srv_buf_pool_instances; i++) {
4954  buf_pool_t* buf_pool;
4955 
4956  buf_pool = buf_pool_from_array(i);
4957 
4958  buf_refresh_io_stats(buf_pool);
4959  }
4960 }
4961 
4962 /**********************************************************************/
4965 UNIV_INTERN
4966 ibool
4967 buf_all_freed(void)
4968 /*===============*/
4969 {
4970  ulint i;
4971 
4972  for (i = 0; i < srv_buf_pool_instances; i++) {
4973  buf_pool_t* buf_pool;
4974 
4975  buf_pool = buf_pool_from_array(i);
4976 
4977  if (!buf_all_freed_instance(buf_pool)) {
4978  return(FALSE);
4979  }
4980  }
4981 
4982  return(TRUE);
4983 }
4984 
4985 /*********************************************************************/
4989 UNIV_INTERN
4990 ibool
4991 buf_pool_check_no_pending_io(void)
4992 /*==============================*/
4993 {
4994  ulint i;
4995  ibool ret = TRUE;
4996 
4998 
4999  for (i = 0; i < srv_buf_pool_instances && ret; i++) {
5000  const buf_pool_t* buf_pool;
5001 
5002  buf_pool = buf_pool_from_array(i);
5003 
5004  if (buf_pool->n_pend_reads
5005  + buf_pool->n_flush[BUF_FLUSH_LRU]
5006  + buf_pool->n_flush[BUF_FLUSH_LIST]
5007  + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
5008 
5009  ret = FALSE;
5010  }
5011  }
5012 
5014 
5015  return(ret);
5016 }
5017 
5018 #if 0
5019 Code currently not used
5020 /*********************************************************************/
5023 UNIV_INTERN
5024 ulint
5026 /*=======================*/
5027 {
5028  ulint len;
5029 
5030  buf_pool_mutex_enter(buf_pool);
5031 
5032  len = UT_LIST_GET_LEN(buf_pool->free);
5033 
5034  buf_pool_mutex_exit(buf_pool);
5035 
5036  return(len);
5037 }
5038 #endif
5039 
5040 #else /* !UNIV_HOTBACKUP */
5041 /********************************************************************/
5043 UNIV_INTERN
5044 void
5045 buf_page_init_for_backup_restore(
5046 /*=============================*/
5047  ulint space,
5048  ulint offset,
5050  ulint zip_size,
5052  buf_block_t* block)
5053 {
5054  block->page.state = BUF_BLOCK_FILE_PAGE;
5055  block->page.space = space;
5056  block->page.offset = offset;
5057 
5058  page_zip_des_init(&block->page.zip);
5059 
5060  /* We assume that block->page.data has been allocated
5061  with zip_size == UNIV_PAGE_SIZE. */
5062  ut_ad(zip_size <= UNIV_PAGE_SIZE);
5063  ut_ad(ut_is_2pow(zip_size));
5064  page_zip_set_size(&block->page.zip, zip_size);
5065  if (zip_size) {
5066  block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
5067  }
5068 }
5069 #endif /* !UNIV_HOTBACKUP */