Drizzled Public API Documentation

btr0btr.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1994, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "btr0btr.h"
27 
28 #ifdef UNIV_NONINL
29 #include "btr0btr.ic"
30 #endif
31 
32 #include "fsp0fsp.h"
33 #include "page0page.h"
34 #include "xtrabackup_api.h"
35 
36 #ifndef UNIV_HOTBACKUP
37 #include "btr0cur.h"
38 #include "btr0sea.h"
39 #include "btr0pcur.h"
40 #include "rem0cmp.h"
41 #include "lock0lock.h"
42 #include "ibuf0ibuf.h"
43 #include "trx0trx.h"
44 #include "page0zip.h"
45 
46 /*
47 Latching strategy of the InnoDB B-tree
48 --------------------------------------
49 A tree latch protects all non-leaf nodes of the tree. Each node of a tree
50 also has a latch of its own.
51 
52 A B-tree operation normally first acquires an S-latch on the tree. It
53 searches down the tree and releases the tree latch when it has the
54 leaf node latch. To save CPU time we do not acquire any latch on
55 non-leaf nodes of the tree during a search, those pages are only bufferfixed.
56 
57 If an operation needs to restructure the tree, it acquires an X-latch on
58 the tree before searching to a leaf node. If it needs, for example, to
59 split a leaf,
60 (1) InnoDB decides the split point in the leaf,
61 (2) allocates a new page,
62 (3) inserts the appropriate node pointer to the first non-leaf level,
63 (4) releases the tree X-latch,
64 (5) and then moves records from the leaf to the new allocated page.
65 
66 Node pointers
67 -------------
68 Leaf pages of a B-tree contain the index records stored in the
69 tree. On levels n > 0 we store 'node pointers' to pages on level
70 n - 1. For each page there is exactly one node pointer stored:
71 thus the our tree is an ordinary B-tree, not a B-link tree.
72 
73 A node pointer contains a prefix P of an index record. The prefix
74 is long enough so that it determines an index record uniquely.
75 The file page number of the child page is added as the last
76 field. To the child page we can store node pointers or index records
77 which are >= P in the alphabetical order, but < P1 if there is
78 a next node pointer on the level, and P1 is its prefix.
79 
80 If a node pointer with a prefix P points to a non-leaf child,
81 then the leftmost record in the child must have the same
82 prefix P. If it points to a leaf node, the child is not required
83 to contain any record with a prefix equal to P. The leaf case
84 is decided this way to allow arbitrary deletions in a leaf node
85 without touching upper levels of the tree.
86 
87 We have predefined a special minimum record which we
88 define as the smallest record in any alphabetical order.
89 A minimum record is denoted by setting a bit in the record
90 header. A minimum record acts as the prefix of a node pointer
91 which points to a leftmost node on any level of the tree.
92 
93 File page allocation
94 --------------------
95 In the root node of a B-tree there are two file segment headers.
96 The leaf pages of a tree are allocated from one file segment, to
97 make them consecutive on disk if possible. From the other file segment
98 we allocate pages for the non-leaf levels of the tree.
99 */
100 
101 #ifdef UNIV_BTR_DEBUG
102 /**************************************************************/
105 static
106 ibool
107 btr_root_fseg_validate(
108 /*===================*/
109  const fseg_header_t* seg_header,
110  ulint space)
111 {
112  ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
113 
114  ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
115  ut_a(offset >= FIL_PAGE_DATA);
116  ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
117  return(TRUE);
118 }
119 #endif /* UNIV_BTR_DEBUG */
120 
121 /**************************************************************/
125 btr_root_block_get(
126 /*===============*/
127  dict_index_t* index,
128  mtr_t* mtr)
129 {
130  ulint space;
131  ulint zip_size;
132  ulint root_page_no;
133  buf_block_t* block;
134 
135  space = dict_index_get_space(index);
136  zip_size = dict_table_zip_size(index->table);
137  root_page_no = dict_index_get_page(index);
138 
139  block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
140  ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
141  == dict_table_is_comp(index->table));
142 #ifdef UNIV_BTR_DEBUG
143  if (!dict_index_is_ibuf(index)) {
144  const page_t* root = buf_block_get_frame(block);
145 
146  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
147  + root, space));
148  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
149  + root, space));
150  }
151 #endif /* UNIV_BTR_DEBUG */
152 
153  return(block);
154 }
155 
156 /**************************************************************/
159 UNIV_INTERN
160 page_t*
161 btr_root_get(
162 /*=========*/
163  dict_index_t* index,
164  mtr_t* mtr)
165 {
166  return(buf_block_get_frame(btr_root_block_get(index, mtr)));
167 }
168 
169 /*************************************************************/
173 UNIV_INTERN
174 rec_t*
175 btr_get_prev_user_rec(
176 /*==================*/
177  rec_t* rec,
178  mtr_t* mtr)
180 {
181  page_t* page;
182  page_t* prev_page;
183  ulint prev_page_no;
184 
185  if (!page_rec_is_infimum(rec)) {
186 
187  rec_t* prev_rec = page_rec_get_prev(rec);
188 
189  if (!page_rec_is_infimum(prev_rec)) {
190 
191  return(prev_rec);
192  }
193  }
194 
195  page = page_align(rec);
196  prev_page_no = btr_page_get_prev(page, mtr);
197 
198  if (prev_page_no != FIL_NULL) {
199 
200  ulint space;
201  ulint zip_size;
202  buf_block_t* prev_block;
203 
204  space = page_get_space_id(page);
205  zip_size = fil_space_get_zip_size(space);
206 
207  prev_block = buf_page_get_with_no_latch(space, zip_size,
208  prev_page_no, mtr);
209  prev_page = buf_block_get_frame(prev_block);
210  /* The caller must already have a latch to the brother */
211  ut_ad(mtr_memo_contains(mtr, prev_block,
212  MTR_MEMO_PAGE_S_FIX)
213  || mtr_memo_contains(mtr, prev_block,
214  MTR_MEMO_PAGE_X_FIX));
215 #ifdef UNIV_BTR_DEBUG
216  ut_a(page_is_comp(prev_page) == page_is_comp(page));
217  ut_a(btr_page_get_next(prev_page, mtr)
218  == page_get_page_no(page));
219 #endif /* UNIV_BTR_DEBUG */
220 
221  return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
222  }
223 
224  return(NULL);
225 }
226 
227 /*************************************************************/
231 UNIV_INTERN
232 rec_t*
233 btr_get_next_user_rec(
234 /*==================*/
235  rec_t* rec,
236  mtr_t* mtr)
238 {
239  page_t* page;
240  page_t* next_page;
241  ulint next_page_no;
242 
243  if (!page_rec_is_supremum(rec)) {
244 
245  rec_t* next_rec = page_rec_get_next(rec);
246 
247  if (!page_rec_is_supremum(next_rec)) {
248 
249  return(next_rec);
250  }
251  }
252 
253  page = page_align(rec);
254  next_page_no = btr_page_get_next(page, mtr);
255 
256  if (next_page_no != FIL_NULL) {
257  ulint space;
258  ulint zip_size;
259  buf_block_t* next_block;
260 
261  space = page_get_space_id(page);
262  zip_size = fil_space_get_zip_size(space);
263 
264  next_block = buf_page_get_with_no_latch(space, zip_size,
265  next_page_no, mtr);
266  next_page = buf_block_get_frame(next_block);
267  /* The caller must already have a latch to the brother */
268  ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
269  || mtr_memo_contains(mtr, next_block,
270  MTR_MEMO_PAGE_X_FIX));
271 #ifdef UNIV_BTR_DEBUG
272  ut_a(page_is_comp(next_page) == page_is_comp(page));
273  ut_a(btr_page_get_prev(next_page, mtr)
274  == page_get_page_no(page));
275 #endif /* UNIV_BTR_DEBUG */
276 
277  return(page_rec_get_next(page_get_infimum_rec(next_page)));
278  }
279 
280  return(NULL);
281 }
282 
283 /**************************************************************/
286 static
287 void
288 btr_page_create(
289 /*============*/
290  buf_block_t* block,
291  page_zip_des_t* page_zip,
292  dict_index_t* index,
293  ulint level,
294  mtr_t* mtr)
295 {
296  page_t* page = buf_block_get_frame(block);
297 
298  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
299 
300  if (UNIV_LIKELY_NULL(page_zip)) {
301  page_create_zip(block, index, level, mtr);
302  } else {
303  page_create(block, mtr, dict_table_is_comp(index->table));
304  /* Set the level of the new index page */
305  btr_page_set_level(page, NULL, level, mtr);
306  }
307 
308  block->check_index_page_at_flush = TRUE;
309 
310  btr_page_set_index_id(page, page_zip, index->id, mtr);
311 }
312 
313 /**************************************************************/
317 static
319 btr_page_alloc_for_ibuf(
320 /*====================*/
321  dict_index_t* index,
322  mtr_t* mtr)
323 {
324  fil_addr_t node_addr;
325  page_t* root;
326  page_t* new_page;
327  buf_block_t* new_block;
328 
329  root = btr_root_get(index, mtr);
330 
331  node_addr = flst_get_first(root + PAGE_HEADER
332  + PAGE_BTR_IBUF_FREE_LIST, mtr);
333  ut_a(node_addr.page != FIL_NULL);
334 
335  new_block = buf_page_get(dict_index_get_space(index),
336  dict_table_zip_size(index->table),
337  node_addr.page, RW_X_LATCH, mtr);
338  new_page = buf_block_get_frame(new_block);
339  buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
340 
341  flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
342  new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
343  mtr);
344  ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
345  mtr));
346 
347  return(new_block);
348 }
349 
350 /**************************************************************/
354 UNIV_INTERN
356 btr_page_alloc(
357 /*===========*/
358  dict_index_t* index,
359  ulint hint_page_no,
360  byte file_direction,
362  ulint level,
364  mtr_t* mtr)
365 {
366  fseg_header_t* seg_header;
367  page_t* root;
368  buf_block_t* new_block;
369  ulint new_page_no;
370 
371  if (dict_index_is_ibuf(index)) {
372 
373  return(btr_page_alloc_for_ibuf(index, mtr));
374  }
375 
376  root = btr_root_get(index, mtr);
377 
378  if (level == 0) {
379  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
380  } else {
381  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
382  }
383 
384  /* Parameter TRUE below states that the caller has made the
385  reservation for free extents, and thus we know that a page can
386  be allocated: */
387 
388  new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
389  file_direction, TRUE, mtr);
390  if (new_page_no == FIL_NULL) {
391 
392  return(NULL);
393  }
394 
395  new_block = buf_page_get(dict_index_get_space(index),
396  dict_table_zip_size(index->table),
397  new_page_no, RW_X_LATCH, mtr);
398  buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
399 
400  return(new_block);
401 }
402 
403 /**************************************************************/
406 UNIV_INTERN
407 ulint
408 btr_get_size(
409 /*=========*/
410  dict_index_t* index,
411  ulint flag)
412 {
413  fseg_header_t* seg_header;
414  page_t* root;
415  ulint n;
416  ulint dummy;
417  mtr_t mtr;
418 
419  mtr_start(&mtr);
420 
421  mtr_s_lock(dict_index_get_lock(index), &mtr);
422 
423  root = btr_root_get(index, &mtr);
424 
425  if (flag == BTR_N_LEAF_PAGES) {
426  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
427 
428  fseg_n_reserved_pages(seg_header, &n, &mtr);
429 
430  } else if (flag == BTR_TOTAL_SIZE) {
431  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
432 
433  n = fseg_n_reserved_pages(seg_header, &dummy, &mtr);
434 
435  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
436 
437  n += fseg_n_reserved_pages(seg_header, &dummy, &mtr);
438  } else {
439  ut_error;
440  }
441 
442  mtr_commit(&mtr);
443 
444  return(n);
445 }
446 
447 /**************************************************************/
450 static
451 void
452 btr_page_free_for_ibuf(
453 /*===================*/
454  dict_index_t* index,
455  buf_block_t* block,
456  mtr_t* mtr)
457 {
458  page_t* root;
459 
460  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
461  root = btr_root_get(index, mtr);
462 
463  flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
464  buf_block_get_frame(block)
465  + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
466 
467  ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
468  mtr));
469 }
470 
471 /**************************************************************/
475 UNIV_INTERN
476 void
477 btr_page_free_low(
478 /*==============*/
479  dict_index_t* index,
480  buf_block_t* block,
481  ulint level,
482  mtr_t* mtr)
483 {
484  fseg_header_t* seg_header;
485  page_t* root;
486 
487  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
488  /* The page gets invalid for optimistic searches: increment the frame
489  modify clock */
490 
492 
493  if (dict_index_is_ibuf(index)) {
494 
495  btr_page_free_for_ibuf(index, block, mtr);
496 
497  return;
498  }
499 
500  root = btr_root_get(index, mtr);
501 
502  if (level == 0) {
503  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
504  } else {
505  seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
506  }
507 
508  fseg_free_page(seg_header,
509  buf_block_get_space(block),
510  buf_block_get_page_no(block), mtr);
511 }
512 
513 /**************************************************************/
516 UNIV_INTERN
517 void
518 btr_page_free(
519 /*==========*/
520  dict_index_t* index,
521  buf_block_t* block,
522  mtr_t* mtr)
523 {
524  ulint level;
525 
526  level = btr_page_get_level(buf_block_get_frame(block), mtr);
527 
528  btr_page_free_low(index, block, level, mtr);
529 }
530 
531 /**************************************************************/
533 UNIV_INLINE
534 void
535 btr_node_ptr_set_child_page_no(
536 /*===========================*/
537  rec_t* rec,
538  page_zip_des_t* page_zip,
540  const ulint* offsets,
541  ulint page_no,
542  mtr_t* mtr)
543 {
544  byte* field;
545  ulint len;
546 
547  ut_ad(rec_offs_validate(rec, NULL, offsets));
548  ut_ad(!page_is_leaf(page_align(rec)));
549  ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
550 
551  /* The child address is in the last field */
552  field = rec_get_nth_field(rec, offsets,
553  rec_offs_n_fields(offsets) - 1, &len);
554 
555  ut_ad(len == REC_NODE_PTR_SIZE);
556 
557  if (UNIV_LIKELY_NULL(page_zip)) {
558  page_zip_write_node_ptr(page_zip, rec,
559  rec_offs_data_size(offsets),
560  page_no, mtr);
561  } else {
562  mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
563  }
564 }
565 
566 /************************************************************/
570 btr_node_ptr_get_child(
571 /*===================*/
572  const rec_t* node_ptr,
573  dict_index_t* index,
574  const ulint* offsets,
575  mtr_t* mtr)
576 {
577  ulint page_no;
578  ulint space;
579 
580  ut_ad(rec_offs_validate(node_ptr, index, offsets));
581  space = page_get_space_id(page_align(node_ptr));
582  page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
583 
584  return(btr_block_get(space, dict_table_zip_size(index->table),
585  page_no, RW_X_LATCH, mtr));
586 }
587 
588 /************************************************************/
592 static
593 ulint*
594 btr_page_get_father_node_ptr_func(
595 /*==============================*/
596  ulint* offsets,
597  mem_heap_t* heap,
598  btr_cur_t* cursor,
601  const char* file,
602  ulint line,
603  mtr_t* mtr)
604 {
605  dtuple_t* tuple;
606  rec_t* user_rec;
607  rec_t* node_ptr;
608  ulint level;
609  ulint page_no;
610  dict_index_t* index;
611 
612  page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
613  index = btr_cur_get_index(cursor);
614 
615  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
616  MTR_MEMO_X_LOCK));
617 
618  ut_ad(dict_index_get_page(index) != page_no);
619 
620  level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
621 
622  user_rec = btr_cur_get_rec(cursor);
623  ut_a(page_rec_is_user_rec(user_rec));
624  tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
625 
626  btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
627  BTR_CONT_MODIFY_TREE, cursor, 0,
628  file, line, mtr);
629 
630  node_ptr = btr_cur_get_rec(cursor);
631  ut_ad(!page_rec_is_comp(node_ptr)
632  || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
633  offsets = rec_get_offsets(node_ptr, index, offsets,
634  ULINT_UNDEFINED, &heap);
635 
636  if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets)
637  != page_no)) {
638  rec_t* print_rec;
639  fputs("InnoDB: Dump of the child page:\n", stderr);
640  buf_page_print(page_align(user_rec), 0);
641  fputs("InnoDB: Dump of the parent page:\n", stderr);
642  buf_page_print(page_align(node_ptr), 0);
643 
644  fputs("InnoDB: Corruption of an index tree: table ", stderr);
645  ut_print_name(stderr, NULL, TRUE, index->table_name);
646  fputs(", index ", stderr);
647  ut_print_name(stderr, NULL, FALSE, index->name);
648  fprintf(stderr, ",\n"
649  "InnoDB: father ptr page no %lu, child page no %lu\n",
650  (ulong)
651  btr_node_ptr_get_child_page_no(node_ptr, offsets),
652  (ulong) page_no);
653  print_rec = page_rec_get_next(
654  page_get_infimum_rec(page_align(user_rec)));
655  offsets = rec_get_offsets(print_rec, index,
656  offsets, ULINT_UNDEFINED, &heap);
657  page_rec_print(print_rec, offsets);
658  offsets = rec_get_offsets(node_ptr, index, offsets,
659  ULINT_UNDEFINED, &heap);
660  page_rec_print(node_ptr, offsets);
661 
662  fputs("InnoDB: You should dump + drop + reimport the table"
663  " to fix the\n"
664  "InnoDB: corruption. If the crash happens at "
665  "the database startup, see\n"
666  "InnoDB: " REFMAN "forcing-recovery.html about\n"
667  "InnoDB: forcing recovery. "
668  "Then dump + drop + reimport.\n", stderr);
669 
670  ut_error;
671  }
672 
673  return(offsets);
674 }
675 
676 #define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
677  btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
678 
679 /************************************************************/
683 static
684 ulint*
685 btr_page_get_father_block(
686 /*======================*/
687  ulint* offsets,
688  mem_heap_t* heap,
689  dict_index_t* index,
690  buf_block_t* block,
691  mtr_t* mtr,
692  btr_cur_t* cursor)
694 {
695  rec_t* rec
696  = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
697  block)));
698  btr_cur_position(index, rec, block, cursor);
699  return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
700 }
701 
702 /************************************************************/
705 static
706 void
707 btr_page_get_father(
708 /*================*/
709  dict_index_t* index,
710  buf_block_t* block,
711  mtr_t* mtr,
712  btr_cur_t* cursor)
714 {
715  mem_heap_t* heap;
716  rec_t* rec
717  = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
718  block)));
719  btr_cur_position(index, rec, block, cursor);
720 
721  heap = mem_heap_create(100);
722  btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
723  mem_heap_free(heap);
724 }
725 
726 /************************************************************/
729 UNIV_INTERN
730 ulint
731 btr_create(
732 /*=======*/
733  ulint type,
734  ulint space,
735  ulint zip_size,
737  index_id_t index_id,
738  dict_index_t* index,
739  mtr_t* mtr)
740 {
741  ulint page_no;
742  buf_block_t* block;
743  buf_frame_t* frame;
744  page_t* page;
745  page_zip_des_t* page_zip;
746 
747  /* Create the two new segments (one, in the case of an ibuf tree) for
748  the index tree; the segment headers are put on the allocated root page
749  (for an ibuf tree, not in the root, but on a separate ibuf header
750  page) */
751 
752  if (type & DICT_IBUF) {
753  /* Allocate first the ibuf header page */
754  buf_block_t* ibuf_hdr_block = fseg_create(
755  space, 0,
756  IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
757 
758  buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
759 
760  ut_ad(buf_block_get_page_no(ibuf_hdr_block)
761  == IBUF_HEADER_PAGE_NO);
762  /* Allocate then the next page to the segment: it will be the
763  tree root page */
764 
765  page_no = fseg_alloc_free_page(buf_block_get_frame(
766  ibuf_hdr_block)
767  + IBUF_HEADER
768  + IBUF_TREE_SEG_HEADER,
769  IBUF_TREE_ROOT_PAGE_NO,
770  FSP_UP, mtr);
771  ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
772 
773  block = buf_page_get(space, zip_size, page_no,
774  RW_X_LATCH, mtr);
775  } else {
776  block = fseg_create(space, 0,
777  PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
778  }
779 
780  if (block == NULL) {
781 
782  return(FIL_NULL);
783  }
784 
785  page_no = buf_block_get_page_no(block);
786  frame = buf_block_get_frame(block);
787 
788  buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
789 
790  if (type & DICT_IBUF) {
791  /* It is an insert buffer tree: initialize the free list */
792 
793  ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO);
794 
795  flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr);
796  } else {
797  /* It is a non-ibuf tree: create a file segment for leaf
798  pages */
799  if (!fseg_create(space, page_no,
800  PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
801  /* Not enough space for new segment, free root
802  segment before return. */
803  btr_free_root(space, zip_size, page_no, mtr);
804 
805  return(FIL_NULL);
806  }
807 
808  /* The fseg create acquires a second latch on the page,
809  therefore we must declare it: */
810  buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
811  }
812 
813  /* Create a new index page on the the allocated segment page */
814  page_zip = buf_block_get_page_zip(block);
815 
816  if (UNIV_LIKELY_NULL(page_zip)) {
817  page = page_create_zip(block, index, 0, mtr);
818  } else {
819  page = page_create(block, mtr,
820  dict_table_is_comp(index->table));
821  /* Set the level of the new index page */
822  btr_page_set_level(page, NULL, 0, mtr);
823  }
824 
825  block->check_index_page_at_flush = TRUE;
826 
827  /* Set the index id of the page */
828  btr_page_set_index_id(page, page_zip, index_id, mtr);
829 
830  /* Set the next node and previous node fields */
831  btr_page_set_next(page, page_zip, FIL_NULL, mtr);
832  btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
833 
834  /* We reset the free bits for the page to allow creation of several
835  trees in the same mtr, otherwise the latch on a bitmap page would
836  prevent it because of the latching order */
837 
838  if (!(type & DICT_CLUSTERED)) {
839  ibuf_reset_free_bits(block);
840  }
841 
842  /* In the following assertion we test that two records of maximum
843  allowed size fit on the root page: this fact is needed to ensure
844  correctness of split algorithms */
845 
847 
848  return(page_no);
849 }
850 
851 /************************************************************/
854 UNIV_INTERN
855 void
856 btr_free_but_not_root(
857 /*==================*/
858  ulint space,
859  ulint zip_size,
861  ulint root_page_no)
862 {
863  ibool finished;
864  page_t* root;
865  mtr_t mtr;
866 
867 leaf_loop:
868  mtr_start(&mtr);
869 
870  root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
871 #ifdef UNIV_BTR_DEBUG
872  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
873  + root, space));
874  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
875  + root, space));
876 #endif /* UNIV_BTR_DEBUG */
877 
878  /* NOTE: page hash indexes are dropped when a page is freed inside
879  fsp0fsp. */
880 
881  finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
882  &mtr);
883  mtr_commit(&mtr);
884 
885  if (!finished) {
886 
887  goto leaf_loop;
888  }
889 top_loop:
890  mtr_start(&mtr);
891 
892  root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
893 #ifdef UNIV_BTR_DEBUG
894  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
895  + root, space));
896 #endif /* UNIV_BTR_DEBUG */
897 
898  finished = fseg_free_step_not_header(
899  root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
900  mtr_commit(&mtr);
901 
902  if (!finished) {
903 
904  goto top_loop;
905  }
906 }
907 
908 /************************************************************/
910 UNIV_INTERN
911 void
912 btr_free_root(
913 /*==========*/
914  ulint space,
915  ulint zip_size,
917  ulint root_page_no,
918  mtr_t* mtr)
920 {
921  buf_block_t* block;
922  fseg_header_t* header;
923 
924  block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
925 
926  btr_search_drop_page_hash_index(block);
927 
928  header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
929 #ifdef UNIV_BTR_DEBUG
930  ut_a(btr_root_fseg_validate(header, space));
931 #endif /* UNIV_BTR_DEBUG */
932 
933  while (!fseg_free_step(header, mtr)) {};
934 }
935 #endif /* !UNIV_HOTBACKUP */
936 
937 /*************************************************************/
939 static
940 ibool
941 btr_page_reorganize_low(
942 /*====================*/
943  ibool recovery,
948  buf_block_t* block,
949  dict_index_t* index,
950  mtr_t* mtr)
951 {
952  buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
953  page_t* page = buf_block_get_frame(block);
954  page_zip_des_t* page_zip = buf_block_get_page_zip(block);
955  buf_block_t* temp_block;
956  page_t* temp_page;
957  ulint log_mode;
958  ulint data_size1;
959  ulint data_size2;
960  ulint max_ins_size1;
961  ulint max_ins_size2;
962  ibool success = FALSE;
963 
964  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
965  ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
966 #ifdef UNIV_ZIP_DEBUG
967  ut_a(!page_zip || page_zip_validate(page_zip, page));
968 #endif /* UNIV_ZIP_DEBUG */
969  data_size1 = page_get_data_size(page);
970  max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
971 
972 #ifndef UNIV_HOTBACKUP
973  /* Write the log record */
974  mlog_open_and_write_index(mtr, page, index, page_is_comp(page)
976  : MLOG_PAGE_REORGANIZE, 0);
977 #endif /* !UNIV_HOTBACKUP */
978 
979  /* Turn logging off */
980  log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
981 
982 #ifndef UNIV_HOTBACKUP
983  temp_block = buf_block_alloc(buf_pool, 0);
984 #else /* !UNIV_HOTBACKUP */
985  ut_ad(block == back_block1);
986  temp_block = back_block2;
987 #endif /* !UNIV_HOTBACKUP */
988  temp_page = temp_block->frame;
989 
990  /* Copy the old page to temporary space */
991  buf_frame_copy(temp_page, page);
992 
993 #ifndef UNIV_HOTBACKUP
994  if (UNIV_LIKELY(!recovery)) {
995  btr_search_drop_page_hash_index(block);
996  }
997 
998  block->check_index_page_at_flush = TRUE;
999 #endif /* !UNIV_HOTBACKUP */
1000 
1001  /* Recreate the page: note that global data on page (possible
1002  segment headers, next page-field, etc.) is preserved intact */
1003 
1004  page_create(block, mtr, dict_table_is_comp(index->table));
1005 
1006  /* Copy the records from the temporary space to the recreated page;
1007  do not copy the lock bits yet */
1008 
1009  page_copy_rec_list_end_no_locks(block, temp_block,
1010  page_get_infimum_rec(temp_page),
1011  index, mtr);
1012 
1013  if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
1014  /* Copy max trx id to recreated page */
1015  trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
1016  page_set_max_trx_id(block, NULL, max_trx_id, mtr);
1017  /* In crash recovery, dict_index_is_sec_or_ibuf() always
1018  returns TRUE, even for clustered indexes. max_trx_id is
1019  unused in clustered index pages. */
1020  ut_ad(max_trx_id != 0 || recovery);
1021  }
1022 
1023  if (UNIV_LIKELY_NULL(page_zip)
1024  && UNIV_UNLIKELY
1025  (!page_zip_compress(page_zip, page, index, NULL))) {
1026 
1027  /* Restore the old page and exit. */
1028 
1029 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
1030  /* Check that the bytes that we skip are identical. */
1031  ut_a(!memcmp(page, temp_page, PAGE_HEADER));
1032  ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
1033  PAGE_HEADER + PAGE_N_RECS + temp_page,
1034  PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
1035  ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
1036  UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
1038 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
1039 
1040  memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
1041  PAGE_N_RECS - PAGE_N_DIR_SLOTS);
1042  memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
1043  UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
1044 
1045 #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
1046  ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
1047 #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
1048 
1049  goto func_exit;
1050  }
1051 
1052 #ifndef UNIV_HOTBACKUP
1053  if (UNIV_LIKELY(!recovery)) {
1054  /* Update the record lock bitmaps */
1055  lock_move_reorganize_page(block, temp_block);
1056  }
1057 #endif /* !UNIV_HOTBACKUP */
1058 
1059  data_size2 = page_get_data_size(page);
1060  max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
1061 
1062  if (UNIV_UNLIKELY(data_size1 != data_size2)
1063  || UNIV_UNLIKELY(max_ins_size1 != max_ins_size2)) {
1064  buf_page_print(page, 0);
1065  buf_page_print(temp_page, 0);
1066  fprintf(stderr,
1067  "InnoDB: Error: page old data size %lu"
1068  " new data size %lu\n"
1069  "InnoDB: Error: page old max ins size %lu"
1070  " new max ins size %lu\n"
1071  "InnoDB: Submit a detailed bug report"
1072  " to http://bugs.mysql.com\n",
1073  (unsigned long) data_size1, (unsigned long) data_size2,
1074  (unsigned long) max_ins_size1,
1075  (unsigned long) max_ins_size2);
1076  } else {
1077  success = TRUE;
1078  }
1079 
1080 func_exit:
1081 #ifdef UNIV_ZIP_DEBUG
1082  ut_a(!page_zip || page_zip_validate(page_zip, page));
1083 #endif /* UNIV_ZIP_DEBUG */
1084 #ifndef UNIV_HOTBACKUP
1085  buf_block_free(temp_block);
1086 #endif /* !UNIV_HOTBACKUP */
1087 
1088  /* Restore logging mode */
1089  mtr_set_log_mode(mtr, log_mode);
1090 
1091  return(success);
1092 }
1093 
1094 #ifndef UNIV_HOTBACKUP
1095 /*************************************************************/
1102 UNIV_INTERN
1103 ibool
1104 btr_page_reorganize(
1105 /*================*/
1106  buf_block_t* block,
1107  dict_index_t* index,
1108  mtr_t* mtr)
1109 {
1110  return(btr_page_reorganize_low(FALSE, block, index, mtr));
1111 }
1112 #endif /* !UNIV_HOTBACKUP */
1113 
1114 /***********************************************************/
1117 UNIV_INTERN
1118 byte*
1119 btr_parse_page_reorganize(
1120 /*======================*/
1121  byte* ptr,
1122  byte* /*end_ptr __attribute__((unused))*/,
1124  dict_index_t* index,
1125  buf_block_t* block,
1126  mtr_t* mtr)
1127 {
1128  ut_ad(ptr && end_ptr);
1129 
1130  /* The record is empty, except for the record initial part */
1131 
1132  if (UNIV_LIKELY(block != NULL)) {
1133  btr_page_reorganize_low(TRUE, block, index, mtr);
1134  }
1135 
1136  return(ptr);
1137 }
1138 
1139 #ifndef UNIV_HOTBACKUP
1140 /*************************************************************/
1142 static
1143 void
1144 btr_page_empty(
1145 /*===========*/
1146  buf_block_t* block,
1147  page_zip_des_t* page_zip,
1148  dict_index_t* index,
1149  ulint level,
1150  mtr_t* mtr)
1151 {
1152  page_t* page = buf_block_get_frame(block);
1153 
1154  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1155  ut_ad(page_zip == buf_block_get_page_zip(block));
1156 #ifdef UNIV_ZIP_DEBUG
1157  ut_a(!page_zip || page_zip_validate(page_zip, page));
1158 #endif /* UNIV_ZIP_DEBUG */
1159 
1160  btr_search_drop_page_hash_index(block);
1161 
1162  /* Recreate the page: note that global data on page (possible
1163  segment headers, next page-field, etc.) is preserved intact */
1164 
1165  if (UNIV_LIKELY_NULL(page_zip)) {
1166  page_create_zip(block, index, level, mtr);
1167  } else {
1168  page_create(block, mtr, dict_table_is_comp(index->table));
1169  btr_page_set_level(page, NULL, level, mtr);
1170  }
1171 
1172  block->check_index_page_at_flush = TRUE;
1173 }
1174 
1175 /*************************************************************/
1182 UNIV_INTERN
1183 rec_t*
1184 btr_root_raise_and_insert(
1185 /*======================*/
1186  btr_cur_t* cursor,
1190  const dtuple_t* tuple,
1191  ulint n_ext,
1192  mtr_t* mtr)
1193 {
1194  dict_index_t* index;
1195  page_t* root;
1196  page_t* new_page;
1197  ulint new_page_no;
1198  rec_t* rec;
1199  mem_heap_t* heap;
1200  dtuple_t* node_ptr;
1201  ulint level;
1202  rec_t* node_ptr_rec;
1203  page_cur_t* page_cursor;
1204  page_zip_des_t* root_page_zip;
1205  page_zip_des_t* new_page_zip;
1206  buf_block_t* root_block;
1207  buf_block_t* new_block;
1208 
1209  root = btr_cur_get_page(cursor);
1210  root_block = btr_cur_get_block(cursor);
1211  root_page_zip = buf_block_get_page_zip(root_block);
1212 #ifdef UNIV_ZIP_DEBUG
1213  ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
1214 #endif /* UNIV_ZIP_DEBUG */
1215  index = btr_cur_get_index(cursor);
1216 #ifdef UNIV_BTR_DEBUG
1217  if (!dict_index_is_ibuf(index)) {
1218  ulint space = dict_index_get_space(index);
1219 
1220  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
1221  + root, space));
1222  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
1223  + root, space));
1224  }
1225 
1226  ut_a(dict_index_get_page(index) == page_get_page_no(root));
1227 #endif /* UNIV_BTR_DEBUG */
1228  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
1229  MTR_MEMO_X_LOCK));
1230  ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
1231 
1232  /* Allocate a new page to the tree. Root splitting is done by first
1233  moving the root records to the new page, emptying the root, putting
1234  a node pointer to the new page, and then splitting the new page. */
1235 
1236  level = btr_page_get_level(root, mtr);
1237 
1238  new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr);
1239  new_page = buf_block_get_frame(new_block);
1240  new_page_zip = buf_block_get_page_zip(new_block);
1241  ut_a(!new_page_zip == !root_page_zip);
1242  ut_a(!new_page_zip
1243  || page_zip_get_size(new_page_zip)
1244  == page_zip_get_size(root_page_zip));
1245 
1246  btr_page_create(new_block, new_page_zip, index, level, mtr);
1247 
1248  /* Set the next node and previous node fields of new page */
1249  btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
1250  btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
1251 
1252  /* Copy the records from root to the new page one by one. */
1253 
1254  if (0
1255 #ifdef UNIV_ZIP_COPY
1256  || new_page_zip
1257 #endif /* UNIV_ZIP_COPY */
1258  || UNIV_UNLIKELY
1259  (!page_copy_rec_list_end(new_block, root_block,
1260  page_get_infimum_rec(root),
1261  index, mtr))) {
1262  ut_a(new_page_zip);
1263 
1264  /* Copy the page byte for byte. */
1265  page_zip_copy_recs(new_page_zip, new_page,
1266  root_page_zip, root, index, mtr);
1267 
1268  /* Update the lock table and possible hash index. */
1269 
1270  lock_move_rec_list_end(new_block, root_block,
1271  page_get_infimum_rec(root));
1272 
1273  btr_search_move_or_delete_hash_entries(new_block, root_block,
1274  index);
1275  }
1276 
1277  /* If this is a pessimistic insert which is actually done to
1278  perform a pessimistic update then we have stored the lock
1279  information of the record to be inserted on the infimum of the
1280  root page: we cannot discard the lock structs on the root page */
1281 
1282  lock_update_root_raise(new_block, root_block);
1283 
1284  /* Create a memory heap where the node pointer is stored */
1285  heap = mem_heap_create(100);
1286 
1287  rec = page_rec_get_next(page_get_infimum_rec(new_page));
1288  new_page_no = buf_block_get_page_no(new_block);
1289 
1290  /* Build the node pointer (= node key and page address) for the
1291  child */
1292 
1293  node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap,
1294  level);
1295  /* The node pointer must be marked as the predefined minimum record,
1296  as there is no lower alphabetical limit to records in the leftmost
1297  node of a level: */
1298  dtuple_set_info_bits(node_ptr,
1299  dtuple_get_info_bits(node_ptr)
1300  | REC_INFO_MIN_REC_FLAG);
1301 
1302  /* Rebuild the root page to get free space */
1303  btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
1304 
1305  /* Set the next node and previous node fields, although
1306  they should already have been set. The previous node field
1307  must be FIL_NULL if root_page_zip != NULL, because the
1308  REC_INFO_MIN_REC_FLAG (of the first user record) will be
1309  set if and only if btr_page_get_prev() == FIL_NULL. */
1310  btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
1311  btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
1312 
1313  page_cursor = btr_cur_get_page_cur(cursor);
1314 
1315  /* Insert node pointer to the root */
1316 
1317  page_cur_set_before_first(root_block, page_cursor);
1318 
1319  node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
1320  index, 0, mtr);
1321 
1322  /* The root page should only contain the node pointer
1323  to new_page at this point. Thus, the data should fit. */
1324  ut_a(node_ptr_rec);
1325 
1326  /* Free the memory heap */
1327  mem_heap_free(heap);
1328 
1329  /* We play safe and reset the free bits for the new page */
1330 
1331 #if 0
1332  fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
1333 #endif
1334 
1335  if (!dict_index_is_clust(index)) {
1336  ibuf_reset_free_bits(new_block);
1337  }
1338 
1339  /* Reposition the cursor to the child node */
1340  page_cur_search(new_block, index, tuple,
1341  PAGE_CUR_LE, page_cursor);
1342 
1343  /* Split the child and insert tuple */
1344  return(btr_page_split_and_insert(cursor, tuple, n_ext, mtr));
1345 }
1346 
1347 /*************************************************************/
1351 UNIV_INTERN
1352 ibool
1353 btr_page_get_split_rec_to_left(
1354 /*===========================*/
1355  btr_cur_t* cursor,
1356  rec_t** split_rec)
1360 {
1361  page_t* page;
1362  rec_t* insert_point;
1363  rec_t* infimum;
1364 
1365  page = btr_cur_get_page(cursor);
1366  insert_point = btr_cur_get_rec(cursor);
1367 
1368  if (page_header_get_ptr(page, PAGE_LAST_INSERT)
1369  == page_rec_get_next(insert_point)) {
1370 
1371  infimum = page_get_infimum_rec(page);
1372 
1373  /* If the convergence is in the middle of a page, include also
1374  the record immediately before the new insert to the upper
1375  page. Otherwise, we could repeatedly move from page to page
1376  lots of records smaller than the convergence point. */
1377 
1378  if (infimum != insert_point
1379  && page_rec_get_next(infimum) != insert_point) {
1380 
1381  *split_rec = insert_point;
1382  } else {
1383  *split_rec = page_rec_get_next(insert_point);
1384  }
1385 
1386  return(TRUE);
1387  }
1388 
1389  return(FALSE);
1390 }
1391 
1392 /*************************************************************/
1396 UNIV_INTERN
1397 ibool
1398 btr_page_get_split_rec_to_right(
1399 /*============================*/
1400  btr_cur_t* cursor,
1401  rec_t** split_rec)
1405 {
1406  page_t* page;
1407  rec_t* insert_point;
1408 
1409  page = btr_cur_get_page(cursor);
1410  insert_point = btr_cur_get_rec(cursor);
1411 
1412  /* We use eager heuristics: if the new insert would be right after
1413  the previous insert on the same page, we assume that there is a
1414  pattern of sequential inserts here. */
1415 
1416  if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT)
1417  == insert_point)) {
1418 
1419  rec_t* next_rec;
1420 
1421  next_rec = page_rec_get_next(insert_point);
1422 
1423  if (page_rec_is_supremum(next_rec)) {
1424 split_at_new:
1425  /* Split at the new record to insert */
1426  *split_rec = NULL;
1427  } else {
1428  rec_t* next_next_rec = page_rec_get_next(next_rec);
1429  if (page_rec_is_supremum(next_next_rec)) {
1430 
1431  goto split_at_new;
1432  }
1433 
1434  /* If there are >= 2 user records up from the insert
1435  point, split all but 1 off. We want to keep one because
1436  then sequential inserts can use the adaptive hash
1437  index, as they can do the necessary checks of the right
1438  search position just by looking at the records on this
1439  page. */
1440 
1441  *split_rec = next_next_rec;
1442  }
1443 
1444  return(TRUE);
1445  }
1446 
1447  return(FALSE);
1448 }
1449 
1450 /*************************************************************/
1456 static
1457 rec_t*
1458 btr_page_get_split_rec(
1459 /*===================*/
1460  btr_cur_t* cursor,
1461  const dtuple_t* tuple,
1462  ulint n_ext)
1463 {
1464  page_t* page;
1465  page_zip_des_t* page_zip;
1466  ulint insert_size;
1467  ulint free_space;
1468  ulint total_data;
1469  ulint total_n_recs;
1470  ulint total_space;
1471  ulint incl_data;
1472  rec_t* ins_rec;
1473  rec_t* rec;
1474  rec_t* next_rec;
1475  ulint n;
1476  mem_heap_t* heap;
1477  ulint* offsets;
1478 
1479  page = btr_cur_get_page(cursor);
1480 
1481  insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
1482  free_space = page_get_free_space_of_empty(page_is_comp(page));
1483 
1484  page_zip = btr_cur_get_page_zip(cursor);
1485  if (UNIV_LIKELY_NULL(page_zip)) {
1486  /* Estimate the free space of an empty compressed page. */
1487  ulint free_space_zip = page_zip_empty_size(
1488  cursor->index->n_fields,
1489  page_zip_get_size(page_zip));
1490 
1491  if (UNIV_LIKELY(free_space > (ulint) free_space_zip)) {
1492  free_space = (ulint) free_space_zip;
1493  }
1494  }
1495 
1496  /* free_space is now the free space of a created new page */
1497 
1498  total_data = page_get_data_size(page) + insert_size;
1499  total_n_recs = page_get_n_recs(page) + 1;
1500  ut_ad(total_n_recs >= 2);
1501  total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
1502 
1503  n = 0;
1504  incl_data = 0;
1505  ins_rec = btr_cur_get_rec(cursor);
1506  rec = page_get_infimum_rec(page);
1507 
1508  heap = NULL;
1509  offsets = NULL;
1510 
1511  /* We start to include records to the left half, and when the
1512  space reserved by them exceeds half of total_space, then if
1513  the included records fit on the left page, they will be put there
1514  if something was left over also for the right page,
1515  otherwise the last included record will be the first on the right
1516  half page */
1517 
1518  do {
1519  /* Decide the next record to include */
1520  if (rec == ins_rec) {
1521  rec = NULL; /* NULL denotes that tuple is
1522  now included */
1523  } else if (rec == NULL) {
1524  rec = page_rec_get_next(ins_rec);
1525  } else {
1526  rec = page_rec_get_next(rec);
1527  }
1528 
1529  if (rec == NULL) {
1530  /* Include tuple */
1531  incl_data += insert_size;
1532  } else {
1533  offsets = rec_get_offsets(rec, cursor->index,
1534  offsets, ULINT_UNDEFINED,
1535  &heap);
1536  incl_data += rec_offs_size(offsets);
1537  }
1538 
1539  n++;
1540  } while (incl_data + page_dir_calc_reserved_space(n)
1541  < total_space / 2);
1542 
1543  if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
1544  /* The next record will be the first on
1545  the right half page if it is not the
1546  supremum record of page */
1547 
1548  if (rec == ins_rec) {
1549  rec = NULL;
1550 
1551  goto func_exit;
1552  } else if (rec == NULL) {
1553  next_rec = page_rec_get_next(ins_rec);
1554  } else {
1555  next_rec = page_rec_get_next(rec);
1556  }
1557  ut_ad(next_rec);
1558  if (!page_rec_is_supremum(next_rec)) {
1559  rec = next_rec;
1560  }
1561  }
1562 
1563 func_exit:
1564  if (UNIV_LIKELY_NULL(heap)) {
1565  mem_heap_free(heap);
1566  }
1567  return(rec);
1568 }
1569 
1570 /*************************************************************/
1574 static
1575 ibool
1576 btr_page_insert_fits(
1577 /*=================*/
1578  btr_cur_t* cursor,
1580  const rec_t* split_rec,
1583  const ulint* offsets,
1585  const dtuple_t* tuple,
1586  ulint n_ext,
1587  mem_heap_t* heap)
1588 {
1589  page_t* page;
1590  ulint insert_size;
1591  ulint free_space;
1592  ulint total_data;
1593  ulint total_n_recs;
1594  const rec_t* rec;
1595  const rec_t* end_rec;
1596  ulint* offs;
1597 
1598  page = btr_cur_get_page(cursor);
1599 
1600  ut_ad(!split_rec == !offsets);
1601  ut_ad(!offsets
1602  || !page_is_comp(page) == !rec_offs_comp(offsets));
1603  ut_ad(!offsets
1604  || rec_offs_validate(split_rec, cursor->index, offsets));
1605 
1606  insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
1607  free_space = page_get_free_space_of_empty(page_is_comp(page));
1608 
1609  /* free_space is now the free space of a created new page */
1610 
1611  total_data = page_get_data_size(page) + insert_size;
1612  total_n_recs = page_get_n_recs(page) + 1;
1613 
1614  /* We determine which records (from rec to end_rec, not including
1615  end_rec) will end up on the other half page from tuple when it is
1616  inserted. */
1617 
1618  if (split_rec == NULL) {
1619  rec = page_rec_get_next(page_get_infimum_rec(page));
1620  end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
1621 
1622  } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) {
1623 
1624  rec = page_rec_get_next(page_get_infimum_rec(page));
1625  end_rec = split_rec;
1626  } else {
1627  rec = split_rec;
1628  end_rec = page_get_supremum_rec(page);
1629  }
1630 
1631  if (total_data + page_dir_calc_reserved_space(total_n_recs)
1632  <= free_space) {
1633 
1634  /* Ok, there will be enough available space on the
1635  half page where the tuple is inserted */
1636 
1637  return(TRUE);
1638  }
1639 
1640  offs = NULL;
1641 
1642  while (rec != end_rec) {
1643  /* In this loop we calculate the amount of reserved
1644  space after rec is removed from page. */
1645 
1646  offs = rec_get_offsets(rec, cursor->index, offs,
1647  ULINT_UNDEFINED, &heap);
1648 
1649  total_data -= rec_offs_size(offs);
1650  total_n_recs--;
1651 
1652  if (total_data + page_dir_calc_reserved_space(total_n_recs)
1653  <= free_space) {
1654 
1655  /* Ok, there will be enough available space on the
1656  half page where the tuple is inserted */
1657 
1658  return(TRUE);
1659  }
1660 
1661  rec = page_rec_get_next_const(rec);
1662  }
1663 
1664  return(FALSE);
1665 }
1666 
1667 /*******************************************************/
1670 UNIV_INTERN
1671 void
1672 btr_insert_on_non_leaf_level_func(
1673 /*==============================*/
1674  dict_index_t* index,
1675  ulint level,
1676  dtuple_t* tuple,
1677  const char* file,
1678  ulint line,
1679  mtr_t* mtr)
1680 {
1681  big_rec_t* dummy_big_rec;
1682  btr_cur_t cursor;
1683  ulint err;
1684  rec_t* rec;
1685 
1686  ut_ad(level > 0);
1687 
1688  btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
1690  &cursor, 0, file, line, mtr);
1691 
1692  err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
1693  | BTR_KEEP_SYS_FLAG
1694  | BTR_NO_UNDO_LOG_FLAG,
1695  &cursor, tuple, &rec,
1696  &dummy_big_rec, 0, NULL, mtr);
1697  ut_a(err == DB_SUCCESS);
1698 }
1699 
1700 /**************************************************************/
1703 static
1704 void
1705 btr_attach_half_pages(
1706 /*==================*/
1707  dict_index_t* index,
1708  buf_block_t* block,
1709  rec_t* split_rec,
1711  buf_block_t* new_block,
1712  ulint direction,
1713  mtr_t* mtr)
1714 {
1715  ulint space;
1716  ulint zip_size;
1717  ulint prev_page_no;
1718  ulint next_page_no;
1719  ulint level;
1720  page_t* page = buf_block_get_frame(block);
1721  page_t* lower_page;
1722  page_t* upper_page;
1723  ulint lower_page_no;
1724  ulint upper_page_no;
1725  page_zip_des_t* lower_page_zip;
1726  page_zip_des_t* upper_page_zip;
1727  dtuple_t* node_ptr_upper;
1728  mem_heap_t* heap;
1729 
1730  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1731  ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
1732 
1733  /* Create a memory heap where the data tuple is stored */
1734  heap = mem_heap_create(1024);
1735 
1736  /* Based on split direction, decide upper and lower pages */
1737  if (direction == FSP_DOWN) {
1738 
1739  btr_cur_t cursor;
1740  ulint* offsets;
1741 
1742  lower_page = buf_block_get_frame(new_block);
1743  lower_page_no = buf_block_get_page_no(new_block);
1744  lower_page_zip = buf_block_get_page_zip(new_block);
1745  upper_page = buf_block_get_frame(block);
1746  upper_page_no = buf_block_get_page_no(block);
1747  upper_page_zip = buf_block_get_page_zip(block);
1748 
1749  /* Look up the index for the node pointer to page */
1750  offsets = btr_page_get_father_block(NULL, heap, index,
1751  block, mtr, &cursor);
1752 
1753  /* Replace the address of the old child node (= page) with the
1754  address of the new lower half */
1755 
1756  btr_node_ptr_set_child_page_no(
1757  btr_cur_get_rec(&cursor),
1758  btr_cur_get_page_zip(&cursor),
1759  offsets, lower_page_no, mtr);
1760  mem_heap_empty(heap);
1761  } else {
1762  lower_page = buf_block_get_frame(block);
1763  lower_page_no = buf_block_get_page_no(block);
1764  lower_page_zip = buf_block_get_page_zip(block);
1765  upper_page = buf_block_get_frame(new_block);
1766  upper_page_no = buf_block_get_page_no(new_block);
1767  upper_page_zip = buf_block_get_page_zip(new_block);
1768  }
1769 
1770  /* Get the level of the split pages */
1771  level = btr_page_get_level(buf_block_get_frame(block), mtr);
1772  ut_ad(level
1773  == btr_page_get_level(buf_block_get_frame(new_block), mtr));
1774 
1775  /* Build the node pointer (= node key and page address) for the upper
1776  half */
1777 
1778  node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
1779  upper_page_no, heap, level);
1780 
1781  /* Insert it next to the pointer to the lower half. Note that this
1782  may generate recursion leading to a split on the higher level. */
1783 
1784  btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr);
1785 
1786  /* Free the memory heap */
1787  mem_heap_free(heap);
1788 
1789  /* Get the previous and next pages of page */
1790 
1791  prev_page_no = btr_page_get_prev(page, mtr);
1792  next_page_no = btr_page_get_next(page, mtr);
1793  space = buf_block_get_space(block);
1794  zip_size = buf_block_get_zip_size(block);
1795 
1796  /* Update page links of the level */
1797 
1798  if (prev_page_no != FIL_NULL) {
1799  buf_block_t* prev_block = btr_block_get(space, zip_size,
1800  prev_page_no,
1801  RW_X_LATCH, mtr);
1802 #ifdef UNIV_BTR_DEBUG
1803  ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
1804  ut_a(btr_page_get_next(prev_block->frame, mtr)
1805  == buf_block_get_page_no(block));
1806 #endif /* UNIV_BTR_DEBUG */
1807 
1808  btr_page_set_next(buf_block_get_frame(prev_block),
1809  buf_block_get_page_zip(prev_block),
1810  lower_page_no, mtr);
1811  }
1812 
1813  if (next_page_no != FIL_NULL) {
1814  buf_block_t* next_block = btr_block_get(space, zip_size,
1815  next_page_no,
1816  RW_X_LATCH, mtr);
1817 #ifdef UNIV_BTR_DEBUG
1818  ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
1819  ut_a(btr_page_get_prev(next_block->frame, mtr)
1820  == page_get_page_no(page));
1821 #endif /* UNIV_BTR_DEBUG */
1822 
1823  btr_page_set_prev(buf_block_get_frame(next_block),
1824  buf_block_get_page_zip(next_block),
1825  upper_page_no, mtr);
1826  }
1827 
1828  btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
1829  btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
1830 
1831  btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
1832  btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
1833 }
1834 
1835 /*************************************************************/
1838 static
1839 ibool
1840 btr_page_tuple_smaller(
1841 /*===================*/
1842  btr_cur_t* cursor,
1843  const dtuple_t* tuple,
1844  ulint* offsets,
1845  ulint n_uniq,
1847  mem_heap_t** heap)
1848 {
1849  buf_block_t* block;
1850  const rec_t* first_rec;
1851  page_cur_t pcur;
1852 
1853  /* Read the first user record in the page. */
1854  block = btr_cur_get_block(cursor);
1855  page_cur_set_before_first(block, &pcur);
1856  page_cur_move_to_next(&pcur);
1857  first_rec = page_cur_get_rec(&pcur);
1858 
1859  offsets = rec_get_offsets(
1860  first_rec, cursor->index, offsets,
1861  n_uniq, heap);
1862 
1863  return(cmp_dtuple_rec(tuple, first_rec, offsets) < 0);
1864 }
1865 
1866 /*************************************************************/
1875 UNIV_INTERN
1876 rec_t*
1877 btr_page_split_and_insert(
1878 /*======================*/
1879  btr_cur_t* cursor,
1882  const dtuple_t* tuple,
1883  ulint n_ext,
1884  mtr_t* mtr)
1885 {
1886  buf_block_t* block;
1887  page_t* page;
1888  page_zip_des_t* page_zip;
1889  ulint page_no;
1890  byte direction;
1891  ulint hint_page_no;
1892  buf_block_t* new_block;
1893  page_t* new_page;
1894  page_zip_des_t* new_page_zip;
1895  rec_t* split_rec;
1896  buf_block_t* left_block;
1897  buf_block_t* right_block;
1898  buf_block_t* insert_block;
1899  page_cur_t* page_cursor;
1900  rec_t* first_rec;
1901  byte* buf = 0; /* remove warning */
1902  rec_t* move_limit;
1903  ibool insert_will_fit;
1904  ibool insert_left;
1905  ulint n_iterations = 0;
1906  rec_t* rec;
1907  mem_heap_t* heap;
1908  ulint n_uniq;
1909  ulint* offsets;
1910 
1911  heap = mem_heap_create(1024);
1912  n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
1913 func_start:
1914  mem_heap_empty(heap);
1915  offsets = NULL;
1916 
1917  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
1918  MTR_MEMO_X_LOCK));
1919 #ifdef UNIV_SYNC_DEBUG
1920  ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
1921 #endif /* UNIV_SYNC_DEBUG */
1922 
1923  block = btr_cur_get_block(cursor);
1924  page = buf_block_get_frame(block);
1925  page_zip = buf_block_get_page_zip(block);
1926 
1927  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
1928  ut_ad(page_get_n_recs(page) >= 1);
1929 
1930  page_no = buf_block_get_page_no(block);
1931 
1932  /* 1. Decide the split record; split_rec == NULL means that the
1933  tuple to be inserted should be the first record on the upper
1934  half-page */
1935  insert_left = FALSE;
1936 
1937  if (n_iterations > 0) {
1938  direction = FSP_UP;
1939  hint_page_no = page_no + 1;
1940  split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
1941 
1942  if (UNIV_UNLIKELY(split_rec == NULL)) {
1943  insert_left = btr_page_tuple_smaller(
1944  cursor, tuple, offsets, n_uniq, &heap);
1945  }
1946  } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
1947  direction = FSP_UP;
1948  hint_page_no = page_no + 1;
1949  } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
1950  direction = FSP_DOWN;
1951  hint_page_no = page_no - 1;
1952  ut_ad(split_rec);
1953  } else {
1954  direction = FSP_UP;
1955  hint_page_no = page_no + 1;
1956  /* If there is only one record in the index page, we
1957  can't split the node in the middle by default. We need
1958  to determine whether the new record will be inserted
1959  to the left or right. */
1960 
1961  if (page_get_n_recs(page) > 1) {
1962  split_rec = page_get_middle_rec(page);
1963  } else if (btr_page_tuple_smaller(cursor, tuple,
1964  offsets, n_uniq, &heap)) {
1965  split_rec = page_rec_get_next(
1966  page_get_infimum_rec(page));
1967  } else {
1968  split_rec = NULL;
1969  }
1970  }
1971 
1972  /* 2. Allocate a new page to the index */
1973  new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
1974  btr_page_get_level(page, mtr), mtr);
1975  new_page = buf_block_get_frame(new_block);
1976  new_page_zip = buf_block_get_page_zip(new_block);
1977  btr_page_create(new_block, new_page_zip, cursor->index,
1978  btr_page_get_level(page, mtr), mtr);
1979 
1980  /* 3. Calculate the first record on the upper half-page, and the
1981  first record (move_limit) on original page which ends up on the
1982  upper half */
1983 
1984  if (split_rec) {
1985  first_rec = move_limit = split_rec;
1986 
1987  offsets = rec_get_offsets(split_rec, cursor->index, offsets,
1988  n_uniq, &heap);
1989 
1990  insert_left = cmp_dtuple_rec(tuple, split_rec, offsets) < 0;
1991 
1992  if (UNIV_UNLIKELY(!insert_left && new_page_zip
1993  && n_iterations > 0)) {
1994  /* If a compressed page has already been split,
1995  avoid further splits by inserting the record
1996  to an empty page. */
1997  split_rec = NULL;
1998  goto insert_empty;
1999  }
2000  } else if (UNIV_UNLIKELY(insert_left)) {
2001  ut_a(n_iterations > 0);
2002  first_rec = page_rec_get_next(page_get_infimum_rec(page));
2003  move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
2004  } else {
2005 insert_empty:
2006  ut_ad(!split_rec);
2007  ut_ad(!insert_left);
2008  buf = (unsigned char *)mem_alloc(rec_get_converted_size(cursor->index,
2009  tuple, n_ext));
2010 
2011  first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
2012  tuple, n_ext);
2013  move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
2014  }
2015 
2016  /* 4. Do first the modifications in the tree structure */
2017 
2018  btr_attach_half_pages(cursor->index, block,
2019  first_rec, new_block, direction, mtr);
2020 
2021  /* If the split is made on the leaf level and the insert will fit
2022  on the appropriate half-page, we may release the tree x-latch.
2023  We can then move the records after releasing the tree latch,
2024  thus reducing the tree latch contention. */
2025 
2026  if (split_rec) {
2027  insert_will_fit = !new_page_zip
2028  && btr_page_insert_fits(cursor, split_rec,
2029  offsets, tuple, n_ext, heap);
2030  } else {
2031  if (!insert_left) {
2032  mem_free(buf);
2033  buf = NULL;
2034  }
2035 
2036  insert_will_fit = !new_page_zip
2037  && btr_page_insert_fits(cursor, NULL,
2038  NULL, tuple, n_ext, heap);
2039  }
2040 
2041  if (insert_will_fit && page_is_leaf(page)) {
2042 
2044  MTR_MEMO_X_LOCK);
2045  }
2046 
2047  /* 5. Move then the records to the new page */
2048  if (direction == FSP_DOWN) {
2049  /* fputs("Split left\n", stderr); */
2050 
2051  if (0
2052 #ifdef UNIV_ZIP_COPY
2053  || page_zip
2054 #endif /* UNIV_ZIP_COPY */
2055  || UNIV_UNLIKELY
2056  (!page_move_rec_list_start(new_block, block, move_limit,
2057  cursor->index, mtr))) {
2058  /* For some reason, compressing new_page failed,
2059  even though it should contain fewer records than
2060  the original page. Copy the page byte for byte
2061  and then delete the records from both pages
2062  as appropriate. Deleting will always succeed. */
2063  ut_a(new_page_zip);
2064 
2065  page_zip_copy_recs(new_page_zip, new_page,
2066  page_zip, page, cursor->index, mtr);
2067  page_delete_rec_list_end(move_limit - page + new_page,
2068  new_block, cursor->index,
2069  ULINT_UNDEFINED,
2070  ULINT_UNDEFINED, mtr);
2071 
2072  /* Update the lock table and possible hash index. */
2073 
2075  new_block, block, move_limit,
2076  new_page + PAGE_NEW_INFIMUM);
2077 
2078  btr_search_move_or_delete_hash_entries(
2079  new_block, block, cursor->index);
2080 
2081  /* Delete the records from the source page. */
2082 
2083  page_delete_rec_list_start(move_limit, block,
2084  cursor->index, mtr);
2085  }
2086 
2087  left_block = new_block;
2088  right_block = block;
2089 
2090  lock_update_split_left(right_block, left_block);
2091  } else {
2092  /* fputs("Split right\n", stderr); */
2093 
2094  if (0
2095 #ifdef UNIV_ZIP_COPY
2096  || page_zip
2097 #endif /* UNIV_ZIP_COPY */
2098  || UNIV_UNLIKELY
2099  (!page_move_rec_list_end(new_block, block, move_limit,
2100  cursor->index, mtr))) {
2101  /* For some reason, compressing new_page failed,
2102  even though it should contain fewer records than
2103  the original page. Copy the page byte for byte
2104  and then delete the records from both pages
2105  as appropriate. Deleting will always succeed. */
2106  ut_a(new_page_zip);
2107 
2108  page_zip_copy_recs(new_page_zip, new_page,
2109  page_zip, page, cursor->index, mtr);
2110  page_delete_rec_list_start(move_limit - page
2111  + new_page, new_block,
2112  cursor->index, mtr);
2113 
2114  /* Update the lock table and possible hash index. */
2115 
2116  lock_move_rec_list_end(new_block, block, move_limit);
2117 
2118  btr_search_move_or_delete_hash_entries(
2119  new_block, block, cursor->index);
2120 
2121  /* Delete the records from the source page. */
2122 
2123  page_delete_rec_list_end(move_limit, block,
2124  cursor->index,
2125  ULINT_UNDEFINED,
2126  ULINT_UNDEFINED, mtr);
2127  }
2128 
2129  left_block = block;
2130  right_block = new_block;
2131 
2132  lock_update_split_right(right_block, left_block);
2133  }
2134 
2135 #ifdef UNIV_ZIP_DEBUG
2136  if (UNIV_LIKELY_NULL(page_zip)) {
2137  ut_a(page_zip_validate(page_zip, page));
2138  ut_a(page_zip_validate(new_page_zip, new_page));
2139  }
2140 #endif /* UNIV_ZIP_DEBUG */
2141 
2142  /* At this point, split_rec, move_limit and first_rec may point
2143  to garbage on the old page. */
2144 
2145  /* 6. The split and the tree modification is now completed. Decide the
2146  page where the tuple should be inserted */
2147 
2148  if (insert_left) {
2149  insert_block = left_block;
2150  } else {
2151  insert_block = right_block;
2152  }
2153 
2154  /* 7. Reposition the cursor for insert and try insertion */
2155  page_cursor = btr_cur_get_page_cur(cursor);
2156 
2157  page_cur_search(insert_block, cursor->index, tuple,
2158  PAGE_CUR_LE, page_cursor);
2159 
2160  rec = page_cur_tuple_insert(page_cursor, tuple,
2161  cursor->index, n_ext, mtr);
2162 
2163 #ifdef UNIV_ZIP_DEBUG
2164  {
2165  page_t* insert_page
2166  = buf_block_get_frame(insert_block);
2167 
2168  page_zip_des_t* insert_page_zip
2169  = buf_block_get_page_zip(insert_block);
2170 
2171  ut_a(!insert_page_zip
2172  || page_zip_validate(insert_page_zip, insert_page));
2173  }
2174 #endif /* UNIV_ZIP_DEBUG */
2175 
2176  if (UNIV_LIKELY(rec != NULL)) {
2177 
2178  goto func_exit;
2179  }
2180 
2181  /* 8. If insert did not fit, try page reorganization */
2182 
2183  if (UNIV_UNLIKELY
2184  (!btr_page_reorganize(insert_block, cursor->index, mtr))) {
2185 
2186  goto insert_failed;
2187  }
2188 
2189  page_cur_search(insert_block, cursor->index, tuple,
2190  PAGE_CUR_LE, page_cursor);
2191  rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
2192  n_ext, mtr);
2193 
2194  if (UNIV_UNLIKELY(rec == NULL)) {
2195  /* The insert did not fit on the page: loop back to the
2196  start of the function for a new split */
2197 insert_failed:
2198  /* We play safe and reset the free bits for new_page */
2199  if (!dict_index_is_clust(cursor->index)) {
2200  ibuf_reset_free_bits(new_block);
2201  }
2202 
2203  /* fprintf(stderr, "Split second round %lu\n",
2204  page_get_page_no(page)); */
2205  n_iterations++;
2206  ut_ad(n_iterations < 2
2207  || buf_block_get_page_zip(insert_block));
2208  ut_ad(!insert_will_fit);
2209 
2210  goto func_start;
2211  }
2212 
2213 func_exit:
2214  /* Insert fit on the page: update the free bits for the
2215  left and right pages in the same mtr */
2216 
2217  if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
2218  ibuf_update_free_bits_for_two_pages_low(
2219  buf_block_get_zip_size(left_block),
2220  left_block, right_block, mtr);
2221  }
2222 
2223 #if 0
2224  fprintf(stderr, "Split and insert done %lu %lu\n",
2225  buf_block_get_page_no(left_block),
2226  buf_block_get_page_no(right_block));
2227 #endif
2228 
2229  ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
2230  ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
2231 
2232  mem_heap_free(heap);
2233  return(rec);
2234 }
2235 
2236 /*************************************************************/
2238 static
2239 void
2240 btr_level_list_remove(
2241 /*==================*/
2242  ulint space,
2243  ulint zip_size,
2245  page_t* page,
2246  mtr_t* mtr)
2247 {
2248  ulint prev_page_no;
2249  ulint next_page_no;
2250 
2251  ut_ad(page && mtr);
2252  ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
2253  ut_ad(space == page_get_space_id(page));
2254  /* Get the previous and next page numbers of page */
2255 
2256  prev_page_no = btr_page_get_prev(page, mtr);
2257  next_page_no = btr_page_get_next(page, mtr);
2258 
2259  /* Update page links of the level */
2260 
2261  if (prev_page_no != FIL_NULL) {
2262  buf_block_t* prev_block
2263  = btr_block_get(space, zip_size, prev_page_no,
2264  RW_X_LATCH, mtr);
2265  page_t* prev_page
2266  = buf_block_get_frame(prev_block);
2267 #ifdef UNIV_BTR_DEBUG
2268  ut_a(page_is_comp(prev_page) == page_is_comp(page));
2269  ut_a(btr_page_get_next(prev_page, mtr)
2270  == page_get_page_no(page));
2271 #endif /* UNIV_BTR_DEBUG */
2272 
2273  btr_page_set_next(prev_page,
2274  buf_block_get_page_zip(prev_block),
2275  next_page_no, mtr);
2276  }
2277 
2278  if (next_page_no != FIL_NULL) {
2279  buf_block_t* next_block
2280  = btr_block_get(space, zip_size, next_page_no,
2281  RW_X_LATCH, mtr);
2282  page_t* next_page
2283  = buf_block_get_frame(next_block);
2284 #ifdef UNIV_BTR_DEBUG
2285  ut_a(page_is_comp(next_page) == page_is_comp(page));
2286  ut_a(btr_page_get_prev(next_page, mtr)
2287  == page_get_page_no(page));
2288 #endif /* UNIV_BTR_DEBUG */
2289 
2290  btr_page_set_prev(next_page,
2291  buf_block_get_page_zip(next_block),
2292  prev_page_no, mtr);
2293  }
2294 }
2295 
2296 /****************************************************************/
2299 UNIV_INLINE
2300 void
2301 btr_set_min_rec_mark_log(
2302 /*=====================*/
2303  rec_t* rec,
2304  byte type,
2305  mtr_t* mtr)
2306 {
2307  mlog_write_initial_log_record(rec, type, mtr);
2308 
2309  /* Write rec offset as a 2-byte ulint */
2311 }
2312 #else /* !UNIV_HOTBACKUP */
2313 # define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
2314 #endif /* !UNIV_HOTBACKUP */
2315 
2316 /****************************************************************/
2320 UNIV_INTERN
2321 byte*
2322 btr_parse_set_min_rec_mark(
2323 /*=======================*/
2324  byte* ptr,
2325  byte* end_ptr,
2326  ulint comp,
2327  page_t* page,
2328  mtr_t* mtr)
2329 {
2330  rec_t* rec;
2331 
2332  if (end_ptr < ptr + 2) {
2333 
2334  return(NULL);
2335  }
2336 
2337  if (page) {
2338  ut_a(!page_is_comp(page) == !comp);
2339 
2340  rec = page + mach_read_from_2(ptr);
2341 
2342  btr_set_min_rec_mark(rec, mtr);
2343  }
2344 
2345  return(ptr + 2);
2346 }
2347 
2348 /****************************************************************/
2350 UNIV_INTERN
2351 void
2352 btr_set_min_rec_mark(
2353 /*=================*/
2354  rec_t* rec,
2355  mtr_t* mtr)
2356 {
2357  ulint info_bits;
2358 
2359  if (UNIV_LIKELY(page_rec_is_comp(rec))) {
2360  info_bits = rec_get_info_bits(rec, TRUE);
2361 
2362  rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
2363 
2364  btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
2365  } else {
2366  info_bits = rec_get_info_bits(rec, FALSE);
2367 
2368  rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
2369 
2370  btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
2371  }
2372 }
2373 
2374 #ifndef UNIV_HOTBACKUP
2375 /*************************************************************/
2377 UNIV_INTERN
2378 void
2379 btr_node_ptr_delete(
2380 /*================*/
2381  dict_index_t* index,
2382  buf_block_t* block,
2383  mtr_t* mtr)
2384 {
2385  btr_cur_t cursor;
2386  ibool compressed;
2387  ulint err;
2388 
2389  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2390 
2391  /* Delete node pointer on father page */
2392  btr_page_get_father(index, block, mtr, &cursor);
2393 
2394  compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
2395  mtr);
2396  ut_a(err == DB_SUCCESS);
2397 
2398  if (!compressed) {
2399  btr_cur_compress_if_useful(&cursor, mtr);
2400  }
2401 }
2402 
2403 /*************************************************************/
2406 static
2407 void
2408 btr_lift_page_up(
2409 /*=============*/
2410  dict_index_t* index,
2411  buf_block_t* block,
2415  mtr_t* mtr)
2416 {
2417  buf_block_t* father_block;
2418  page_t* father_page;
2419  ulint page_level;
2420  page_zip_des_t* father_page_zip;
2421  page_t* page = buf_block_get_frame(block);
2422  ulint root_page_no;
2423  buf_block_t* blocks[BTR_MAX_LEVELS];
2424  ulint n_blocks;
2425  ulint i;
2426 
2427  ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
2428  ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
2429  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2430 
2431  page_level = btr_page_get_level(page, mtr);
2432  root_page_no = dict_index_get_page(index);
2433 
2434  {
2435  btr_cur_t cursor;
2436  mem_heap_t* heap = mem_heap_create(100);
2437  ulint* offsets;
2438  buf_block_t* b;
2439 
2440  offsets = btr_page_get_father_block(NULL, heap, index,
2441  block, mtr, &cursor);
2442  father_block = btr_cur_get_block(&cursor);
2443  father_page_zip = buf_block_get_page_zip(father_block);
2444  father_page = buf_block_get_frame(father_block);
2445 
2446  n_blocks = 0;
2447 
2448  /* Store all ancestor pages so we can reset their
2449  levels later on. We have to do all the searches on
2450  the tree now because later on, after we've replaced
2451  the first level, the tree is in an inconsistent state
2452  and can not be searched. */
2453  for (b = father_block;
2454  buf_block_get_page_no(b) != root_page_no; ) {
2455  ut_a(n_blocks < BTR_MAX_LEVELS);
2456 
2457  offsets = btr_page_get_father_block(offsets, heap,
2458  index, b,
2459  mtr, &cursor);
2460 
2461  blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
2462  }
2463 
2464  mem_heap_free(heap);
2465  }
2466 
2467  btr_search_drop_page_hash_index(block);
2468 
2469  /* Make the father empty */
2470  btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
2471 
2472  /* Copy the records to the father page one by one. */
2473  if (0
2474 #ifdef UNIV_ZIP_COPY
2475  || father_page_zip
2476 #endif /* UNIV_ZIP_COPY */
2477  || UNIV_UNLIKELY
2478  (!page_copy_rec_list_end(father_block, block,
2479  page_get_infimum_rec(page),
2480  index, mtr))) {
2481  const page_zip_des_t* page_zip
2482  = buf_block_get_page_zip(block);
2483  ut_a(father_page_zip);
2484  ut_a(page_zip);
2485 
2486  /* Copy the page byte for byte. */
2487  page_zip_copy_recs(father_page_zip, father_page,
2488  page_zip, page, index, mtr);
2489 
2490  /* Update the lock table and possible hash index. */
2491 
2492  lock_move_rec_list_end(father_block, block,
2493  page_get_infimum_rec(page));
2494 
2495  btr_search_move_or_delete_hash_entries(father_block, block,
2496  index);
2497  }
2498 
2499  lock_update_copy_and_discard(father_block, block);
2500 
2501  /* Go upward to root page, decrementing levels by one. */
2502  for (i = 0; i < n_blocks; i++, page_level++) {
2503  page_t* inner_page = buf_block_get_frame(blocks[i]);
2504  page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
2505 
2506  ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
2507 
2508  btr_page_set_level(inner_page, page_zip, page_level, mtr);
2509 #ifdef UNIV_ZIP_DEBUG
2510  ut_a(!page_zip || page_zip_validate(page_zip, inner_page));
2511 #endif /* UNIV_ZIP_DEBUG */
2512  }
2513 
2514  /* Free the file page */
2515  btr_page_free(index, block, mtr);
2516 
2517  /* We play it safe and reset the free bits for the father */
2518  if (!dict_index_is_clust(index)) {
2519  ibuf_reset_free_bits(father_block);
2520  }
2521  ut_ad(page_validate(father_page, index));
2522  ut_ad(btr_check_node_ptr(index, father_block, mtr));
2523 }
2524 
2525 /*************************************************************/
2535 UNIV_INTERN
2536 ibool
2537 btr_compress(
2538 /*=========*/
2539  btr_cur_t* cursor,
2543  mtr_t* mtr)
2544 {
2545  dict_index_t* index;
2546  ulint space;
2547  ulint zip_size;
2548  ulint left_page_no;
2549  ulint right_page_no;
2550  buf_block_t* merge_block;
2551  page_t* merge_page;
2552  page_zip_des_t* merge_page_zip;
2553  ibool is_left;
2554  buf_block_t* block;
2555  page_t* page;
2556  btr_cur_t father_cursor;
2557  mem_heap_t* heap;
2558  ulint* offsets;
2559  ulint data_size;
2560  ulint n_recs;
2561  ulint max_ins_size;
2562  ulint max_ins_size_reorg;
2563 
2564  block = btr_cur_get_block(cursor);
2565  page = btr_cur_get_page(cursor);
2566  index = btr_cur_get_index(cursor);
2567  ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table));
2568 
2569  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2570  MTR_MEMO_X_LOCK));
2571  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2572  space = dict_index_get_space(index);
2573  zip_size = dict_table_zip_size(index->table);
2574 
2575  left_page_no = btr_page_get_prev(page, mtr);
2576  right_page_no = btr_page_get_next(page, mtr);
2577 
2578 #if 0
2579  fprintf(stderr, "Merge left page %lu right %lu \n",
2580  left_page_no, right_page_no);
2581 #endif
2582 
2583  heap = mem_heap_create(100);
2584  offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
2585  &father_cursor);
2586 
2587  /* Decide the page to which we try to merge and which will inherit
2588  the locks */
2589 
2590  is_left = left_page_no != FIL_NULL;
2591 
2592  if (is_left) {
2593 
2594  merge_block = btr_block_get(space, zip_size, left_page_no,
2595  RW_X_LATCH, mtr);
2596  merge_page = buf_block_get_frame(merge_block);
2597 #ifdef UNIV_BTR_DEBUG
2598  ut_a(btr_page_get_next(merge_page, mtr)
2599  == buf_block_get_page_no(block));
2600 #endif /* UNIV_BTR_DEBUG */
2601  } else if (right_page_no != FIL_NULL) {
2602 
2603  merge_block = btr_block_get(space, zip_size, right_page_no,
2604  RW_X_LATCH, mtr);
2605  merge_page = buf_block_get_frame(merge_block);
2606 #ifdef UNIV_BTR_DEBUG
2607  ut_a(btr_page_get_prev(merge_page, mtr)
2608  == buf_block_get_page_no(block));
2609 #endif /* UNIV_BTR_DEBUG */
2610  } else {
2611  /* The page is the only one on the level, lift the records
2612  to the father */
2613  btr_lift_page_up(index, block, mtr);
2614  mem_heap_free(heap);
2615  return(TRUE);
2616  }
2617 
2618  n_recs = page_get_n_recs(page);
2619  data_size = page_get_data_size(page);
2620 #ifdef UNIV_BTR_DEBUG
2621  ut_a(page_is_comp(merge_page) == page_is_comp(page));
2622 #endif /* UNIV_BTR_DEBUG */
2623 
2624  max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
2625  merge_page, n_recs);
2626  if (data_size > max_ins_size_reorg) {
2627 
2628  /* No space for merge */
2629 err_exit:
2630  /* We play it safe and reset the free bits. */
2631  if (zip_size
2632  && page_is_leaf(merge_page)
2633  && !dict_index_is_clust(index)) {
2634  ibuf_reset_free_bits(merge_block);
2635  }
2636 
2637  mem_heap_free(heap);
2638  return(FALSE);
2639  }
2640 
2641  ut_ad(page_validate(merge_page, index));
2642 
2643  max_ins_size = page_get_max_insert_size(merge_page, n_recs);
2644 
2645  if (UNIV_UNLIKELY(data_size > max_ins_size)) {
2646 
2647  /* We have to reorganize merge_page */
2648 
2649  if (UNIV_UNLIKELY(!btr_page_reorganize(merge_block,
2650  index, mtr))) {
2651 
2652  goto err_exit;
2653  }
2654 
2655  max_ins_size = page_get_max_insert_size(merge_page, n_recs);
2656 
2657  ut_ad(page_validate(merge_page, index));
2658  ut_ad(max_ins_size == max_ins_size_reorg);
2659 
2660  if (UNIV_UNLIKELY(data_size > max_ins_size)) {
2661 
2662  /* Add fault tolerance, though this should
2663  never happen */
2664 
2665  goto err_exit;
2666  }
2667  }
2668 
2669  merge_page_zip = buf_block_get_page_zip(merge_block);
2670 #ifdef UNIV_ZIP_DEBUG
2671  if (UNIV_LIKELY_NULL(merge_page_zip)) {
2672  const page_zip_des_t* page_zip
2673  = buf_block_get_page_zip(block);
2674  ut_a(page_zip);
2675  ut_a(page_zip_validate(merge_page_zip, merge_page));
2676  ut_a(page_zip_validate(page_zip, page));
2677  }
2678 #endif /* UNIV_ZIP_DEBUG */
2679 
2680  /* Move records to the merge page */
2681  if (is_left) {
2682  rec_t* orig_pred = page_copy_rec_list_start(
2683  merge_block, block, page_get_supremum_rec(page),
2684  index, mtr);
2685 
2686  if (UNIV_UNLIKELY(!orig_pred)) {
2687  goto err_exit;
2688  }
2689 
2690  btr_search_drop_page_hash_index(block);
2691 
2692  /* Remove the page from the level list */
2693  btr_level_list_remove(space, zip_size, page, mtr);
2694 
2695  btr_node_ptr_delete(index, block, mtr);
2696  lock_update_merge_left(merge_block, orig_pred, block);
2697  } else {
2698  rec_t* orig_succ;
2699 #ifdef UNIV_BTR_DEBUG
2700  byte fil_page_prev[4];
2701 #endif /* UNIV_BTR_DEBUG */
2702 
2703  if (UNIV_LIKELY_NULL(merge_page_zip)) {
2704  /* The function page_zip_compress(), which will be
2705  invoked by page_copy_rec_list_end() below,
2706  requires that FIL_PAGE_PREV be FIL_NULL.
2707  Clear the field, but prepare to restore it. */
2708 #ifdef UNIV_BTR_DEBUG
2709  memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
2710 #endif /* UNIV_BTR_DEBUG */
2711 #if FIL_NULL != 0xffffffff
2712 # error "FIL_NULL != 0xffffffff"
2713 #endif
2714  memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
2715  }
2716 
2717  orig_succ = page_copy_rec_list_end(merge_block, block,
2718  page_get_infimum_rec(page),
2719  cursor->index, mtr);
2720 
2721  if (UNIV_UNLIKELY(!orig_succ)) {
2722  ut_a(merge_page_zip);
2723 #ifdef UNIV_BTR_DEBUG
2724  /* FIL_PAGE_PREV was restored from merge_page_zip. */
2725  ut_a(!memcmp(fil_page_prev,
2726  merge_page + FIL_PAGE_PREV, 4));
2727 #endif /* UNIV_BTR_DEBUG */
2728  goto err_exit;
2729  }
2730 
2731  btr_search_drop_page_hash_index(block);
2732 
2733 #ifdef UNIV_BTR_DEBUG
2734  if (UNIV_LIKELY_NULL(merge_page_zip)) {
2735  /* Restore FIL_PAGE_PREV in order to avoid an assertion
2736  failure in btr_level_list_remove(), which will set
2737  the field again to FIL_NULL. Even though this makes
2738  merge_page and merge_page_zip inconsistent for a
2739  split second, it is harmless, because the pages
2740  are X-latched. */
2741  memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
2742  }
2743 #endif /* UNIV_BTR_DEBUG */
2744 
2745  /* Remove the page from the level list */
2746  btr_level_list_remove(space, zip_size, page, mtr);
2747 
2748  /* Replace the address of the old child node (= page) with the
2749  address of the merge page to the right */
2750 
2751  btr_node_ptr_set_child_page_no(
2752  btr_cur_get_rec(&father_cursor),
2753  btr_cur_get_page_zip(&father_cursor),
2754  offsets, right_page_no, mtr);
2755  btr_node_ptr_delete(index, merge_block, mtr);
2756 
2757  lock_update_merge_right(merge_block, orig_succ, block);
2758  }
2759 
2760  mem_heap_free(heap);
2761 
2762  if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
2763  /* Update the free bits of the B-tree page in the
2764  insert buffer bitmap. This has to be done in a
2765  separate mini-transaction that is committed before the
2766  main mini-transaction. We cannot update the insert
2767  buffer bitmap in this mini-transaction, because
2768  btr_compress() can be invoked recursively without
2769  committing the mini-transaction in between. Since
2770  insert buffer bitmap pages have a lower rank than
2771  B-tree pages, we must not access other pages in the
2772  same mini-transaction after accessing an insert buffer
2773  bitmap page. */
2774 
2775  /* The free bits in the insert buffer bitmap must
2776  never exceed the free space on a page. It is safe to
2777  decrement or reset the bits in the bitmap in a
2778  mini-transaction that is committed before the
2779  mini-transaction that affects the free space. */
2780 
2781  /* It is unsafe to increment the bits in a separately
2782  committed mini-transaction, because in crash recovery,
2783  the free bits could momentarily be set too high. */
2784 
2785  if (zip_size) {
2786  /* Because the free bits may be incremented
2787  and we cannot update the insert buffer bitmap
2788  in the same mini-transaction, the only safe
2789  thing we can do here is the pessimistic
2790  approach: reset the free bits. */
2791  ibuf_reset_free_bits(merge_block);
2792  } else {
2793  /* On uncompressed pages, the free bits will
2794  never increase here. Thus, it is safe to
2795  write the bits accurately in a separate
2796  mini-transaction. */
2797  ibuf_update_free_bits_if_full(merge_block,
2798  UNIV_PAGE_SIZE,
2799  ULINT_UNDEFINED);
2800  }
2801  }
2802 
2803  ut_ad(page_validate(merge_page, index));
2804 #ifdef UNIV_ZIP_DEBUG
2805  ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
2806 #endif /* UNIV_ZIP_DEBUG */
2807 
2808  /* Free the file page */
2809  btr_page_free(index, block, mtr);
2810 
2811  ut_ad(btr_check_node_ptr(index, merge_block, mtr));
2812  return(TRUE);
2813 }
2814 
2815 /*************************************************************/
2820 static
2821 void
2822 btr_discard_only_page_on_level(
2823 /*===========================*/
2824  dict_index_t* index,
2825  buf_block_t* block,
2826  mtr_t* mtr)
2827 {
2828  ulint page_level = 0;
2829  trx_id_t max_trx_id;
2830 
2831  /* Save the PAGE_MAX_TRX_ID from the leaf page. */
2832  max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
2833 
2834  while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
2835  btr_cur_t cursor;
2836  buf_block_t* father;
2837  const page_t* page = buf_block_get_frame(block);
2838 
2839  ut_a(page_get_n_recs(page) == 1);
2840  ut_a(page_level == btr_page_get_level(page, mtr));
2841  ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
2842  ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
2843 
2844  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2845  btr_search_drop_page_hash_index(block);
2846 
2847  btr_page_get_father(index, block, mtr, &cursor);
2848  father = btr_cur_get_block(&cursor);
2849 
2850  lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
2851 
2852  /* Free the file page */
2853  btr_page_free(index, block, mtr);
2854 
2855  block = father;
2856  page_level++;
2857  }
2858 
2859  /* block is the root page, which must be empty, except
2860  for the node pointer to the (now discarded) block(s). */
2861 
2862 #ifdef UNIV_BTR_DEBUG
2863  if (!dict_index_is_ibuf(index)) {
2864  const page_t* root = buf_block_get_frame(block);
2865  const ulint space = dict_index_get_space(index);
2866  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
2867  + root, space));
2868  ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
2869  + root, space));
2870  }
2871 #endif /* UNIV_BTR_DEBUG */
2872 
2873  btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
2874 
2875  if (!dict_index_is_clust(index)) {
2876  /* We play it safe and reset the free bits for the root */
2877  ibuf_reset_free_bits(block);
2878 
2879  if (page_is_leaf(buf_block_get_frame(block))) {
2880  ut_a(max_trx_id);
2881  page_set_max_trx_id(block,
2882  buf_block_get_page_zip(block),
2883  max_trx_id, mtr);
2884  }
2885  }
2886 }
2887 
2888 /*************************************************************/
2892 UNIV_INTERN
2893 void
2894 btr_discard_page(
2895 /*=============*/
2896  btr_cur_t* cursor,
2898  mtr_t* mtr)
2899 {
2900  dict_index_t* index;
2901  ulint space;
2902  ulint zip_size;
2903  ulint left_page_no;
2904  ulint right_page_no;
2905  buf_block_t* merge_block;
2906  page_t* merge_page;
2907  buf_block_t* block;
2908  page_t* page;
2909  rec_t* node_ptr;
2910 
2911  block = btr_cur_get_block(cursor);
2912  index = btr_cur_get_index(cursor);
2913 
2915  ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
2916  MTR_MEMO_X_LOCK));
2917  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
2918  space = dict_index_get_space(index);
2919  zip_size = dict_table_zip_size(index->table);
2920 
2921  /* Decide the page which will inherit the locks */
2922 
2923  left_page_no = btr_page_get_prev(buf_block_get_frame(block), mtr);
2924  right_page_no = btr_page_get_next(buf_block_get_frame(block), mtr);
2925 
2926  if (left_page_no != FIL_NULL) {
2927  merge_block = btr_block_get(space, zip_size, left_page_no,
2928  RW_X_LATCH, mtr);
2929  merge_page = buf_block_get_frame(merge_block);
2930 #ifdef UNIV_BTR_DEBUG
2931  ut_a(btr_page_get_next(merge_page, mtr)
2932  == buf_block_get_page_no(block));
2933 #endif /* UNIV_BTR_DEBUG */
2934  } else if (right_page_no != FIL_NULL) {
2935  merge_block = btr_block_get(space, zip_size, right_page_no,
2936  RW_X_LATCH, mtr);
2937  merge_page = buf_block_get_frame(merge_block);
2938 #ifdef UNIV_BTR_DEBUG
2939  ut_a(btr_page_get_prev(merge_page, mtr)
2940  == buf_block_get_page_no(block));
2941 #endif /* UNIV_BTR_DEBUG */
2942  } else {
2943  btr_discard_only_page_on_level(index, block, mtr);
2944 
2945  return;
2946  }
2947 
2948  page = buf_block_get_frame(block);
2949  ut_a(page_is_comp(merge_page) == page_is_comp(page));
2950  btr_search_drop_page_hash_index(block);
2951 
2952  if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
2953 
2954  /* We have to mark the leftmost node pointer on the right
2955  side page as the predefined minimum record */
2956  node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
2957 
2958  ut_ad(page_rec_is_user_rec(node_ptr));
2959 
2960  /* This will make page_zip_validate() fail on merge_page
2961  until btr_level_list_remove() completes. This is harmless,
2962  because everything will take place within a single
2963  mini-transaction and because writing to the redo log
2964  is an atomic operation (performed by mtr_commit()). */
2965  btr_set_min_rec_mark(node_ptr, mtr);
2966  }
2967 
2968  btr_node_ptr_delete(index, block, mtr);
2969 
2970  /* Remove the page from the level list */
2971  btr_level_list_remove(space, zip_size, page, mtr);
2972 #ifdef UNIV_ZIP_DEBUG
2973  {
2974  page_zip_des_t* merge_page_zip
2975  = buf_block_get_page_zip(merge_block);
2976  ut_a(!merge_page_zip
2977  || page_zip_validate(merge_page_zip, merge_page));
2978  }
2979 #endif /* UNIV_ZIP_DEBUG */
2980 
2981  if (left_page_no != FIL_NULL) {
2982  lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
2983  block);
2984  } else {
2985  lock_update_discard(merge_block,
2986  lock_get_min_heap_no(merge_block),
2987  block);
2988  }
2989 
2990  /* Free the file page */
2991  btr_page_free(index, block, mtr);
2992 
2993  ut_ad(btr_check_node_ptr(index, merge_block, mtr));
2994 }
2995 
2996 #ifdef UNIV_BTR_PRINT
2997 /*************************************************************/
2999 UNIV_INTERN
3000 void
3001 btr_print_size(
3002 /*===========*/
3003  dict_index_t* index)
3004 {
3005  page_t* root;
3006  fseg_header_t* seg;
3007  mtr_t mtr;
3008 
3009  if (dict_index_is_ibuf(index)) {
3010  fputs("Sorry, cannot print info of an ibuf tree:"
3011  " use ibuf functions\n", stderr);
3012 
3013  return;
3014  }
3015 
3016  mtr_start(&mtr);
3017 
3018  root = btr_root_get(index, &mtr);
3019 
3020  seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
3021 
3022  fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
3023  fseg_print(seg, &mtr);
3024 
3025  if (!(index->type & DICT_UNIVERSAL)) {
3026 
3027  seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
3028 
3029  fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
3030  fseg_print(seg, &mtr);
3031  }
3032 
3033  mtr_commit(&mtr);
3034 }
3035 
3036 /************************************************************/
3038 static
3039 void
3040 btr_print_recursive(
3041 /*================*/
3042  dict_index_t* index,
3043  buf_block_t* block,
3044  ulint width,
3046  mem_heap_t** heap,
3047  ulint** offsets,
3048  mtr_t* mtr)
3049 {
3050  const page_t* page = buf_block_get_frame(block);
3051  page_cur_t cursor;
3052  ulint n_recs;
3053  ulint i = 0;
3054  mtr_t mtr2;
3055 
3056  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3057  fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
3058  (ulong) btr_page_get_level(page, mtr),
3059  (ulong) buf_block_get_page_no(block));
3060 
3061  page_print(block, index, width, width);
3062 
3063  n_recs = page_get_n_recs(page);
3064 
3065  page_cur_set_before_first(block, &cursor);
3066  page_cur_move_to_next(&cursor);
3067 
3068  while (!page_cur_is_after_last(&cursor)) {
3069 
3070  if (page_is_leaf(page)) {
3071 
3072  /* If this is the leaf level, do nothing */
3073 
3074  } else if ((i <= width) || (i >= n_recs - width)) {
3075 
3076  const rec_t* node_ptr;
3077 
3078  mtr_start(&mtr2);
3079 
3080  node_ptr = page_cur_get_rec(&cursor);
3081 
3082  *offsets = rec_get_offsets(node_ptr, index, *offsets,
3083  ULINT_UNDEFINED, heap);
3084  btr_print_recursive(index,
3085  btr_node_ptr_get_child(node_ptr,
3086  index,
3087  *offsets,
3088  &mtr2),
3089  width, heap, offsets, &mtr2);
3090  mtr_commit(&mtr2);
3091  }
3092 
3093  page_cur_move_to_next(&cursor);
3094  i++;
3095  }
3096 }
3097 
3098 /**************************************************************/
3100 UNIV_INTERN
3101 void
3102 btr_print_index(
3103 /*============*/
3104  dict_index_t* index,
3105  ulint width)
3107 {
3108  mtr_t mtr;
3109  buf_block_t* root;
3110  mem_heap_t* heap = NULL;
3111  ulint offsets_[REC_OFFS_NORMAL_SIZE];
3112  ulint* offsets = offsets_;
3113  rec_offs_init(offsets_);
3114 
3115  fputs("--------------------------\n"
3116  "INDEX TREE PRINT\n", stderr);
3117 
3118  mtr_start(&mtr);
3119 
3120  root = btr_root_block_get(index, &mtr);
3121 
3122  btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
3123  if (UNIV_LIKELY_NULL(heap)) {
3124  mem_heap_free(heap);
3125  }
3126 
3127  mtr_commit(&mtr);
3128 
3129  btr_validate_index(index, NULL);
3130 }
3131 #endif /* UNIV_BTR_PRINT */
3132 
3133 #ifdef UNIV_DEBUG
3134 /************************************************************/
3137 UNIV_INTERN
3138 ibool
3139 btr_check_node_ptr(
3140 /*===============*/
3141  dict_index_t* index,
3142  buf_block_t* block,
3143  mtr_t* mtr)
3144 {
3145  mem_heap_t* heap;
3146  dtuple_t* tuple;
3147  ulint* offsets;
3148  btr_cur_t cursor;
3149  page_t* page = buf_block_get_frame(block);
3150 
3151  ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
3152  if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
3153 
3154  return(TRUE);
3155  }
3156 
3157  heap = mem_heap_create(256);
3158  offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
3159  &cursor);
3160 
3161  if (page_is_leaf(page)) {
3162 
3163  goto func_exit;
3164  }
3165 
3166  tuple = dict_index_build_node_ptr(
3167  index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
3168  btr_page_get_level(page, mtr));
3169 
3170  ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
3171 func_exit:
3172  mem_heap_free(heap);
3173 
3174  return(TRUE);
3175 }
3176 #endif /* UNIV_DEBUG */
3177 
3178 /************************************************************/
3180 static
3181 void
3182 btr_index_rec_validate_report(
3183 /*==========================*/
3184  const page_t* page,
3185  const rec_t* rec,
3186  const dict_index_t* index)
3187 {
3188  fputs("InnoDB: Record in ", stderr);
3189  dict_index_name_print(stderr, NULL, index);
3190  fprintf(stderr, ", page %lu, at offset %lu\n",
3191  page_get_page_no(page), (ulint) page_offset(rec));
3192 }
3193 
3194 /************************************************************/
3198 UNIV_INTERN
3199 ibool
3200 btr_index_rec_validate(
3201 /*===================*/
3202  const rec_t* rec,
3203  const dict_index_t* index,
3204  ibool dump_on_error)
3207 {
3208  ulint len;
3209  ulint n;
3210  ulint i;
3211  const page_t* page;
3212  mem_heap_t* heap = NULL;
3213  ulint offsets_[REC_OFFS_NORMAL_SIZE];
3214  ulint* offsets = offsets_;
3215  rec_offs_init(offsets_);
3216 
3217  page = page_align(rec);
3218 
3219  if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
3220  /* The insert buffer index tree can contain records from any
3221  other index: we cannot check the number of fields or
3222  their length */
3223 
3224  return(TRUE);
3225  }
3226 
3227  if (UNIV_UNLIKELY((ibool)!!page_is_comp(page)
3228  != dict_table_is_comp(index->table))) {
3229  btr_index_rec_validate_report(page, rec, index);
3230  fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
3231  (ulong) !!page_is_comp(page),
3232  (ulong) dict_table_is_comp(index->table));
3233 
3234  return(FALSE);
3235  }
3236 
3237  n = dict_index_get_n_fields(index);
3238 
3239  if (!page_is_comp(page)
3240  && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) {
3241  btr_index_rec_validate_report(page, rec, index);
3242  fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
3243  (ulong) rec_get_n_fields_old(rec), (ulong) n);
3244 
3245  if (dump_on_error) {
3246  buf_page_print(page, 0);
3247 
3248  fputs("InnoDB: corrupt record ", stderr);
3249  rec_print_old(stderr, rec);
3250  putc('\n', stderr);
3251  }
3252  return(FALSE);
3253  }
3254 
3255  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3256 
3257  for (i = 0; i < n; i++) {
3258  ulint fixed_size = dict_col_get_fixed_size(
3259  dict_index_get_nth_col(index, i), page_is_comp(page));
3260 
3261  rec_get_nth_field_offs(offsets, i, &len);
3262 
3263  /* Note that if fixed_size != 0, it equals the
3264  length of a fixed-size column in the clustered index.
3265  A prefix index of the column is of fixed, but different
3266  length. When fixed_size == 0, prefix_len is the maximum
3267  length of the prefix index column. */
3268 
3269  if ((dict_index_get_nth_field(index, i)->prefix_len == 0
3270  && len != UNIV_SQL_NULL && fixed_size
3271  && len != fixed_size)
3272  || (dict_index_get_nth_field(index, i)->prefix_len > 0
3273  && len != UNIV_SQL_NULL
3274  && len
3275  > dict_index_get_nth_field(index, i)->prefix_len)) {
3276 
3277  btr_index_rec_validate_report(page, rec, index);
3278  fprintf(stderr,
3279  "InnoDB: field %lu len is %lu,"
3280  " should be %lu\n",
3281  (ulong) i, (ulong) len, (ulong) fixed_size);
3282 
3283  if (dump_on_error) {
3284  buf_page_print(page, 0);
3285 
3286  fputs("InnoDB: corrupt record ", stderr);
3287  rec_print_new(stderr, rec, offsets);
3288  putc('\n', stderr);
3289  }
3290  if (UNIV_LIKELY_NULL(heap)) {
3291  mem_heap_free(heap);
3292  }
3293  return(FALSE);
3294  }
3295  }
3296 
3297  if (UNIV_LIKELY_NULL(heap)) {
3298  mem_heap_free(heap);
3299  }
3300  return(TRUE);
3301 }
3302 
3303 /************************************************************/
3307 static
3308 ibool
3309 btr_index_page_validate(
3310 /*====================*/
3311  buf_block_t* block,
3312  dict_index_t* index)
3313 {
3314  page_cur_t cur;
3315  ibool ret = TRUE;
3316 
3317  page_cur_set_before_first(block, &cur);
3318  page_cur_move_to_next(&cur);
3319 
3320  for (;;) {
3321  if (page_cur_is_after_last(&cur)) {
3322 
3323  break;
3324  }
3325 
3326  if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
3327 
3328  return(FALSE);
3329  }
3330 
3331  page_cur_move_to_next(&cur);
3332  }
3333 
3334  return(ret);
3335 }
3336 
3337 /************************************************************/
3339 static
3340 void
3341 btr_validate_report1(
3342 /*=================*/
3343  dict_index_t* index,
3344  ulint level,
3345  const buf_block_t* block)
3346 {
3347  fprintf(stderr, "InnoDB: Error in page %lu of ",
3348  buf_block_get_page_no(block));
3349  dict_index_name_print(stderr, NULL, index);
3350  if (level) {
3351  fprintf(stderr, ", index tree level %lu", level);
3352  }
3353  putc('\n', stderr);
3354 }
3355 
3356 /************************************************************/
3358 static
3359 void
3360 btr_validate_report2(
3361 /*=================*/
3362  const dict_index_t* index,
3363  ulint level,
3364  const buf_block_t* block1,
3365  const buf_block_t* block2)
3366 {
3367  fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
3368  buf_block_get_page_no(block1),
3369  buf_block_get_page_no(block2));
3370  dict_index_name_print(stderr, NULL, index);
3371  if (level) {
3372  fprintf(stderr, ", index tree level %lu", level);
3373  }
3374  putc('\n', stderr);
3375 }
3376 
3377 /************************************************************/
3380 static
3381 ibool
3382 btr_validate_level(
3383 /*===============*/
3384  dict_index_t* index,
3385  trx_t* trx,
3386  ulint level)
3387 {
3388  ulint space;
3389  ulint zip_size;
3390  buf_block_t* block;
3391  page_t* page;
3392  buf_block_t* right_block = 0; /* remove warning */
3393  page_t* right_page = 0; /* remove warning */
3394  page_t* father_page;
3395  btr_cur_t node_cur;
3396  btr_cur_t right_node_cur;
3397  rec_t* rec;
3398  ulint right_page_no;
3399  ulint left_page_no;
3400  page_cur_t cursor;
3401  dtuple_t* node_ptr_tuple;
3402  ibool ret = TRUE;
3403  mtr_t mtr;
3404  mem_heap_t* heap = mem_heap_create(256);
3405  ulint* offsets = NULL;
3406  ulint* offsets2= NULL;
3407 #ifdef UNIV_ZIP_DEBUG
3408  page_zip_des_t* page_zip;
3409 #endif /* UNIV_ZIP_DEBUG */
3410 
3411  mtr_start(&mtr);
3412 
3413  mtr_x_lock(dict_index_get_lock(index), &mtr);
3414 
3415  block = btr_root_block_get(index, &mtr);
3416  page = buf_block_get_frame(block);
3417 
3418  space = dict_index_get_space(index);
3419  zip_size = dict_table_zip_size(index->table);
3420 
3421  while (level != btr_page_get_level(page, &mtr)) {
3422  const rec_t* node_ptr;
3423 
3424  ut_a(space == buf_block_get_space(block));
3425  ut_a(space == page_get_space_id(page));
3426 #ifdef UNIV_ZIP_DEBUG
3427  page_zip = buf_block_get_page_zip(block);
3428  ut_a(!page_zip || page_zip_validate(page_zip, page));
3429 #endif /* UNIV_ZIP_DEBUG */
3430  ut_a(!page_is_leaf(page));
3431 
3432  page_cur_set_before_first(block, &cursor);
3433  page_cur_move_to_next(&cursor);
3434 
3435  node_ptr = page_cur_get_rec(&cursor);
3436  offsets = rec_get_offsets(node_ptr, index, offsets,
3437  ULINT_UNDEFINED, &heap);
3438  block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
3439  page = buf_block_get_frame(block);
3440  }
3441 
3442  /* Now we are on the desired level. Loop through the pages on that
3443  level. */
3444 loop:
3445  if (trx_is_interrupted(trx)) {
3446  mtr_commit(&mtr);
3447  mem_heap_free(heap);
3448  return(ret);
3449  }
3450  mem_heap_empty(heap);
3451  offsets = offsets2 = NULL;
3452  mtr_x_lock(dict_index_get_lock(index), &mtr);
3453 
3454 #ifdef UNIV_ZIP_DEBUG
3455  page_zip = buf_block_get_page_zip(block);
3456  ut_a(!page_zip || page_zip_validate(page_zip, page));
3457 #endif /* UNIV_ZIP_DEBUG */
3458 
3459  /* Check ordering etc. of records */
3460 
3461  if (!page_validate(page, index)) {
3462  btr_validate_report1(index, level, block);
3463 
3464  ret = FALSE;
3465  } else if (level == 0) {
3466  /* We are on level 0. Check that the records have the right
3467  number of fields, and field lengths are right. */
3468 
3469  if (!btr_index_page_validate(block, index)) {
3470 
3471  ret = FALSE;
3472  }
3473  }
3474 
3475  ut_a(btr_page_get_level(page, &mtr) == level);
3476 
3477  right_page_no = btr_page_get_next(page, &mtr);
3478  left_page_no = btr_page_get_prev(page, &mtr);
3479 
3480  ut_a(page_get_n_recs(page) > 0 || (level == 0
3481  && page_get_page_no(page)
3482  == dict_index_get_page(index)));
3483 
3484  if (right_page_no != FIL_NULL) {
3485  const rec_t* right_rec;
3486  right_block = btr_block_get(space, zip_size, right_page_no,
3487  RW_X_LATCH, &mtr);
3488  right_page = buf_block_get_frame(right_block);
3489  if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr)
3490  != page_get_page_no(page))) {
3491  btr_validate_report2(index, level, block, right_block);
3492  fputs("InnoDB: broken FIL_PAGE_NEXT"
3493  " or FIL_PAGE_PREV links\n", stderr);
3494  buf_page_print(page, 0);
3495  buf_page_print(right_page, 0);
3496 
3497  ret = FALSE;
3498  }
3499 
3500  if (UNIV_UNLIKELY(page_is_comp(right_page)
3501  != page_is_comp(page))) {
3502  btr_validate_report2(index, level, block, right_block);
3503  fputs("InnoDB: 'compact' flag mismatch\n", stderr);
3504  buf_page_print(page, 0);
3505  buf_page_print(right_page, 0);
3506 
3507  ret = FALSE;
3508 
3509  goto node_ptr_fails;
3510  }
3511 
3512  rec = page_rec_get_prev(page_get_supremum_rec(page));
3513  right_rec = page_rec_get_next(page_get_infimum_rec(
3514  right_page));
3515  offsets = rec_get_offsets(rec, index,
3516  offsets, ULINT_UNDEFINED, &heap);
3517  offsets2 = rec_get_offsets(right_rec, index,
3518  offsets2, ULINT_UNDEFINED, &heap);
3519  if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec,
3520  offsets, offsets2,
3521  index) >= 0)) {
3522 
3523  btr_validate_report2(index, level, block, right_block);
3524 
3525  fputs("InnoDB: records in wrong order"
3526  " on adjacent pages\n", stderr);
3527 
3528  buf_page_print(page, 0);
3529  buf_page_print(right_page, 0);
3530 
3531  fputs("InnoDB: record ", stderr);
3532  rec = page_rec_get_prev(page_get_supremum_rec(page));
3533  rec_print(stderr, rec, index);
3534  putc('\n', stderr);
3535  fputs("InnoDB: record ", stderr);
3536  rec = page_rec_get_next(
3537  page_get_infimum_rec(right_page));
3538  rec_print(stderr, rec, index);
3539  putc('\n', stderr);
3540 
3541  ret = FALSE;
3542  }
3543  }
3544 
3545  if (level > 0 && left_page_no == FIL_NULL) {
3546  ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
3547  page_rec_get_next(page_get_infimum_rec(page)),
3548  page_is_comp(page)));
3549  }
3550 
3551  if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
3552 
3553  /* Check father node pointers */
3554 
3555  rec_t* node_ptr;
3556 
3557  offsets = btr_page_get_father_block(offsets, heap, index,
3558  block, &mtr, &node_cur);
3559  father_page = btr_cur_get_page(&node_cur);
3560  node_ptr = btr_cur_get_rec(&node_cur);
3561 
3563  index, page_rec_get_prev(page_get_supremum_rec(page)),
3564  block, &node_cur);
3565  offsets = btr_page_get_father_node_ptr(offsets, heap,
3566  &node_cur, &mtr);
3567 
3568  if (UNIV_UNLIKELY(node_ptr != btr_cur_get_rec(&node_cur))
3569  || UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr,
3570  offsets)
3571  != buf_block_get_page_no(block))) {
3572 
3573  btr_validate_report1(index, level, block);
3574 
3575  fputs("InnoDB: node pointer to the page is wrong\n",
3576  stderr);
3577 
3578  buf_page_print(father_page, 0);
3579  buf_page_print(page, 0);
3580 
3581  fputs("InnoDB: node ptr ", stderr);
3582  rec_print(stderr, node_ptr, index);
3583 
3584  rec = btr_cur_get_rec(&node_cur);
3585  fprintf(stderr, "\n"
3586  "InnoDB: node ptr child page n:o %lu\n",
3588  rec, offsets));
3589 
3590  fputs("InnoDB: record on page ", stderr);
3591  rec_print_new(stderr, rec, offsets);
3592  putc('\n', stderr);
3593  ret = FALSE;
3594 
3595  goto node_ptr_fails;
3596  }
3597 
3598  if (!page_is_leaf(page)) {
3599  node_ptr_tuple = dict_index_build_node_ptr(
3600  index,
3601  page_rec_get_next(page_get_infimum_rec(page)),
3602  0, heap, btr_page_get_level(page, &mtr));
3603 
3604  if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
3605  offsets)) {
3606  const rec_t* first_rec = page_rec_get_next(
3607  page_get_infimum_rec(page));
3608 
3609  btr_validate_report1(index, level, block);
3610 
3611  buf_page_print(father_page, 0);
3612  buf_page_print(page, 0);
3613 
3614  fputs("InnoDB: Error: node ptrs differ"
3615  " on levels > 0\n"
3616  "InnoDB: node ptr ", stderr);
3617  rec_print_new(stderr, node_ptr, offsets);
3618  fputs("InnoDB: first rec ", stderr);
3619  rec_print(stderr, first_rec, index);
3620  putc('\n', stderr);
3621  ret = FALSE;
3622 
3623  goto node_ptr_fails;
3624  }
3625  }
3626 
3627  if (left_page_no == FIL_NULL) {
3628  ut_a(node_ptr == page_rec_get_next(
3629  page_get_infimum_rec(father_page)));
3630  ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
3631  }
3632 
3633  if (right_page_no == FIL_NULL) {
3634  ut_a(node_ptr == page_rec_get_prev(
3635  page_get_supremum_rec(father_page)));
3636  ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
3637  } else {
3638  const rec_t* right_node_ptr
3639  = page_rec_get_next(node_ptr);
3640 
3641  offsets = btr_page_get_father_block(
3642  offsets, heap, index, right_block,
3643  &mtr, &right_node_cur);
3644  if (right_node_ptr
3645  != page_get_supremum_rec(father_page)) {
3646 
3647  if (btr_cur_get_rec(&right_node_cur)
3648  != right_node_ptr) {
3649  ret = FALSE;
3650  fputs("InnoDB: node pointer to"
3651  " the right page is wrong\n",
3652  stderr);
3653 
3654  btr_validate_report1(index, level,
3655  block);
3656 
3657  buf_page_print(father_page, 0);
3658  buf_page_print(page, 0);
3659  buf_page_print(right_page, 0);
3660  }
3661  } else {
3662  page_t* right_father_page
3663  = btr_cur_get_page(&right_node_cur);
3664 
3665  if (btr_cur_get_rec(&right_node_cur)
3666  != page_rec_get_next(
3667  page_get_infimum_rec(
3668  right_father_page))) {
3669  ret = FALSE;
3670  fputs("InnoDB: node pointer 2 to"
3671  " the right page is wrong\n",
3672  stderr);
3673 
3674  btr_validate_report1(index, level,
3675  block);
3676 
3677  buf_page_print(father_page, 0);
3678  buf_page_print(right_father_page, 0);
3679  buf_page_print(page, 0);
3680  buf_page_print(right_page, 0);
3681  }
3682 
3683  if (page_get_page_no(right_father_page)
3684  != btr_page_get_next(father_page, &mtr)) {
3685 
3686  ret = FALSE;
3687  fputs("InnoDB: node pointer 3 to"
3688  " the right page is wrong\n",
3689  stderr);
3690 
3691  btr_validate_report1(index, level,
3692  block);
3693 
3694  buf_page_print(father_page, 0);
3695  buf_page_print(right_father_page, 0);
3696  buf_page_print(page, 0);
3697  buf_page_print(right_page, 0);
3698  }
3699  }
3700  }
3701  }
3702 
3703 node_ptr_fails:
3704  /* Commit the mini-transaction to release the latch on 'page'.
3705  Re-acquire the latch on right_page, which will become 'page'
3706  on the next loop. The page has already been checked. */
3707  mtr_commit(&mtr);
3708 
3709  if (right_page_no != FIL_NULL) {
3710  mtr_start(&mtr);
3711 
3712  block = btr_block_get(space, zip_size, right_page_no,
3713  RW_X_LATCH, &mtr);
3714  page = buf_block_get_frame(block);
3715 
3716  goto loop;
3717  }
3718 
3719  mem_heap_free(heap);
3720  return(ret);
3721 }
3722 
3723 /**************************************************************/
3726 UNIV_INTERN
3727 ibool
3728 btr_validate_index(
3729 /*===============*/
3730  dict_index_t* index,
3731  trx_t* trx)
3732 {
3733  mtr_t mtr;
3734  page_t* root;
3735  ulint i;
3736  ulint n;
3737 
3738  mtr_start(&mtr);
3739  mtr_x_lock(dict_index_get_lock(index), &mtr);
3740 
3741  root = btr_root_get(index, &mtr);
3742  n = btr_page_get_level(root, &mtr);
3743 
3744  for (i = 0; i <= n && !trx_is_interrupted(trx); i++) {
3745  if (!btr_validate_level(index, trx, n - i)) {
3746 
3747  mtr_commit(&mtr);
3748 
3749  return(FALSE);
3750  }
3751  }
3752 
3753  mtr_commit(&mtr);
3754 
3755  return(TRUE);
3756 }
3757 #endif /* !UNIV_HOTBACKUP */