Drizzled Public API Documentation

trx0sys.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1996, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "trx0sys.h"
27 
28 #ifdef UNIV_NONINL
29 #include "trx0sys.ic"
30 #endif
31 
32 #ifndef UNIV_HOTBACKUP
33 #include "fsp0fsp.h"
34 #include "mtr0log.h"
35 #include "mtr0log.h"
36 #include "trx0trx.h"
37 #include "trx0rseg.h"
38 #include "trx0undo.h"
39 #include "srv0srv.h"
40 #include "trx0purge.h"
41 #include "log0log.h"
42 #include "log0recv.h"
43 #include "os0file.h"
44 #include "read0read.h"
45 
48  ulint id;
49  const char* name;
53 };
54 
55 #include <drizzled/errmsg_print.h>
56 
58 typedef struct file_format_struct file_format_t;
59 
61 UNIV_INTERN trx_sys_t* trx_sys = NULL;
63 UNIV_INTERN trx_doublewrite_t* trx_doublewrite = NULL;
64 
67 UNIV_INTERN ibool trx_doublewrite_must_reset_space_ids = FALSE;
69 UNIV_INTERN ibool trx_doublewrite_buf_is_being_created = FALSE;
70 
74 UNIV_INTERN ibool trx_sys_multiple_tablespace_format = FALSE;
75 
78 /* @{ */
80 UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
84 UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
85 /* @} */
86 
90 /* @{ */
92 UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
94 UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
95 
97 
98 /* @} */
99 #endif /* !UNIV_HOTBACKUP */
100 
102 static const char* file_format_name_map[] = {
103  "Antelope",
104  "Barracuda",
105  "Cheetah",
106  "Dragon",
107  "Elk",
108  "Fox",
109  "Gazelle",
110  "Hornet",
111  "Impala",
112  "Jaguar",
113  "Kangaroo",
114  "Leopard",
115  "Moose",
116  "Nautilus",
117  "Ocelot",
118  "Porpoise",
119  "Quail",
120  "Rabbit",
121  "Shark",
122  "Tiger",
123  "Urchin",
124  "Viper",
125  "Whale",
126  "Xenops",
127  "Yak",
128  "Zebra"
129 };
130 
132 static const ulint FILE_FORMAT_NAME_N
133  = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
134 
135 #ifdef UNIV_PFS_MUTEX
136 /* Key to register the mutex with performance schema */
137 UNIV_INTERN mysql_pfs_key_t trx_doublewrite_mutex_key;
138 UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key;
139 #endif /* UNIV_PFS_MUTEX */
140 
141 #ifndef UNIV_HOTBACKUP
142 
145 static file_format_t file_format_max;
146 
147 /****************************************************************/
151 UNIV_INTERN
152 ibool
154 /*========================*/
155  ulint page_no)
156 {
157  if (trx_doublewrite == NULL) {
158 
159  return(FALSE);
160  }
161 
162  if (page_no >= trx_doublewrite->block1
163  && page_no < trx_doublewrite->block1
165  return(TRUE);
166  }
167 
168  if (page_no >= trx_doublewrite->block2
169  && page_no < trx_doublewrite->block2
171  return(TRUE);
172  }
173 
174  return(FALSE);
175 }
176 
177 /****************************************************************/
179 static
180 void
181 trx_doublewrite_init(
182 /*=================*/
183  byte* doublewrite)
185 {
186  trx_doublewrite = static_cast<trx_doublewrite_t *>(mem_alloc(sizeof(trx_doublewrite_t)));
187 
188  /* Since we now start to use the doublewrite buffer, no need to call
189  fsync() after every write to a data file */
190 #ifdef UNIV_DO_FLUSH
191  os_do_not_call_flush_at_each_write = TRUE;
192 #endif /* UNIV_DO_FLUSH */
193 
194  mutex_create(trx_doublewrite_mutex_key,
195  &trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
196 
198 
200  doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
202  doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
203  trx_doublewrite->write_buf_unaligned = static_cast<byte *>(ut_malloc(
204  (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE));
205 
206  trx_doublewrite->write_buf = static_cast<byte *>(ut_align(
207  trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE));
208  trx_doublewrite->buf_block_arr = static_cast<buf_page_t **>(mem_alloc(
209  2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*)));
210 }
211 
212 /****************************************************************/
215 UNIV_INTERN
216 void
218 /*===============================================*/
219 {
220  buf_block_t* block;
221  byte* doublewrite;
222  mtr_t mtr;
223 
224  /* We upgraded to 4.1.x and reset the space id fields in the
225  doublewrite buffer. Let us mark to the trx_sys header that the upgrade
226  has been done. */
227 
228  mtr_start(&mtr);
229 
230  block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
231  RW_X_LATCH, &mtr);
232  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
233 
234  doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
235 
238  MLOG_4BYTES, &mtr);
239  mtr_commit(&mtr);
240 
241  /* Flush the modified pages to disk and make a checkpoint */
242  log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
243 
245 }
246 
247 /****************************************************************/
250 UNIV_INTERN
251 void
253 /*================================*/
254 {
255  buf_block_t* block;
256  buf_block_t* block2;
257 #ifdef UNIV_SYNC_DEBUG
258  buf_block_t* new_block;
259 #endif /* UNIV_SYNC_DEBUG */
260  byte* doublewrite;
261  byte* fseg_header;
262  ulint page_no;
263  ulint prev_page_no;
264  ulint i;
265  mtr_t mtr;
266 
267  if (trx_doublewrite) {
268  /* Already inited */
269 
270  return;
271  }
272 
273 start_again:
274  mtr_start(&mtr);
276 
277  block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
278  RW_X_LATCH, &mtr);
279  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
280 
281  doublewrite = buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE;
282 
285  /* The doublewrite buffer has already been created:
286  just read in some numbers */
287 
288  trx_doublewrite_init(doublewrite);
289 
290  mtr_commit(&mtr);
292  } else {
293  fprintf(stderr,
294  "InnoDB: Doublewrite buffer not found:"
295  " creating new\n");
296 
299  + FSP_EXTENT_SIZE / 2 + 100)
300  * UNIV_PAGE_SIZE)) {
301  fprintf(stderr,
302  "InnoDB: Cannot create doublewrite buffer:"
303  " you must\n"
304  "InnoDB: increase your buffer pool size.\n"
305  "InnoDB: Cannot continue operation.\n");
306 
307  exit(1);
308  }
309 
310  block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
312  + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
313 
314  /* fseg_create acquires a second latch on the page,
315  therefore we must declare it: */
316 
317  buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
318 
319  if (block2 == NULL) {
320  fprintf(stderr,
321  "InnoDB: Cannot create doublewrite buffer:"
322  " you must\n"
323  "InnoDB: increase your tablespace size.\n"
324  "InnoDB: Cannot continue operation.\n");
325 
326  /* We exit without committing the mtr to prevent
327  its modifications to the database getting to disk */
328 
329  exit(1);
330  }
331 
332  fseg_header = buf_block_get_frame(block)
334  prev_page_no = 0;
335 
336  for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
337  + FSP_EXTENT_SIZE / 2; i++) {
338  page_no = fseg_alloc_free_page(fseg_header,
339  prev_page_no + 1,
340  FSP_UP, &mtr);
341  if (page_no == FIL_NULL) {
342  fprintf(stderr,
343  "InnoDB: Cannot create doublewrite"
344  " buffer: you must\n"
345  "InnoDB: increase your"
346  " tablespace size.\n"
347  "InnoDB: Cannot continue operation.\n"
348  );
349 
350  exit(1);
351  }
352 
353  /* We read the allocated pages to the buffer pool;
354  when they are written to disk in a flush, the space
355  id and page number fields are also written to the
356  pages. When we at database startup read pages
357  from the doublewrite buffer, we know that if the
358  space id and page number in them are the same as
359  the page position in the tablespace, then the page
360  has not been written to in doublewrite. */
361 
362 #ifdef UNIV_SYNC_DEBUG
363  new_block =
364 #endif /* UNIV_SYNC_DEBUG */
365  buf_page_get(TRX_SYS_SPACE, 0, page_no,
366  RW_X_LATCH, &mtr);
367  buf_block_dbg_add_level(new_block,
368  SYNC_NO_ORDER_CHECK);
369 
370  if (i == FSP_EXTENT_SIZE / 2) {
371  ut_a(page_no == FSP_EXTENT_SIZE);
372  mlog_write_ulint(doublewrite
374  page_no, MLOG_4BYTES, &mtr);
375  mlog_write_ulint(doublewrite
378  page_no, MLOG_4BYTES, &mtr);
379  } else if (i == FSP_EXTENT_SIZE / 2
381  ut_a(page_no == 2 * FSP_EXTENT_SIZE);
382  mlog_write_ulint(doublewrite
384  page_no, MLOG_4BYTES, &mtr);
385  mlog_write_ulint(doublewrite
388  page_no, MLOG_4BYTES, &mtr);
389  } else if (i > FSP_EXTENT_SIZE / 2) {
390  ut_a(page_no == prev_page_no + 1);
391  }
392 
393  prev_page_no = page_no;
394  }
395 
398  MLOG_4BYTES, &mtr);
402  MLOG_4BYTES, &mtr);
403 
404  mlog_write_ulint(doublewrite
407  MLOG_4BYTES, &mtr);
408  mtr_commit(&mtr);
409 
410  /* Flush the modified pages to disk and make a checkpoint */
411  log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
412 
413  fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
414 
416 
417  goto start_again;
418  }
419 }
420 
421 /****************************************************************/
428 UNIV_INTERN
429 void
431 /*======================================*/
432  ibool restore_corrupt_pages)
433 {
434  byte* buf;
435  byte* read_buf;
436  byte* unaligned_read_buf;
437  ulint block1;
438  ulint block2;
439  ulint source_page_no;
440  byte* page;
441  byte* doublewrite;
442  ulint space_id;
443  ulint page_no;
444  ulint i;
445 
446  /* We do the file i/o past the buffer pool */
447 
448  unaligned_read_buf = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
449  read_buf = static_cast<byte *>(ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
450 
451  /* Read the trx sys header to check if we are using the doublewrite
452  buffer */
453 
454  fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
455  UNIV_PAGE_SIZE, read_buf, NULL);
456  doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
457 
460  /* The doublewrite buffer has been created */
461 
462  trx_doublewrite_init(doublewrite);
463 
464  block1 = trx_doublewrite->block1;
465  block2 = trx_doublewrite->block2;
466 
467  buf = trx_doublewrite->write_buf;
468  } else {
469  goto leave_func;
470  }
471 
474 
475  /* We are upgrading from a version < 4.1.x to a version where
476  multiple tablespaces are supported. We must reset the space id
477  field in the pages in the doublewrite buffer because starting
478  from this version the space id is stored to
479  FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
480 
482 
483  fprintf(stderr,
484  "InnoDB: Resetting space id's in the"
485  " doublewrite buffer\n");
486  } else {
488  }
489 
490  /* Read the pages from the doublewrite buffer to memory */
491 
492  fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
493  TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
494  buf, NULL);
495  fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
496  TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
497  buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
498  NULL);
499  /* Check if any of these pages is half-written in data files, in the
500  intended position */
501 
502  page = buf;
503 
504  for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
505 
506  page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
507 
509 
510  space_id = 0;
511  mach_write_to_4(page
513  /* We do not need to calculate new checksums for the
514  pages because the field .._SPACE_ID does not affect
515  them. Write the page back to where we read it from. */
516 
517  if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
518  source_page_no = block1 + i;
519  } else {
520  source_page_no = block2
522  }
523 
524  fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
525  UNIV_PAGE_SIZE, page, NULL);
526  /* printf("Resetting space id in page %lu\n",
527  source_page_no); */
528  } else {
529  space_id = mach_read_from_4(
531  }
532 
533  if (!restore_corrupt_pages) {
534  /* The database was shut down gracefully: no need to
535  restore pages */
536 
537  } else if (!fil_tablespace_exists_in_mem(space_id)) {
538  /* Maybe we have dropped the single-table tablespace
539  and this page once belonged to it: do nothing */
540 
541  } else if (!fil_check_adress_in_tablespace(space_id,
542  page_no)) {
543  fprintf(stderr,
544  "InnoDB: Warning: a page in the"
545  " doublewrite buffer is not within space\n"
546  "InnoDB: bounds; space id %lu"
547  " page number %lu, page %lu in"
548  " doublewrite buf.\n",
549  (ulong) space_id, (ulong) page_no, (ulong) i);
550 
551  } else if (space_id == TRX_SYS_SPACE
552  && ((page_no >= block1
553  && page_no
554  < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
555  || (page_no >= block2
556  && page_no
557  < (block2
558  + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
559 
560  /* It is an unwritten doublewrite buffer page:
561  do nothing */
562  } else {
563  ulint zip_size = fil_space_get_zip_size(space_id);
564 
565  /* Read in the actual page from the file */
566  fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
567  page_no, 0,
568  zip_size ? zip_size : UNIV_PAGE_SIZE,
569  read_buf, NULL);
570 
571  /* Check if the page is corrupt */
572 
573  if (UNIV_UNLIKELY
574  (buf_page_is_corrupted(read_buf, zip_size))) {
575 
576  fprintf(stderr,
577  "InnoDB: Warning: database page"
578  " corruption or a failed\n"
579  "InnoDB: file read of"
580  " space %lu page %lu.\n"
581  "InnoDB: Trying to recover it from"
582  " the doublewrite buffer.\n",
583  (ulong) space_id, (ulong) page_no);
584 
585  if (buf_page_is_corrupted(page, zip_size)) {
586  fprintf(stderr,
587  "InnoDB: Dump of the page:\n");
588  buf_page_print(read_buf, zip_size);
589  fprintf(stderr,
590  "InnoDB: Dump of"
591  " corresponding page"
592  " in doublewrite buffer:\n");
593  buf_page_print(page, zip_size);
594 
595  fprintf(stderr,
596  "InnoDB: Also the page in the"
597  " doublewrite buffer"
598  " is corrupt.\n"
599  "InnoDB: Cannot continue"
600  " operation.\n"
601  "InnoDB: You can try to"
602  " recover the database"
603  " with the my.cnf\n"
604  "InnoDB: option:\n"
605  "InnoDB:"
606  " innodb_force_recovery=6\n");
607  exit(1);
608  }
609 
610  /* Write the good page from the
611  doublewrite buffer to the intended
612  position */
613 
614  fil_io(OS_FILE_WRITE, TRUE, space_id,
615  zip_size, page_no, 0,
616  zip_size ? zip_size : UNIV_PAGE_SIZE,
617  page, NULL);
618  fprintf(stderr,
619  "InnoDB: Recovered the page from"
620  " the doublewrite buffer.\n");
621  }
622  }
623 
624  page += UNIV_PAGE_SIZE;
625  }
626 
627  fil_flush_file_spaces(FIL_TABLESPACE);
628 
629 leave_func:
630  ut_free(unaligned_read_buf);
631 }
632 
633 /****************************************************************/
636 UNIV_INTERN
637 ibool
639 /*============*/
640  trx_t* in_trx)
641 {
642  trx_t* trx;
643 
644  ut_ad(mutex_own(&(kernel_mutex)));
645 
646  trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
647 
648  while (trx != NULL) {
649 
650  if (trx == in_trx) {
651 
652  return(TRUE);
653  }
654 
655  trx = UT_LIST_GET_NEXT(trx_list, trx);
656  }
657 
658  return(FALSE);
659 }
660 
661 /*****************************************************************/
663 UNIV_INTERN
664 void
665 trx_sys_flush_max_trx_id(void)
666 /*==========================*/
667 {
668  trx_sysf_t* sys_header;
669  mtr_t mtr;
670 
671  ut_ad(mutex_own(&kernel_mutex));
672 
673  mtr_start(&mtr);
674 
675  sys_header = trx_sysf_get(&mtr);
676 
678  trx_sys->max_trx_id, &mtr);
679  mtr_commit(&mtr);
680 }
681 
682 UNIV_INTERN
683 void
684 trx_sys_flush_commit_id(uint64_t commit_id, ulint field, mtr_t* mtr)
685 {
686  trx_sysf_t* sys_header;
687 
688  sys_header = trx_sysf_get(mtr);
689 
690  mlog_write_ull(sys_header + field + TRX_SYS_DRIZZLE_MAX_COMMIT_ID,
691  commit_id, mtr);
692 }
693 
694 
695 UNIV_INTERN
696 void
698 /*===================================*/
699 {
700  trx_sysf_t* sys_header;
701  mtr_t mtr;
702 
703  mtr_start(&mtr);
704 
705  sys_header = trx_sysf_get(&mtr);
706 
709 
710  mtr_commit(&mtr);
711 }
712 
713 /****************************************************************/
716 UNIV_INTERN
717 ulint
719 /*====================*/
720  mtr_t* mtr)
721 {
722  trx_sysf_t* sys_header;
723  ulint page_no;
724  ulint i;
725 
726  ut_ad(mutex_own(&(kernel_mutex)));
727 
728  sys_header = trx_sysf_get(mtr);
729 
730  for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
731 
732  page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
733 
734  if (page_no == FIL_NULL) {
735 
736  return(i);
737  }
738  }
739 
740  return(ULINT_UNDEFINED);
741 }
742 
743 /*****************************************************************/
746 static
747 void
748 trx_sysf_create(
749 /*============*/
750  mtr_t* mtr)
751 {
752  trx_sysf_t* sys_header;
753  ulint slot_no;
754  buf_block_t* block;
755  page_t* page;
756  ulint page_no;
757  byte* ptr;
758  ulint len;
759 
760  ut_ad(mtr);
761 
762  /* Note that below we first reserve the file space x-latch, and
763  then enter the kernel: we must do it in this order to conform
764  to the latching order rules. */
765 
766  mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
767  mutex_enter(&kernel_mutex);
768 
769  /* Create the trx sys file block in a new allocated file segment */
770  block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
771  mtr);
772  buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
773 
774  ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
775 
776  page = buf_block_get_frame(block);
777 
779  MLOG_2BYTES, mtr);
780 
781  /* Reset the doublewrite buffer magic number to zero so that we
782  know that the doublewrite buffer has not yet been created (this
783  suppresses a Valgrind warning) */
784 
787 
788  sys_header = trx_sysf_get(mtr);
789 
790  /* Start counting transaction ids from number 1 up */
791  mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
792 
793  /* Reset the rollback segment slots. Old versions of InnoDB
794  define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
795  that the whole array is initialized. */
796  ptr = TRX_SYS_RSEGS + sys_header;
797  len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
798  * TRX_SYS_RSEG_SLOT_SIZE;
799  memset(ptr, 0xff, len);
800  ptr += len;
801  ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
802 
803  /* Initialize all of the page. This part used to be uninitialized. */
804  memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
805 
806  mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
807  + page - sys_header, mtr);
808 
809  /* Create the first rollback segment in the SYSTEM tablespace */
810  slot_no = trx_sysf_rseg_find_free(mtr);
811  page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
812  mtr);
813  ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
814  ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
815 
816  mutex_exit(&kernel_mutex);
817 }
818 
819 /*****************************************************************/
822 UNIV_INTERN
823 void
825 /*==========================*/
826 {
827  trx_sysf_t* sys_header;
828  ib_uint64_t rows_to_undo = 0;
829  const char* unit = "";
830  trx_t* trx;
831  mtr_t mtr;
832 
833  mtr_start(&mtr);
834 
835  ut_ad(trx_sys == NULL);
836 
837  mutex_enter(&kernel_mutex);
838 
839  trx_sys = static_cast<trx_sys_t *>(mem_alloc(sizeof(trx_sys_t)));
840 
841  sys_header = trx_sysf_get(&mtr);
842 
843  trx_rseg_list_and_array_init(sys_header, &mtr);
844 
846 
847  /* VERY important: after the database is started, max_trx_id value is
848  divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
849  trx_sys_get_new_trx_id will evaluate to TRUE when the function
850  is first time called, and the value for trx id will be written
851  to the disk-based header! Thus trx id values will not overlap when
852  the database is repeatedly started! */
853 
858 
859  UT_LIST_INIT(trx_sys->mysql_trx_list);
862 
863  if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
864  trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
865 
866  for (;;) {
867 
868  if (trx->conc_state != TRX_PREPARED) {
869  rows_to_undo += trx->undo_no;
870  }
871 
872  trx = UT_LIST_GET_NEXT(trx_list, trx);
873 
874  if (!trx) {
875  break;
876  }
877  }
878 
879  if (rows_to_undo > 1000000000) {
880  unit = "M";
881  rows_to_undo = rows_to_undo / 1000000;
882  }
883 
884  fprintf(stderr,
885  "InnoDB: %lu transaction(s) which must be"
886  " rolled back or cleaned up\n"
887  "InnoDB: in total %lu%s row operations to undo\n",
888  (ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
889  (ulong) rows_to_undo, unit);
890 
891  fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
893  }
894 
895  UT_LIST_INIT(trx_sys->view_list);
896 
898 
899  mutex_exit(&kernel_mutex);
900 
901  mtr_commit(&mtr);
902 }
903 
904 /*****************************************************************/
906 UNIV_INTERN
907 void
909 /*================*/
910 {
911  mtr_t mtr;
912 
913  mtr_start(&mtr);
914 
915  trx_sysf_create(&mtr);
916 
917  mtr_commit(&mtr);
918 
920 }
921 
922 /*****************************************************************/
925 static
926 ibool
927 trx_sys_file_format_max_write(
928 /*==========================*/
929  ulint format_id,
930  const char** name)
932 {
933  mtr_t mtr;
934  byte* ptr;
935  buf_block_t* block;
936  ib_uint64_t tag_value;
937 
938  mtr_start(&mtr);
939 
940  block = buf_page_get(
941  TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
942 
943  file_format_max.id = format_id;
944  file_format_max.name = trx_sys_file_format_id_to_name(format_id);
945 
946  ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
947  tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
948 
949  if (name) {
950  *name = file_format_max.name;
951  }
952 
953  mlog_write_ull(ptr, tag_value, &mtr);
954 
955  mtr_commit(&mtr);
956 
957  return(TRUE);
958 }
959 
960 /*****************************************************************/
963 static
964 ulint
965 trx_sys_file_format_max_read(void)
966 /*==============================*/
967 {
968  mtr_t mtr;
969  const byte* ptr;
970  const buf_block_t* block;
971  ib_id_t file_format_id;
972 
973  /* Since this is called during the startup phase it's safe to
974  read the value without a covering mutex. */
975  mtr_start(&mtr);
976 
977  block = buf_page_get(
978  TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
979 
980  ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
981  file_format_id = mach_read_from_8(ptr);
982 
983  mtr_commit(&mtr);
984 
985  file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
986 
987  if (file_format_id >= FILE_FORMAT_NAME_N) {
988 
989  /* Either it has never been tagged, or garbage in it. */
990  return(ULINT_UNDEFINED);
991  }
992 
993  return((ulint) file_format_id);
994 }
995 
996 /*****************************************************************/
999 UNIV_INTERN
1000 const char*
1002 /*===========================*/
1003  const ulint id)
1004 {
1005  ut_a(id < FILE_FORMAT_NAME_N);
1006 
1007  return(file_format_name_map[id]);
1008 }
1009 
1010 /*****************************************************************/
1014 UNIV_INTERN
1015 ulint
1017 /*==========================*/
1018  ulint max_format_id)
1019 {
1020  ulint format_id;
1021 
1022  /* Check the file format in the tablespace. Do not try to
1023  recover if the file format is not supported by the engine
1024  unless forced by the user. */
1025  format_id = trx_sys_file_format_max_read();
1026  if (format_id == ULINT_UNDEFINED) {
1027  /* Format ID was not set. Set it to minimum possible
1028  value. */
1029  format_id = DICT_TF_FORMAT_MIN;
1030  }
1031 
1032  drizzled::errmsg_printf(drizzled::error::INFO, "InnoDB: highest supported file format is %s",
1034 
1035  if (format_id > DICT_TF_FORMAT_MAX) {
1036 
1037  ut_a(format_id < FILE_FORMAT_NAME_N);
1038 
1039  drizzled::errmsg_printf(drizzled::error::ERROR,
1040  "InnoDB: %s: the system tablespace is in a file "
1041  "format that this version doesn't support - %s",
1042  ((max_format_id <= DICT_TF_FORMAT_MAX)
1043  ? "Error" : "Warning"),
1044  trx_sys_file_format_id_to_name(format_id));
1045 
1046  if (max_format_id <= DICT_TF_FORMAT_MAX) {
1047  return(DB_ERROR);
1048  }
1049  }
1050 
1051  format_id = (format_id > max_format_id) ? format_id : max_format_id;
1052 
1053  /* We don't need a mutex here, as this function should only
1054  be called once at start up. */
1055  file_format_max.id = format_id;
1056  file_format_max.name = trx_sys_file_format_id_to_name(format_id);
1057 
1058  return(DB_SUCCESS);
1059 }
1060 
1061 /*****************************************************************/
1065 UNIV_INTERN
1066 ibool
1068 /*========================*/
1069  ulint format_id,
1070  const char** name)
1072 {
1073  ibool ret = FALSE;
1074 
1075  ut_a(format_id <= DICT_TF_FORMAT_MAX);
1076 
1077  mutex_enter(&file_format_max.mutex);
1078 
1079  /* Only update if not already same value. */
1080  if (format_id != file_format_max.id) {
1081 
1082  ret = trx_sys_file_format_max_write(format_id, name);
1083  }
1084 
1085  mutex_exit(&file_format_max.mutex);
1086 
1087  return(ret);
1088 }
1089 
1090 /********************************************************************/
1095 UNIV_INTERN
1096 void
1098 /*==============================*/
1099 {
1100  ulint format_id;
1101 
1102  format_id = trx_sys_file_format_max_read();
1103 
1104  /* If format_id is not set then set it to the minimum. */
1105  if (format_id == ULINT_UNDEFINED) {
1107  }
1108 }
1109 
1110 /********************************************************************/
1114 UNIV_INTERN
1115 ibool
1117 /*============================*/
1118  const char** name,
1119  ulint format_id)
1120 {
1121  ibool ret = FALSE;
1122 
1123  ut_a(name);
1124  ut_a(file_format_max.name != NULL);
1125  ut_a(format_id <= DICT_TF_FORMAT_MAX);
1126 
1127  mutex_enter(&file_format_max.mutex);
1128 
1129  if (format_id > file_format_max.id) {
1130 
1131  ret = trx_sys_file_format_max_write(format_id, name);
1132  }
1133 
1134  mutex_exit(&file_format_max.mutex);
1135 
1136  return(ret);
1137 }
1138 
1139 /*****************************************************************/
1142 UNIV_INTERN
1143 const char*
1145 /*=============================*/
1146 {
1147  return(file_format_max.name);
1148 }
1149 
1150 /*****************************************************************/
1152 UNIV_INTERN
1153 void
1155 /*==========================*/
1156 {
1157  mutex_create(file_format_max_mutex_key,
1158  &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
1159 
1160  /* We don't need a mutex here, as this function should only
1161  be called once at start up. */
1162  file_format_max.id = DICT_TF_FORMAT_MIN;
1163 
1164  file_format_max.name = trx_sys_file_format_id_to_name(
1165  file_format_max.id);
1166 }
1167 
1168 /*****************************************************************/
1170 UNIV_INTERN
1171 void
1173 /*===========================*/
1174 {
1175  /* Does nothing at the moment */
1176 }
1177 
1178 /*********************************************************************
1179 Creates the rollback segments */
1180 UNIV_INTERN
1181 void
1183 /*=================*/
1184  ulint n_rsegs)
1185 {
1186  ulint new_rsegs = 0;
1187 
1188  /* Do not create additional rollback segments if
1189  innodb_force_recovery has been set and the database
1190  was not shutdown cleanly. */
1191  if (!srv_force_recovery && !recv_needed_recovery) {
1192  ulint i;
1193 
1194  for (i = 0; i < n_rsegs; ++i) {
1195 
1196  if (trx_rseg_create() != NULL) {
1197  ++new_rsegs;
1198  } else {
1199  break;
1200  }
1201  }
1202  }
1203 
1204  if (new_rsegs > 0) {
1205  fprintf(stderr,
1206  "InnoDB: %lu rollback segment(s) active.\n",
1207  new_rsegs);
1208  }
1209 }
1210 
1211 #else /* !UNIV_HOTBACKUP */
1212 
1213 /* THESE ARE COPIED FROM NON-HOTBACKUP PART OF THE INNODB SOURCE TREE
1214  (This code duplicaton should be fixed at some point!)
1215 */
1216 
1217 #define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
1218 /* The offset of the file format tag on the trx system header page */
1219 #define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
1220 /* We use these random constants to reduce the probability of reading
1221 garbage (from previous versions) that maps to an actual format id. We
1222 use these as bit masks at the time of reading and writing from/to disk. */
1223 #define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
1224 #define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
1225 
1226 /* END OF COPIED DEFINITIONS */
1227 
1228 
1229 /*****************************************************************/
1235 UNIV_INTERN
1236 ibool
1237 trx_sys_read_file_format_id(
1238 /*========================*/
1239  const char *pathname,
1241  ulint *format_id)
1243 {
1244  os_file_t file;
1245  ibool success;
1246  byte buf[UNIV_PAGE_SIZE * 2];
1247  page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
1248  const byte* ptr;
1249  ib_id_t file_format_id;
1250 
1251  *format_id = ULINT_UNDEFINED;
1252 
1253  file = os_file_create_simple_no_error_handling(
1254  innodb_file_data_key,
1255  pathname,
1256  OS_FILE_OPEN,
1257  OS_FILE_READ_ONLY,
1258  &success
1259  );
1260  if (!success) {
1261  /* The following call prints an error message */
1262  os_file_get_last_error(TRUE);
1263 
1264  ut_print_timestamp(stderr);
1265 
1266  fprintf(stderr,
1267 " ibbackup: Error: trying to read system tablespace file format,\n"
1268 " ibbackup: but could not open the tablespace file %s!\n",
1269  pathname
1270  );
1271  return(FALSE);
1272  }
1273 
1274  /* Read the page on which file format is stored */
1275 
1276  success = os_file_read_no_error_handling(
1277  file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, 0, UNIV_PAGE_SIZE
1278  );
1279  if (!success) {
1280  /* The following call prints an error message */
1281  os_file_get_last_error(TRUE);
1282 
1283  ut_print_timestamp(stderr);
1284 
1285  fprintf(stderr,
1286 " ibbackup: Error: trying to read system table space file format,\n"
1287 " ibbackup: but failed to read the tablespace file %s!\n",
1288  pathname
1289  );
1290  os_file_close(file);
1291  return(FALSE);
1292  }
1293  os_file_close(file);
1294 
1295  /* get the file format from the page */
1296  ptr = page + TRX_SYS_FILE_FORMAT_TAG;
1297  file_format_id = mach_read_from_8(ptr);
1298  file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
1299 
1300  if (file_format_id >= FILE_FORMAT_NAME_N) {
1301 
1302  /* Either it has never been tagged, or garbage in it. */
1303  return(TRUE);
1304  }
1305 
1306  *format_id = (ulint) file_format_id;
1307 
1308  return(TRUE);
1309 }
1310 
1311 
1312 /*****************************************************************/
1315 UNIV_INTERN
1316 ibool
1317 trx_sys_read_pertable_file_format_id(
1318 /*=================================*/
1319  const char *pathname,
1321  ulint *format_id)
1323 {
1324  os_file_t file;
1325  ibool success;
1326  byte buf[UNIV_PAGE_SIZE * 2];
1327  page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
1328  const byte* ptr;
1329  ib_uint32_t flags;
1330 
1331  *format_id = ULINT_UNDEFINED;
1332 
1333  file = os_file_create_simple_no_error_handling(
1334  innodb_file_data_key,
1335  pathname,
1336  OS_FILE_OPEN,
1337  OS_FILE_READ_ONLY,
1338  &success
1339  );
1340  if (!success) {
1341  /* The following call prints an error message */
1342  os_file_get_last_error(TRUE);
1343 
1344  ut_print_timestamp(stderr);
1345 
1346  fprintf(stderr,
1347 " ibbackup: Error: trying to read per-table tablespace format,\n"
1348 " ibbackup: but could not open the tablespace file %s!\n",
1349  pathname
1350  );
1351  return(FALSE);
1352  }
1353 
1354  /* Read the first page of the per-table datafile */
1355 
1356  success = os_file_read_no_error_handling(
1357  file, page, 0, 0, UNIV_PAGE_SIZE
1358  );
1359  if (!success) {
1360  /* The following call prints an error message */
1361  os_file_get_last_error(TRUE);
1362 
1363  ut_print_timestamp(stderr);
1364 
1365  fprintf(stderr,
1366 " ibbackup: Error: trying to per-table data file format,\n"
1367 " ibbackup: but failed to read the tablespace file %s!\n",
1368  pathname
1369  );
1370  os_file_close(file);
1371  return(FALSE);
1372  }
1373  os_file_close(file);
1374 
1375  /* get the file format from the page */
1376  ptr = page + 54;
1377  flags = mach_read_from_4(ptr);
1378  if (flags == 0) {
1379  /* file format is Antelope */
1380  *format_id = 0;
1381  return (TRUE);
1382  } else if (flags & 1) {
1383  /* tablespace flags are ok */
1384  *format_id = (flags / 32) % 128;
1385  return (TRUE);
1386  } else {
1387  /* bad tablespace flags */
1388  return(FALSE);
1389  }
1390 }
1391 
1392 
1393 /*****************************************************************/
1396 UNIV_INTERN
1397 const char*
1399 /*===========================*/
1400  const ulint id)
1401 {
1402  if (!(id < FILE_FORMAT_NAME_N)) {
1403  /* unknown id */
1404  return ("Unknown");
1405  }
1406 
1407  return(file_format_name_map[id]);
1408 }
1409 
1410 #endif /* !UNIV_HOTBACKUP */
1411 
1412 #ifndef UNIV_HOTBACKUP
1413 /*********************************************************************
1414 Shutdown/Close the transaction system. */
1415 UNIV_INTERN
1416 void
1418 /*===============*/
1419 {
1420  trx_rseg_t* rseg;
1421  read_view_t* view;
1422 
1423  ut_ad(trx_sys != NULL);
1424 
1425  /* Check that all read views are closed except read view owned
1426  by a purge. */
1427 
1428  if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
1429  fprintf(stderr,
1430  "InnoDB: Error: all read views were not closed"
1431  " before shutdown:\n"
1432  "InnoDB: %lu read views open \n",
1433  static_cast<ulint>(UT_LIST_GET_LEN(trx_sys->view_list)) - 1);
1434  }
1435 
1437  trx_dummy_sess = NULL;
1438 
1440 
1441  mutex_enter(&kernel_mutex);
1442 
1443  /* Free the double write data structures. */
1444  ut_a(trx_doublewrite != NULL);
1447 
1450 
1451  mutex_free(&trx_doublewrite->mutex);
1453  trx_doublewrite = NULL;
1454 
1455  /* There can't be any active transactions. */
1456  rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
1457 
1458  while (rseg != NULL) {
1459  trx_rseg_t* prev_rseg = rseg;
1460 
1461  rseg = UT_LIST_GET_NEXT(rseg_list, prev_rseg);
1462  UT_LIST_REMOVE(rseg_list, trx_sys->rseg_list, prev_rseg);
1463 
1464  trx_rseg_mem_free(prev_rseg);
1465  }
1466 
1467  view = UT_LIST_GET_FIRST(trx_sys->view_list);
1468 
1469  while (view != NULL) {
1470  read_view_t* prev_view = view;
1471 
1472  view = UT_LIST_GET_NEXT(view_list, prev_view);
1473 
1474  /* Views are allocated from the trx_sys->global_read_view_heap.
1475  So, we simply remove the element here. */
1476  UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
1477  }
1478 
1479  if (! srv_apply_log_only) {
1480  ut_a(UT_LIST_GET_LEN(trx_sys->trx_list) == 0);
1481  ut_a(UT_LIST_GET_LEN(trx_sys->rseg_list) == 0);
1482  ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
1483  ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
1484  }
1485 
1486  mem_free(trx_sys);
1487 
1488  trx_sys = NULL;
1489  mutex_exit(&kernel_mutex);
1490 }
1491 #endif /* !UNIV_HOTBACKUP */