Drizzled Public API Documentation

fil0fil.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "fil0fil.h"
27 
28 #include "mem0mem.h"
29 #include "hash0hash.h"
30 #include "os0file.h"
31 #include "mach0data.h"
32 #include "buf0buf.h"
33 #include "buf0flu.h"
34 #include "log0recv.h"
35 #include "fsp0fsp.h"
36 #include "srv0srv.h"
37 #include "srv0start.h"
38 #include "mtr0mtr.h"
39 #include "mtr0log.h"
40 #include "dict0dict.h"
41 #include "page0page.h"
42 #include "page0zip.h"
43 #include "xtrabackup_api.h"
44 #ifndef UNIV_HOTBACKUP
45 # include "buf0lru.h"
46 # include "ibuf0ibuf.h"
47 # include "sync0sync.h"
48 # include "os0sync.h"
49 #else /* !UNIV_HOTBACKUP */
50 static ulint srv_data_read, srv_data_written;
51 #endif /* !UNIV_HOTBACKUP */
52 
53 /*
54  IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
55  =============================================
56 
57 The tablespace cache is responsible for providing fast read/write access to
58 tablespaces and logs of the database. File creation and deletion is done
59 in other modules which know more of the logic of the operation, however.
60 
61 A tablespace consists of a chain of files. The size of the files does not
62 have to be divisible by the database block size, because we may just leave
63 the last incomplete block unused. When a new file is appended to the
64 tablespace, the maximum size of the file is also specified. At the moment,
65 we think that it is best to extend the file to its maximum size already at
66 the creation of the file, because then we can avoid dynamically extending
67 the file when more space is needed for the tablespace.
68 
69 A block's position in the tablespace is specified with a 32-bit unsigned
70 integer. The files in the chain are thought to be catenated, and the block
71 corresponding to an address n is the nth block in the catenated file (where
72 the first block is named the 0th block, and the incomplete block fragments
73 at the end of files are not taken into account). A tablespace can be extended
74 by appending a new file at the end of the chain.
75 
76 Our tablespace concept is similar to the one of Oracle.
77 
78 To acquire more speed in disk transfers, a technique called disk striping is
79 sometimes used. This means that logical block addresses are divided in a
80 round-robin fashion across several disks. Windows NT supports disk striping,
81 so there we do not need to support it in the database. Disk striping is
82 implemented in hardware in RAID disks. We conclude that it is not necessary
83 to implement it in the database. Oracle 7 does not support disk striping,
84 either.
85 
86 Another trick used at some database sites is replacing tablespace files by
87 raw disks, that is, the whole physical disk drive, or a partition of it, is
88 opened as a single file, and it is accessed through byte offsets calculated
89 from the start of the disk or the partition. This is recommended in some
90 books on database tuning to achieve more speed in i/o. Using raw disk
91 certainly prevents the OS from fragmenting disk space, but it is not clear
92 if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
93 system + EIDE Conner disk only a negligible difference in speed when reading
94 from a file, versus reading from a raw disk.
95 
96 To have fast access to a tablespace or a log file, we put the data structures
97 to a hash table. Each tablespace and log file is given an unique 32-bit
98 identifier.
99 
100 Some operating systems do not support many open files at the same time,
101 though NT seems to tolerate at least 900 open files. Therefore, we put the
102 open files in an LRU-list. If we need to open another file, we may close the
103 file at the end of the LRU-list. When an i/o-operation is pending on a file,
104 the file cannot be closed. We take the file nodes with pending i/o-operations
105 out of the LRU-list and keep a count of pending operations. When an operation
106 completes, we decrement the count and return the file node to the LRU-list if
107 the count drops to zero. */
108 
112 UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
113 
115 UNIV_INTERN ulint fil_n_log_flushes = 0;
116 
118 UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
120 UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
121 
123 UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
124 
125 #ifdef UNIV_PFS_MUTEX
126 /* Key to register fil_system_mutex with performance schema */
127 UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
128 #endif /* UNIV_PFS_MUTEX */
129 
130 #ifdef UNIV_PFS_RWLOCK
131 /* Key to register file space latch with performance schema */
132 UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
133 #endif /* UNIV_PFS_RWLOCK */
134 
139  char* name;
140  ibool open;
142  ibool is_raw_disk;
144  ulint size;
147  ulint n_pending;
157  ib_int64_t flush_counter;
160  UT_LIST_NODE_T(fil_node_t) chain;
164  ulint magic_n;
165 };
166 
168 #define FIL_NODE_MAGIC_N 89389
169 
172  char* name;
174  ulint id;
175  ib_int64_t tablespace_version;
181  ibool mark;
185  ibool stop_ios;
199  ulint purpose;
203  ulint size;
207  ulint flags;
208  ulint n_reserved_extents;
214  ulint n_pending_ibuf_merges;
220  hash_node_t hash;
221  hash_node_t name_hash;
222 #ifndef UNIV_HOTBACKUP
225 #endif /* !UNIV_HOTBACKUP */
226  UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
229  ibool is_in_unflushed_spaces;
231  UT_LIST_NODE_T(fil_space_t) space_list;
233  ulint magic_n;
234 };
235 
237 #define FIL_SPACE_MAGIC_N 89472
238 
240 typedef struct fil_system_struct fil_system_t;
241 
247 #ifndef UNIV_HOTBACKUP
249 #endif /* !UNIV_HOTBACKUP */
266  UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
272  ulint n_open;
273  ulint max_n_open;
277  ulint max_assigned_id;
283  ib_int64_t tablespace_version;
291  UT_LIST_BASE_NODE_T(fil_space_t) space_list;
293  ibool space_id_reuse_warned;
294  /* !< TRUE if fil_space_create()
295  has issued a warning about
296  potential space_id reuse */
297 };
298 
301 fil_system_t* fil_system = NULL;
302 
303 #ifdef UNIV_DEBUG
304 
305 # define FIL_VALIDATE_SKIP 17
306 
307 /******************************************************************/
310 static
311 ibool
312 fil_validate_skip(void)
313 /*===================*/
314 {
317  static int fil_validate_count = FIL_VALIDATE_SKIP;
318 
319  /* There is a race condition below, but it does not matter,
320  because this call is only for heuristic purposes. We want to
321  reduce the call frequency of the costly fil_validate() check
322  in debug builds. */
323  if (--fil_validate_count > 0) {
324  return(TRUE);
325  }
326 
327  fil_validate_count = FIL_VALIDATE_SKIP;
328  return(fil_validate());
329 }
330 #endif /* UNIV_DEBUG */
331 
332 /********************************************************************/
339 static
340 void
341 fil_node_prepare_for_io(
342 /*====================*/
343  fil_node_t* node,
344  fil_system_t* system,
345  fil_space_t* space);
346 /********************************************************************/
349 static
350 void
351 fil_node_complete_io(
352 /*=================*/
353  fil_node_t* node,
354  fil_system_t* system,
355  ulint type);
358 /*******************************************************************/
362 static
363 ulint
364 fil_get_space_id_for_table(
365 /*=======================*/
366  const char* name);
368 /*******************************************************************/
373 static
374 ibool
375 fil_space_free(
376 /*===========*/
377  ulint id, /* in: space id */
378  ibool x_latched); /* in: TRUE if caller has space->latch
379  in X mode */
380 /********************************************************************/
386 UNIV_INLINE
387 ulint
388 fil_read(
389 /*=====*/
390  ibool sync,
391  ulint space_id,
392  ulint zip_size,
394  ulint block_offset,
395  ulint byte_offset,
397  ulint len,
400  void* buf,
402  void* message)
404 {
405  return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
406  byte_offset, len, buf, message));
407 }
408 
409 /********************************************************************/
415 UNIV_INLINE
416 ulint
417 fil_write(
418 /*======*/
419  ibool sync,
420  ulint space_id,
421  ulint zip_size,
423  ulint block_offset,
424  ulint byte_offset,
426  ulint len,
429  void* buf,
431  void* message)
433 {
434  return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
435  byte_offset, len, buf, message));
436 }
437 
438 /*******************************************************************/
440 UNIV_INLINE
442 fil_space_get_by_id(
443 /*================*/
444  ulint id)
445 {
447 
448  ut_ad(mutex_own(&fil_system->mutex));
449 
450  HASH_SEARCH(hash, fil_system->spaces, id,
451  fil_space_t*, space,
452  ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
453  space->id == id);
454 
455  return(space);
456 }
457 
458 /*******************************************************************/
460 UNIV_INLINE
462 fil_space_get_by_name(
463 /*==================*/
464  const char* name)
465 {
467  ulint fold;
468 
469  ut_ad(mutex_own(&fil_system->mutex));
470 
471  fold = ut_fold_string(name);
472 
473  HASH_SEARCH(name_hash, fil_system->name_hash, fold,
474  fil_space_t*, space,
475  ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
476  !strcmp(name, space->name));
477 
478  return(space);
479 }
480 
481 #ifndef UNIV_HOTBACKUP
482 /*******************************************************************/
486 UNIV_INTERN
487 ib_int64_t
488 fil_space_get_version(
489 /*==================*/
490  ulint id)
491 {
493  ib_int64_t version = -1;
494 
495  ut_ad(fil_system);
496 
497  mutex_enter(&fil_system->mutex);
498 
499  space = fil_space_get_by_id(id);
500 
501  if (space) {
502  version = space->tablespace_version;
503  }
504 
505  mutex_exit(&fil_system->mutex);
506 
507  return(version);
508 }
509 
510 /*******************************************************************/
513 UNIV_INTERN
514 rw_lock_t*
515 fil_space_get_latch(
516 /*================*/
517  ulint id,
518  ulint* flags)
519 {
521 
522  ut_ad(fil_system);
523 
524  mutex_enter(&fil_system->mutex);
525 
526  space = fil_space_get_by_id(id);
527 
528  ut_a(space);
529 
530  if (flags) {
531  *flags = space->flags;
532  }
533 
534  mutex_exit(&fil_system->mutex);
535 
536  return(&(space->latch));
537 }
538 
539 /*******************************************************************/
542 UNIV_INTERN
543 ulint
544 fil_space_get_type(
545 /*===============*/
546  ulint id)
547 {
549 
550  ut_ad(fil_system);
551 
552  mutex_enter(&fil_system->mutex);
553 
554  space = fil_space_get_by_id(id);
555 
556  ut_a(space);
557 
558  mutex_exit(&fil_system->mutex);
559 
560  return(space->purpose);
561 }
562 #endif /* !UNIV_HOTBACKUP */
563 
564 /**********************************************************************/
568 static
569 ibool
570 fil_space_is_flushed(
571 /*=================*/
572  fil_space_t* space)
573 {
574  fil_node_t* node;
575 
576  ut_ad(mutex_own(&fil_system->mutex));
577 
578  node = UT_LIST_GET_FIRST(space->chain);
579 
580  while (node) {
581  if (node->modification_counter > node->flush_counter) {
582 
583  return(FALSE);
584  }
585 
586  node = UT_LIST_GET_NEXT(chain, node);
587  }
588 
589  return(TRUE);
590 }
591 
592 /*******************************************************************/
594 UNIV_INTERN
595 void
596 fil_node_create(
597 /*============*/
598  const char* name,
599  ulint size,
601  ulint id,
602  ibool is_raw)
604 {
605  fil_node_t* node;
607 
608  ut_a(fil_system);
609  ut_a(name);
610 
611  mutex_enter(&fil_system->mutex);
612 
613  node = static_cast<fil_node_t *>(mem_alloc(sizeof(fil_node_t)));
614 
615  node->name = mem_strdup(name);
616  node->open = FALSE;
617 
618  ut_a(!is_raw || srv_start_raw_disk_in_use);
619 
620  node->is_raw_disk = is_raw;
621  node->size = size;
622  node->magic_n = FIL_NODE_MAGIC_N;
623  node->n_pending = 0;
624  node->n_pending_flushes = 0;
625 
626  node->modification_counter = 0;
627  node->flush_counter = 0;
628 
629  space = fil_space_get_by_id(id);
630 
631  if (!space) {
632  ut_print_timestamp(stderr);
633  fprintf(stderr,
634  " InnoDB: Error: Could not find tablespace %lu for\n"
635  "InnoDB: file ", (ulong) id);
636  ut_print_filename(stderr, name);
637  fputs(" in the tablespace memory cache.\n", stderr);
638  mem_free(node->name);
639 
640  mem_free(node);
641 
642  mutex_exit(&fil_system->mutex);
643 
644  return;
645  }
646 
647  space->size += size;
648 
649  node->space = space;
650 
651  UT_LIST_ADD_LAST(chain, space->chain, node);
652 
653  if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
654 
655  fil_system->max_assigned_id = id;
656  }
657 
658  mutex_exit(&fil_system->mutex);
659 }
660 
661 /********************************************************************/
664 static
665 void
666 fil_node_open_file(
667 /*===============*/
668  fil_node_t* node,
669  fil_system_t* system,
670  fil_space_t* space)
671 {
672  uint64_t size_bytes;
673  ulint size_low;
674  ulint size_high;
675  ibool ret;
676  ibool success;
677  byte* buf2;
678  byte* page;
679  ulint space_id;
680  ulint flags;
681 
682  ut_ad(mutex_own(&(system->mutex)));
683  ut_a(node->n_pending == 0);
684  ut_a(node->open == FALSE);
685 
686  if (node->size == 0) {
687  /* It must be a single-table tablespace and we do not know the
688  size of the file yet. First we open the file in the normal
689  mode, no async I/O here, for simplicity. Then do some checks,
690  and close the file again.
691  NOTE that we could not use the simple file read function
692  os_file_read() in Windows to read from a file opened for
693  async I/O! */
694 
695  node->handle = os_file_create_simple_no_error_handling(
696  innodb_file_data_key, node->name, OS_FILE_OPEN,
697  OS_FILE_READ_ONLY, &success);
698  if (!success) {
699  /* The following call prints an error message */
701 
702  ut_print_timestamp(stderr);
703 
704  fprintf(stderr,
705  " InnoDB: Fatal error: cannot open %s\n."
706  "InnoDB: Have you deleted .ibd files"
707  " under a running mysqld server?\n",
708  node->name);
709  ut_a(0);
710  }
711 
712  os_file_get_size(node->handle, &size_low, &size_high);
713 
714  size_bytes = (((uint64_t)size_high) << 32) + size_low;
715 #ifdef UNIV_HOTBACKUP
716  if (space->id == 0) {
717  node->size = size_bytes / UNIV_PAGE_SIZE;
718  os_file_close(node->handle);
719  goto add_size;
720  }
721 #endif /* UNIV_HOTBACKUP */
722  ut_a(space->purpose != FIL_LOG);
723  ut_a(space->id != 0);
724 
725  if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
726  fprintf(stderr,
727  "InnoDB: Error: the size of single-table"
728  " tablespace file %s\n"
729  "InnoDB: is only %lu %lu,"
730  " should be at least %lu!\n",
731  node->name,
732  (ulong) size_high,
733  (ulong) size_low,
735  * UNIV_PAGE_SIZE));
736 
737  ut_a(0);
738  }
739 
740  /* Read the first page of the tablespace */
741 
742  buf2 = static_cast<unsigned char *>(ut_malloc(2 * UNIV_PAGE_SIZE));
743  /* Align the memory for file i/o if we might have O_DIRECT
744  set */
745  page = static_cast<unsigned char *>(ut_align(buf2, UNIV_PAGE_SIZE));
746 
747  success = os_file_read(node->handle, page, 0, 0,
748  UNIV_PAGE_SIZE);
749  space_id = fsp_header_get_space_id(page);
750  flags = fsp_header_get_flags(page);
751 
752  ut_free(buf2);
753 
754  /* Close the file now that we have read the space id from it */
755 
756  os_file_close(node->handle);
757 
758  if (UNIV_UNLIKELY(space_id != space->id)) {
759  fprintf(stderr,
760  "InnoDB: Error: tablespace id is %lu"
761  " in the data dictionary\n"
762  "InnoDB: but in file %s it is %lu!\n",
763  space->id, node->name, space_id);
764 
765  ut_error;
766  }
767 
768  if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
769  || space_id == 0)) {
770  fprintf(stderr,
771  "InnoDB: Error: tablespace id %lu"
772  " in file %s is not sensible\n",
773  (ulong) space_id, node->name);
774 
775  ut_error;
776  }
777 
778  if (UNIV_UNLIKELY(space->flags != flags)) {
779  fprintf(stderr,
780  "InnoDB: Error: table flags are %lx"
781  " in the data dictionary\n"
782  "InnoDB: but the flags in file %s are %lx!\n",
783  space->flags, node->name, flags);
784 
785  ut_error;
786  }
787 
788  if (size_bytes >= 1024 * 1024) {
789  /* Truncate the size to whole megabytes. */
790  size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
791  }
792 
793  if (!(flags & DICT_TF_ZSSIZE_MASK)) {
794  node->size = (ulint)size_bytes / UNIV_PAGE_SIZE;
795  } else {
796  node->size = (ulint)
797  (size_bytes
799  }
800 
801 #ifdef UNIV_HOTBACKUP
802 add_size:
803 #endif /* UNIV_HOTBACKUP */
804  space->size += node->size;
805  }
806 
807  /* printf("Opening file %s\n", node->name); */
808 
809  /* Open the file for reading and writing, in Windows normally in the
810  unbuffered async I/O mode, though global variables may make
811  os_file_create() to fall back to the normal file I/O mode. */
812 
813  if (space->purpose == FIL_LOG) {
814  node->handle = os_file_create(innodb_file_log_key,
815  node->name, OS_FILE_OPEN,
816  OS_FILE_AIO, OS_LOG_FILE,
817  &ret);
818  } else if (node->is_raw_disk) {
819  node->handle = os_file_create(innodb_file_data_key,
820  node->name,
821  OS_FILE_OPEN_RAW,
822  OS_FILE_AIO, OS_DATA_FILE,
823  &ret);
824  } else {
825  node->handle = os_file_create(innodb_file_data_key,
826  node->name, OS_FILE_OPEN,
827  OS_FILE_AIO, OS_DATA_FILE,
828  &ret);
829  }
830 
831  ut_a(ret);
832 
833  node->open = TRUE;
834 
835  system->n_open++;
836 
837  if (space->purpose == FIL_TABLESPACE && space->id != 0) {
838  /* Put the node to the LRU list */
839  UT_LIST_ADD_FIRST(LRU, system->LRU, node);
840  }
841 }
842 
843 /**********************************************************************/
845 static
846 void
847 fil_node_close_file(
848 /*================*/
849  fil_node_t* node,
850  fil_system_t* system)
851 {
852  ibool ret;
853 
854  ut_ad(node && system);
855  ut_ad(mutex_own(&(system->mutex)));
856  ut_a(node->open);
857  ut_a(node->n_pending == 0);
858  ut_a(node->n_pending_flushes == 0);
859  ut_a(node->modification_counter == node->flush_counter);
860 
861  ret = os_file_close(node->handle);
862  ut_a(ret);
863 
864  /* printf("Closing file %s\n", node->name); */
865 
866  node->open = FALSE;
867  ut_a(system->n_open > 0);
868  system->n_open--;
869 
870  if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
871  ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
872 
873  /* The node is in the LRU list, remove it */
874  UT_LIST_REMOVE(LRU, system->LRU, node);
875  }
876 }
877 
878 /********************************************************************/
886 static
887 ibool
888 fil_try_to_close_file_in_LRU(
889 /*=========================*/
890  ibool print_info)
892 {
893  fil_node_t* node;
894 
895  ut_ad(mutex_own(&fil_system->mutex));
896 
897  node = UT_LIST_GET_LAST(fil_system->LRU);
898 
899  if (print_info) {
900  fprintf(stderr,
901  "InnoDB: fil_sys open file LRU len %lu\n",
902  (ulong) UT_LIST_GET_LEN(fil_system->LRU));
903  }
904 
905  while (node != NULL) {
906  if (node->modification_counter == node->flush_counter
907  && node->n_pending_flushes == 0) {
908 
909  fil_node_close_file(node, fil_system);
910 
911  return(TRUE);
912  }
913 
914  if (print_info && node->n_pending_flushes > 0) {
915  fputs("InnoDB: cannot close file ", stderr);
916  ut_print_filename(stderr, node->name);
917  fprintf(stderr, ", because n_pending_flushes %lu\n",
918  (ulong) node->n_pending_flushes);
919  }
920 
921  if (print_info
922  && node->modification_counter != node->flush_counter) {
923  fputs("InnoDB: cannot close file ", stderr);
924  ut_print_filename(stderr, node->name);
925  fprintf(stderr,
926  ", because mod_count %ld != fl_count %ld\n",
927  (long) node->modification_counter,
928  (long) node->flush_counter);
929  }
930 
931  node = UT_LIST_GET_PREV(LRU, node);
932  }
933 
934  return(FALSE);
935 }
936 
937 /*******************************************************************/
941 static
942 void
943 fil_mutex_enter_and_prepare_for_io(
944 /*===============================*/
945  ulint space_id)
946 {
948  ibool success;
949  ibool print_info = FALSE;
950  ulint count = 0;
951  ulint count2 = 0;
952 
953 retry:
954  mutex_enter(&fil_system->mutex);
955 
956  if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
957  /* We keep log files and system tablespace files always open;
958  this is important in preventing deadlocks in this module, as
959  a page read completion often performs another read from the
960  insert buffer. The insert buffer is in tablespace 0, and we
961  cannot end up waiting in this function. */
962 
963  return;
964  }
965 
966  if (fil_system->n_open < fil_system->max_n_open) {
967 
968  return;
969  }
970 
971  space = fil_space_get_by_id(space_id);
972 
973  if (space != NULL && space->stop_ios) {
974  /* We are going to do a rename file and want to stop new i/o's
975  for a while */
976 
977  if (count2 > 20000) {
978  fputs("InnoDB: Warning: tablespace ", stderr);
979  ut_print_filename(stderr, space->name);
980  fprintf(stderr,
981  " has i/o ops stopped for a long time %lu\n",
982  (ulong) count2);
983  }
984 
985  mutex_exit(&fil_system->mutex);
986 
987  os_thread_sleep(20000);
988 
989  count2++;
990 
991  goto retry;
992  }
993 
994  /* If the file is already open, no need to do anything; if the space
995  does not exist, we handle the situation in the function which called
996  this function */
997 
998  if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
999 
1000  return;
1001  }
1002 
1003  if (count > 1) {
1004  print_info = TRUE;
1005  }
1006 
1007  /* Too many files are open, try to close some */
1008 close_more:
1009  success = fil_try_to_close_file_in_LRU(print_info);
1010 
1011  if (success && fil_system->n_open >= fil_system->max_n_open) {
1012 
1013  goto close_more;
1014  }
1015 
1016  if (fil_system->n_open < fil_system->max_n_open) {
1017  /* Ok */
1018 
1019  return;
1020  }
1021 
1022  if (count >= 2) {
1023  ut_print_timestamp(stderr);
1024  fprintf(stderr,
1025  " InnoDB: Warning: too many (%lu) files stay open"
1026  " while the maximum\n"
1027  "InnoDB: allowed value would be %lu.\n"
1028  "InnoDB: You may need to raise the value of"
1029  " innodb_open_files in\n"
1030  "InnoDB: my.cnf.\n",
1031  (ulong) fil_system->n_open,
1032  (ulong) fil_system->max_n_open);
1033 
1034  return;
1035  }
1036 
1037  mutex_exit(&fil_system->mutex);
1038 
1039 #ifndef UNIV_HOTBACKUP
1040  /* Wake the i/o-handler threads to make sure pending i/o's are
1041  performed */
1043 
1044  os_thread_sleep(20000);
1045 #endif
1046  /* Flush tablespaces so that we can close modified files in the LRU
1047  list */
1048 
1049  fil_flush_file_spaces(FIL_TABLESPACE);
1050 
1051  count++;
1052 
1053  goto retry;
1054 }
1055 
1056 /*******************************************************************/
1058 static
1059 void
1060 fil_node_free(
1061 /*==========*/
1062  fil_node_t* node,
1063  fil_system_t* system,
1064  fil_space_t* space)
1065 {
1066  ut_ad(node && system && space);
1067  ut_ad(mutex_own(&(system->mutex)));
1068  ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1069  ut_a(node->n_pending == 0);
1070 
1071  if (node->open) {
1072  /* We fool the assertion in fil_node_close_file() to think
1073  there are no unflushed modifications in the file */
1074 
1075  node->modification_counter = node->flush_counter;
1076 
1077  if (space->is_in_unflushed_spaces
1078  && fil_space_is_flushed(space)) {
1079 
1080  space->is_in_unflushed_spaces = FALSE;
1081 
1082  UT_LIST_REMOVE(unflushed_spaces,
1083  system->unflushed_spaces,
1084  space);
1085  }
1086 
1087  fil_node_close_file(node, system);
1088  }
1089 
1090  space->size -= node->size;
1091 
1092  UT_LIST_REMOVE(chain, space->chain, node);
1093 
1094  mem_free(node->name);
1095  mem_free(node);
1096 }
1097 
1098 #ifdef UNIV_LOG_ARCHIVE
1099 /****************************************************************/
1102 UNIV_INTERN
1103 void
1104 fil_space_truncate_start(
1105 /*=====================*/
1106  ulint id,
1107  ulint trunc_len)
1110 {
1111  fil_node_t* node;
1112  fil_space_t* space;
1113 
1114  mutex_enter(&fil_system->mutex);
1115 
1116  space = fil_space_get_by_id(id);
1117 
1118  ut_a(space);
1119 
1120  while (trunc_len > 0) {
1121  node = UT_LIST_GET_FIRST(space->chain);
1122 
1123  ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
1124 
1125  trunc_len -= node->size * UNIV_PAGE_SIZE;
1126 
1127  fil_node_free(node, fil_system, space);
1128  }
1129 
1130  mutex_exit(&fil_system->mutex);
1131 }
1132 #endif /* UNIV_LOG_ARCHIVE */
1133 
1134 /*******************************************************************/
1138 UNIV_INTERN
1139 ibool
1140 fil_space_create(
1141 /*=============*/
1142  const char* name,
1143  ulint id,
1144  ulint flags,
1146  ulint purpose)
1147 {
1148  fil_space_t* space;
1149 
1150  /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
1151  ROW_FORMAT=COMPACT
1152  ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
1153  ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
1154  format, the tablespace flags should equal
1155  (table->flags & ~(~0 << DICT_TF_BITS)). */
1156  ut_a(flags != DICT_TF_COMPACT);
1157  ut_a(!(flags & (~0UL << DICT_TF_BITS)));
1158 
1159 try_again:
1160  /*printf(
1161  "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
1162  purpose);*/
1163 
1164  ut_a(fil_system);
1165  ut_a(name);
1166 
1167  mutex_enter(&fil_system->mutex);
1168 
1169  space = fil_space_get_by_name(name);
1170 
1171  if (UNIV_LIKELY_NULL(space)) {
1172  ibool success;
1173  ulint namesake_id;
1174 
1175  ut_print_timestamp(stderr);
1176  fprintf(stderr,
1177  " InnoDB: Warning: trying to init to the"
1178  " tablespace memory cache\n"
1179  "InnoDB: a tablespace %lu of name ", (ulong) id);
1180  ut_print_filename(stderr, name);
1181  fprintf(stderr, ",\n"
1182  "InnoDB: but a tablespace %lu of the same name\n"
1183  "InnoDB: already exists in the"
1184  " tablespace memory cache!\n",
1185  (ulong) space->id);
1186 
1187  if (id == 0 || purpose != FIL_TABLESPACE) {
1188 
1189  mutex_exit(&fil_system->mutex);
1190 
1191  return(FALSE);
1192  }
1193 
1194  fprintf(stderr,
1195  "InnoDB: We assume that InnoDB did a crash recovery,"
1196  " and you had\n"
1197  "InnoDB: an .ibd file for which the table"
1198  " did not exist in the\n"
1199  "InnoDB: InnoDB internal data dictionary in the"
1200  " ibdata files.\n"
1201  "InnoDB: We assume that you later removed the"
1202  " .ibd and .frm files,\n"
1203  "InnoDB: and are now trying to recreate the table."
1204  " We now remove the\n"
1205  "InnoDB: conflicting tablespace object"
1206  " from the memory cache and try\n"
1207  "InnoDB: the init again.\n");
1208 
1209  namesake_id = space->id;
1210 
1211  success = fil_space_free(namesake_id, FALSE);
1212  ut_a(success);
1213 
1214  mutex_exit(&fil_system->mutex);
1215 
1216  goto try_again;
1217  }
1218 
1219  space = fil_space_get_by_id(id);
1220 
1221  if (UNIV_LIKELY_NULL(space)) {
1222  fprintf(stderr,
1223  "InnoDB: Error: trying to add tablespace %lu"
1224  " of name ", (ulong) id);
1225  ut_print_filename(stderr, name);
1226  fprintf(stderr, "\n"
1227  "InnoDB: to the tablespace memory cache,"
1228  " but tablespace\n"
1229  "InnoDB: %lu of name ", (ulong) space->id);
1230  ut_print_filename(stderr, space->name);
1231  fputs(" already exists in the tablespace\n"
1232  "InnoDB: memory cache!\n", stderr);
1233 
1234  mutex_exit(&fil_system->mutex);
1235 
1236  return(FALSE);
1237  }
1238 
1239  space = static_cast<fil_space_t *>(mem_alloc(sizeof(fil_space_t)));
1240 
1241  space->name = mem_strdup(name);
1242  space->id = id;
1243 
1244  fil_system->tablespace_version++;
1245  space->tablespace_version = fil_system->tablespace_version;
1246  space->mark = FALSE;
1247 
1248  if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
1249  && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
1250  if (!fil_system->space_id_reuse_warned) {
1251  fil_system->space_id_reuse_warned = TRUE;
1252 
1253  ut_print_timestamp(stderr);
1254  fprintf(stderr,
1255  " InnoDB: Warning: allocated tablespace %lu,"
1256  " old maximum was %lu\n",
1257  (ulong) id,
1258  (ulong) fil_system->max_assigned_id);
1259  }
1260 
1261  fil_system->max_assigned_id = id;
1262  }
1263 
1264  space->stop_ios = FALSE;
1265  space->stop_ibuf_merges = FALSE;
1266  space->is_being_deleted = FALSE;
1267  space->purpose = purpose;
1268  space->size = 0;
1269  space->flags = flags;
1270 
1271  space->n_reserved_extents = 0;
1272 
1273  space->n_pending_flushes = 0;
1274  space->n_pending_ibuf_merges = 0;
1275 
1276  UT_LIST_INIT(space->chain);
1277  space->magic_n = FIL_SPACE_MAGIC_N;
1278 
1279  rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1280 
1281  HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
1282 
1283  HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
1284  ut_fold_string(name), space);
1285  space->is_in_unflushed_spaces = FALSE;
1286 
1287  UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
1288 
1289  mutex_exit(&fil_system->mutex);
1290 
1291  return(TRUE);
1292 }
1293 
1294 /*******************************************************************/
1299 UNIV_INTERN
1300 ibool
1301 fil_assign_new_space_id(
1302 /*====================*/
1303  ulint* space_id)
1304 {
1305  ulint id;
1306  ibool success;
1307 
1308  mutex_enter(&fil_system->mutex);
1309 
1310  id = *space_id;
1311 
1312  if (id < fil_system->max_assigned_id) {
1313  id = fil_system->max_assigned_id;
1314  }
1315 
1316  id++;
1317 
1318  if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1319  ut_print_timestamp(stderr);
1320  fprintf(stderr,
1321  "InnoDB: Warning: you are running out of new"
1322  " single-table tablespace id's.\n"
1323  "InnoDB: Current counter is %lu and it"
1324  " must not exceed %lu!\n"
1325  "InnoDB: To reset the counter to zero"
1326  " you have to dump all your tables and\n"
1327  "InnoDB: recreate the whole InnoDB installation.\n",
1328  (ulong) id,
1329  (ulong) SRV_LOG_SPACE_FIRST_ID);
1330  }
1331 
1332  success = (id < SRV_LOG_SPACE_FIRST_ID);
1333 
1334  if (success) {
1335  *space_id = fil_system->max_assigned_id = id;
1336  } else {
1337  ut_print_timestamp(stderr);
1338  fprintf(stderr,
1339  "InnoDB: You have run out of single-table"
1340  " tablespace id's!\n"
1341  "InnoDB: Current counter is %lu.\n"
1342  "InnoDB: To reset the counter to zero you"
1343  " have to dump all your tables and\n"
1344  "InnoDB: recreate the whole InnoDB installation.\n",
1345  (ulong) id);
1346  *space_id = ULINT_UNDEFINED;
1347  }
1348 
1349  mutex_exit(&fil_system->mutex);
1350 
1351  return(success);
1352 }
1353 
1354 /*******************************************************************/
1359 static
1360 ibool
1361 fil_space_free(
1362 /*===========*/
1363  /* out: TRUE if success */
1364  ulint id, /* in: space id */
1365  ibool x_latched) /* in: TRUE if caller has space->latch
1366  in X mode */
1367 {
1368  fil_space_t* space;
1369  fil_space_t* tablespace;
1370  fil_node_t* fil_node;
1371 
1372  ut_ad(mutex_own(&fil_system->mutex));
1373 
1374  space = fil_space_get_by_id(id);
1375 
1376  if (!space) {
1377  ut_print_timestamp(stderr);
1378  fprintf(stderr,
1379  " InnoDB: Error: trying to remove tablespace %lu"
1380  " from the cache but\n"
1381  "InnoDB: it is not there.\n", (ulong) id);
1382 
1383  return(FALSE);
1384  }
1385 
1386  HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
1387 
1388  tablespace = fil_space_get_by_name(space->name);
1389  ut_a(tablespace);
1390  ut_a(space == tablespace);
1391 
1392  HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
1393  ut_fold_string(space->name), space);
1394 
1395  if (space->is_in_unflushed_spaces) {
1396  space->is_in_unflushed_spaces = FALSE;
1397 
1398  UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
1399  space);
1400  }
1401 
1402  UT_LIST_REMOVE(space_list, fil_system->space_list, space);
1403 
1404  ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1405  ut_a(0 == space->n_pending_flushes);
1406 
1407  fil_node = UT_LIST_GET_FIRST(space->chain);
1408 
1409  while (fil_node != NULL) {
1410  fil_node_free(fil_node, fil_system, space);
1411 
1412  fil_node = UT_LIST_GET_FIRST(space->chain);
1413  }
1414 
1415  ut_a(0 == UT_LIST_GET_LEN(space->chain));
1416 
1417  if (x_latched) {
1418  rw_lock_x_unlock(&space->latch);
1419  }
1420 
1421  rw_lock_free(&(space->latch));
1422 
1423  mem_free(space->name);
1424  mem_free(space);
1425 
1426  return(TRUE);
1427 }
1428 
1429 /*******************************************************************/
1433 UNIV_INTERN
1434 ulint
1435 fil_space_get_size(
1436 /*===============*/
1437  ulint id)
1438 {
1439  fil_node_t* node;
1440  fil_space_t* space;
1441  ulint size;
1442 
1443  ut_ad(fil_system);
1444 
1445  fil_mutex_enter_and_prepare_for_io(id);
1446 
1447  space = fil_space_get_by_id(id);
1448 
1449  if (space == NULL) {
1450  mutex_exit(&fil_system->mutex);
1451 
1452  return(0);
1453  }
1454 
1455  if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1456  ut_a(id != 0);
1457 
1458  ut_a(1 == UT_LIST_GET_LEN(space->chain));
1459 
1460  node = UT_LIST_GET_FIRST(space->chain);
1461 
1462  /* It must be a single-table tablespace and we have not opened
1463  the file yet; the following calls will open it and update the
1464  size fields */
1465 
1466  fil_node_prepare_for_io(node, fil_system, space);
1467  fil_node_complete_io(node, fil_system, OS_FILE_READ);
1468  }
1469 
1470  size = space->size;
1471 
1472  mutex_exit(&fil_system->mutex);
1473 
1474  return(size);
1475 }
1476 
1477 /*******************************************************************/
1481 UNIV_INTERN
1482 ulint
1483 fil_space_get_flags(
1484 /*================*/
1485  ulint id)
1486 {
1487  fil_node_t* node;
1488  fil_space_t* space;
1489  ulint flags;
1490 
1491  ut_ad(fil_system);
1492 
1493  if (UNIV_UNLIKELY(!id)) {
1494  return(0);
1495  }
1496 
1497  fil_mutex_enter_and_prepare_for_io(id);
1498 
1499  space = fil_space_get_by_id(id);
1500 
1501  if (space == NULL) {
1502  mutex_exit(&fil_system->mutex);
1503 
1504  return(ULINT_UNDEFINED);
1505  }
1506 
1507  if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1508  ut_a(id != 0);
1509 
1510  ut_a(1 == UT_LIST_GET_LEN(space->chain));
1511 
1512  node = UT_LIST_GET_FIRST(space->chain);
1513 
1514  /* It must be a single-table tablespace and we have not opened
1515  the file yet; the following calls will open it and update the
1516  size fields */
1517 
1518  fil_node_prepare_for_io(node, fil_system, space);
1519  fil_node_complete_io(node, fil_system, OS_FILE_READ);
1520  }
1521 
1522  flags = space->flags;
1523 
1524  mutex_exit(&fil_system->mutex);
1525 
1526  return(flags);
1527 }
1528 
1529 /*******************************************************************/
1533 UNIV_INTERN
1534 ulint
1535 fil_space_get_zip_size(
1536 /*===================*/
1537  ulint id)
1538 {
1539  ulint flags;
1540 
1541  flags = fil_space_get_flags(id);
1542 
1543  if (flags && flags != ULINT_UNDEFINED) {
1544 
1545  return(dict_table_flags_to_zip_size(flags));
1546  }
1547 
1548  return(flags);
1549 }
1550 
1551 /*******************************************************************/
1555 UNIV_INTERN
1556 ibool
1557 fil_check_adress_in_tablespace(
1558 /*===========================*/
1559  ulint id,
1560  ulint page_no)
1561 {
1562  if (fil_space_get_size(id) > page_no) {
1563 
1564  return(TRUE);
1565  }
1566 
1567  return(FALSE);
1568 }
1569 
1570 /****************************************************************/
1572 UNIV_INTERN
1573 void
1574 fil_init(
1575 /*=====*/
1576  ulint hash_size,
1577  ulint max_n_open)
1578 {
1579  ut_a(fil_system == NULL);
1580 
1581  ut_a(hash_size > 0);
1582  ut_a(max_n_open > 0);
1583 
1584  void *fil_system_ptr= mem_zalloc(sizeof(fil_system_t));
1585  fil_system = static_cast<fil_system_t *>(fil_system_ptr);
1586 
1587  mutex_create(fil_system_mutex_key,
1588  &fil_system->mutex, SYNC_ANY_LATCH);
1589 
1590  fil_system->spaces = hash_create(hash_size);
1591  fil_system->name_hash = hash_create(hash_size);
1592 
1593  UT_LIST_INIT(fil_system->LRU);
1594 
1595  fil_system->max_n_open = max_n_open;
1596 }
1597 
1598 /*******************************************************************/
1604 UNIV_INTERN
1605 void
1606 fil_open_log_and_system_tablespace_files(void)
1607 /*==========================================*/
1608 {
1609  fil_space_t* space;
1610  fil_node_t* node;
1611 
1612  mutex_enter(&fil_system->mutex);
1613 
1614  space = UT_LIST_GET_FIRST(fil_system->space_list);
1615 
1616  while (space != NULL) {
1617  if (space->purpose != FIL_TABLESPACE || space->id == 0) {
1618  node = UT_LIST_GET_FIRST(space->chain);
1619 
1620  while (node != NULL) {
1621  if (!node->open) {
1622  fil_node_open_file(node, fil_system,
1623  space);
1624  }
1625  if (fil_system->max_n_open
1626  < 10 + fil_system->n_open) {
1627  fprintf(stderr,
1628  "InnoDB: Warning: you must"
1629  " raise the value of"
1630  " innodb_open_files in\n"
1631  "InnoDB: my.cnf! Remember that"
1632  " InnoDB keeps all log files"
1633  " and all system\n"
1634  "InnoDB: tablespace files open"
1635  " for the whole time mysqld is"
1636  " running, and\n"
1637  "InnoDB: needs to open also"
1638  " some .ibd files if the"
1639  " file-per-table storage\n"
1640  "InnoDB: model is used."
1641  " Current open files %lu,"
1642  " max allowed"
1643  " open files %lu.\n",
1644  (ulong) fil_system->n_open,
1645  (ulong) fil_system->max_n_open);
1646  }
1647  node = UT_LIST_GET_NEXT(chain, node);
1648  }
1649  }
1650  space = UT_LIST_GET_NEXT(space_list, space);
1651  }
1652 
1653  mutex_exit(&fil_system->mutex);
1654 }
1655 
1656 /*******************************************************************/
1659 UNIV_INTERN
1660 void
1661 fil_close_all_files(void)
1662 /*=====================*/
1663 {
1664  fil_space_t* space;
1665 
1666  mutex_enter(&fil_system->mutex);
1667 
1668  space = UT_LIST_GET_FIRST(fil_system->space_list);
1669 
1670  while (space != NULL) {
1671  fil_node_t* node;
1672  fil_space_t* prev_space = space;
1673 
1674  for (node = UT_LIST_GET_FIRST(space->chain);
1675  node != NULL;
1676  node = UT_LIST_GET_NEXT(chain, node)) {
1677 
1678  if (node->open) {
1679  fil_node_close_file(node, fil_system);
1680  }
1681  }
1682 
1683  space = UT_LIST_GET_NEXT(space_list, space);
1684 
1685  fil_space_free(prev_space->id, FALSE);
1686  }
1687 
1688  mutex_exit(&fil_system->mutex);
1689 }
1690 
1691 /*******************************************************************/
1694 UNIV_INTERN
1695 void
1696 fil_set_max_space_id_if_bigger(
1697 /*===========================*/
1698  ulint max_id)
1699 {
1700  if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1701  fprintf(stderr,
1702  "InnoDB: Fatal error: max tablespace id"
1703  " is too high, %lu\n", (ulong) max_id);
1704  ut_error;
1705  }
1706 
1707  mutex_enter(&fil_system->mutex);
1708 
1709  if (fil_system->max_assigned_id < max_id) {
1710 
1711  fil_system->max_assigned_id = max_id;
1712  }
1713 
1714  mutex_exit(&fil_system->mutex);
1715 }
1716 
1717 /****************************************************************/
1721 static
1722 ulint
1723 fil_write_lsn_and_arch_no_to_file(
1724 /*==============================*/
1725  ulint sum_of_sizes,
1727  ib_uint64_t lsn,
1728  ulint /*arch_log_no __attribute__((unused))*/)
1730 {
1731  byte* buf1;
1732  byte* buf;
1733 
1734  buf1 = static_cast<byte *>(mem_alloc(2 * UNIV_PAGE_SIZE));
1735  buf = static_cast<byte *>(ut_align(buf1, UNIV_PAGE_SIZE));
1736 
1737  fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1738 
1740 
1741  fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1742 
1743  mem_free(buf1);
1744 
1745  return(DB_SUCCESS);
1746 }
1747 
1748 /****************************************************************/
1752 UNIV_INTERN
1753 ulint
1754 fil_write_flushed_lsn_to_data_files(
1755 /*================================*/
1756  ib_uint64_t lsn,
1757  ulint arch_log_no)
1759 {
1760  fil_space_t* space;
1761  fil_node_t* node;
1762  ulint sum_of_sizes;
1763  ulint err;
1764 
1765  mutex_enter(&fil_system->mutex);
1766 
1767  space = UT_LIST_GET_FIRST(fil_system->space_list);
1768 
1769  while (space) {
1770  /* We only write the lsn to all existing data files which have
1771  been open during the lifetime of the mysqld process; they are
1772  represented by the space objects in the tablespace memory
1773  cache. Note that all data files in the system tablespace 0 are
1774  always open. */
1775 
1776  if (space->purpose == FIL_TABLESPACE
1777  && space->id == 0) {
1778  sum_of_sizes = 0;
1779 
1780  node = UT_LIST_GET_FIRST(space->chain);
1781  while (node) {
1782  mutex_exit(&fil_system->mutex);
1783 
1784  err = fil_write_lsn_and_arch_no_to_file(
1785  sum_of_sizes, lsn, arch_log_no);
1786  if (err != DB_SUCCESS) {
1787 
1788  return(err);
1789  }
1790 
1791  mutex_enter(&fil_system->mutex);
1792 
1793  sum_of_sizes += node->size;
1794  node = UT_LIST_GET_NEXT(chain, node);
1795  }
1796  }
1797  space = UT_LIST_GET_NEXT(space_list, space);
1798  }
1799 
1800  mutex_exit(&fil_system->mutex);
1801 
1802  return(DB_SUCCESS);
1803 }
1804 
1805 /*******************************************************************/
1808 UNIV_INTERN
1809 void
1810 fil_read_flushed_lsn_and_arch_log_no(
1811 /*=================================*/
1812  os_file_t data_file,
1813  ibool one_read_already,
1816 #ifdef UNIV_LOG_ARCHIVE
1817  ulint* min_arch_log_no,
1818  ulint* max_arch_log_no,
1819 #endif /* UNIV_LOG_ARCHIVE */
1820  ib_uint64_t* min_flushed_lsn,
1821  ib_uint64_t* max_flushed_lsn)
1822 {
1823  byte* buf;
1824  byte* buf2;
1825  ib_uint64_t flushed_lsn;
1826 
1827  buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
1828  /* Align the memory for a possible read from a raw device */
1829  buf = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
1830 
1831  os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
1832 
1833  flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
1834 
1835  ut_free(buf2);
1836 
1837  if (!one_read_already) {
1838  *min_flushed_lsn = flushed_lsn;
1839  *max_flushed_lsn = flushed_lsn;
1840 #ifdef UNIV_LOG_ARCHIVE
1841  *min_arch_log_no = arch_log_no;
1842  *max_arch_log_no = arch_log_no;
1843 #endif /* UNIV_LOG_ARCHIVE */
1844  return;
1845  }
1846 
1847  if (*min_flushed_lsn > flushed_lsn) {
1848  *min_flushed_lsn = flushed_lsn;
1849  }
1850  if (*max_flushed_lsn < flushed_lsn) {
1851  *max_flushed_lsn = flushed_lsn;
1852  }
1853 #ifdef UNIV_LOG_ARCHIVE
1854  if (*min_arch_log_no > arch_log_no) {
1855  *min_arch_log_no = arch_log_no;
1856  }
1857  if (*max_arch_log_no < arch_log_no) {
1858  *max_arch_log_no = arch_log_no;
1859  }
1860 #endif /* UNIV_LOG_ARCHIVE */
1861 }
1862 
1863 /*================ SINGLE-TABLE TABLESPACES ==========================*/
1864 
1865 #ifndef UNIV_HOTBACKUP
1866 /*******************************************************************/
1870 UNIV_INTERN
1871 ibool
1872 fil_inc_pending_ibuf_merges(
1873 /*========================*/
1874  ulint id)
1875 {
1876  fil_space_t* space;
1877 
1878  mutex_enter(&fil_system->mutex);
1879 
1880  space = fil_space_get_by_id(id);
1881 
1882  if (space == NULL) {
1883  fprintf(stderr,
1884  "InnoDB: Error: trying to do ibuf merge to a"
1885  " dropped tablespace %lu\n",
1886  (ulong) id);
1887  }
1888 
1889  if (space == NULL || space->stop_ibuf_merges) {
1890  mutex_exit(&fil_system->mutex);
1891 
1892  return(TRUE);
1893  }
1894 
1895  space->n_pending_ibuf_merges++;
1896 
1897  mutex_exit(&fil_system->mutex);
1898 
1899  return(FALSE);
1900 }
1901 
1902 /*******************************************************************/
1904 UNIV_INTERN
1905 void
1906 fil_decr_pending_ibuf_merges(
1907 /*=========================*/
1908  ulint id)
1909 {
1910  fil_space_t* space;
1911 
1912  mutex_enter(&fil_system->mutex);
1913 
1914  space = fil_space_get_by_id(id);
1915 
1916  if (space == NULL) {
1917  fprintf(stderr,
1918  "InnoDB: Error: decrementing ibuf merge of a"
1919  " dropped tablespace %lu\n",
1920  (ulong) id);
1921  }
1922 
1923  if (space != NULL) {
1924  space->n_pending_ibuf_merges--;
1925  }
1926 
1927  mutex_exit(&fil_system->mutex);
1928 }
1929 #endif /* !UNIV_HOTBACKUP */
1930 
1931 /********************************************************/
1933 static
1934 void
1935 fil_create_directory_for_tablename(
1936 /*===============================*/
1937  const char* name)
1939 {
1940  const char* namend;
1941  char* path;
1942  ulint len;
1943 
1944  len = strlen(fil_path_to_mysql_datadir);
1945  namend = strchr(name, '/');
1946  ut_a(namend);
1947  path = static_cast<char *>(mem_alloc(len + (namend - name) + 2));
1948 
1949  memcpy(path, fil_path_to_mysql_datadir, len);
1950  path[len] = '/';
1951  memcpy(path + len + 1, name, namend - name);
1952  path[len + (namend - name) + 1] = 0;
1953 
1955 
1956  ut_a(os_file_create_directory(path, FALSE));
1957  mem_free(path);
1958 }
1959 
1960 #ifndef UNIV_HOTBACKUP
1961 /********************************************************/
1963 static
1964 void
1965 fil_op_write_log(
1966 /*=============*/
1967  ulint type,
1971  ulint space_id,
1972  ulint log_flags,
1974  ulint flags,
1977  const char* name,
1981  const char* new_name,
1984  mtr_t* mtr)
1985 {
1986  byte* log_ptr;
1987  ulint len;
1988 
1989  log_ptr = mlog_open(mtr, 11 + 2 + 1);
1990 
1991  if (!log_ptr) {
1992  /* Logging in mtr is switched off during crash recovery:
1993  in that case mlog_open returns NULL */
1994  return;
1995  }
1996 
1998  type, space_id, log_flags, log_ptr, mtr);
1999  if (type == MLOG_FILE_CREATE2) {
2000  mach_write_to_4(log_ptr, flags);
2001  log_ptr += 4;
2002  }
2003  /* Let us store the strings as null-terminated for easier readability
2004  and handling */
2005 
2006  len = strlen(name) + 1;
2007 
2008  mach_write_to_2(log_ptr, len);
2009  log_ptr += 2;
2010  mlog_close(mtr, log_ptr);
2011 
2012  mlog_catenate_string(mtr, (byte*) name, len);
2013 
2014  if (type == MLOG_FILE_RENAME) {
2015  len = strlen(new_name) + 1;
2016  log_ptr = mlog_open(mtr, 2 + len);
2017  ut_a(log_ptr);
2018  mach_write_to_2(log_ptr, len);
2019  log_ptr += 2;
2020  mlog_close(mtr, log_ptr);
2021 
2022  mlog_catenate_string(mtr, (byte*) new_name, len);
2023  }
2024 }
2025 #endif
2026 
2027 /*******************************************************************/
2041 UNIV_INTERN
2042 byte*
2043 fil_op_log_parse_or_replay(
2044 /*=======================*/
2045  byte* ptr,
2048  byte* end_ptr,
2049  ulint type,
2050  ulint space_id,
2053  ulint log_flags)
2055 {
2056  ulint name_len;
2057  ulint new_name_len;
2058  const char* name;
2059  const char* new_name = NULL;
2060  ulint flags = 0;
2061 
2062  if (type == MLOG_FILE_CREATE2) {
2063  if (end_ptr < ptr + 4) {
2064 
2065  return(NULL);
2066  }
2067 
2068  flags = mach_read_from_4(ptr);
2069  ptr += 4;
2070  }
2071 
2072  if (end_ptr < ptr + 2) {
2073 
2074  return(NULL);
2075  }
2076 
2077  name_len = mach_read_from_2(ptr);
2078 
2079  ptr += 2;
2080 
2081  if (end_ptr < ptr + name_len) {
2082 
2083  return(NULL);
2084  }
2085 
2086  name = (const char*) ptr;
2087 
2088  ptr += name_len;
2089 
2090  if (type == MLOG_FILE_RENAME) {
2091  if (end_ptr < ptr + 2) {
2092 
2093  return(NULL);
2094  }
2095 
2096  new_name_len = mach_read_from_2(ptr);
2097 
2098  ptr += 2;
2099 
2100  if (end_ptr < ptr + new_name_len) {
2101 
2102  return(NULL);
2103  }
2104 
2105  new_name = (const char*) ptr;
2106 
2107  ptr += new_name_len;
2108  }
2109 
2110  /* We managed to parse a full log record body */
2111  /*
2112  printf("Parsed log rec of type %lu space %lu\n"
2113  "name %s\n", type, space_id, name);
2114 
2115  if (type == MLOG_FILE_RENAME) {
2116  printf("new name %s\n", new_name);
2117  }
2118  */
2119  if (!space_id) {
2120 
2121  return(ptr);
2122  }
2123 
2124  /* Let us try to perform the file operation, if sensible. Note that
2125  ibbackup has at this stage already read in all space id info to the
2126  fil0fil.c data structures.
2127 
2128  NOTE that our algorithm is not guaranteed to work correctly if there
2129  were renames of tables during the backup. See ibbackup code for more
2130  on the problem. */
2131 
2132  switch (type) {
2133  case MLOG_FILE_DELETE:
2134  if (fil_tablespace_exists_in_mem(space_id)) {
2135  ut_a(fil_delete_tablespace(space_id));
2136  }
2137 
2138  break;
2139 
2140  case MLOG_FILE_RENAME:
2141  /* We do the rename based on space id, not old file name;
2142  this should guarantee that after the log replay each .ibd file
2143  has the correct name for the latest log sequence number; the
2144  proof is left as an exercise :) */
2145 
2146  if (fil_tablespace_exists_in_mem(space_id)) {
2147  /* Create the database directory for the new name, if
2148  it does not exist yet */
2149  fil_create_directory_for_tablename(new_name);
2150 
2151  /* Rename the table if there is not yet a tablespace
2152  with the same name */
2153 
2154  if (fil_get_space_id_for_table(new_name)
2155  == ULINT_UNDEFINED) {
2156  /* We do not care of the old name, that is
2157  why we pass NULL as the first argument */
2158  if (!fil_rename_tablespace(NULL, space_id,
2159  new_name)) {
2160  ut_error;
2161  }
2162  }
2163  }
2164 
2165  break;
2166 
2167  case MLOG_FILE_CREATE:
2168  case MLOG_FILE_CREATE2:
2169  if (fil_tablespace_exists_in_mem(space_id)) {
2170  /* Do nothing */
2171  } else if (fil_get_space_id_for_table(name)
2172  != ULINT_UNDEFINED) {
2173  /* Do nothing */
2174  } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
2175  /* Temporary table, do nothing */
2176  } else {
2177  /* Create the database directory for name, if it does
2178  not exist yet */
2179  fil_create_directory_for_tablename(name);
2180 
2181  if (fil_create_new_single_table_tablespace(
2182  space_id, name, FALSE, flags,
2183  FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
2184  ut_error;
2185  }
2186  }
2187 
2188  break;
2189 
2190  default:
2191  ut_error;
2192  }
2193 
2194  return(ptr);
2195 }
2196 
2197 /*******************************************************************/
2201 UNIV_INTERN
2202 ibool
2203 fil_delete_tablespace(
2204 /*==================*/
2205  ulint id)
2206 {
2207  ibool success;
2208  fil_space_t* space;
2209  fil_node_t* node;
2210  ulint count = 0;
2211  char* path;
2212 
2213  ut_a(id != 0);
2214 stop_ibuf_merges:
2215  mutex_enter(&fil_system->mutex);
2216 
2217  space = fil_space_get_by_id(id);
2218 
2219  if (space != NULL) {
2220  space->stop_ibuf_merges = TRUE;
2221 
2222  if (space->n_pending_ibuf_merges == 0) {
2223  mutex_exit(&fil_system->mutex);
2224 
2225  count = 0;
2226 
2227  goto try_again;
2228  } else {
2229  if (count > 5000) {
2230  ut_print_timestamp(stderr);
2231  fputs(" InnoDB: Warning: trying to"
2232  " delete tablespace ", stderr);
2233  ut_print_filename(stderr, space->name);
2234  fprintf(stderr, ",\n"
2235  "InnoDB: but there are %lu pending"
2236  " ibuf merges on it.\n"
2237  "InnoDB: Loop %lu.\n",
2238  (ulong) space->n_pending_ibuf_merges,
2239  (ulong) count);
2240  }
2241 
2242  mutex_exit(&fil_system->mutex);
2243 
2244  os_thread_sleep(20000);
2245  count++;
2246 
2247  goto stop_ibuf_merges;
2248  }
2249  }
2250 
2251  mutex_exit(&fil_system->mutex);
2252  count = 0;
2253 
2254 try_again:
2255  mutex_enter(&fil_system->mutex);
2256 
2257  space = fil_space_get_by_id(id);
2258 
2259  if (space == NULL) {
2260  ut_print_timestamp(stderr);
2261  fprintf(stderr,
2262  " InnoDB: Error: cannot delete tablespace %lu\n"
2263  "InnoDB: because it is not found in the"
2264  " tablespace memory cache.\n",
2265  (ulong) id);
2266 
2267  mutex_exit(&fil_system->mutex);
2268 
2269  return(FALSE);
2270  }
2271 
2272  ut_a(space);
2273  ut_a(space->n_pending_ibuf_merges == 0);
2274 
2275  space->is_being_deleted = TRUE;
2276 
2277  ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2278  node = UT_LIST_GET_FIRST(space->chain);
2279 
2280  if (space->n_pending_flushes > 0 || node->n_pending > 0) {
2281  if (count > 1000) {
2282  ut_print_timestamp(stderr);
2283  fputs(" InnoDB: Warning: trying to"
2284  " delete tablespace ", stderr);
2285  ut_print_filename(stderr, space->name);
2286  fprintf(stderr, ",\n"
2287  "InnoDB: but there are %lu flushes"
2288  " and %lu pending i/o's on it\n"
2289  "InnoDB: Loop %lu.\n",
2290  (ulong) space->n_pending_flushes,
2291  (ulong) node->n_pending,
2292  (ulong) count);
2293  }
2294  mutex_exit(&fil_system->mutex);
2295  os_thread_sleep(20000);
2296 
2297  count++;
2298 
2299  goto try_again;
2300  }
2301 
2302  path = mem_strdup(space->name);
2303 
2304  mutex_exit(&fil_system->mutex);
2305 
2306  /* Important: We rely on the data dictionary mutex to ensure
2307  that a race is not possible here. It should serialize the tablespace
2308  drop/free. We acquire an X latch only to avoid a race condition
2309  when accessing the tablespace instance via:
2310 
2311  fsp_get_available_space_in_free_extents().
2312 
2313  There our main motivation is to reduce the contention on the
2314  dictionary mutex. */
2315 
2316  rw_lock_x_lock(&space->latch);
2317 
2318 #ifndef UNIV_HOTBACKUP
2319  /* Invalidate in the buffer pool all pages belonging to the
2320  tablespace. Since we have set space->is_being_deleted = TRUE, readahead
2321  or ibuf merge can no longer read more pages of this tablespace to the
2322  buffer pool. Thus we can clean the tablespace out of the buffer pool
2323  completely and permanently. The flag is_being_deleted also prevents
2324  fil_flush() from being applied to this tablespace. */
2325 
2326  buf_LRU_invalidate_tablespace(id);
2327 #endif
2328  /* printf("Deleting tablespace %s id %lu\n", space->name, id); */
2329 
2330  mutex_enter(&fil_system->mutex);
2331 
2332  success = fil_space_free(id, TRUE);
2333 
2334  mutex_exit(&fil_system->mutex);
2335 
2336  if (success) {
2337  success = os_file_delete(path);
2338 
2339  if (!success) {
2340  success = os_file_delete_if_exists(path);
2341  }
2342  } else {
2343  rw_lock_x_unlock(&space->latch);
2344  }
2345 
2346  if (success) {
2347 #ifndef UNIV_HOTBACKUP
2348  /* Write a log record about the deletion of the .ibd
2349  file, so that ibbackup can replay it in the
2350  --apply-log phase. We use a dummy mtr and the familiar
2351  log write mechanism. */
2352  mtr_t mtr;
2353 
2354  /* When replaying the operation in ibbackup, do not try
2355  to write any log record */
2356  mtr_start(&mtr);
2357 
2358  fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
2359  mtr_commit(&mtr);
2360 #endif
2361  mem_free(path);
2362 
2363  return(TRUE);
2364  }
2365 
2366  mem_free(path);
2367 
2368  return(FALSE);
2369 }
2370 
2371 /*******************************************************************/
2374 UNIV_INTERN
2375 ibool
2376 fil_tablespace_is_being_deleted(
2377 /*============================*/
2378  ulint id)
2379 {
2380  fil_space_t* space;
2381  ibool is_being_deleted;
2382 
2383  mutex_enter(&fil_system->mutex);
2384 
2385  space = fil_space_get_by_id(id);
2386 
2387  ut_a(space != NULL);
2388 
2389  is_being_deleted = space->is_being_deleted;
2390 
2391  mutex_exit(&fil_system->mutex);
2392 
2393  return(is_being_deleted);
2394 }
2395 
2396 #ifndef UNIV_HOTBACKUP
2397 /*******************************************************************/
2406 UNIV_INTERN
2407 ibool
2408 fil_discard_tablespace(
2409 /*===================*/
2410  ulint id)
2411 {
2412  ibool success;
2413 
2414  success = fil_delete_tablespace(id);
2415 
2416  if (!success) {
2417  fprintf(stderr,
2418  "InnoDB: Warning: cannot delete tablespace %lu"
2419  " in DISCARD TABLESPACE.\n"
2420  "InnoDB: But let us remove the"
2421  " insert buffer entries for this tablespace.\n",
2422  (ulong) id);
2423  }
2424 
2425  /* Remove all insert buffer entries for the tablespace */
2426 
2427  ibuf_delete_for_discarded_space(id);
2428 
2429  return(success);
2430 }
2431 #endif /* !UNIV_HOTBACKUP */
2432 
2433 /*******************************************************************/
2436 static
2437 ibool
2438 fil_rename_tablespace_in_mem(
2439 /*=========================*/
2440  fil_space_t* space,
2441  fil_node_t* node,
2442  const char* path)
2443 {
2444  fil_space_t* space2;
2445  const char* old_name = space->name;
2446 
2447  ut_ad(mutex_own(&fil_system->mutex));
2448 
2449  space2 = fil_space_get_by_name(old_name);
2450  if (space != space2) {
2451  fputs("InnoDB: Error: cannot find ", stderr);
2452  ut_print_filename(stderr, old_name);
2453  fputs(" in tablespace memory cache\n", stderr);
2454 
2455  return(FALSE);
2456  }
2457 
2458  space2 = fil_space_get_by_name(path);
2459  if (space2 != NULL) {
2460  fputs("InnoDB: Error: ", stderr);
2461  ut_print_filename(stderr, path);
2462  fputs(" is already in tablespace memory cache\n", stderr);
2463 
2464  return(FALSE);
2465  }
2466 
2467  HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
2468  ut_fold_string(space->name), space);
2469  mem_free(space->name);
2470  mem_free(node->name);
2471 
2472  space->name = mem_strdup(path);
2473  node->name = mem_strdup(path);
2474 
2475  HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
2476  ut_fold_string(path), space);
2477  return(TRUE);
2478 }
2479 
2480 /*******************************************************************/
2484 static
2485 char*
2486 fil_make_ibd_name(
2487 /*==============*/
2488  const char* name,
2490  ibool is_temp)
2491 {
2492  ulint namelen = strlen(name);
2493  ulint dirlen = strlen(fil_path_to_mysql_datadir);
2494  char* filename = static_cast<char *>(mem_alloc(namelen + dirlen + sizeof "/.ibd"));
2495 
2496  if (is_temp) {
2497  memcpy(filename, name, namelen);
2498  memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2499  } else {
2500  memcpy(filename, fil_path_to_mysql_datadir, dirlen);
2501  filename[dirlen] = '/';
2502 
2503  memcpy(filename + dirlen + 1, name, namelen);
2504  memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
2505  }
2506 
2507  srv_normalize_path_for_win(filename);
2508 
2509  return(filename);
2510 }
2511 
2512 /*******************************************************************/
2516 UNIV_INTERN
2517 ibool
2518 fil_rename_tablespace(
2519 /*==================*/
2520  const char* old_name,
2524  ulint id,
2525  const char* new_name)
2528 {
2529  ibool success;
2530  fil_space_t* space;
2531  fil_node_t* node;
2532  ulint count = 0;
2533  char* path;
2534  ibool old_name_was_specified = TRUE;
2535  char* old_path;
2536 
2537  ut_a(id != 0);
2538 
2539  if (old_name == NULL) {
2540  old_name = "(name not specified)";
2541  old_name_was_specified = FALSE;
2542  }
2543 retry:
2544  count++;
2545 
2546  if (count > 1000) {
2547  ut_print_timestamp(stderr);
2548  fputs(" InnoDB: Warning: problems renaming ", stderr);
2549  ut_print_filename(stderr, old_name);
2550  fputs(" to ", stderr);
2551  ut_print_filename(stderr, new_name);
2552  fprintf(stderr, ", %lu iterations\n", (ulong) count);
2553  }
2554 
2555  mutex_enter(&fil_system->mutex);
2556 
2557  space = fil_space_get_by_id(id);
2558 
2559  if (space == NULL) {
2560  fprintf(stderr,
2561  "InnoDB: Error: cannot find space id %lu"
2562  " in the tablespace memory cache\n"
2563  "InnoDB: though the table ", (ulong) id);
2564  ut_print_filename(stderr, old_name);
2565  fputs(" in a rename operation should have that id\n", stderr);
2566  mutex_exit(&fil_system->mutex);
2567 
2568  return(FALSE);
2569  }
2570 
2571  if (count > 25000) {
2572  space->stop_ios = FALSE;
2573  mutex_exit(&fil_system->mutex);
2574 
2575  return(FALSE);
2576  }
2577 
2578  /* We temporarily close the .ibd file because we do not trust that
2579  operating systems can rename an open file. For the closing we have to
2580  wait until there are no pending i/o's or flushes on the file. */
2581 
2582  space->stop_ios = TRUE;
2583 
2584  ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2585  node = UT_LIST_GET_FIRST(space->chain);
2586 
2587  if (node->n_pending > 0 || node->n_pending_flushes > 0) {
2588  /* There are pending i/o's or flushes, sleep for a while and
2589  retry */
2590 
2591  mutex_exit(&fil_system->mutex);
2592 
2593  os_thread_sleep(20000);
2594 
2595  goto retry;
2596 
2597  } else if (node->modification_counter > node->flush_counter) {
2598  /* Flush the space */
2599 
2600  mutex_exit(&fil_system->mutex);
2601 
2602  os_thread_sleep(20000);
2603 
2604  fil_flush(id);
2605 
2606  goto retry;
2607 
2608  } else if (node->open) {
2609  /* Close the file */
2610 
2611  fil_node_close_file(node, fil_system);
2612  }
2613 
2614  /* Check that the old name in the space is right */
2615 
2616  if (old_name_was_specified) {
2617  old_path = fil_make_ibd_name(old_name, FALSE);
2618 
2619  ut_a(strcmp(space->name, old_path) == 0);
2620  ut_a(strcmp(node->name, old_path) == 0);
2621  } else {
2622  old_path = mem_strdup(space->name);
2623  }
2624 
2625  /* Rename the tablespace and the node in the memory cache */
2626  path = fil_make_ibd_name(new_name, FALSE);
2627  success = fil_rename_tablespace_in_mem(space, node, path);
2628 
2629  if (success) {
2630  success = os_file_rename(innodb_file_data_key, old_path, path);
2631 
2632  if (!success) {
2633  /* We have to revert the changes we made
2634  to the tablespace memory cache */
2635 
2636  ut_a(fil_rename_tablespace_in_mem(space, node,
2637  old_path));
2638  }
2639  }
2640 
2641  mem_free(path);
2642  mem_free(old_path);
2643 
2644  space->stop_ios = FALSE;
2645 
2646  mutex_exit(&fil_system->mutex);
2647 
2648 #ifndef UNIV_HOTBACKUP
2649  if (success) {
2650  mtr_t mtr;
2651 
2652  mtr_start(&mtr);
2653 
2654  fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
2655  &mtr);
2656  mtr_commit(&mtr);
2657  }
2658 #endif
2659  return(success);
2660 }
2661 
2662 /*******************************************************************/
2669 UNIV_INTERN
2670 ulint
2671 fil_create_new_single_table_tablespace(
2672 /*===================================*/
2673  ulint space_id,
2674  const char* tablename,
2678  ibool is_temp,
2680  ulint flags,
2681  ulint size)
2684 {
2685  os_file_t file;
2686  ibool ret;
2687  ulint err;
2688  byte* buf2;
2689  byte* page;
2690  ibool success;
2691  char* path;
2692 
2693  ut_a(space_id > 0);
2694  ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
2696  /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
2697  ROW_FORMAT=COMPACT
2698  ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
2699  ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
2700  format, the tablespace flags should equal
2701  (table->flags & ~(~0 << DICT_TF_BITS)). */
2702  ut_a(flags != DICT_TF_COMPACT);
2703  ut_a(!(flags & (~0UL << DICT_TF_BITS)));
2704 
2705  path = fil_make_ibd_name(tablename, is_temp);
2706 
2707  file = os_file_create(innodb_file_data_key, path,
2708  OS_FILE_CREATE, OS_FILE_NORMAL,
2709  OS_DATA_FILE, &ret);
2710  if (ret == FALSE) {
2711  ut_print_timestamp(stderr);
2712  fputs(" InnoDB: Error creating file ", stderr);
2713  ut_print_filename(stderr, path);
2714  fputs(".\n", stderr);
2715 
2716  /* The following call will print an error message */
2717 
2718  err = os_file_get_last_error(TRUE);
2719 
2720  if (err == OS_FILE_ALREADY_EXISTS) {
2721  fputs("InnoDB: The file already exists though"
2722  " the corresponding table did not\n"
2723  "InnoDB: exist in the InnoDB data dictionary."
2724  " Have you moved InnoDB\n"
2725  "InnoDB: .ibd files around without using the"
2726  " SQL commands\n"
2727  "InnoDB: DISCARD TABLESPACE and"
2728  " IMPORT TABLESPACE, or did\n"
2729  "InnoDB: mysqld crash in the middle of"
2730  " CREATE TABLE? You can\n"
2731  "InnoDB: resolve the problem by"
2732  " removing the file ", stderr);
2733  ut_print_filename(stderr, path);
2734  fputs("\n"
2735  "InnoDB: under the 'datadir' of MySQL.\n",
2736  stderr);
2737 
2738  mem_free(path);
2739  return(DB_TABLESPACE_ALREADY_EXISTS);
2740  }
2741 
2742  if (err == OS_FILE_DISK_FULL) {
2743 
2744  mem_free(path);
2745  return(DB_OUT_OF_FILE_SPACE);
2746  }
2747 
2748  mem_free(path);
2749  return(DB_ERROR);
2750  }
2751 
2752  ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
2753 
2754  if (!ret) {
2755  err = DB_OUT_OF_FILE_SPACE;
2756 error_exit:
2757  os_file_close(file);
2758 error_exit2:
2759  os_file_delete(path);
2760 
2761  mem_free(path);
2762  return(err);
2763  }
2764 
2765  /* printf("Creating tablespace %s id %lu\n", path, space_id); */
2766 
2767  /* We have to write the space id to the file immediately and flush the
2768  file to disk. This is because in crash recovery we must be aware what
2769  tablespaces exist and what are their space id's, so that we can apply
2770  the log records to the right file. It may take quite a while until
2771  buffer pool flush algorithms write anything to the file and flush it to
2772  disk. If we would not write here anything, the file would be filled
2773  with zeros from the call of os_file_set_size(), until a buffer pool
2774  flush would write to it. */
2775 
2776  buf2 = static_cast<byte *>(ut_malloc(3 * UNIV_PAGE_SIZE));
2777  /* Align the memory for file i/o if we might have O_DIRECT set */
2778  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2779 
2780  memset(page, '\0', UNIV_PAGE_SIZE);
2781 
2782  fsp_header_init_fields(page, space_id, flags);
2784 
2785  if (!(flags & DICT_TF_ZSSIZE_MASK)) {
2786  buf_flush_init_for_writing(page, NULL, 0);
2787  ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
2788  } else {
2789  page_zip_des_t page_zip;
2790  ulint zip_size;
2791 
2792  zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
2793  << ((flags & DICT_TF_ZSSIZE_MASK)
2794  >> DICT_TF_ZSSIZE_SHIFT));
2795 
2796  page_zip_set_size(&page_zip, zip_size);
2797  page_zip.data = page + UNIV_PAGE_SIZE;
2798 #ifdef UNIV_DEBUG
2799  page_zip.m_start =
2800 #endif /* UNIV_DEBUG */
2801  page_zip.m_end = page_zip.m_nonempty =
2802  page_zip.n_blobs = 0;
2803  buf_flush_init_for_writing(page, &page_zip, 0);
2804  ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
2805  }
2806 
2807  ut_free(buf2);
2808 
2809  if (!ret) {
2810  fputs("InnoDB: Error: could not write the first page"
2811  " to tablespace ", stderr);
2812  ut_print_filename(stderr, path);
2813  putc('\n', stderr);
2814  err = DB_ERROR;
2815  goto error_exit;
2816  }
2817 
2818  ret = os_file_flush(file);
2819 
2820  if (!ret) {
2821  fputs("InnoDB: Error: file flush of tablespace ", stderr);
2822  ut_print_filename(stderr, path);
2823  fputs(" failed\n", stderr);
2824  err = DB_ERROR;
2825  goto error_exit;
2826  }
2827 
2828  os_file_close(file);
2829 
2830  success = fil_space_create(path, space_id, flags, FIL_TABLESPACE);
2831 
2832  if (!success) {
2833  err = DB_ERROR;
2834  goto error_exit2;
2835  }
2836 
2837  fil_node_create(path, size, space_id, FALSE);
2838 
2839 #ifndef UNIV_HOTBACKUP
2840  {
2841  mtr_t mtr;
2842 
2843  mtr_start(&mtr);
2844 
2845  fil_op_write_log(flags
2847  : MLOG_FILE_CREATE,
2848  space_id,
2849  is_temp ? MLOG_FILE_FLAG_TEMP : 0,
2850  flags,
2851  tablename, NULL, &mtr);
2852 
2853  mtr_commit(&mtr);
2854  }
2855 #endif
2856  mem_free(path);
2857  return(DB_SUCCESS);
2858 }
2859 
2860 #ifndef UNIV_HOTBACKUP
2861 /********************************************************************/
2871 UNIV_INTERN
2872 ibool
2873 fil_reset_too_high_lsns(
2874 /*====================*/
2875  const char* name,
2877  ib_uint64_t current_lsn)
2880 {
2881  os_file_t file;
2882  char* filepath;
2883  byte* page;
2884  byte* buf2;
2885  ib_uint64_t flush_lsn;
2886  ulint space_id;
2887  ib_int64_t file_size;
2888  ib_int64_t offset;
2889  ulint zip_size;
2890  ibool success;
2891  page_zip_des_t page_zip;
2892 
2893  filepath = fil_make_ibd_name(name, FALSE);
2894 
2895  file = os_file_create_simple_no_error_handling(
2896  innodb_file_data_key, filepath, OS_FILE_OPEN,
2897  OS_FILE_READ_WRITE, &success);
2898  if (!success) {
2899  /* The following call prints an error message */
2900  os_file_get_last_error(TRUE);
2901 
2902  ut_print_timestamp(stderr);
2903 
2904  fputs(" InnoDB: Error: trying to open a table,"
2905  " but could not\n"
2906  "InnoDB: open the tablespace file ", stderr);
2907  ut_print_filename(stderr, filepath);
2908  fputs("!\n", stderr);
2909  mem_free(filepath);
2910 
2911  return(FALSE);
2912  }
2913 
2914  /* Read the first page of the tablespace */
2915 
2916  buf2 = static_cast<byte *>(ut_malloc(3 * UNIV_PAGE_SIZE));
2917  /* Align the memory for file i/o if we might have O_DIRECT set */
2918  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2919 
2920  success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2921  if (!success) {
2922 
2923  goto func_exit;
2924  }
2925 
2926  /* We have to read the file flush lsn from the header of the file */
2927 
2928  flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2929 
2930  if (current_lsn >= flush_lsn) {
2931  /* Ok */
2932  success = TRUE;
2933 
2934  goto func_exit;
2935  }
2936 
2937  space_id = fsp_header_get_space_id(page);
2938  zip_size = fsp_header_get_zip_size(page);
2939 
2940  page_zip_des_init(&page_zip);
2941  page_zip_set_size(&page_zip, zip_size);
2942  if (zip_size) {
2943  page_zip.data = page + UNIV_PAGE_SIZE;
2944  }
2945 
2946  ut_print_timestamp(stderr);
2947  fprintf(stderr,
2948  " InnoDB: Flush lsn in the tablespace file %lu"
2949  " to be imported\n"
2950  "InnoDB: is %"PRIu64", which exceeds current"
2951  " system lsn %"PRIu64".\n"
2952  "InnoDB: We reset the lsn's in the file ",
2953  (ulong) space_id,
2954  flush_lsn, current_lsn);
2955  ut_print_filename(stderr, filepath);
2956  fputs(".\n", stderr);
2957 
2958  ut_a(ut_is_2pow(zip_size));
2959  ut_a(zip_size <= UNIV_PAGE_SIZE);
2960 
2961  /* Loop through all the pages in the tablespace and reset the lsn and
2962  the page checksum if necessary */
2963 
2964  file_size = os_file_get_size_as_iblonglong(file);
2965 
2966  for (offset = 0; offset < file_size;
2967  offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
2968  success = os_file_read(file, page,
2969  (ulint)(offset & 0xFFFFFFFFUL),
2970  (ulint)(offset >> 32),
2971  zip_size ? zip_size : UNIV_PAGE_SIZE);
2972  if (!success) {
2973 
2974  goto func_exit;
2975  }
2976  if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
2977  /* We have to reset the lsn */
2978 
2979  if (zip_size) {
2980  memcpy(page_zip.data, page, zip_size);
2981  buf_flush_init_for_writing(
2982  page, &page_zip, current_lsn);
2983  success = os_file_write(
2984  filepath, file, page_zip.data,
2985  (ulint) offset & 0xFFFFFFFFUL,
2986  (ulint) (offset >> 32), zip_size);
2987  } else {
2988  buf_flush_init_for_writing(
2989  page, NULL, current_lsn);
2990  success = os_file_write(
2991  filepath, file, page,
2992  (ulint)(offset & 0xFFFFFFFFUL),
2993  (ulint)(offset >> 32),
2994  UNIV_PAGE_SIZE);
2995  }
2996 
2997  if (!success) {
2998 
2999  goto func_exit;
3000  }
3001  }
3002  }
3003 
3004  success = os_file_flush(file);
3005  if (!success) {
3006 
3007  goto func_exit;
3008  }
3009 
3010  /* We now update the flush_lsn stamp at the start of the file */
3011  success = os_file_read(file, page, 0, 0,
3012  zip_size ? zip_size : UNIV_PAGE_SIZE);
3013  if (!success) {
3014 
3015  goto func_exit;
3016  }
3017 
3018  mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
3019 
3020  success = os_file_write(filepath, file, page, 0, 0,
3021  zip_size ? zip_size : UNIV_PAGE_SIZE);
3022  if (!success) {
3023 
3024  goto func_exit;
3025  }
3026  success = os_file_flush(file);
3027 func_exit:
3028  os_file_close(file);
3029  ut_free(buf2);
3030  mem_free(filepath);
3031 
3032  return(success);
3033 }
3034 
3035 /********************************************************************/
3045 UNIV_INTERN
3046 ibool
3047 fil_open_single_table_tablespace(
3048 /*=============================*/
3049  ibool check_space_id,
3056  ulint id,
3057  ulint flags,
3058  const char* name)
3060 {
3061  os_file_t file;
3062  char* filepath;
3063  ibool success;
3064  byte* buf2;
3065  byte* page;
3066  ulint space_id;
3067  ulint space_flags;
3068 
3069  filepath = fil_make_ibd_name(name, FALSE);
3070 
3071  /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
3072  ROW_FORMAT=COMPACT
3073  ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
3074  ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
3075  format, the tablespace flags should equal
3076  (table->flags & ~(~0 << DICT_TF_BITS)). */
3077  ut_a(flags != DICT_TF_COMPACT);
3078  ut_a(!(flags & (~0UL << DICT_TF_BITS)));
3079 
3080  file = os_file_create_simple_no_error_handling(
3081  innodb_file_data_key, filepath, OS_FILE_OPEN,
3082  OS_FILE_READ_ONLY, &success);
3083  if (!success) {
3084  /* The following call prints an error message */
3085  os_file_get_last_error(TRUE);
3086 
3087  ut_print_timestamp(stderr);
3088 
3089  fputs(" InnoDB: Error: trying to open a table,"
3090  " but could not\n"
3091  "InnoDB: open the tablespace file ", stderr);
3092  ut_print_filename(stderr, filepath);
3093  fputs("!\n"
3094  "InnoDB: Have you moved InnoDB .ibd files around"
3095  " without using the\n"
3096  "InnoDB: commands DISCARD TABLESPACE and"
3097  " IMPORT TABLESPACE?\n"
3098  "InnoDB: It is also possible that this is"
3099  " a temporary table #sql...,\n"
3100  "InnoDB: and MySQL removed the .ibd file for this.\n"
3101  "InnoDB: Please refer to\n"
3102  "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3103  "InnoDB: for how to resolve the issue.\n", stderr);
3104 
3105  mem_free(filepath);
3106 
3107  return(FALSE);
3108  }
3109 
3110  if (!check_space_id) {
3111  space_id = id;
3112 
3113  goto skip_check;
3114  }
3115 
3116  /* Read the first page of the tablespace */
3117 
3118  buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
3119  /* Align the memory for file i/o if we might have O_DIRECT set */
3120  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
3121 
3122  success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3123 
3124  /* We have to read the tablespace id and flags from the file. */
3125 
3126  space_id = fsp_header_get_space_id(page);
3127  space_flags = fsp_header_get_flags(page);
3128 
3129  ut_free(buf2);
3130 
3131  if (UNIV_UNLIKELY(space_id != id
3132  || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
3133  ut_print_timestamp(stderr);
3134 
3135  fputs(" InnoDB: Error: tablespace id and flags in file ",
3136  stderr);
3137  ut_print_filename(stderr, filepath);
3138  fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
3139  "InnoDB: data dictionary they are %lu and %lu.\n"
3140  "InnoDB: Have you moved InnoDB .ibd files"
3141  " around without using the\n"
3142  "InnoDB: commands DISCARD TABLESPACE and"
3143  " IMPORT TABLESPACE?\n"
3144  "InnoDB: Please refer to\n"
3145  "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3146  "InnoDB: for how to resolve the issue.\n",
3147  (ulong) space_id, (ulong) space_flags,
3148  (ulong) id, (ulong) flags);
3149 
3150  success = FALSE;
3151 
3152  goto func_exit;
3153  }
3154 
3155 skip_check:
3156  success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
3157 
3158  if (!success) {
3159  goto func_exit;
3160  }
3161 
3162  /* We do not measure the size of the file, that is why we pass the 0
3163  below */
3164 
3165  fil_node_create(filepath, 0, space_id, FALSE);
3166 func_exit:
3167  os_file_close(file);
3168  mem_free(filepath);
3169 
3170  return(success);
3171 }
3172 #endif /* !UNIV_HOTBACKUP */
3173 
3174 #ifdef UNIV_HOTBACKUP
3175 /*******************************************************************/
3179 static
3180 char*
3181 fil_make_ibbackup_old_name(
3182 /*=======================*/
3183  const char* name)
3184 {
3185  static const char suffix[] = "_ibbackup_old_vers_";
3186  ulint len = strlen(name);
3187  char* path = mem_alloc(len + (15 + sizeof suffix));
3188 
3189  memcpy(path, name, len);
3190  memcpy(path + len, suffix, (sizeof suffix) - 1);
3191  ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
3192  return(path);
3193 }
3194 #endif /* UNIV_HOTBACKUP */
3195 
3196 /********************************************************************/
3199 static
3200 void
3201 fil_load_single_table_tablespace(
3202 /*=============================*/
3203  const char* dbname,
3204  const char* filename)
3206 {
3207  os_file_t file;
3208  char* filepath;
3209  ibool success;
3210  byte* buf2;
3211  byte* page;
3212  ulint space_id;
3213  ulint flags;
3214  ulint size_low;
3215  ulint size_high;
3216  uint64_t size;
3217 #ifdef UNIV_HOTBACKUP
3218  fil_space_t* space;
3219 #endif
3220  filepath = static_cast<char *>(mem_alloc(strlen(dbname) + strlen(filename)
3221  + strlen(fil_path_to_mysql_datadir) + 3));
3222 
3223  sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
3224  filename);
3225  srv_normalize_path_for_win(filepath);
3226 #ifdef __WIN__
3227 # ifndef UNIV_HOTBACKUP
3228  /* If lower_case_table_names is 0 or 2, then MySQL allows database
3229  directory names with upper case letters. On Windows, all table and
3230  database names in InnoDB are internally always in lower case. Put the
3231  file path to lower case, so that we are consistent with InnoDB's
3232  internal data dictionary. */
3233 
3234  dict_casedn_str(filepath);
3235 # endif /* !UNIV_HOTBACKUP */
3236 #endif
3237  file = os_file_create_simple_no_error_handling(
3238  innodb_file_data_key, filepath, OS_FILE_OPEN,
3239  OS_FILE_READ_ONLY, &success);
3240  if (!success) {
3241  /* The following call prints an error message */
3242  os_file_get_last_error(TRUE);
3243 
3244  fprintf(stderr,
3245  "InnoDB: Error: could not open single-table tablespace"
3246  " file\n"
3247  "InnoDB: %s!\n"
3248  "InnoDB: We do not continue the crash recovery,"
3249  " because the table may become\n"
3250  "InnoDB: corrupt if we cannot apply the log records"
3251  " in the InnoDB log to it.\n"
3252  "InnoDB: To fix the problem and start mysqld:\n"
3253  "InnoDB: 1) If there is a permission problem"
3254  " in the file and mysqld cannot\n"
3255  "InnoDB: open the file, you should"
3256  " modify the permissions.\n"
3257  "InnoDB: 2) If the table is not needed, or you can"
3258  " restore it from a backup,\n"
3259  "InnoDB: then you can remove the .ibd file,"
3260  " and InnoDB will do a normal\n"
3261  "InnoDB: crash recovery and ignore that table.\n"
3262  "InnoDB: 3) If the file system or the"
3263  " disk is broken, and you cannot remove\n"
3264  "InnoDB: the .ibd file, you can set"
3265  " innodb_force_recovery > 0 in my.cnf\n"
3266  "InnoDB: and force InnoDB to continue crash"
3267  " recovery here.\n", filepath);
3268 
3269  mem_free(filepath);
3270 
3271  if (srv_force_recovery > 0) {
3272  fprintf(stderr,
3273  "InnoDB: innodb_force_recovery"
3274  " was set to %lu. Continuing crash recovery\n"
3275  "InnoDB: even though we cannot access"
3276  " the .ibd file of this table.\n",
3277  srv_force_recovery);
3278  return;
3279  }
3280 
3281  exit(1);
3282  }
3283 
3284  success = os_file_get_size(file, &size_low, &size_high);
3285 
3286  if (!success) {
3287  /* The following call prints an error message */
3288  os_file_get_last_error(TRUE);
3289 
3290  fprintf(stderr,
3291  "InnoDB: Error: could not measure the size"
3292  " of single-table tablespace file\n"
3293  "InnoDB: %s!\n"
3294  "InnoDB: We do not continue crash recovery,"
3295  " because the table will become\n"
3296  "InnoDB: corrupt if we cannot apply the log records"
3297  " in the InnoDB log to it.\n"
3298  "InnoDB: To fix the problem and start mysqld:\n"
3299  "InnoDB: 1) If there is a permission problem"
3300  " in the file and mysqld cannot\n"
3301  "InnoDB: access the file, you should"
3302  " modify the permissions.\n"
3303  "InnoDB: 2) If the table is not needed,"
3304  " or you can restore it from a backup,\n"
3305  "InnoDB: then you can remove the .ibd file,"
3306  " and InnoDB will do a normal\n"
3307  "InnoDB: crash recovery and ignore that table.\n"
3308  "InnoDB: 3) If the file system or the disk is broken,"
3309  " and you cannot remove\n"
3310  "InnoDB: the .ibd file, you can set"
3311  " innodb_force_recovery > 0 in my.cnf\n"
3312  "InnoDB: and force InnoDB to continue"
3313  " crash recovery here.\n", filepath);
3314 
3315  os_file_close(file);
3316  mem_free(filepath);
3317 
3318  if (srv_force_recovery > 0) {
3319  fprintf(stderr,
3320  "InnoDB: innodb_force_recovery"
3321  " was set to %lu. Continuing crash recovery\n"
3322  "InnoDB: even though we cannot access"
3323  " the .ibd file of this table.\n",
3324  srv_force_recovery);
3325  return;
3326  }
3327 
3328  exit(1);
3329  }
3330 
3331  /* TODO: What to do in other cases where we cannot access an .ibd
3332  file during a crash recovery? */
3333 
3334  /* Every .ibd file is created >= 4 pages in size. Smaller files
3335  cannot be ok. */
3336 
3337  size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low;
3338 #ifndef UNIV_HOTBACKUP
3339  if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3340  fprintf(stderr,
3341  "InnoDB: Error: the size of single-table tablespace"
3342  " file %s\n"
3343  "InnoDB: is only %lu %lu, should be at least %lu!",
3344  filepath,
3345  (ulong) size_high,
3346  (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
3347  os_file_close(file);
3348  mem_free(filepath);
3349 
3350  return;
3351  }
3352 #endif
3353  /* Read the first page of the tablespace if the size big enough */
3354 
3355  buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
3356  /* Align the memory for file i/o if we might have O_DIRECT set */
3357  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
3358 
3359  if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3360  success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3361 
3362  /* We have to read the tablespace id from the file */
3363 
3364  space_id = fsp_header_get_space_id(page);
3365  flags = fsp_header_get_flags(page);
3366  } else {
3367  space_id = ULINT_UNDEFINED;
3368  flags = 0;
3369  }
3370 
3371 #ifndef UNIV_HOTBACKUP
3372  if (space_id == ULINT_UNDEFINED || space_id == 0) {
3373  fprintf(stderr,
3374  "InnoDB: Error: tablespace id %lu in file %s"
3375  " is not sensible\n",
3376  (ulong) space_id,
3377  filepath);
3378  goto func_exit;
3379  }
3380 #else
3381  if (space_id == ULINT_UNDEFINED || space_id == 0) {
3382  char* new_path;
3383 
3384  fprintf(stderr,
3385  "InnoDB: Renaming tablespace %s of id %lu,\n"
3386  "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3387  "InnoDB: because its size %" PRId64 " is too small"
3388  " (< 4 pages 16 kB each),\n"
3389  "InnoDB: or the space id in the file header"
3390  " is not sensible.\n"
3391  "InnoDB: This can happen in an ibbackup run,"
3392  " and is not dangerous.\n",
3393  filepath, space_id, filepath, size);
3394  os_file_close(file);
3395 
3396  new_path = fil_make_ibbackup_old_name(filepath);
3397  ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
3398 
3399  ut_free(buf2);
3400  mem_free(filepath);
3401  mem_free(new_path);
3402 
3403  return;
3404  }
3405 
3406  /* A backup may contain the same space several times, if the space got
3407  renamed at a sensitive time. Since it is enough to have one version of
3408  the space, we rename the file if a space with the same space id
3409  already exists in the tablespace memory cache. We rather rename the
3410  file than delete it, because if there is a bug, we do not want to
3411  destroy valuable data. */
3412 
3413  mutex_enter(&fil_system->mutex);
3414 
3415  space = fil_space_get_by_id(space_id);
3416 
3417  if (space) {
3418  char* new_path;
3419 
3420  fprintf(stderr,
3421  "InnoDB: Renaming tablespace %s of id %lu,\n"
3422  "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3423  "InnoDB: because space %s with the same id\n"
3424  "InnoDB: was scanned earlier. This can happen"
3425  " if you have renamed tables\n"
3426  "InnoDB: during an ibbackup run.\n",
3427  filepath, space_id, filepath,
3428  space->name);
3429  os_file_close(file);
3430 
3431  new_path = fil_make_ibbackup_old_name(filepath);
3432 
3433  mutex_exit(&fil_system->mutex);
3434 
3435  ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
3436 
3437  ut_free(buf2);
3438  mem_free(filepath);
3439  mem_free(new_path);
3440 
3441  return;
3442  }
3443  mutex_exit(&fil_system->mutex);
3444 #endif
3445  success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
3446 
3447  if (!success) {
3448 
3449  if (srv_force_recovery > 0) {
3450  fprintf(stderr,
3451  "InnoDB: innodb_force_recovery"
3452  " was set to %lu. Continuing crash recovery\n"
3453  "InnoDB: even though the tablespace creation"
3454  " of this table failed.\n",
3455  srv_force_recovery);
3456  goto func_exit;
3457  }
3458 
3459  exit(1);
3460  }
3461 
3462  /* We do not use the size information we have about the file, because
3463  the rounding formula for extents and pages is somewhat complex; we
3464  let fil_node_open() do that task. */
3465 
3466  fil_node_create(filepath, 0, space_id, FALSE);
3467 func_exit:
3468  os_file_close(file);
3469  ut_free(buf2);
3470  mem_free(filepath);
3471 }
3472 
3473 /***********************************************************************/
3479 int
3480 fil_file_readdir_next_file(
3481 /*=======================*/
3482  ulint* err,
3484  const char* dirname,
3485  os_file_dir_t dir,
3486  os_file_stat_t* info)
3487 {
3488  ulint i;
3489  int ret;
3490 
3491  for (i = 0; i < 100; i++) {
3492  ret = os_file_readdir_next_file(dirname, dir, info);
3493 
3494  if (ret != -1) {
3495 
3496  return(ret);
3497  }
3498 
3499  fprintf(stderr,
3500  "InnoDB: Error: os_file_readdir_next_file()"
3501  " returned -1 in\n"
3502  "InnoDB: directory %s\n"
3503  "InnoDB: Crash recovery may have failed"
3504  " for some .ibd files!\n", dirname);
3505 
3506  *err = DB_ERROR;
3507  }
3508 
3509  return(-1);
3510 }
3511 
3512 /********************************************************************/
3520 UNIV_INTERN
3521 ulint
3522 fil_load_single_table_tablespaces(void)
3523 /*===================================*/
3524 {
3525  int ret;
3526  char* dbpath = NULL;
3527  ulint dbpath_len = 100;
3528  os_file_dir_t dir;
3529  os_file_dir_t dbdir;
3530  os_file_stat_t dbinfo;
3531  os_file_stat_t fileinfo;
3532  ulint err = DB_SUCCESS;
3533 
3534  /* The datadir of MySQL is always the default directory of mysqld */
3535 
3536  dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
3537 
3538  if (dir == NULL) {
3539 
3540  return(DB_ERROR);
3541  }
3542 
3543  dbpath = static_cast<char *>(mem_alloc(dbpath_len));
3544 
3545  /* Scan all directories under the datadir. They are the database
3546  directories of MySQL. */
3547 
3548  ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
3549  &dbinfo);
3550  while (ret == 0) {
3551  ulint len;
3552  /* printf("Looking at %s in datadir\n", dbinfo.name); */
3553 
3554  if (dbinfo.type == OS_FILE_TYPE_FILE
3555  || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
3556 
3557  goto next_datadir_item;
3558  }
3559 
3560  /* We found a symlink or a directory; try opening it to see
3561  if a symlink is a directory */
3562 
3563  len = strlen(fil_path_to_mysql_datadir)
3564  + strlen (dbinfo.name) + 2;
3565  if (len > dbpath_len) {
3566  dbpath_len = len;
3567 
3568  if (dbpath) {
3569  mem_free(dbpath);
3570  }
3571 
3572  dbpath = static_cast<char *>(mem_alloc(dbpath_len));
3573  }
3574  sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
3575  dbinfo.name);
3577 
3578  dbdir = os_file_opendir(dbpath, FALSE);
3579 
3580  if (dbdir != NULL) {
3581  /* printf("Opened dir %s\n", dbinfo.name); */
3582 
3583  /* We found a database directory; loop through it,
3584  looking for possible .ibd files in it */
3585 
3586  ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
3587  &fileinfo);
3588  while (ret == 0) {
3589  /* printf(
3590  " Looking at file %s\n", fileinfo.name); */
3591 
3592  if (fileinfo.type == OS_FILE_TYPE_DIR) {
3593 
3594  goto next_file_item;
3595  }
3596 
3597  /* We found a symlink or a file */
3598  if (strlen(fileinfo.name) > 4
3599  && 0 == strcmp(fileinfo.name
3600  + strlen(fileinfo.name) - 4,
3601  ".ibd")) {
3602  /* The name ends in .ibd; try opening
3603  the file */
3604  fil_load_single_table_tablespace(
3605  dbinfo.name, fileinfo.name);
3606  }
3607 next_file_item:
3608  ret = fil_file_readdir_next_file(&err,
3609  dbpath, dbdir,
3610  &fileinfo);
3611  }
3612 
3613  if (0 != os_file_closedir(dbdir)) {
3614  fputs("InnoDB: Warning: could not"
3615  " close database directory ", stderr);
3616  ut_print_filename(stderr, dbpath);
3617  putc('\n', stderr);
3618 
3619  err = DB_ERROR;
3620  }
3621  }
3622 
3623 next_datadir_item:
3624  ret = fil_file_readdir_next_file(&err,
3625  fil_path_to_mysql_datadir,
3626  dir, &dbinfo);
3627  }
3628 
3629  mem_free(dbpath);
3630 
3631  if (0 != os_file_closedir(dir)) {
3632  fprintf(stderr,
3633  "InnoDB: Error: could not close MySQL datadir\n");
3634 
3635  return(DB_ERROR);
3636  }
3637 
3638  return(err);
3639 }
3640 
3641 /*******************************************************************/
3645 UNIV_INTERN
3646 ibool
3647 fil_tablespace_deleted_or_being_deleted_in_mem(
3648 /*===========================================*/
3649  ulint id,
3650  ib_int64_t version)
3653 {
3654  fil_space_t* space;
3655 
3656  ut_ad(fil_system);
3657 
3658  mutex_enter(&fil_system->mutex);
3659 
3660  space = fil_space_get_by_id(id);
3661 
3662  if (space == NULL || space->is_being_deleted) {
3663  mutex_exit(&fil_system->mutex);
3664 
3665  return(TRUE);
3666  }
3667 
3668  if (version != ((ib_int64_t)-1)
3669  && space->tablespace_version != version) {
3670  mutex_exit(&fil_system->mutex);
3671 
3672  return(TRUE);
3673  }
3674 
3675  mutex_exit(&fil_system->mutex);
3676 
3677  return(FALSE);
3678 }
3679 
3680 /*******************************************************************/
3683 UNIV_INTERN
3684 ibool
3685 fil_tablespace_exists_in_mem(
3686 /*=========================*/
3687  ulint id)
3688 {
3689  fil_space_t* space;
3690 
3691  ut_ad(fil_system);
3692 
3693  mutex_enter(&fil_system->mutex);
3694 
3695  space = fil_space_get_by_id(id);
3696 
3697  mutex_exit(&fil_system->mutex);
3698 
3699  return(space != NULL);
3700 }
3701 
3702 /*******************************************************************/
3707 UNIV_INTERN
3708 ibool
3709 fil_space_for_table_exists_in_mem(
3710 /*==============================*/
3711  ulint id,
3712  const char* name,
3715  ibool is_temp,
3717  ibool mark_space,
3723  ibool print_error_if_does_not_exist)
3728 {
3729  fil_space_t* tablespace;
3730  fil_space_t* space;
3731  char* path;
3732 
3733  ut_ad(fil_system);
3734 
3735  mutex_enter(&fil_system->mutex);
3736 
3737  path = fil_make_ibd_name(name, is_temp);
3738 
3739  /* Look if there is a space with the same id */
3740 
3741  space = fil_space_get_by_id(id);
3742 
3743  /* Look if there is a space with the same name; the name is the
3744  directory path from the datadir to the file */
3745 
3746  tablespace = fil_space_get_by_name(path);
3747  if (space && space == tablespace) {
3748  /* Found */
3749 
3750  if (mark_space) {
3751  space->mark = TRUE;
3752  }
3753 
3754  mem_free(path);
3755  mutex_exit(&fil_system->mutex);
3756 
3757  return(TRUE);
3758  }
3759 
3760  if (!print_error_if_does_not_exist) {
3761 
3762  mem_free(path);
3763  mutex_exit(&fil_system->mutex);
3764 
3765  return(FALSE);
3766  }
3767 
3768  if (space == NULL) {
3769  if (tablespace == NULL) {
3770  ut_print_timestamp(stderr);
3771  fputs(" InnoDB: Error: table ", stderr);
3772  ut_print_filename(stderr, name);
3773  fprintf(stderr, "\n"
3774  "InnoDB: in InnoDB data dictionary"
3775  " has tablespace id %lu,\n"
3776  "InnoDB: but tablespace with that id"
3777  " or name does not exist. Have\n"
3778  "InnoDB: you deleted or moved .ibd files?\n"
3779  "InnoDB: This may also be a table created with"
3780  " CREATE TEMPORARY TABLE\n"
3781  "InnoDB: whose .ibd and .frm files"
3782  " MySQL automatically removed, but the\n"
3783  "InnoDB: table still exists in the"
3784  " InnoDB internal data dictionary.\n",
3785  (ulong) id);
3786  } else {
3787  ut_print_timestamp(stderr);
3788  fputs(" InnoDB: Error: table ", stderr);
3789  ut_print_filename(stderr, name);
3790  fprintf(stderr, "\n"
3791  "InnoDB: in InnoDB data dictionary has"
3792  " tablespace id %lu,\n"
3793  "InnoDB: but a tablespace with that id"
3794  " does not exist. There is\n"
3795  "InnoDB: a tablespace of name %s and id %lu,"
3796  " though. Have\n"
3797  "InnoDB: you deleted or moved .ibd files?\n",
3798  (ulong) id, tablespace->name,
3799  (ulong) tablespace->id);
3800  }
3801 error_exit:
3802  fputs("InnoDB: Please refer to\n"
3803  "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3804  "InnoDB: for how to resolve the issue.\n", stderr);
3805 
3806  mem_free(path);
3807  mutex_exit(&fil_system->mutex);
3808 
3809  return(FALSE);
3810  }
3811 
3812  if (0 != strcmp(space->name, path)) {
3813  ut_print_timestamp(stderr);
3814  fputs(" InnoDB: Error: table ", stderr);
3815  ut_print_filename(stderr, name);
3816  fprintf(stderr, "\n"
3817  "InnoDB: in InnoDB data dictionary has"
3818  " tablespace id %lu,\n"
3819  "InnoDB: but the tablespace with that id"
3820  " has name %s.\n"
3821  "InnoDB: Have you deleted or moved .ibd files?\n",
3822  (ulong) id, space->name);
3823 
3824  if (tablespace != NULL) {
3825  fputs("InnoDB: There is a tablespace"
3826  " with the right name\n"
3827  "InnoDB: ", stderr);
3828  ut_print_filename(stderr, tablespace->name);
3829  fprintf(stderr, ", but its id is %lu.\n",
3830  (ulong) tablespace->id);
3831  }
3832 
3833  goto error_exit;
3834  }
3835 
3836  mem_free(path);
3837  mutex_exit(&fil_system->mutex);
3838 
3839  return(FALSE);
3840 }
3841 
3842 /*******************************************************************/
3846 static
3847 ulint
3848 fil_get_space_id_for_table(
3849 /*=======================*/
3850  const char* name)
3852 {
3853  fil_space_t* tablespace;
3854  ulint id = ULINT_UNDEFINED;
3855  char* path;
3856 
3857  ut_ad(fil_system);
3858 
3859  mutex_enter(&fil_system->mutex);
3860 
3861  path = fil_make_ibd_name(name, FALSE);
3862 
3863  /* Look if there is a space with the same name; the name is the
3864  directory path to the file */
3865 
3866  tablespace = fil_space_get_by_name(path);
3867 
3868  if (tablespace) {
3869  id = tablespace->id;
3870  }
3871 
3872  mem_free(path);
3873 
3874  mutex_exit(&fil_system->mutex);
3875 
3876  return(id);
3877 }
3878 
3879 /**********************************************************************/
3884 UNIV_INTERN
3885 ibool
3886 fil_extend_space_to_desired_size(
3887 /*=============================*/
3888  ulint* actual_size,
3891  ulint space_id,
3892  ulint size_after_extend)
3895 {
3896  fil_node_t* node;
3897  fil_space_t* space;
3898  byte* buf2;
3899  byte* buf;
3900  ulint buf_size;
3901  ulint start_page_no;
3902  ulint file_start_page_no;
3903  ulint offset_high;
3904  ulint offset_low;
3905  ulint page_size;
3906  ibool success = TRUE;
3907 
3908  fil_mutex_enter_and_prepare_for_io(space_id);
3909 
3910  space = fil_space_get_by_id(space_id);
3911  ut_a(space);
3912 
3913  if (space->size >= size_after_extend) {
3914  /* Space already big enough */
3915 
3916  *actual_size = space->size;
3917 
3918  mutex_exit(&fil_system->mutex);
3919 
3920  return(TRUE);
3921  }
3922 
3923  page_size = dict_table_flags_to_zip_size(space->flags);
3924  if (!page_size) {
3925  page_size = UNIV_PAGE_SIZE;
3926  }
3927 
3928  node = UT_LIST_GET_LAST(space->chain);
3929 
3930  fil_node_prepare_for_io(node, fil_system, space);
3931 
3932  start_page_no = space->size;
3933  file_start_page_no = space->size - node->size;
3934 
3935  /* Extend at most 64 pages at a time */
3936  buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
3937  buf2 = static_cast<byte *>(mem_alloc(buf_size + page_size));
3938  buf = static_cast<byte *>(ut_align(buf2, page_size));
3939 
3940  memset(buf, 0, buf_size);
3941 
3942  while (start_page_no < size_after_extend) {
3943  ulint n_pages = ut_min(buf_size / page_size,
3944  size_after_extend - start_page_no);
3945 
3946  offset_high = (start_page_no - file_start_page_no)
3947  / (4096 * ((1024 * 1024) / page_size));
3948  offset_low = ((start_page_no - file_start_page_no)
3949  % (4096 * ((1024 * 1024) / page_size)))
3950  * page_size;
3951 #ifdef UNIV_HOTBACKUP
3952  success = os_file_write(node->name, node->handle, buf,
3953  offset_low, offset_high,
3954  page_size * n_pages);
3955 #else
3956  success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
3957  node->name, node->handle, buf,
3958  offset_low, offset_high,
3959  page_size * n_pages,
3960  NULL, NULL);
3961 #endif
3962  if (success) {
3963  node->size += n_pages;
3964  space->size += n_pages;
3965 
3966  os_has_said_disk_full = FALSE;
3967  } else {
3968  /* Let us measure the size of the file to determine
3969  how much we were able to extend it */
3970 
3971  n_pages = ((ulint)
3973  node->handle)
3974  / page_size)) - node->size;
3975 
3976  node->size += n_pages;
3977  space->size += n_pages;
3978 
3979  break;
3980  }
3981 
3982  start_page_no += n_pages;
3983  }
3984 
3985  mem_free(buf2);
3986 
3987  fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
3988 
3989  *actual_size = space->size;
3990 
3991 #ifndef UNIV_HOTBACKUP
3992  if (space_id == 0) {
3993  ulint pages_per_mb = (1024 * 1024) / page_size;
3994 
3995  /* Keep the last data file size info up to date, rounded to
3996  full megabytes */
3997 
3998  srv_data_file_sizes[srv_n_data_files - 1]
3999  = (node->size / pages_per_mb) * pages_per_mb;
4000  }
4001 #endif /* !UNIV_HOTBACKUP */
4002 
4003  /*
4004  printf("Extended %s to %lu, actual size %lu pages\n", space->name,
4005  size_after_extend, *actual_size); */
4006  mutex_exit(&fil_system->mutex);
4007 
4008  fil_flush(space_id);
4009 
4010  return(success);
4011 }
4012 
4013 #ifdef UNIV_HOTBACKUP
4014 /********************************************************************/
4019 UNIV_INTERN
4020 void
4021 fil_extend_tablespaces_to_stored_len(void)
4022 /*======================================*/
4023 {
4024  fil_space_t* space;
4025  byte* buf;
4026  ulint actual_size;
4027  ulint size_in_header;
4028  ulint error;
4029  ibool success;
4030 
4031  buf = mem_alloc(UNIV_PAGE_SIZE);
4032 
4033  mutex_enter(&fil_system->mutex);
4034 
4035  space = UT_LIST_GET_FIRST(fil_system->space_list);
4036 
4037  while (space) {
4038  ut_a(space->purpose == FIL_TABLESPACE);
4039 
4040  mutex_exit(&fil_system->mutex); /* no need to protect with a
4041  mutex, because this is a
4042  single-threaded operation */
4043  error = fil_read(TRUE, space->id,
4045  0, 0, UNIV_PAGE_SIZE, buf, NULL);
4046  ut_a(error == DB_SUCCESS);
4047 
4048  size_in_header = fsp_get_size_low(buf);
4049 
4050  success = fil_extend_space_to_desired_size(
4051  &actual_size, space->id, size_in_header);
4052  if (!success) {
4053  fprintf(stderr,
4054  "InnoDB: Error: could not extend the"
4055  " tablespace of %s\n"
4056  "InnoDB: to the size stored in header,"
4057  " %lu pages;\n"
4058  "InnoDB: size after extension %lu pages\n"
4059  "InnoDB: Check that you have free disk space"
4060  " and retry!\n",
4061  space->name, size_in_header, actual_size);
4062  exit(1);
4063  }
4064 
4065  mutex_enter(&fil_system->mutex);
4066 
4067  space = UT_LIST_GET_NEXT(space_list, space);
4068  }
4069 
4070  mutex_exit(&fil_system->mutex);
4071 
4072  mem_free(buf);
4073 }
4074 #endif
4075 
4076 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
4077 
4078 /*******************************************************************/
4081 UNIV_INTERN
4082 ibool
4083 fil_space_reserve_free_extents(
4084 /*===========================*/
4085  ulint id,
4086  ulint n_free_now,
4087  ulint n_to_reserve)
4088 {
4089  fil_space_t* space;
4090  ibool success;
4091 
4092  ut_ad(fil_system);
4093 
4094  mutex_enter(&fil_system->mutex);
4095 
4096  space = fil_space_get_by_id(id);
4097 
4098  ut_a(space);
4099 
4100  if (space->n_reserved_extents + n_to_reserve > n_free_now) {
4101  success = FALSE;
4102  } else {
4103  space->n_reserved_extents += n_to_reserve;
4104  success = TRUE;
4105  }
4106 
4107  mutex_exit(&fil_system->mutex);
4108 
4109  return(success);
4110 }
4111 
4112 /*******************************************************************/
4114 UNIV_INTERN
4115 void
4116 fil_space_release_free_extents(
4117 /*===========================*/
4118  ulint id,
4119  ulint n_reserved)
4120 {
4121  fil_space_t* space;
4122 
4123  ut_ad(fil_system);
4124 
4125  mutex_enter(&fil_system->mutex);
4126 
4127  space = fil_space_get_by_id(id);
4128 
4129  ut_a(space);
4130  ut_a(space->n_reserved_extents >= n_reserved);
4131 
4132  space->n_reserved_extents -= n_reserved;
4133 
4134  mutex_exit(&fil_system->mutex);
4135 }
4136 
4137 /*******************************************************************/
4140 UNIV_INTERN
4141 ulint
4142 fil_space_get_n_reserved_extents(
4143 /*=============================*/
4144  ulint id)
4145 {
4146  fil_space_t* space;
4147  ulint n;
4148 
4149  ut_ad(fil_system);
4150 
4151  mutex_enter(&fil_system->mutex);
4152 
4153  space = fil_space_get_by_id(id);
4154 
4155  ut_a(space);
4156 
4157  n = space->n_reserved_extents;
4158 
4159  mutex_exit(&fil_system->mutex);
4160 
4161  return(n);
4162 }
4163 
4164 /*============================ FILE I/O ================================*/
4165 
4166 /********************************************************************/
4173 static
4174 void
4175 fil_node_prepare_for_io(
4176 /*====================*/
4177  fil_node_t* node,
4178  fil_system_t* system,
4179  fil_space_t* space)
4180 {
4181  ut_ad(node && system && space);
4182  ut_ad(mutex_own(&(system->mutex)));
4183 
4184  if (system->n_open > system->max_n_open + 5) {
4185  ut_print_timestamp(stderr);
4186  fprintf(stderr,
4187  " InnoDB: Warning: open files %lu"
4188  " exceeds the limit %lu\n",
4189  (ulong) system->n_open,
4190  (ulong) system->max_n_open);
4191  }
4192 
4193  if (node->open == FALSE) {
4194  /* File is closed: open it */
4195  ut_a(node->n_pending == 0);
4196 
4197  fil_node_open_file(node, system, space);
4198  }
4199 
4200  if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
4201  && space->id != 0) {
4202  /* The node is in the LRU list, remove it */
4203 
4204  ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
4205 
4206  UT_LIST_REMOVE(LRU, system->LRU, node);
4207  }
4208 
4209  node->n_pending++;
4210 }
4211 
4212 /********************************************************************/
4215 static
4216 void
4217 fil_node_complete_io(
4218 /*=================*/
4219  fil_node_t* node,
4220  fil_system_t* system,
4221  ulint type)
4224 {
4225  ut_ad(node);
4226  ut_ad(system);
4227  ut_ad(mutex_own(&(system->mutex)));
4228 
4229  ut_a(node->n_pending > 0);
4230 
4231  node->n_pending--;
4232 
4233  if (type == OS_FILE_WRITE) {
4234  system->modification_counter++;
4236 
4237  if (!node->space->is_in_unflushed_spaces) {
4238 
4239  node->space->is_in_unflushed_spaces = TRUE;
4240  UT_LIST_ADD_FIRST(unflushed_spaces,
4241  system->unflushed_spaces,
4242  node->space);
4243  }
4244  }
4245 
4246  if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
4247  && node->space->id != 0) {
4248  /* The node must be put back to the LRU list */
4249  UT_LIST_ADD_FIRST(LRU, system->LRU, node);
4250  }
4251 }
4252 
4253 /********************************************************************/
4255 static
4256 void
4257 fil_report_invalid_page_access(
4258 /*===========================*/
4259  ulint block_offset,
4260  ulint space_id,
4261  const char* space_name,
4262  ulint byte_offset,
4263  ulint len,
4264  ulint type)
4265 {
4266  fprintf(stderr,
4267  "InnoDB: Error: trying to access page number %lu"
4268  " in space %lu,\n"
4269  "InnoDB: space name %s,\n"
4270  "InnoDB: which is outside the tablespace bounds.\n"
4271  "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
4272  "InnoDB: If you get this error at mysqld startup,"
4273  " please check that\n"
4274  "InnoDB: your my.cnf matches the ibdata files"
4275  " that you have in the\n"
4276  "InnoDB: MySQL server.\n",
4277  (ulong) block_offset, (ulong) space_id, space_name,
4278  (ulong) byte_offset, (ulong) len, (ulong) type);
4279 }
4280 
4281 /********************************************************************/
4285 UNIV_INTERN
4286 ulint
4287 fil_io(
4288 /*===*/
4289  ulint type,
4298  ibool sync,
4299  ulint space_id,
4300  ulint zip_size,
4302  ulint block_offset,
4303  ulint byte_offset,
4306  ulint len,
4309  void* buf,
4312  void* message)
4314 {
4315  ulint mode;
4316  fil_space_t* space;
4317  fil_node_t* node;
4318  ulint offset_high;
4319  ulint offset_low;
4320  ibool ret;
4321  ulint is_log;
4322  ulint wake_later;
4323 
4324  is_log = type & OS_FILE_LOG;
4325  type = type & ~OS_FILE_LOG;
4326 
4327  wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
4328  type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
4329 
4330  ut_ad(byte_offset < UNIV_PAGE_SIZE);
4331  ut_ad(!zip_size || !byte_offset);
4332  ut_ad(ut_is_2pow(zip_size));
4333  ut_ad(buf);
4334  ut_ad(len > 0);
4335  ut_ad(fil_validate_skip());
4336 #ifndef UNIV_HOTBACKUP
4337 # ifndef UNIV_LOG_DEBUG
4338  /* ibuf bitmap pages must be read in the sync aio mode: */
4339  ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
4340  || !ibuf_bitmap_page(zip_size, block_offset)
4341  || sync || is_log);
4342 # endif /* UNIV_LOG_DEBUG */
4343  if (sync) {
4344  mode = OS_AIO_SYNC;
4345  } else if (is_log) {
4346  mode = OS_AIO_LOG;
4347  } else if (type == OS_FILE_READ
4349  && ibuf_page(space_id, zip_size, block_offset, NULL)) {
4350  mode = OS_AIO_IBUF;
4351  } else {
4352  mode = OS_AIO_NORMAL;
4353  }
4354 #else /* !UNIV_HOTBACKUP */
4355  ut_a(sync);
4356  mode = OS_AIO_SYNC;
4357 #endif /* !UNIV_HOTBACKUP */
4358 
4359  if (type == OS_FILE_READ) {
4360  srv_data_read+= len;
4361  } else if (type == OS_FILE_WRITE) {
4362  srv_data_written+= len;
4363  }
4364 
4365  /* Reserve the fil_system mutex and make sure that we can open at
4366  least one file while holding it, if the file is not already open */
4367 
4368  fil_mutex_enter_and_prepare_for_io(space_id);
4369 
4370  space = fil_space_get_by_id(space_id);
4371 
4372  if (!space) {
4373  mutex_exit(&fil_system->mutex);
4374 
4375  ut_print_timestamp(stderr);
4376  fprintf(stderr,
4377  " InnoDB: Error: trying to do i/o"
4378  " to a tablespace which does not exist.\n"
4379  "InnoDB: i/o type %lu, space id %lu,"
4380  " page no. %lu, i/o length %lu bytes\n",
4381  (ulong) type, (ulong) space_id, (ulong) block_offset,
4382  (ulong) len);
4383 
4384  return(DB_TABLESPACE_DELETED);
4385  }
4386 
4387  ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
4388 
4389  node = UT_LIST_GET_FIRST(space->chain);
4390 
4391  for (;;) {
4392  if (UNIV_UNLIKELY(node == NULL)) {
4393  fil_report_invalid_page_access(
4394  block_offset, space_id, space->name,
4395  byte_offset, len, type);
4396 
4397  ut_error;
4398  }
4399 
4400  if (space->id != 0 && node->size == 0) {
4401  /* We do not know the size of a single-table tablespace
4402  before we open the file */
4403 
4404  break;
4405  }
4406 
4407  if (node->size > block_offset) {
4408  /* Found! */
4409  break;
4410  } else {
4411  block_offset -= node->size;
4412  node = UT_LIST_GET_NEXT(chain, node);
4413  }
4414  }
4415 
4416  /* Open file if closed */
4417  fil_node_prepare_for_io(node, fil_system, space);
4418 
4419  /* Check that at least the start offset is within the bounds of a
4420  single-table tablespace */
4421  if (UNIV_UNLIKELY(node->size <= block_offset)
4422  && space->id != 0 && space->purpose == FIL_TABLESPACE) {
4423 
4424  fil_report_invalid_page_access(
4425  block_offset, space_id, space->name, byte_offset,
4426  len, type);
4427 
4428  ut_error;
4429  }
4430 
4431  /* Now we have made the changes in the data structures of fil_system */
4432  mutex_exit(&fil_system->mutex);
4433 
4434  /* Calculate the low 32 bits and the high 32 bits of the file offset */
4435 
4436  if (!zip_size) {
4437  offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
4438  offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
4439  & 0xFFFFFFFFUL) + byte_offset;
4440 
4441  ut_a(node->size - block_offset
4442  >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
4443  / UNIV_PAGE_SIZE));
4444  } else {
4445  ulint zip_size_shift;
4446  switch (zip_size) {
4447  case 1024: zip_size_shift = 10; break;
4448  case 2048: zip_size_shift = 11; break;
4449  case 4096: zip_size_shift = 12; break;
4450  case 8192: zip_size_shift = 13; break;
4451  case 16384: zip_size_shift = 14; break;
4452  default: ut_error;
4453  }
4454  offset_high = block_offset >> (32 - zip_size_shift);
4455  offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
4456  + byte_offset;
4457  ut_a(node->size - block_offset
4458  >= (len + (zip_size - 1)) / zip_size);
4459  }
4460 
4461  /* Do aio */
4462 
4463  ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
4464  ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
4465 
4466 #ifdef UNIV_HOTBACKUP
4467  /* In ibbackup do normal i/o, not aio */
4468  if (type == OS_FILE_READ) {
4469  ret = os_file_read(node->handle, buf, offset_low, offset_high,
4470  len);
4471  } else {
4472  ret = os_file_write(node->name, node->handle, buf,
4473  offset_low, offset_high, len);
4474  }
4475 #else
4476  /* Queue the aio request */
4477  ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
4478  offset_low, offset_high, len, node, message);
4479 #endif
4480  ut_a(ret);
4481 
4482  if (mode == OS_AIO_SYNC) {
4483  /* The i/o operation is already completed when we return from
4484  os_aio: */
4485 
4486  mutex_enter(&fil_system->mutex);
4487 
4488  fil_node_complete_io(node, fil_system, type);
4489 
4490  mutex_exit(&fil_system->mutex);
4491 
4492  ut_ad(fil_validate_skip());
4493  }
4494 
4495  return(DB_SUCCESS);
4496 }
4497 
4498 /********************************************************************/
4500 UNIV_INTERN
4501 bool
4502 fil_is_exist(
4503 /*=========*/
4504  ulint space_id,
4505  ulint block_offset)
4506 {
4507  fil_space_t* space;
4508  fil_node_t* node;
4509 
4510  /* Reserve the fil_system mutex and make sure that we can open at
4511  least one file while holding it, if the file is not already open */
4512 
4513  fil_mutex_enter_and_prepare_for_io(space_id);
4514 
4515  space = fil_space_get_by_id(space_id);
4516 
4517  if (!space) {
4518  mutex_exit(&fil_system->mutex);
4519  return(false);
4520  }
4521 
4522  node = UT_LIST_GET_FIRST(space->chain);
4523 
4524  for (;;) {
4525  if (UNIV_UNLIKELY(node == NULL)) {
4526  mutex_exit(&fil_system->mutex);
4527  return(false);
4528  }
4529 
4530  if (space->id != 0 && node->size == 0) {
4531  /* We do not know the size of a single-table tablespace
4532  before we open the file */
4533 
4534  break;
4535  }
4536 
4537  if (node->size > block_offset) {
4538  /* Found! */
4539  break;
4540  } else {
4541  block_offset -= node->size;
4542  node = UT_LIST_GET_NEXT(chain, node);
4543  }
4544  }
4545 
4546  /* Open file if closed */
4547  fil_node_prepare_for_io(node, fil_system, space);
4548  fil_node_complete_io(node, fil_system, OS_FILE_READ);
4549 
4550  /* Check that at least the start offset is within the bounds of a
4551  single-table tablespace */
4552  if (UNIV_UNLIKELY(node->size <= block_offset)
4553  && space->id != 0 && space->purpose == FIL_TABLESPACE) {
4554  mutex_exit(&fil_system->mutex);
4555  return(false);
4556  }
4557 
4558  mutex_exit(&fil_system->mutex);
4559  return(true);
4560 }
4561 
4562 #ifndef UNIV_HOTBACKUP
4563 /**********************************************************************/
4568 UNIV_INTERN
4569 void
4570 fil_aio_wait(
4571 /*=========*/
4572  ulint segment)
4574 {
4575  ibool ret;
4576  fil_node_t* fil_node;
4577  void* message;
4578  ulint type;
4579 
4580  ut_ad(fil_validate_skip());
4581 
4582  if (srv_use_native_aio) {
4583  srv_set_io_thread_op_info(segment, "native aio handle");
4584 #ifdef WIN_ASYNC_IO
4585  ret = os_aio_windows_handle(segment, 0, &fil_node,
4586  &message, &type);
4587 #elif defined(LINUX_NATIVE_AIO)
4588  ret = os_aio_linux_handle(segment, &fil_node,
4589  &message, &type);
4590 #else
4591  ret = 0; /* Eliminate compiler warning */
4592  ut_error;
4593 #endif
4594  } else {
4595  srv_set_io_thread_op_info(segment, "simulated aio handle");
4596 
4597  ret = os_aio_simulated_handle(segment, &fil_node,
4598  &message, &type);
4599  }
4600 
4601  ut_a(ret);
4602 
4603  srv_set_io_thread_op_info(segment, "complete io for fil node");
4604 
4605  mutex_enter(&fil_system->mutex);
4606 
4607  fil_node_complete_io(fil_node, fil_system, type);
4608 
4609  mutex_exit(&fil_system->mutex);
4610 
4611  ut_ad(fil_validate_skip());
4612 
4613  /* Do the i/o handling */
4614  /* IMPORTANT: since i/o handling for reads will read also the insert
4615  buffer in tablespace 0, you have to be very careful not to introduce
4616  deadlocks in the i/o system. We keep tablespace 0 data files always
4617  open, and use a special i/o thread to serve insert buffer requests. */
4618 
4619  if (fil_node->space->purpose == FIL_TABLESPACE) {
4620  srv_set_io_thread_op_info(segment, "complete io for buf page");
4621  buf_page_io_complete(static_cast<buf_page_t *>(message));
4622  } else {
4623  srv_set_io_thread_op_info(segment, "complete io for log");
4624  log_io_complete(static_cast<log_group_t *>(message));
4625  }
4626 }
4627 #endif /* UNIV_HOTBACKUP */
4628 
4629 /**********************************************************************/
4632 UNIV_INTERN
4633 void
4634 fil_flush(
4635 /*======*/
4636  ulint space_id)
4638 {
4639  fil_space_t* space;
4640  fil_node_t* node;
4641  os_file_t file;
4642  ib_int64_t old_mod_counter;
4643 
4644  mutex_enter(&fil_system->mutex);
4645 
4646  space = fil_space_get_by_id(space_id);
4647 
4648  if (!space || space->is_being_deleted) {
4649  mutex_exit(&fil_system->mutex);
4650 
4651  return;
4652  }
4653 
4654  space->n_pending_flushes++;
4656  node = UT_LIST_GET_FIRST(space->chain);
4657 
4658  while (node) {
4659  if (node->modification_counter > node->flush_counter) {
4660  ut_a(node->open);
4661 
4662  /* We want to flush the changes at least up to
4663  old_mod_counter */
4664  old_mod_counter = node->modification_counter;
4665 
4666  if (space->purpose == FIL_TABLESPACE) {
4667  fil_n_pending_tablespace_flushes++;
4668  } else {
4669  fil_n_pending_log_flushes++;
4670  fil_n_log_flushes++;
4671  }
4672 #ifdef __WIN__
4673  if (node->is_raw_disk) {
4674 
4675  goto skip_flush;
4676  }
4677 #endif
4678 retry:
4679  if (node->n_pending_flushes > 0) {
4680  /* We want to avoid calling os_file_flush() on
4681  the file twice at the same time, because we do
4682  not know what bugs OS's may contain in file
4683  i/o; sleep for a while */
4684 
4685  mutex_exit(&fil_system->mutex);
4686 
4687  os_thread_sleep(20000);
4688 
4689  mutex_enter(&fil_system->mutex);
4690 
4691  if (node->flush_counter >= old_mod_counter) {
4692 
4693  goto skip_flush;
4694  }
4695 
4696  goto retry;
4697  }
4698 
4699  ut_a(node->open);
4700  file = node->handle;
4701  node->n_pending_flushes++;
4702 
4703  mutex_exit(&fil_system->mutex);
4704 
4705  /* fprintf(stderr, "Flushing to file %s\n",
4706  node->name); */
4707 
4708  os_file_flush(file);
4709 
4710  mutex_enter(&fil_system->mutex);
4711 
4712  node->n_pending_flushes--;
4713 skip_flush:
4714  if (node->flush_counter < old_mod_counter) {
4715  node->flush_counter = old_mod_counter;
4716 
4717  if (space->is_in_unflushed_spaces
4718  && fil_space_is_flushed(space)) {
4719 
4720  space->is_in_unflushed_spaces = FALSE;
4721 
4723  unflushed_spaces,
4724  fil_system->unflushed_spaces,
4725  space);
4726  }
4727  }
4728 
4729  if (space->purpose == FIL_TABLESPACE) {
4730  fil_n_pending_tablespace_flushes--;
4731  } else {
4732  fil_n_pending_log_flushes--;
4733  }
4734  }
4735 
4736  node = UT_LIST_GET_NEXT(chain, node);
4737  }
4738 
4739  space->n_pending_flushes--;
4740 
4741  mutex_exit(&fil_system->mutex);
4742 }
4743 
4744 /**********************************************************************/
4747 UNIV_INTERN
4748 void
4749 fil_flush_file_spaces(
4750 /*==================*/
4751  ulint purpose)
4752 {
4753  fil_space_t* space;
4754  ulint* space_ids;
4755  ulint n_space_ids;
4756  ulint i;
4757 
4758  mutex_enter(&fil_system->mutex);
4759 
4760  n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
4761  if (n_space_ids == 0) {
4762 
4763  mutex_exit(&fil_system->mutex);
4764  return;
4765  }
4766 
4767  /* Assemble a list of space ids to flush. Previously, we
4768  traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
4769  on a space that was just removed from the list by fil_flush().
4770  Thus, the space could be dropped and the memory overwritten. */
4771  space_ids = static_cast<unsigned long *>(mem_alloc(n_space_ids * sizeof *space_ids));
4772 
4773  n_space_ids = 0;
4774 
4775  for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
4776  space;
4777  space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
4778 
4779  if (space->purpose == purpose && !space->is_being_deleted) {
4780 
4781  space_ids[n_space_ids++] = space->id;
4782  }
4783  }
4784 
4785  mutex_exit(&fil_system->mutex);
4786 
4787  /* Flush the spaces. It will not hurt to call fil_flush() on
4788  a non-existing space id. */
4789  for (i = 0; i < n_space_ids; i++) {
4790 
4791  fil_flush(space_ids[i]);
4792  }
4793 
4794  mem_free(space_ids);
4795 }
4796 
4797 /******************************************************************/
4800 UNIV_INTERN
4801 ibool
4802 fil_validate(void)
4803 /*==============*/
4804 {
4805  fil_space_t* space;
4806  fil_node_t* fil_node;
4807  ulint n_open = 0;
4808  ulint i;
4809 
4810  mutex_enter(&fil_system->mutex);
4811 
4812  /* Look for spaces in the hash table */
4813 
4814  for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
4815 
4816  space = static_cast<fil_space_t *>(HASH_GET_FIRST(fil_system->spaces, i));
4817 
4818  while (space != NULL) {
4819  UT_LIST_VALIDATE(chain, fil_node_t, space->chain,
4820  ut_a(ut_list_node_313->open
4821  || !ut_list_node_313->n_pending));
4822 
4823  fil_node = UT_LIST_GET_FIRST(space->chain);
4824 
4825  while (fil_node != NULL) {
4826  if (fil_node->n_pending > 0) {
4827  ut_a(fil_node->open);
4828  }
4829 
4830  if (fil_node->open) {
4831  n_open++;
4832  }
4833  fil_node = UT_LIST_GET_NEXT(chain, fil_node);
4834  }
4835  space = static_cast<fil_space_t *>(HASH_GET_NEXT(hash, space));
4836  }
4837  }
4838 
4839  ut_a(fil_system->n_open == n_open);
4840 
4841  UT_LIST_VALIDATE(LRU, fil_node_t, fil_system->LRU, (void) 0);
4842 
4843  fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
4844 
4845  while (fil_node != NULL) {
4846  ut_a(fil_node->n_pending == 0);
4847  ut_a(fil_node->open);
4848  ut_a(fil_node->space->purpose == FIL_TABLESPACE);
4849  ut_a(fil_node->space->id != 0);
4850 
4851  fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
4852  }
4853 
4854  mutex_exit(&fil_system->mutex);
4855 
4856  return(TRUE);
4857 }
4858 
4859 /********************************************************************/
4862 UNIV_INTERN
4863 ibool
4864 fil_addr_is_null(
4865 /*=============*/
4866  fil_addr_t addr)
4867 {
4868  return(addr.page == FIL_NULL);
4869 }
4870 
4871 /********************************************************************/
4874 UNIV_INTERN
4875 ulint
4876 fil_page_get_prev(
4877 /*==============*/
4878  const byte* page)
4879 {
4880  return(mach_read_from_4(page + FIL_PAGE_PREV));
4881 }
4882 
4883 /********************************************************************/
4886 UNIV_INTERN
4887 ulint
4888 fil_page_get_next(
4889 /*==============*/
4890  const byte* page)
4891 {
4892  return(mach_read_from_4(page + FIL_PAGE_NEXT));
4893 }
4894 
4895 /*********************************************************************/
4897 UNIV_INTERN
4898 void
4899 fil_page_set_type(
4900 /*==============*/
4901  byte* page,
4902  ulint type)
4903 {
4904  ut_ad(page);
4905 
4906  mach_write_to_2(page + FIL_PAGE_TYPE, type);
4907 }
4908 
4909 /*********************************************************************/
4913 UNIV_INTERN
4914 ulint
4915 fil_page_get_type(
4916 /*==============*/
4917  const byte* page)
4918 {
4919  ut_ad(page);
4920 
4921  return(mach_read_from_2(page + FIL_PAGE_TYPE));
4922 }
4923 
4924 /****************************************************************/
4926 UNIV_INTERN
4927 void
4928 fil_close(void)
4929 /*===========*/
4930 {
4931 #ifndef UNIV_HOTBACKUP
4932  /* The mutex should already have been freed. */
4933  ut_ad(fil_system->mutex.magic_n == 0);
4934 #endif /* !UNIV_HOTBACKUP */
4935 
4936  hash_table_free(fil_system->spaces);
4937 
4938  hash_table_free(fil_system->name_hash);
4939 
4940  ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
4941  ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
4942  ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
4943 
4944  mem_free(fil_system);
4945 
4946  fil_system = NULL;
4947 }