Drizzled Public API Documentation

fil0fil.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "fil0fil.h"
27 
28 #include "mem0mem.h"
29 #include "hash0hash.h"
30 #include "os0file.h"
31 #include "mach0data.h"
32 #include "buf0buf.h"
33 #include "buf0flu.h"
34 #include "log0recv.h"
35 #include "fsp0fsp.h"
36 #include "srv0srv.h"
37 #include "srv0start.h"
38 #include "mtr0mtr.h"
39 #include "mtr0log.h"
40 #include "dict0dict.h"
41 #include "page0page.h"
42 #include "page0zip.h"
43 #include "xtrabackup_api.h"
44 #ifndef UNIV_HOTBACKUP
45 # include "buf0lru.h"
46 # include "ibuf0ibuf.h"
47 # include "sync0sync.h"
48 # include "os0sync.h"
49 #else /* !UNIV_HOTBACKUP */
50 static ulint srv_data_read, srv_data_written;
51 #endif /* !UNIV_HOTBACKUP */
52 
53 /*
54  IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
55  =============================================
56 
57 The tablespace cache is responsible for providing fast read/write access to
58 tablespaces and logs of the database. File creation and deletion is done
59 in other modules which know more of the logic of the operation, however.
60 
61 A tablespace consists of a chain of files. The size of the files does not
62 have to be divisible by the database block size, because we may just leave
63 the last incomplete block unused. When a new file is appended to the
64 tablespace, the maximum size of the file is also specified. At the moment,
65 we think that it is best to extend the file to its maximum size already at
66 the creation of the file, because then we can avoid dynamically extending
67 the file when more space is needed for the tablespace.
68 
69 A block's position in the tablespace is specified with a 32-bit unsigned
70 integer. The files in the chain are thought to be catenated, and the block
71 corresponding to an address n is the nth block in the catenated file (where
72 the first block is named the 0th block, and the incomplete block fragments
73 at the end of files are not taken into account). A tablespace can be extended
74 by appending a new file at the end of the chain.
75 
76 Our tablespace concept is similar to the one of Oracle.
77 
78 To acquire more speed in disk transfers, a technique called disk striping is
79 sometimes used. This means that logical block addresses are divided in a
80 round-robin fashion across several disks. Windows NT supports disk striping,
81 so there we do not need to support it in the database. Disk striping is
82 implemented in hardware in RAID disks. We conclude that it is not necessary
83 to implement it in the database. Oracle 7 does not support disk striping,
84 either.
85 
86 Another trick used at some database sites is replacing tablespace files by
87 raw disks, that is, the whole physical disk drive, or a partition of it, is
88 opened as a single file, and it is accessed through byte offsets calculated
89 from the start of the disk or the partition. This is recommended in some
90 books on database tuning to achieve more speed in i/o. Using raw disk
91 certainly prevents the OS from fragmenting disk space, but it is not clear
92 if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
93 system + EIDE Conner disk only a negligible difference in speed when reading
94 from a file, versus reading from a raw disk.
95 
96 To have fast access to a tablespace or a log file, we put the data structures
97 to a hash table. Each tablespace and log file is given an unique 32-bit
98 identifier.
99 
100 Some operating systems do not support many open files at the same time,
101 though NT seems to tolerate at least 900 open files. Therefore, we put the
102 open files in an LRU-list. If we need to open another file, we may close the
103 file at the end of the LRU-list. When an i/o-operation is pending on a file,
104 the file cannot be closed. We take the file nodes with pending i/o-operations
105 out of the LRU-list and keep a count of pending operations. When an operation
106 completes, we decrement the count and return the file node to the LRU-list if
107 the count drops to zero. */
108 
112 UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
113 
115 UNIV_INTERN ulint fil_n_log_flushes = 0;
116 
118 UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
120 UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
121 
123 UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
124 
125 #ifdef UNIV_PFS_MUTEX
126 /* Key to register fil_system_mutex with performance schema */
127 UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
128 #endif /* UNIV_PFS_MUTEX */
129 
130 #ifdef UNIV_PFS_RWLOCK
131 /* Key to register file space latch with performance schema */
132 UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
133 #endif /* UNIV_PFS_RWLOCK */
134 
139  char* name;
140  ibool open;
142  ibool is_raw_disk;
144  ulint size;
147  ulint n_pending;
157  ib_int64_t flush_counter;
160  UT_LIST_NODE_T(fil_node_t) chain;
164  ulint magic_n;
165 };
166 
168 #define FIL_NODE_MAGIC_N 89389
169 
172  char* name;
174  ulint id;
175  ib_int64_t tablespace_version;
181  ibool mark;
185  ibool stop_ios;
199  ulint purpose;
203  ulint size;
207  ulint flags;
208  ulint n_reserved_extents;
214  ulint n_pending_ibuf_merges;
220  hash_node_t hash;
221  hash_node_t name_hash;
222 #ifndef UNIV_HOTBACKUP
225 #endif /* !UNIV_HOTBACKUP */
226  UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
229  ibool is_in_unflushed_spaces;
231  UT_LIST_NODE_T(fil_space_t) space_list;
233  ulint magic_n;
234 };
235 
237 #define FIL_SPACE_MAGIC_N 89472
238 
240 typedef struct fil_system_struct fil_system_t;
241 
247 #ifndef UNIV_HOTBACKUP
249 #endif /* !UNIV_HOTBACKUP */
266  UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
272  ulint n_open;
273  ulint max_n_open;
277  ulint max_assigned_id;
283  ib_int64_t tablespace_version;
291  UT_LIST_BASE_NODE_T(fil_space_t) space_list;
293  ibool space_id_reuse_warned;
294  /* !< TRUE if fil_space_create()
295  has issued a warning about
296  potential space_id reuse */
297 };
298 
301 fil_system_t* fil_system = NULL;
302 
303 
304 /********************************************************************/
311 static
312 void
313 fil_node_prepare_for_io(
314 /*====================*/
315  fil_node_t* node,
316  fil_system_t* system,
317  fil_space_t* space);
318 /********************************************************************/
321 static
322 void
323 fil_node_complete_io(
324 /*=================*/
325  fil_node_t* node,
326  fil_system_t* system,
327  ulint type);
330 /*******************************************************************/
334 static
335 ulint
336 fil_get_space_id_for_table(
337 /*=======================*/
338  const char* name);
340 /*******************************************************************/
345 static
346 ibool
347 fil_space_free(
348 /*===========*/
349  ulint id, /* in: space id */
350  ibool x_latched); /* in: TRUE if caller has space->latch
351  in X mode */
352 /********************************************************************/
358 UNIV_INLINE
359 ulint
360 fil_read(
361 /*=====*/
362  ibool sync,
363  ulint space_id,
364  ulint zip_size,
366  ulint block_offset,
367  ulint byte_offset,
369  ulint len,
372  void* buf,
374  void* message)
376 {
377  return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
378  byte_offset, len, buf, message));
379 }
380 
381 /********************************************************************/
387 UNIV_INLINE
388 ulint
389 fil_write(
390 /*======*/
391  ibool sync,
392  ulint space_id,
393  ulint zip_size,
395  ulint block_offset,
396  ulint byte_offset,
398  ulint len,
401  void* buf,
403  void* message)
405 {
406  return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
407  byte_offset, len, buf, message));
408 }
409 
410 /*******************************************************************/
412 UNIV_INLINE
414 fil_space_get_by_id(
415 /*================*/
416  ulint id)
417 {
419 
420  ut_ad(mutex_own(&fil_system->mutex));
421 
422  HASH_SEARCH(hash, fil_system->spaces, id,
423  fil_space_t*, space,
424  ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
425  space->id == id);
426 
427  return(space);
428 }
429 
430 /*******************************************************************/
432 UNIV_INLINE
434 fil_space_get_by_name(
435 /*==================*/
436  const char* name)
437 {
439  ulint fold;
440 
441  ut_ad(mutex_own(&fil_system->mutex));
442 
443  fold = ut_fold_string(name);
444 
445  HASH_SEARCH(name_hash, fil_system->name_hash, fold,
446  fil_space_t*, space,
447  ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
448  !strcmp(name, space->name));
449 
450  return(space);
451 }
452 
453 #ifndef UNIV_HOTBACKUP
454 /*******************************************************************/
458 UNIV_INTERN
459 ib_int64_t
460 fil_space_get_version(
461 /*==================*/
462  ulint id)
463 {
465  ib_int64_t version = -1;
466 
467  ut_ad(fil_system);
468 
469  mutex_enter(&fil_system->mutex);
470 
471  space = fil_space_get_by_id(id);
472 
473  if (space) {
474  version = space->tablespace_version;
475  }
476 
477  mutex_exit(&fil_system->mutex);
478 
479  return(version);
480 }
481 
482 /*******************************************************************/
485 UNIV_INTERN
486 rw_lock_t*
487 fil_space_get_latch(
488 /*================*/
489  ulint id,
490  ulint* flags)
491 {
493 
494  ut_ad(fil_system);
495 
496  mutex_enter(&fil_system->mutex);
497 
498  space = fil_space_get_by_id(id);
499 
500  ut_a(space);
501 
502  if (flags) {
503  *flags = space->flags;
504  }
505 
506  mutex_exit(&fil_system->mutex);
507 
508  return(&(space->latch));
509 }
510 
511 /*******************************************************************/
514 UNIV_INTERN
515 ulint
516 fil_space_get_type(
517 /*===============*/
518  ulint id)
519 {
521 
522  ut_ad(fil_system);
523 
524  mutex_enter(&fil_system->mutex);
525 
526  space = fil_space_get_by_id(id);
527 
528  ut_a(space);
529 
530  mutex_exit(&fil_system->mutex);
531 
532  return(space->purpose);
533 }
534 #endif /* !UNIV_HOTBACKUP */
535 
536 /**********************************************************************/
540 static
541 ibool
542 fil_space_is_flushed(
543 /*=================*/
544  fil_space_t* space)
545 {
546  fil_node_t* node;
547 
548  ut_ad(mutex_own(&fil_system->mutex));
549 
550  node = UT_LIST_GET_FIRST(space->chain);
551 
552  while (node) {
553  if (node->modification_counter > node->flush_counter) {
554 
555  return(FALSE);
556  }
557 
558  node = UT_LIST_GET_NEXT(chain, node);
559  }
560 
561  return(TRUE);
562 }
563 
564 /*******************************************************************/
566 UNIV_INTERN
567 void
568 fil_node_create(
569 /*============*/
570  const char* name,
571  ulint size,
573  ulint id,
574  ibool is_raw)
576 {
577  fil_node_t* node;
579 
580  ut_a(fil_system);
581  ut_a(name);
582 
583  mutex_enter(&fil_system->mutex);
584 
585  node = static_cast<fil_node_t *>(mem_alloc(sizeof(fil_node_t)));
586 
587  node->name = mem_strdup(name);
588  node->open = FALSE;
589 
590  ut_a(!is_raw || srv_start_raw_disk_in_use);
591 
592  node->is_raw_disk = is_raw;
593  node->size = size;
594  node->magic_n = FIL_NODE_MAGIC_N;
595  node->n_pending = 0;
596  node->n_pending_flushes = 0;
597 
598  node->modification_counter = 0;
599  node->flush_counter = 0;
600 
601  space = fil_space_get_by_id(id);
602 
603  if (!space) {
604  ut_print_timestamp(stderr);
605  fprintf(stderr,
606  " InnoDB: Error: Could not find tablespace %lu for\n"
607  "InnoDB: file ", (ulong) id);
608  ut_print_filename(stderr, name);
609  fputs(" in the tablespace memory cache.\n", stderr);
610  mem_free(node->name);
611 
612  mem_free(node);
613 
614  mutex_exit(&fil_system->mutex);
615 
616  return;
617  }
618 
619  space->size += size;
620 
621  node->space = space;
622 
623  UT_LIST_ADD_LAST(chain, space->chain, node);
624 
625  if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
626 
627  fil_system->max_assigned_id = id;
628  }
629 
630  mutex_exit(&fil_system->mutex);
631 }
632 
633 /********************************************************************/
636 static
637 void
638 fil_node_open_file(
639 /*===============*/
640  fil_node_t* node,
641  fil_system_t* system,
642  fil_space_t* space)
643 {
644  uint64_t size_bytes;
645  ulint size_low;
646  ulint size_high;
647  ibool ret;
648  ibool success;
649  byte* buf2;
650  byte* page;
651  ulint space_id;
652  ulint flags;
653 
654  ut_ad(mutex_own(&(system->mutex)));
655  ut_a(node->n_pending == 0);
656  ut_a(node->open == FALSE);
657 
658  if (node->size == 0) {
659  /* It must be a single-table tablespace and we do not know the
660  size of the file yet. First we open the file in the normal
661  mode, no async I/O here, for simplicity. Then do some checks,
662  and close the file again.
663  NOTE that we could not use the simple file read function
664  os_file_read() in Windows to read from a file opened for
665  async I/O! */
666 
667  node->handle = os_file_create_simple_no_error_handling(
668  innodb_file_data_key, node->name, OS_FILE_OPEN,
669  OS_FILE_READ_ONLY, &success);
670  if (!success) {
671  /* The following call prints an error message */
673 
674  ut_print_timestamp(stderr);
675 
676  fprintf(stderr,
677  " InnoDB: Fatal error: cannot open %s\n."
678  "InnoDB: Have you deleted .ibd files"
679  " under a running mysqld server?\n",
680  node->name);
681  ut_a(0);
682  }
683 
684  os_file_get_size(node->handle, &size_low, &size_high);
685 
686  size_bytes = (((uint64_t)size_high) << 32) + size_low;
687 #ifdef UNIV_HOTBACKUP
688  if (space->id == 0) {
689  node->size = size_bytes / UNIV_PAGE_SIZE;
690  os_file_close(node->handle);
691  goto add_size;
692  }
693 #endif /* UNIV_HOTBACKUP */
694  ut_a(space->purpose != FIL_LOG);
695  ut_a(space->id != 0);
696 
697  if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
698  fprintf(stderr,
699  "InnoDB: Error: the size of single-table"
700  " tablespace file %s\n"
701  "InnoDB: is only %lu %lu,"
702  " should be at least %lu!\n",
703  node->name,
704  (ulong) size_high,
705  (ulong) size_low,
707  * UNIV_PAGE_SIZE));
708 
709  ut_a(0);
710  }
711 
712  /* Read the first page of the tablespace */
713 
714  buf2 = static_cast<unsigned char *>(ut_malloc(2 * UNIV_PAGE_SIZE));
715  /* Align the memory for file i/o if we might have O_DIRECT
716  set */
717  page = static_cast<unsigned char *>(ut_align(buf2, UNIV_PAGE_SIZE));
718 
719  success = os_file_read(node->handle, page, 0, 0,
720  UNIV_PAGE_SIZE);
721  space_id = fsp_header_get_space_id(page);
722  flags = fsp_header_get_flags(page);
723 
724  ut_free(buf2);
725 
726  /* Close the file now that we have read the space id from it */
727 
728  os_file_close(node->handle);
729 
730  if (UNIV_UNLIKELY(space_id != space->id)) {
731  fprintf(stderr,
732  "InnoDB: Error: tablespace id is %lu"
733  " in the data dictionary\n"
734  "InnoDB: but in file %s it is %lu!\n",
735  space->id, node->name, space_id);
736 
737  ut_error;
738  }
739 
740  if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
741  || space_id == 0)) {
742  fprintf(stderr,
743  "InnoDB: Error: tablespace id %lu"
744  " in file %s is not sensible\n",
745  (ulong) space_id, node->name);
746 
747  ut_error;
748  }
749 
750  if (UNIV_UNLIKELY(space->flags != flags)) {
751  fprintf(stderr,
752  "InnoDB: Error: table flags are %lx"
753  " in the data dictionary\n"
754  "InnoDB: but the flags in file %s are %lx!\n",
755  space->flags, node->name, flags);
756 
757  ut_error;
758  }
759 
760  if (size_bytes >= 1024 * 1024) {
761  /* Truncate the size to whole megabytes. */
762  size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
763  }
764 
765  if (!(flags & DICT_TF_ZSSIZE_MASK)) {
766  node->size = (ulint)size_bytes / UNIV_PAGE_SIZE;
767  } else {
768  node->size = (ulint)
769  (size_bytes
771  }
772 
773 #ifdef UNIV_HOTBACKUP
774 add_size:
775 #endif /* UNIV_HOTBACKUP */
776  space->size += node->size;
777  }
778 
779  /* printf("Opening file %s\n", node->name); */
780 
781  /* Open the file for reading and writing, in Windows normally in the
782  unbuffered async I/O mode, though global variables may make
783  os_file_create() to fall back to the normal file I/O mode. */
784 
785  if (space->purpose == FIL_LOG) {
786  node->handle = os_file_create(innodb_file_log_key,
787  node->name, OS_FILE_OPEN,
788  OS_FILE_AIO, OS_LOG_FILE,
789  &ret);
790  } else if (node->is_raw_disk) {
791  node->handle = os_file_create(innodb_file_data_key,
792  node->name,
793  OS_FILE_OPEN_RAW,
794  OS_FILE_AIO, OS_DATA_FILE,
795  &ret);
796  } else {
797  node->handle = os_file_create(innodb_file_data_key,
798  node->name, OS_FILE_OPEN,
799  OS_FILE_AIO, OS_DATA_FILE,
800  &ret);
801  }
802 
803  ut_a(ret);
804 
805  node->open = TRUE;
806 
807  system->n_open++;
808 
809  if (space->purpose == FIL_TABLESPACE && space->id != 0) {
810  /* Put the node to the LRU list */
811  UT_LIST_ADD_FIRST(LRU, system->LRU, node);
812  }
813 }
814 
815 /**********************************************************************/
817 static
818 void
819 fil_node_close_file(
820 /*================*/
821  fil_node_t* node,
822  fil_system_t* system)
823 {
824  ibool ret;
825 
826  ut_ad(node && system);
827  ut_ad(mutex_own(&(system->mutex)));
828  ut_a(node->open);
829  ut_a(node->n_pending == 0);
830  ut_a(node->n_pending_flushes == 0);
831  ut_a(node->modification_counter == node->flush_counter);
832 
833  ret = os_file_close(node->handle);
834  ut_a(ret);
835 
836  /* printf("Closing file %s\n", node->name); */
837 
838  node->open = FALSE;
839  ut_a(system->n_open > 0);
840  system->n_open--;
841 
842  if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
843  ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
844 
845  /* The node is in the LRU list, remove it */
846  UT_LIST_REMOVE(LRU, system->LRU, node);
847  }
848 }
849 
850 /********************************************************************/
858 static
859 ibool
860 fil_try_to_close_file_in_LRU(
861 /*=========================*/
862  ibool print_info)
864 {
865  fil_node_t* node;
866 
867  ut_ad(mutex_own(&fil_system->mutex));
868 
869  node = UT_LIST_GET_LAST(fil_system->LRU);
870 
871  if (print_info) {
872  fprintf(stderr,
873  "InnoDB: fil_sys open file LRU len %lu\n",
874  (ulong) UT_LIST_GET_LEN(fil_system->LRU));
875  }
876 
877  while (node != NULL) {
878  if (node->modification_counter == node->flush_counter
879  && node->n_pending_flushes == 0) {
880 
881  fil_node_close_file(node, fil_system);
882 
883  return(TRUE);
884  }
885 
886  if (print_info && node->n_pending_flushes > 0) {
887  fputs("InnoDB: cannot close file ", stderr);
888  ut_print_filename(stderr, node->name);
889  fprintf(stderr, ", because n_pending_flushes %lu\n",
890  (ulong) node->n_pending_flushes);
891  }
892 
893  if (print_info
894  && node->modification_counter != node->flush_counter) {
895  fputs("InnoDB: cannot close file ", stderr);
896  ut_print_filename(stderr, node->name);
897  fprintf(stderr,
898  ", because mod_count %ld != fl_count %ld\n",
899  (long) node->modification_counter,
900  (long) node->flush_counter);
901  }
902 
903  node = UT_LIST_GET_PREV(LRU, node);
904  }
905 
906  return(FALSE);
907 }
908 
909 /*******************************************************************/
913 static
914 void
915 fil_mutex_enter_and_prepare_for_io(
916 /*===============================*/
917  ulint space_id)
918 {
920  ibool success;
921  ibool print_info = FALSE;
922  ulint count = 0;
923  ulint count2 = 0;
924 
925 retry:
926  mutex_enter(&fil_system->mutex);
927 
928  if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
929  /* We keep log files and system tablespace files always open;
930  this is important in preventing deadlocks in this module, as
931  a page read completion often performs another read from the
932  insert buffer. The insert buffer is in tablespace 0, and we
933  cannot end up waiting in this function. */
934 
935  return;
936  }
937 
938  if (fil_system->n_open < fil_system->max_n_open) {
939 
940  return;
941  }
942 
943  space = fil_space_get_by_id(space_id);
944 
945  if (space != NULL && space->stop_ios) {
946  /* We are going to do a rename file and want to stop new i/o's
947  for a while */
948 
949  if (count2 > 20000) {
950  fputs("InnoDB: Warning: tablespace ", stderr);
951  ut_print_filename(stderr, space->name);
952  fprintf(stderr,
953  " has i/o ops stopped for a long time %lu\n",
954  (ulong) count2);
955  }
956 
957  mutex_exit(&fil_system->mutex);
958 
959  os_thread_sleep(20000);
960 
961  count2++;
962 
963  goto retry;
964  }
965 
966  /* If the file is already open, no need to do anything; if the space
967  does not exist, we handle the situation in the function which called
968  this function */
969 
970  if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
971 
972  return;
973  }
974 
975  if (count > 1) {
976  print_info = TRUE;
977  }
978 
979  /* Too many files are open, try to close some */
980 close_more:
981  success = fil_try_to_close_file_in_LRU(print_info);
982 
983  if (success && fil_system->n_open >= fil_system->max_n_open) {
984 
985  goto close_more;
986  }
987 
988  if (fil_system->n_open < fil_system->max_n_open) {
989  /* Ok */
990 
991  return;
992  }
993 
994  if (count >= 2) {
995  ut_print_timestamp(stderr);
996  fprintf(stderr,
997  " InnoDB: Warning: too many (%lu) files stay open"
998  " while the maximum\n"
999  "InnoDB: allowed value would be %lu.\n"
1000  "InnoDB: You may need to raise the value of"
1001  " innodb_open_files in\n"
1002  "InnoDB: my.cnf.\n",
1003  (ulong) fil_system->n_open,
1004  (ulong) fil_system->max_n_open);
1005 
1006  return;
1007  }
1008 
1009  mutex_exit(&fil_system->mutex);
1010 
1011 #ifndef UNIV_HOTBACKUP
1012  /* Wake the i/o-handler threads to make sure pending i/o's are
1013  performed */
1015 
1016  os_thread_sleep(20000);
1017 #endif
1018  /* Flush tablespaces so that we can close modified files in the LRU
1019  list */
1020 
1021  fil_flush_file_spaces(FIL_TABLESPACE);
1022 
1023  count++;
1024 
1025  goto retry;
1026 }
1027 
1028 /*******************************************************************/
1030 static
1031 void
1032 fil_node_free(
1033 /*==========*/
1034  fil_node_t* node,
1035  fil_system_t* system,
1036  fil_space_t* space)
1037 {
1038  ut_ad(node && system && space);
1039  ut_ad(mutex_own(&(system->mutex)));
1040  ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1041  ut_a(node->n_pending == 0);
1042 
1043  if (node->open) {
1044  /* We fool the assertion in fil_node_close_file() to think
1045  there are no unflushed modifications in the file */
1046 
1047  node->modification_counter = node->flush_counter;
1048 
1049  if (space->is_in_unflushed_spaces
1050  && fil_space_is_flushed(space)) {
1051 
1052  space->is_in_unflushed_spaces = FALSE;
1053 
1054  UT_LIST_REMOVE(unflushed_spaces,
1055  system->unflushed_spaces,
1056  space);
1057  }
1058 
1059  fil_node_close_file(node, system);
1060  }
1061 
1062  space->size -= node->size;
1063 
1064  UT_LIST_REMOVE(chain, space->chain, node);
1065 
1066  mem_free(node->name);
1067  mem_free(node);
1068 }
1069 
1070 #ifdef UNIV_LOG_ARCHIVE
1071 /****************************************************************/
1074 UNIV_INTERN
1075 void
1076 fil_space_truncate_start(
1077 /*=====================*/
1078  ulint id,
1079  ulint trunc_len)
1082 {
1083  fil_node_t* node;
1084  fil_space_t* space;
1085 
1086  mutex_enter(&fil_system->mutex);
1087 
1088  space = fil_space_get_by_id(id);
1089 
1090  ut_a(space);
1091 
1092  while (trunc_len > 0) {
1093  node = UT_LIST_GET_FIRST(space->chain);
1094 
1095  ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
1096 
1097  trunc_len -= node->size * UNIV_PAGE_SIZE;
1098 
1099  fil_node_free(node, fil_system, space);
1100  }
1101 
1102  mutex_exit(&fil_system->mutex);
1103 }
1104 #endif /* UNIV_LOG_ARCHIVE */
1105 
1106 /*******************************************************************/
1110 UNIV_INTERN
1111 ibool
1112 fil_space_create(
1113 /*=============*/
1114  const char* name,
1115  ulint id,
1116  ulint flags,
1118  ulint purpose)
1119 {
1120  fil_space_t* space;
1121 
1122  /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
1123  ROW_FORMAT=COMPACT
1124  ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
1125  ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
1126  format, the tablespace flags should equal
1127  (table->flags & ~(~0 << DICT_TF_BITS)). */
1128  ut_a(flags != DICT_TF_COMPACT);
1129  ut_a(!(flags & (~0UL << DICT_TF_BITS)));
1130 
1131 try_again:
1132  /*printf(
1133  "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
1134  purpose);*/
1135 
1136  ut_a(fil_system);
1137  ut_a(name);
1138 
1139  mutex_enter(&fil_system->mutex);
1140 
1141  space = fil_space_get_by_name(name);
1142 
1143  if (UNIV_LIKELY_NULL(space)) {
1144  ibool success;
1145  ulint namesake_id;
1146 
1147  ut_print_timestamp(stderr);
1148  fprintf(stderr,
1149  " InnoDB: Warning: trying to init to the"
1150  " tablespace memory cache\n"
1151  "InnoDB: a tablespace %lu of name ", (ulong) id);
1152  ut_print_filename(stderr, name);
1153  fprintf(stderr, ",\n"
1154  "InnoDB: but a tablespace %lu of the same name\n"
1155  "InnoDB: already exists in the"
1156  " tablespace memory cache!\n",
1157  (ulong) space->id);
1158 
1159  if (id == 0 || purpose != FIL_TABLESPACE) {
1160 
1161  mutex_exit(&fil_system->mutex);
1162 
1163  return(FALSE);
1164  }
1165 
1166  fprintf(stderr,
1167  "InnoDB: We assume that InnoDB did a crash recovery,"
1168  " and you had\n"
1169  "InnoDB: an .ibd file for which the table"
1170  " did not exist in the\n"
1171  "InnoDB: InnoDB internal data dictionary in the"
1172  " ibdata files.\n"
1173  "InnoDB: We assume that you later removed the"
1174  " .ibd and .frm files,\n"
1175  "InnoDB: and are now trying to recreate the table."
1176  " We now remove the\n"
1177  "InnoDB: conflicting tablespace object"
1178  " from the memory cache and try\n"
1179  "InnoDB: the init again.\n");
1180 
1181  namesake_id = space->id;
1182 
1183  success = fil_space_free(namesake_id, FALSE);
1184  ut_a(success);
1185 
1186  mutex_exit(&fil_system->mutex);
1187 
1188  goto try_again;
1189  }
1190 
1191  space = fil_space_get_by_id(id);
1192 
1193  if (UNIV_LIKELY_NULL(space)) {
1194  fprintf(stderr,
1195  "InnoDB: Error: trying to add tablespace %lu"
1196  " of name ", (ulong) id);
1197  ut_print_filename(stderr, name);
1198  fprintf(stderr, "\n"
1199  "InnoDB: to the tablespace memory cache,"
1200  " but tablespace\n"
1201  "InnoDB: %lu of name ", (ulong) space->id);
1202  ut_print_filename(stderr, space->name);
1203  fputs(" already exists in the tablespace\n"
1204  "InnoDB: memory cache!\n", stderr);
1205 
1206  mutex_exit(&fil_system->mutex);
1207 
1208  return(FALSE);
1209  }
1210 
1211  space = static_cast<fil_space_t *>(mem_alloc(sizeof(fil_space_t)));
1212 
1213  space->name = mem_strdup(name);
1214  space->id = id;
1215 
1216  fil_system->tablespace_version++;
1217  space->tablespace_version = fil_system->tablespace_version;
1218  space->mark = FALSE;
1219 
1220  if (UNIV_LIKELY(purpose == FIL_TABLESPACE && !recv_recovery_on)
1221  && UNIV_UNLIKELY(id > fil_system->max_assigned_id)) {
1222  if (!fil_system->space_id_reuse_warned) {
1223  fil_system->space_id_reuse_warned = TRUE;
1224 
1225  ut_print_timestamp(stderr);
1226  fprintf(stderr,
1227  " InnoDB: Warning: allocated tablespace %lu,"
1228  " old maximum was %lu\n",
1229  (ulong) id,
1230  (ulong) fil_system->max_assigned_id);
1231  }
1232 
1233  fil_system->max_assigned_id = id;
1234  }
1235 
1236  space->stop_ios = FALSE;
1237  space->stop_ibuf_merges = FALSE;
1238  space->is_being_deleted = FALSE;
1239  space->purpose = purpose;
1240  space->size = 0;
1241  space->flags = flags;
1242 
1243  space->n_reserved_extents = 0;
1244 
1245  space->n_pending_flushes = 0;
1246  space->n_pending_ibuf_merges = 0;
1247 
1248  UT_LIST_INIT(space->chain);
1249  space->magic_n = FIL_SPACE_MAGIC_N;
1250 
1251  rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1252 
1253  HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
1254 
1255  HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
1256  ut_fold_string(name), space);
1257  space->is_in_unflushed_spaces = FALSE;
1258 
1259  UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
1260 
1261  mutex_exit(&fil_system->mutex);
1262 
1263  return(TRUE);
1264 }
1265 
1266 /*******************************************************************/
1271 UNIV_INTERN
1272 ibool
1273 fil_assign_new_space_id(
1274 /*====================*/
1275  ulint* space_id)
1276 {
1277  ulint id;
1278  ibool success;
1279 
1280  mutex_enter(&fil_system->mutex);
1281 
1282  id = *space_id;
1283 
1284  if (id < fil_system->max_assigned_id) {
1285  id = fil_system->max_assigned_id;
1286  }
1287 
1288  id++;
1289 
1290  if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1291  ut_print_timestamp(stderr);
1292  fprintf(stderr,
1293  "InnoDB: Warning: you are running out of new"
1294  " single-table tablespace id's.\n"
1295  "InnoDB: Current counter is %lu and it"
1296  " must not exceed %lu!\n"
1297  "InnoDB: To reset the counter to zero"
1298  " you have to dump all your tables and\n"
1299  "InnoDB: recreate the whole InnoDB installation.\n",
1300  (ulong) id,
1301  (ulong) SRV_LOG_SPACE_FIRST_ID);
1302  }
1303 
1304  success = (id < SRV_LOG_SPACE_FIRST_ID);
1305 
1306  if (success) {
1307  *space_id = fil_system->max_assigned_id = id;
1308  } else {
1309  ut_print_timestamp(stderr);
1310  fprintf(stderr,
1311  "InnoDB: You have run out of single-table"
1312  " tablespace id's!\n"
1313  "InnoDB: Current counter is %lu.\n"
1314  "InnoDB: To reset the counter to zero you"
1315  " have to dump all your tables and\n"
1316  "InnoDB: recreate the whole InnoDB installation.\n",
1317  (ulong) id);
1318  *space_id = ULINT_UNDEFINED;
1319  }
1320 
1321  mutex_exit(&fil_system->mutex);
1322 
1323  return(success);
1324 }
1325 
1326 /*******************************************************************/
1331 static
1332 ibool
1333 fil_space_free(
1334 /*===========*/
1335  /* out: TRUE if success */
1336  ulint id, /* in: space id */
1337  ibool x_latched) /* in: TRUE if caller has space->latch
1338  in X mode */
1339 {
1340  fil_space_t* space;
1341  fil_space_t* tablespace;
1342  fil_node_t* fil_node;
1343 
1344  ut_ad(mutex_own(&fil_system->mutex));
1345 
1346  space = fil_space_get_by_id(id);
1347 
1348  if (!space) {
1349  ut_print_timestamp(stderr);
1350  fprintf(stderr,
1351  " InnoDB: Error: trying to remove tablespace %lu"
1352  " from the cache but\n"
1353  "InnoDB: it is not there.\n", (ulong) id);
1354 
1355  return(FALSE);
1356  }
1357 
1358  HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
1359 
1360  tablespace = fil_space_get_by_name(space->name);
1361  ut_a(tablespace);
1362  ut_a(space == tablespace);
1363 
1364  HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
1365  ut_fold_string(space->name), space);
1366 
1367  if (space->is_in_unflushed_spaces) {
1368  space->is_in_unflushed_spaces = FALSE;
1369 
1370  UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
1371  space);
1372  }
1373 
1374  UT_LIST_REMOVE(space_list, fil_system->space_list, space);
1375 
1376  ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1377  ut_a(0 == space->n_pending_flushes);
1378 
1379  fil_node = UT_LIST_GET_FIRST(space->chain);
1380 
1381  while (fil_node != NULL) {
1382  fil_node_free(fil_node, fil_system, space);
1383 
1384  fil_node = UT_LIST_GET_FIRST(space->chain);
1385  }
1386 
1387  ut_a(0 == UT_LIST_GET_LEN(space->chain));
1388 
1389  if (x_latched) {
1390  rw_lock_x_unlock(&space->latch);
1391  }
1392 
1393  rw_lock_free(&(space->latch));
1394 
1395  mem_free(space->name);
1396  mem_free(space);
1397 
1398  return(TRUE);
1399 }
1400 
1401 /*******************************************************************/
1405 UNIV_INTERN
1406 ulint
1407 fil_space_get_size(
1408 /*===============*/
1409  ulint id)
1410 {
1411  fil_node_t* node;
1412  fil_space_t* space;
1413  ulint size;
1414 
1415  ut_ad(fil_system);
1416 
1417  fil_mutex_enter_and_prepare_for_io(id);
1418 
1419  space = fil_space_get_by_id(id);
1420 
1421  if (space == NULL) {
1422  mutex_exit(&fil_system->mutex);
1423 
1424  return(0);
1425  }
1426 
1427  if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1428  ut_a(id != 0);
1429 
1430  ut_a(1 == UT_LIST_GET_LEN(space->chain));
1431 
1432  node = UT_LIST_GET_FIRST(space->chain);
1433 
1434  /* It must be a single-table tablespace and we have not opened
1435  the file yet; the following calls will open it and update the
1436  size fields */
1437 
1438  fil_node_prepare_for_io(node, fil_system, space);
1439  fil_node_complete_io(node, fil_system, OS_FILE_READ);
1440  }
1441 
1442  size = space->size;
1443 
1444  mutex_exit(&fil_system->mutex);
1445 
1446  return(size);
1447 }
1448 
1449 /*******************************************************************/
1453 UNIV_INTERN
1454 ulint
1455 fil_space_get_flags(
1456 /*================*/
1457  ulint id)
1458 {
1459  fil_node_t* node;
1460  fil_space_t* space;
1461  ulint flags;
1462 
1463  ut_ad(fil_system);
1464 
1465  if (UNIV_UNLIKELY(!id)) {
1466  return(0);
1467  }
1468 
1469  fil_mutex_enter_and_prepare_for_io(id);
1470 
1471  space = fil_space_get_by_id(id);
1472 
1473  if (space == NULL) {
1474  mutex_exit(&fil_system->mutex);
1475 
1476  return(ULINT_UNDEFINED);
1477  }
1478 
1479  if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1480  ut_a(id != 0);
1481 
1482  ut_a(1 == UT_LIST_GET_LEN(space->chain));
1483 
1484  node = UT_LIST_GET_FIRST(space->chain);
1485 
1486  /* It must be a single-table tablespace and we have not opened
1487  the file yet; the following calls will open it and update the
1488  size fields */
1489 
1490  fil_node_prepare_for_io(node, fil_system, space);
1491  fil_node_complete_io(node, fil_system, OS_FILE_READ);
1492  }
1493 
1494  flags = space->flags;
1495 
1496  mutex_exit(&fil_system->mutex);
1497 
1498  return(flags);
1499 }
1500 
1501 /*******************************************************************/
1505 UNIV_INTERN
1506 ulint
1507 fil_space_get_zip_size(
1508 /*===================*/
1509  ulint id)
1510 {
1511  ulint flags;
1512 
1513  flags = fil_space_get_flags(id);
1514 
1515  if (flags && flags != ULINT_UNDEFINED) {
1516 
1517  return(dict_table_flags_to_zip_size(flags));
1518  }
1519 
1520  return(flags);
1521 }
1522 
1523 /*******************************************************************/
1527 UNIV_INTERN
1528 ibool
1529 fil_check_adress_in_tablespace(
1530 /*===========================*/
1531  ulint id,
1532  ulint page_no)
1533 {
1534  if (fil_space_get_size(id) > page_no) {
1535 
1536  return(TRUE);
1537  }
1538 
1539  return(FALSE);
1540 }
1541 
1542 /****************************************************************/
1544 UNIV_INTERN
1545 void
1546 fil_init(
1547 /*=====*/
1548  ulint hash_size,
1549  ulint max_n_open)
1550 {
1551  ut_a(fil_system == NULL);
1552 
1553  ut_a(hash_size > 0);
1554  ut_a(max_n_open > 0);
1555 
1556  void *fil_system_ptr= mem_zalloc(sizeof(fil_system_t));
1557  fil_system = static_cast<fil_system_t *>(fil_system_ptr);
1558 
1559  mutex_create(fil_system_mutex_key,
1560  &fil_system->mutex, SYNC_ANY_LATCH);
1561 
1562  fil_system->spaces = hash_create(hash_size);
1563  fil_system->name_hash = hash_create(hash_size);
1564 
1565  UT_LIST_INIT(fil_system->LRU);
1566 
1567  fil_system->max_n_open = max_n_open;
1568 }
1569 
1570 /*******************************************************************/
1576 UNIV_INTERN
1577 void
1578 fil_open_log_and_system_tablespace_files(void)
1579 /*==========================================*/
1580 {
1581  fil_space_t* space;
1582  fil_node_t* node;
1583 
1584  mutex_enter(&fil_system->mutex);
1585 
1586  space = UT_LIST_GET_FIRST(fil_system->space_list);
1587 
1588  while (space != NULL) {
1589  if (space->purpose != FIL_TABLESPACE || space->id == 0) {
1590  node = UT_LIST_GET_FIRST(space->chain);
1591 
1592  while (node != NULL) {
1593  if (!node->open) {
1594  fil_node_open_file(node, fil_system,
1595  space);
1596  }
1597  if (fil_system->max_n_open
1598  < 10 + fil_system->n_open) {
1599  fprintf(stderr,
1600  "InnoDB: Warning: you must"
1601  " raise the value of"
1602  " innodb_open_files in\n"
1603  "InnoDB: my.cnf! Remember that"
1604  " InnoDB keeps all log files"
1605  " and all system\n"
1606  "InnoDB: tablespace files open"
1607  " for the whole time mysqld is"
1608  " running, and\n"
1609  "InnoDB: needs to open also"
1610  " some .ibd files if the"
1611  " file-per-table storage\n"
1612  "InnoDB: model is used."
1613  " Current open files %lu,"
1614  " max allowed"
1615  " open files %lu.\n",
1616  (ulong) fil_system->n_open,
1617  (ulong) fil_system->max_n_open);
1618  }
1619  node = UT_LIST_GET_NEXT(chain, node);
1620  }
1621  }
1622  space = UT_LIST_GET_NEXT(space_list, space);
1623  }
1624 
1625  mutex_exit(&fil_system->mutex);
1626 }
1627 
1628 /*******************************************************************/
1631 UNIV_INTERN
1632 void
1633 fil_close_all_files(void)
1634 /*=====================*/
1635 {
1636  fil_space_t* space;
1637 
1638  mutex_enter(&fil_system->mutex);
1639 
1640  space = UT_LIST_GET_FIRST(fil_system->space_list);
1641 
1642  while (space != NULL) {
1643  fil_node_t* node;
1644  fil_space_t* prev_space = space;
1645 
1646  for (node = UT_LIST_GET_FIRST(space->chain);
1647  node != NULL;
1648  node = UT_LIST_GET_NEXT(chain, node)) {
1649 
1650  if (node->open) {
1651  fil_node_close_file(node, fil_system);
1652  }
1653  }
1654 
1655  space = UT_LIST_GET_NEXT(space_list, space);
1656 
1657  fil_space_free(prev_space->id, FALSE);
1658  }
1659 
1660  mutex_exit(&fil_system->mutex);
1661 }
1662 
1663 /*******************************************************************/
1666 UNIV_INTERN
1667 void
1668 fil_set_max_space_id_if_bigger(
1669 /*===========================*/
1670  ulint max_id)
1671 {
1672  if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1673  fprintf(stderr,
1674  "InnoDB: Fatal error: max tablespace id"
1675  " is too high, %lu\n", (ulong) max_id);
1676  ut_error;
1677  }
1678 
1679  mutex_enter(&fil_system->mutex);
1680 
1681  if (fil_system->max_assigned_id < max_id) {
1682 
1683  fil_system->max_assigned_id = max_id;
1684  }
1685 
1686  mutex_exit(&fil_system->mutex);
1687 }
1688 
1689 /****************************************************************/
1693 static
1694 ulint
1695 fil_write_lsn_and_arch_no_to_file(
1696 /*==============================*/
1697  ulint sum_of_sizes,
1699  ib_uint64_t lsn,
1700  ulint /*arch_log_no __attribute__((unused))*/)
1702 {
1703  byte* buf1;
1704  byte* buf;
1705 
1706  buf1 = static_cast<byte *>(mem_alloc(2 * UNIV_PAGE_SIZE));
1707  buf = static_cast<byte *>(ut_align(buf1, UNIV_PAGE_SIZE));
1708 
1709  fil_read(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1710 
1712 
1713  fil_write(TRUE, 0, 0, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
1714 
1715  mem_free(buf1);
1716 
1717  return(DB_SUCCESS);
1718 }
1719 
1720 /****************************************************************/
1724 UNIV_INTERN
1725 ulint
1726 fil_write_flushed_lsn_to_data_files(
1727 /*================================*/
1728  ib_uint64_t lsn,
1729  ulint arch_log_no)
1731 {
1732  fil_space_t* space;
1733  fil_node_t* node;
1734  ulint sum_of_sizes;
1735  ulint err;
1736 
1737  mutex_enter(&fil_system->mutex);
1738 
1739  space = UT_LIST_GET_FIRST(fil_system->space_list);
1740 
1741  while (space) {
1742  /* We only write the lsn to all existing data files which have
1743  been open during the lifetime of the mysqld process; they are
1744  represented by the space objects in the tablespace memory
1745  cache. Note that all data files in the system tablespace 0 are
1746  always open. */
1747 
1748  if (space->purpose == FIL_TABLESPACE
1749  && space->id == 0) {
1750  sum_of_sizes = 0;
1751 
1752  node = UT_LIST_GET_FIRST(space->chain);
1753  while (node) {
1754  mutex_exit(&fil_system->mutex);
1755 
1756  err = fil_write_lsn_and_arch_no_to_file(
1757  sum_of_sizes, lsn, arch_log_no);
1758  if (err != DB_SUCCESS) {
1759 
1760  return(err);
1761  }
1762 
1763  mutex_enter(&fil_system->mutex);
1764 
1765  sum_of_sizes += node->size;
1766  node = UT_LIST_GET_NEXT(chain, node);
1767  }
1768  }
1769  space = UT_LIST_GET_NEXT(space_list, space);
1770  }
1771 
1772  mutex_exit(&fil_system->mutex);
1773 
1774  return(DB_SUCCESS);
1775 }
1776 
1777 /*******************************************************************/
1780 UNIV_INTERN
1781 void
1782 fil_read_flushed_lsn_and_arch_log_no(
1783 /*=================================*/
1784  os_file_t data_file,
1785  ibool one_read_already,
1788 #ifdef UNIV_LOG_ARCHIVE
1789  ulint* min_arch_log_no,
1790  ulint* max_arch_log_no,
1791 #endif /* UNIV_LOG_ARCHIVE */
1792  ib_uint64_t* min_flushed_lsn,
1793  ib_uint64_t* max_flushed_lsn)
1794 {
1795  byte* buf;
1796  byte* buf2;
1797  ib_uint64_t flushed_lsn;
1798 
1799  buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
1800  /* Align the memory for a possible read from a raw device */
1801  buf = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
1802 
1803  os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
1804 
1805  flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
1806 
1807  ut_free(buf2);
1808 
1809  if (!one_read_already) {
1810  *min_flushed_lsn = flushed_lsn;
1811  *max_flushed_lsn = flushed_lsn;
1812 #ifdef UNIV_LOG_ARCHIVE
1813  *min_arch_log_no = arch_log_no;
1814  *max_arch_log_no = arch_log_no;
1815 #endif /* UNIV_LOG_ARCHIVE */
1816  return;
1817  }
1818 
1819  if (*min_flushed_lsn > flushed_lsn) {
1820  *min_flushed_lsn = flushed_lsn;
1821  }
1822  if (*max_flushed_lsn < flushed_lsn) {
1823  *max_flushed_lsn = flushed_lsn;
1824  }
1825 #ifdef UNIV_LOG_ARCHIVE
1826  if (*min_arch_log_no > arch_log_no) {
1827  *min_arch_log_no = arch_log_no;
1828  }
1829  if (*max_arch_log_no < arch_log_no) {
1830  *max_arch_log_no = arch_log_no;
1831  }
1832 #endif /* UNIV_LOG_ARCHIVE */
1833 }
1834 
1835 /*================ SINGLE-TABLE TABLESPACES ==========================*/
1836 
1837 #ifndef UNIV_HOTBACKUP
1838 /*******************************************************************/
1842 UNIV_INTERN
1843 ibool
1844 fil_inc_pending_ibuf_merges(
1845 /*========================*/
1846  ulint id)
1847 {
1848  fil_space_t* space;
1849 
1850  mutex_enter(&fil_system->mutex);
1851 
1852  space = fil_space_get_by_id(id);
1853 
1854  if (space == NULL) {
1855  fprintf(stderr,
1856  "InnoDB: Error: trying to do ibuf merge to a"
1857  " dropped tablespace %lu\n",
1858  (ulong) id);
1859  }
1860 
1861  if (space == NULL || space->stop_ibuf_merges) {
1862  mutex_exit(&fil_system->mutex);
1863 
1864  return(TRUE);
1865  }
1866 
1867  space->n_pending_ibuf_merges++;
1868 
1869  mutex_exit(&fil_system->mutex);
1870 
1871  return(FALSE);
1872 }
1873 
1874 /*******************************************************************/
1876 UNIV_INTERN
1877 void
1878 fil_decr_pending_ibuf_merges(
1879 /*=========================*/
1880  ulint id)
1881 {
1882  fil_space_t* space;
1883 
1884  mutex_enter(&fil_system->mutex);
1885 
1886  space = fil_space_get_by_id(id);
1887 
1888  if (space == NULL) {
1889  fprintf(stderr,
1890  "InnoDB: Error: decrementing ibuf merge of a"
1891  " dropped tablespace %lu\n",
1892  (ulong) id);
1893  }
1894 
1895  if (space != NULL) {
1896  space->n_pending_ibuf_merges--;
1897  }
1898 
1899  mutex_exit(&fil_system->mutex);
1900 }
1901 #endif /* !UNIV_HOTBACKUP */
1902 
1903 /********************************************************/
1905 static
1906 void
1907 fil_create_directory_for_tablename(
1908 /*===============================*/
1909  const char* name)
1911 {
1912  const char* namend;
1913  char* path;
1914  ulint len;
1915 
1916  len = strlen(fil_path_to_mysql_datadir);
1917  namend = strchr(name, '/');
1918  ut_a(namend);
1919  path = static_cast<char *>(mem_alloc(len + (namend - name) + 2));
1920 
1921  memcpy(path, fil_path_to_mysql_datadir, len);
1922  path[len] = '/';
1923  memcpy(path + len + 1, name, namend - name);
1924  path[len + (namend - name) + 1] = 0;
1925 
1927 
1928  ut_a(os_file_create_directory(path, FALSE));
1929  mem_free(path);
1930 }
1931 
1932 #ifndef UNIV_HOTBACKUP
1933 /********************************************************/
1935 static
1936 void
1937 fil_op_write_log(
1938 /*=============*/
1939  ulint type,
1943  ulint space_id,
1944  ulint log_flags,
1946  ulint flags,
1949  const char* name,
1953  const char* new_name,
1956  mtr_t* mtr)
1957 {
1958  byte* log_ptr;
1959  ulint len;
1960 
1961  log_ptr = mlog_open(mtr, 11 + 2 + 1);
1962 
1963  if (!log_ptr) {
1964  /* Logging in mtr is switched off during crash recovery:
1965  in that case mlog_open returns NULL */
1966  return;
1967  }
1968 
1970  type, space_id, log_flags, log_ptr, mtr);
1971  if (type == MLOG_FILE_CREATE2) {
1972  mach_write_to_4(log_ptr, flags);
1973  log_ptr += 4;
1974  }
1975  /* Let us store the strings as null-terminated for easier readability
1976  and handling */
1977 
1978  len = strlen(name) + 1;
1979 
1980  mach_write_to_2(log_ptr, len);
1981  log_ptr += 2;
1982  mlog_close(mtr, log_ptr);
1983 
1984  mlog_catenate_string(mtr, (byte*) name, len);
1985 
1986  if (type == MLOG_FILE_RENAME) {
1987  len = strlen(new_name) + 1;
1988  log_ptr = mlog_open(mtr, 2 + len);
1989  ut_a(log_ptr);
1990  mach_write_to_2(log_ptr, len);
1991  log_ptr += 2;
1992  mlog_close(mtr, log_ptr);
1993 
1994  mlog_catenate_string(mtr, (byte*) new_name, len);
1995  }
1996 }
1997 #endif
1998 
1999 /*******************************************************************/
2013 UNIV_INTERN
2014 byte*
2015 fil_op_log_parse_or_replay(
2016 /*=======================*/
2017  byte* ptr,
2020  byte* end_ptr,
2021  ulint type,
2022  ulint space_id,
2025  ulint log_flags)
2027 {
2028  ulint name_len;
2029  ulint new_name_len;
2030  const char* name;
2031  const char* new_name = NULL;
2032  ulint flags = 0;
2033 
2034  if (type == MLOG_FILE_CREATE2) {
2035  if (end_ptr < ptr + 4) {
2036 
2037  return(NULL);
2038  }
2039 
2040  flags = mach_read_from_4(ptr);
2041  ptr += 4;
2042  }
2043 
2044  if (end_ptr < ptr + 2) {
2045 
2046  return(NULL);
2047  }
2048 
2049  name_len = mach_read_from_2(ptr);
2050 
2051  ptr += 2;
2052 
2053  if (end_ptr < ptr + name_len) {
2054 
2055  return(NULL);
2056  }
2057 
2058  name = (const char*) ptr;
2059 
2060  ptr += name_len;
2061 
2062  if (type == MLOG_FILE_RENAME) {
2063  if (end_ptr < ptr + 2) {
2064 
2065  return(NULL);
2066  }
2067 
2068  new_name_len = mach_read_from_2(ptr);
2069 
2070  ptr += 2;
2071 
2072  if (end_ptr < ptr + new_name_len) {
2073 
2074  return(NULL);
2075  }
2076 
2077  new_name = (const char*) ptr;
2078 
2079  ptr += new_name_len;
2080  }
2081 
2082  /* We managed to parse a full log record body */
2083  /*
2084  printf("Parsed log rec of type %lu space %lu\n"
2085  "name %s\n", type, space_id, name);
2086 
2087  if (type == MLOG_FILE_RENAME) {
2088  printf("new name %s\n", new_name);
2089  }
2090  */
2091  if (!space_id) {
2092 
2093  return(ptr);
2094  }
2095 
2096  /* Let us try to perform the file operation, if sensible. Note that
2097  ibbackup has at this stage already read in all space id info to the
2098  fil0fil.c data structures.
2099 
2100  NOTE that our algorithm is not guaranteed to work correctly if there
2101  were renames of tables during the backup. See ibbackup code for more
2102  on the problem. */
2103 
2104  switch (type) {
2105  case MLOG_FILE_DELETE:
2106  if (fil_tablespace_exists_in_mem(space_id)) {
2107  ut_a(fil_delete_tablespace(space_id));
2108  }
2109 
2110  break;
2111 
2112  case MLOG_FILE_RENAME:
2113  /* We do the rename based on space id, not old file name;
2114  this should guarantee that after the log replay each .ibd file
2115  has the correct name for the latest log sequence number; the
2116  proof is left as an exercise :) */
2117 
2118  if (fil_tablespace_exists_in_mem(space_id)) {
2119  /* Create the database directory for the new name, if
2120  it does not exist yet */
2121  fil_create_directory_for_tablename(new_name);
2122 
2123  /* Rename the table if there is not yet a tablespace
2124  with the same name */
2125 
2126  if (fil_get_space_id_for_table(new_name)
2127  == ULINT_UNDEFINED) {
2128  /* We do not care of the old name, that is
2129  why we pass NULL as the first argument */
2130  if (!fil_rename_tablespace(NULL, space_id,
2131  new_name)) {
2132  ut_error;
2133  }
2134  }
2135  }
2136 
2137  break;
2138 
2139  case MLOG_FILE_CREATE:
2140  case MLOG_FILE_CREATE2:
2141  if (fil_tablespace_exists_in_mem(space_id)) {
2142  /* Do nothing */
2143  } else if (fil_get_space_id_for_table(name)
2144  != ULINT_UNDEFINED) {
2145  /* Do nothing */
2146  } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
2147  /* Temporary table, do nothing */
2148  } else {
2149  /* Create the database directory for name, if it does
2150  not exist yet */
2151  fil_create_directory_for_tablename(name);
2152 
2153  if (fil_create_new_single_table_tablespace(
2154  space_id, name, FALSE, flags,
2155  FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
2156  ut_error;
2157  }
2158  }
2159 
2160  break;
2161 
2162  default:
2163  ut_error;
2164  }
2165 
2166  return(ptr);
2167 }
2168 
2169 /*******************************************************************/
2173 UNIV_INTERN
2174 ibool
2175 fil_delete_tablespace(
2176 /*==================*/
2177  ulint id)
2178 {
2179  ibool success;
2180  fil_space_t* space;
2181  fil_node_t* node;
2182  ulint count = 0;
2183  char* path;
2184 
2185  ut_a(id != 0);
2186 stop_ibuf_merges:
2187  mutex_enter(&fil_system->mutex);
2188 
2189  space = fil_space_get_by_id(id);
2190 
2191  if (space != NULL) {
2192  space->stop_ibuf_merges = TRUE;
2193 
2194  if (space->n_pending_ibuf_merges == 0) {
2195  mutex_exit(&fil_system->mutex);
2196 
2197  count = 0;
2198 
2199  goto try_again;
2200  } else {
2201  if (count > 5000) {
2202  ut_print_timestamp(stderr);
2203  fputs(" InnoDB: Warning: trying to"
2204  " delete tablespace ", stderr);
2205  ut_print_filename(stderr, space->name);
2206  fprintf(stderr, ",\n"
2207  "InnoDB: but there are %lu pending"
2208  " ibuf merges on it.\n"
2209  "InnoDB: Loop %lu.\n",
2210  (ulong) space->n_pending_ibuf_merges,
2211  (ulong) count);
2212  }
2213 
2214  mutex_exit(&fil_system->mutex);
2215 
2216  os_thread_sleep(20000);
2217  count++;
2218 
2219  goto stop_ibuf_merges;
2220  }
2221  }
2222 
2223  mutex_exit(&fil_system->mutex);
2224  count = 0;
2225 
2226 try_again:
2227  mutex_enter(&fil_system->mutex);
2228 
2229  space = fil_space_get_by_id(id);
2230 
2231  if (space == NULL) {
2232  ut_print_timestamp(stderr);
2233  fprintf(stderr,
2234  " InnoDB: Error: cannot delete tablespace %lu\n"
2235  "InnoDB: because it is not found in the"
2236  " tablespace memory cache.\n",
2237  (ulong) id);
2238 
2239  mutex_exit(&fil_system->mutex);
2240 
2241  return(FALSE);
2242  }
2243 
2244  ut_a(space);
2245  ut_a(space->n_pending_ibuf_merges == 0);
2246 
2247  space->is_being_deleted = TRUE;
2248 
2249  ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2250  node = UT_LIST_GET_FIRST(space->chain);
2251 
2252  if (space->n_pending_flushes > 0 || node->n_pending > 0) {
2253  if (count > 1000) {
2254  ut_print_timestamp(stderr);
2255  fputs(" InnoDB: Warning: trying to"
2256  " delete tablespace ", stderr);
2257  ut_print_filename(stderr, space->name);
2258  fprintf(stderr, ",\n"
2259  "InnoDB: but there are %lu flushes"
2260  " and %lu pending i/o's on it\n"
2261  "InnoDB: Loop %lu.\n",
2262  (ulong) space->n_pending_flushes,
2263  (ulong) node->n_pending,
2264  (ulong) count);
2265  }
2266  mutex_exit(&fil_system->mutex);
2267  os_thread_sleep(20000);
2268 
2269  count++;
2270 
2271  goto try_again;
2272  }
2273 
2274  path = mem_strdup(space->name);
2275 
2276  mutex_exit(&fil_system->mutex);
2277 
2278  /* Important: We rely on the data dictionary mutex to ensure
2279  that a race is not possible here. It should serialize the tablespace
2280  drop/free. We acquire an X latch only to avoid a race condition
2281  when accessing the tablespace instance via:
2282 
2283  fsp_get_available_space_in_free_extents().
2284 
2285  There our main motivation is to reduce the contention on the
2286  dictionary mutex. */
2287 
2288  rw_lock_x_lock(&space->latch);
2289 
2290 #ifndef UNIV_HOTBACKUP
2291  /* Invalidate in the buffer pool all pages belonging to the
2292  tablespace. Since we have set space->is_being_deleted = TRUE, readahead
2293  or ibuf merge can no longer read more pages of this tablespace to the
2294  buffer pool. Thus we can clean the tablespace out of the buffer pool
2295  completely and permanently. The flag is_being_deleted also prevents
2296  fil_flush() from being applied to this tablespace. */
2297 
2298  buf_LRU_invalidate_tablespace(id);
2299 #endif
2300  /* printf("Deleting tablespace %s id %lu\n", space->name, id); */
2301 
2302  mutex_enter(&fil_system->mutex);
2303 
2304  success = fil_space_free(id, TRUE);
2305 
2306  mutex_exit(&fil_system->mutex);
2307 
2308  if (success) {
2309  success = os_file_delete(path);
2310 
2311  if (!success) {
2312  success = os_file_delete_if_exists(path);
2313  }
2314  } else {
2315  rw_lock_x_unlock(&space->latch);
2316  }
2317 
2318  if (success) {
2319 #ifndef UNIV_HOTBACKUP
2320  /* Write a log record about the deletion of the .ibd
2321  file, so that ibbackup can replay it in the
2322  --apply-log phase. We use a dummy mtr and the familiar
2323  log write mechanism. */
2324  mtr_t mtr;
2325 
2326  /* When replaying the operation in ibbackup, do not try
2327  to write any log record */
2328  mtr_start(&mtr);
2329 
2330  fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
2331  mtr_commit(&mtr);
2332 #endif
2333  mem_free(path);
2334 
2335  return(TRUE);
2336  }
2337 
2338  mem_free(path);
2339 
2340  return(FALSE);
2341 }
2342 
2343 /*******************************************************************/
2346 UNIV_INTERN
2347 ibool
2348 fil_tablespace_is_being_deleted(
2349 /*============================*/
2350  ulint id)
2351 {
2352  fil_space_t* space;
2353  ibool is_being_deleted;
2354 
2355  mutex_enter(&fil_system->mutex);
2356 
2357  space = fil_space_get_by_id(id);
2358 
2359  ut_a(space != NULL);
2360 
2361  is_being_deleted = space->is_being_deleted;
2362 
2363  mutex_exit(&fil_system->mutex);
2364 
2365  return(is_being_deleted);
2366 }
2367 
2368 #ifndef UNIV_HOTBACKUP
2369 /*******************************************************************/
2378 UNIV_INTERN
2379 ibool
2380 fil_discard_tablespace(
2381 /*===================*/
2382  ulint id)
2383 {
2384  ibool success;
2385 
2386  success = fil_delete_tablespace(id);
2387 
2388  if (!success) {
2389  fprintf(stderr,
2390  "InnoDB: Warning: cannot delete tablespace %lu"
2391  " in DISCARD TABLESPACE.\n"
2392  "InnoDB: But let us remove the"
2393  " insert buffer entries for this tablespace.\n",
2394  (ulong) id);
2395  }
2396 
2397  /* Remove all insert buffer entries for the tablespace */
2398 
2399  ibuf_delete_for_discarded_space(id);
2400 
2401  return(success);
2402 }
2403 #endif /* !UNIV_HOTBACKUP */
2404 
2405 /*******************************************************************/
2408 static
2409 ibool
2410 fil_rename_tablespace_in_mem(
2411 /*=========================*/
2412  fil_space_t* space,
2413  fil_node_t* node,
2414  const char* path)
2415 {
2416  fil_space_t* space2;
2417  const char* old_name = space->name;
2418 
2419  ut_ad(mutex_own(&fil_system->mutex));
2420 
2421  space2 = fil_space_get_by_name(old_name);
2422  if (space != space2) {
2423  fputs("InnoDB: Error: cannot find ", stderr);
2424  ut_print_filename(stderr, old_name);
2425  fputs(" in tablespace memory cache\n", stderr);
2426 
2427  return(FALSE);
2428  }
2429 
2430  space2 = fil_space_get_by_name(path);
2431  if (space2 != NULL) {
2432  fputs("InnoDB: Error: ", stderr);
2433  ut_print_filename(stderr, path);
2434  fputs(" is already in tablespace memory cache\n", stderr);
2435 
2436  return(FALSE);
2437  }
2438 
2439  HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
2440  ut_fold_string(space->name), space);
2441  mem_free(space->name);
2442  mem_free(node->name);
2443 
2444  space->name = mem_strdup(path);
2445  node->name = mem_strdup(path);
2446 
2447  HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
2448  ut_fold_string(path), space);
2449  return(TRUE);
2450 }
2451 
2452 /*******************************************************************/
2456 static
2457 char*
2458 fil_make_ibd_name(
2459 /*==============*/
2460  const char* name,
2462  ibool is_temp)
2463 {
2464  ulint namelen = strlen(name);
2465  ulint dirlen = strlen(fil_path_to_mysql_datadir);
2466  char* filename = static_cast<char *>(mem_alloc(namelen + dirlen + sizeof "/.ibd"));
2467 
2468  if (is_temp) {
2469  memcpy(filename, name, namelen);
2470  memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2471  } else {
2472  memcpy(filename, fil_path_to_mysql_datadir, dirlen);
2473  filename[dirlen] = '/';
2474 
2475  memcpy(filename + dirlen + 1, name, namelen);
2476  memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd");
2477  }
2478 
2479  srv_normalize_path_for_win(filename);
2480 
2481  return(filename);
2482 }
2483 
2484 /*******************************************************************/
2488 UNIV_INTERN
2489 ibool
2490 fil_rename_tablespace(
2491 /*==================*/
2492  const char* old_name,
2496  ulint id,
2497  const char* new_name)
2500 {
2501  ibool success;
2502  fil_space_t* space;
2503  fil_node_t* node;
2504  ulint count = 0;
2505  char* path;
2506  ibool old_name_was_specified = TRUE;
2507  char* old_path;
2508 
2509  ut_a(id != 0);
2510 
2511  if (old_name == NULL) {
2512  old_name = "(name not specified)";
2513  old_name_was_specified = FALSE;
2514  }
2515 retry:
2516  count++;
2517 
2518  if (count > 1000) {
2519  ut_print_timestamp(stderr);
2520  fputs(" InnoDB: Warning: problems renaming ", stderr);
2521  ut_print_filename(stderr, old_name);
2522  fputs(" to ", stderr);
2523  ut_print_filename(stderr, new_name);
2524  fprintf(stderr, ", %lu iterations\n", (ulong) count);
2525  }
2526 
2527  mutex_enter(&fil_system->mutex);
2528 
2529  space = fil_space_get_by_id(id);
2530 
2531  if (space == NULL) {
2532  fprintf(stderr,
2533  "InnoDB: Error: cannot find space id %lu"
2534  " in the tablespace memory cache\n"
2535  "InnoDB: though the table ", (ulong) id);
2536  ut_print_filename(stderr, old_name);
2537  fputs(" in a rename operation should have that id\n", stderr);
2538  mutex_exit(&fil_system->mutex);
2539 
2540  return(FALSE);
2541  }
2542 
2543  if (count > 25000) {
2544  space->stop_ios = FALSE;
2545  mutex_exit(&fil_system->mutex);
2546 
2547  return(FALSE);
2548  }
2549 
2550  /* We temporarily close the .ibd file because we do not trust that
2551  operating systems can rename an open file. For the closing we have to
2552  wait until there are no pending i/o's or flushes on the file. */
2553 
2554  space->stop_ios = TRUE;
2555 
2556  ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2557  node = UT_LIST_GET_FIRST(space->chain);
2558 
2559  if (node->n_pending > 0 || node->n_pending_flushes > 0) {
2560  /* There are pending i/o's or flushes, sleep for a while and
2561  retry */
2562 
2563  mutex_exit(&fil_system->mutex);
2564 
2565  os_thread_sleep(20000);
2566 
2567  goto retry;
2568 
2569  } else if (node->modification_counter > node->flush_counter) {
2570  /* Flush the space */
2571 
2572  mutex_exit(&fil_system->mutex);
2573 
2574  os_thread_sleep(20000);
2575 
2576  fil_flush(id);
2577 
2578  goto retry;
2579 
2580  } else if (node->open) {
2581  /* Close the file */
2582 
2583  fil_node_close_file(node, fil_system);
2584  }
2585 
2586  /* Check that the old name in the space is right */
2587 
2588  if (old_name_was_specified) {
2589  old_path = fil_make_ibd_name(old_name, FALSE);
2590 
2591  ut_a(strcmp(space->name, old_path) == 0);
2592  ut_a(strcmp(node->name, old_path) == 0);
2593  } else {
2594  old_path = mem_strdup(space->name);
2595  }
2596 
2597  /* Rename the tablespace and the node in the memory cache */
2598  path = fil_make_ibd_name(new_name, FALSE);
2599  success = fil_rename_tablespace_in_mem(space, node, path);
2600 
2601  if (success) {
2602  success = os_file_rename(innodb_file_data_key, old_path, path);
2603 
2604  if (!success) {
2605  /* We have to revert the changes we made
2606  to the tablespace memory cache */
2607 
2608  ut_a(fil_rename_tablespace_in_mem(space, node,
2609  old_path));
2610  }
2611  }
2612 
2613  mem_free(path);
2614  mem_free(old_path);
2615 
2616  space->stop_ios = FALSE;
2617 
2618  mutex_exit(&fil_system->mutex);
2619 
2620 #ifndef UNIV_HOTBACKUP
2621  if (success) {
2622  mtr_t mtr;
2623 
2624  mtr_start(&mtr);
2625 
2626  fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
2627  &mtr);
2628  mtr_commit(&mtr);
2629  }
2630 #endif
2631  return(success);
2632 }
2633 
2634 /*******************************************************************/
2641 UNIV_INTERN
2642 ulint
2643 fil_create_new_single_table_tablespace(
2644 /*===================================*/
2645  ulint space_id,
2646  const char* tablename,
2650  ibool is_temp,
2652  ulint flags,
2653  ulint size)
2656 {
2657  os_file_t file;
2658  ibool ret;
2659  ulint err;
2660  byte* buf2;
2661  byte* page;
2662  ibool success;
2663  char* path;
2664 
2665  ut_a(space_id > 0);
2666  ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
2668  /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
2669  ROW_FORMAT=COMPACT
2670  ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
2671  ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
2672  format, the tablespace flags should equal
2673  (table->flags & ~(~0 << DICT_TF_BITS)). */
2674  ut_a(flags != DICT_TF_COMPACT);
2675  ut_a(!(flags & (~0UL << DICT_TF_BITS)));
2676 
2677  path = fil_make_ibd_name(tablename, is_temp);
2678 
2679  file = os_file_create(innodb_file_data_key, path,
2680  OS_FILE_CREATE, OS_FILE_NORMAL,
2681  OS_DATA_FILE, &ret);
2682  if (ret == FALSE) {
2683  ut_print_timestamp(stderr);
2684  fputs(" InnoDB: Error creating file ", stderr);
2685  ut_print_filename(stderr, path);
2686  fputs(".\n", stderr);
2687 
2688  /* The following call will print an error message */
2689 
2690  err = os_file_get_last_error(TRUE);
2691 
2692  if (err == OS_FILE_ALREADY_EXISTS) {
2693  fputs("InnoDB: The file already exists though"
2694  " the corresponding table did not\n"
2695  "InnoDB: exist in the InnoDB data dictionary."
2696  " Have you moved InnoDB\n"
2697  "InnoDB: .ibd files around without using the"
2698  " SQL commands\n"
2699  "InnoDB: DISCARD TABLESPACE and"
2700  " IMPORT TABLESPACE, or did\n"
2701  "InnoDB: mysqld crash in the middle of"
2702  " CREATE TABLE? You can\n"
2703  "InnoDB: resolve the problem by"
2704  " removing the file ", stderr);
2705  ut_print_filename(stderr, path);
2706  fputs("\n"
2707  "InnoDB: under the 'datadir' of MySQL.\n",
2708  stderr);
2709 
2710  mem_free(path);
2711  return(DB_TABLESPACE_ALREADY_EXISTS);
2712  }
2713 
2714  if (err == OS_FILE_DISK_FULL) {
2715 
2716  mem_free(path);
2717  return(DB_OUT_OF_FILE_SPACE);
2718  }
2719 
2720  mem_free(path);
2721  return(DB_ERROR);
2722  }
2723 
2724  ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
2725 
2726  if (!ret) {
2727  err = DB_OUT_OF_FILE_SPACE;
2728 error_exit:
2729  os_file_close(file);
2730 error_exit2:
2731  os_file_delete(path);
2732 
2733  mem_free(path);
2734  return(err);
2735  }
2736 
2737  /* printf("Creating tablespace %s id %lu\n", path, space_id); */
2738 
2739  /* We have to write the space id to the file immediately and flush the
2740  file to disk. This is because in crash recovery we must be aware what
2741  tablespaces exist and what are their space id's, so that we can apply
2742  the log records to the right file. It may take quite a while until
2743  buffer pool flush algorithms write anything to the file and flush it to
2744  disk. If we would not write here anything, the file would be filled
2745  with zeros from the call of os_file_set_size(), until a buffer pool
2746  flush would write to it. */
2747 
2748  buf2 = static_cast<byte *>(ut_malloc(3 * UNIV_PAGE_SIZE));
2749  /* Align the memory for file i/o if we might have O_DIRECT set */
2750  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2751 
2752  memset(page, '\0', UNIV_PAGE_SIZE);
2753 
2754  fsp_header_init_fields(page, space_id, flags);
2756 
2757  if (!(flags & DICT_TF_ZSSIZE_MASK)) {
2758  buf_flush_init_for_writing(page, NULL, 0);
2759  ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
2760  } else {
2761  page_zip_des_t page_zip;
2762  ulint zip_size;
2763 
2764  zip_size = ((PAGE_ZIP_MIN_SIZE >> 1)
2765  << ((flags & DICT_TF_ZSSIZE_MASK)
2766  >> DICT_TF_ZSSIZE_SHIFT));
2767 
2768  page_zip_set_size(&page_zip, zip_size);
2769  page_zip.data = page + UNIV_PAGE_SIZE;
2770 #ifdef UNIV_DEBUG
2771  page_zip.m_start =
2772 #endif /* UNIV_DEBUG */
2773  page_zip.m_end = page_zip.m_nonempty =
2774  page_zip.n_blobs = 0;
2775  buf_flush_init_for_writing(page, &page_zip, 0);
2776  ret = os_file_write(path, file, page_zip.data, 0, 0, zip_size);
2777  }
2778 
2779  ut_free(buf2);
2780 
2781  if (!ret) {
2782  fputs("InnoDB: Error: could not write the first page"
2783  " to tablespace ", stderr);
2784  ut_print_filename(stderr, path);
2785  putc('\n', stderr);
2786  err = DB_ERROR;
2787  goto error_exit;
2788  }
2789 
2790  ret = os_file_flush(file);
2791 
2792  if (!ret) {
2793  fputs("InnoDB: Error: file flush of tablespace ", stderr);
2794  ut_print_filename(stderr, path);
2795  fputs(" failed\n", stderr);
2796  err = DB_ERROR;
2797  goto error_exit;
2798  }
2799 
2800  os_file_close(file);
2801 
2802  success = fil_space_create(path, space_id, flags, FIL_TABLESPACE);
2803 
2804  if (!success) {
2805  err = DB_ERROR;
2806  goto error_exit2;
2807  }
2808 
2809  fil_node_create(path, size, space_id, FALSE);
2810 
2811 #ifndef UNIV_HOTBACKUP
2812  {
2813  mtr_t mtr;
2814 
2815  mtr_start(&mtr);
2816 
2817  fil_op_write_log(flags
2819  : MLOG_FILE_CREATE,
2820  space_id,
2821  is_temp ? MLOG_FILE_FLAG_TEMP : 0,
2822  flags,
2823  tablename, NULL, &mtr);
2824 
2825  mtr_commit(&mtr);
2826  }
2827 #endif
2828  mem_free(path);
2829  return(DB_SUCCESS);
2830 }
2831 
2832 #ifndef UNIV_HOTBACKUP
2833 /********************************************************************/
2843 UNIV_INTERN
2844 ibool
2845 fil_reset_too_high_lsns(
2846 /*====================*/
2847  const char* name,
2849  ib_uint64_t current_lsn)
2852 {
2853  os_file_t file;
2854  char* filepath;
2855  byte* page;
2856  byte* buf2;
2857  ib_uint64_t flush_lsn;
2858  ulint space_id;
2859  ib_int64_t file_size;
2860  ib_int64_t offset;
2861  ulint zip_size;
2862  ibool success;
2863  page_zip_des_t page_zip;
2864 
2865  filepath = fil_make_ibd_name(name, FALSE);
2866 
2867  file = os_file_create_simple_no_error_handling(
2868  innodb_file_data_key, filepath, OS_FILE_OPEN,
2869  OS_FILE_READ_WRITE, &success);
2870  if (!success) {
2871  /* The following call prints an error message */
2872  os_file_get_last_error(TRUE);
2873 
2874  ut_print_timestamp(stderr);
2875 
2876  fputs(" InnoDB: Error: trying to open a table,"
2877  " but could not\n"
2878  "InnoDB: open the tablespace file ", stderr);
2879  ut_print_filename(stderr, filepath);
2880  fputs("!\n", stderr);
2881  mem_free(filepath);
2882 
2883  return(FALSE);
2884  }
2885 
2886  /* Read the first page of the tablespace */
2887 
2888  buf2 = static_cast<byte *>(ut_malloc(3 * UNIV_PAGE_SIZE));
2889  /* Align the memory for file i/o if we might have O_DIRECT set */
2890  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2891 
2892  success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
2893  if (!success) {
2894 
2895  goto func_exit;
2896  }
2897 
2898  /* We have to read the file flush lsn from the header of the file */
2899 
2900  flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2901 
2902  if (current_lsn >= flush_lsn) {
2903  /* Ok */
2904  success = TRUE;
2905 
2906  goto func_exit;
2907  }
2908 
2909  space_id = fsp_header_get_space_id(page);
2910  zip_size = fsp_header_get_zip_size(page);
2911 
2912  page_zip_des_init(&page_zip);
2913  page_zip_set_size(&page_zip, zip_size);
2914  if (zip_size) {
2915  page_zip.data = page + UNIV_PAGE_SIZE;
2916  }
2917 
2918  ut_print_timestamp(stderr);
2919  fprintf(stderr,
2920  " InnoDB: Flush lsn in the tablespace file %lu"
2921  " to be imported\n"
2922  "InnoDB: is %"PRIu64", which exceeds current"
2923  " system lsn %"PRIu64".\n"
2924  "InnoDB: We reset the lsn's in the file ",
2925  (ulong) space_id,
2926  flush_lsn, current_lsn);
2927  ut_print_filename(stderr, filepath);
2928  fputs(".\n", stderr);
2929 
2930  ut_a(ut_is_2pow(zip_size));
2931  ut_a(zip_size <= UNIV_PAGE_SIZE);
2932 
2933  /* Loop through all the pages in the tablespace and reset the lsn and
2934  the page checksum if necessary */
2935 
2936  file_size = os_file_get_size_as_iblonglong(file);
2937 
2938  for (offset = 0; offset < file_size;
2939  offset += zip_size ? zip_size : UNIV_PAGE_SIZE) {
2940  success = os_file_read(file, page,
2941  (ulint)(offset & 0xFFFFFFFFUL),
2942  (ulint)(offset >> 32),
2943  zip_size ? zip_size : UNIV_PAGE_SIZE);
2944  if (!success) {
2945 
2946  goto func_exit;
2947  }
2948  if (mach_read_from_8(page + FIL_PAGE_LSN) > current_lsn) {
2949  /* We have to reset the lsn */
2950 
2951  if (zip_size) {
2952  memcpy(page_zip.data, page, zip_size);
2953  buf_flush_init_for_writing(
2954  page, &page_zip, current_lsn);
2955  success = os_file_write(
2956  filepath, file, page_zip.data,
2957  (ulint) offset & 0xFFFFFFFFUL,
2958  (ulint) (offset >> 32), zip_size);
2959  } else {
2960  buf_flush_init_for_writing(
2961  page, NULL, current_lsn);
2962  success = os_file_write(
2963  filepath, file, page,
2964  (ulint)(offset & 0xFFFFFFFFUL),
2965  (ulint)(offset >> 32),
2966  UNIV_PAGE_SIZE);
2967  }
2968 
2969  if (!success) {
2970 
2971  goto func_exit;
2972  }
2973  }
2974  }
2975 
2976  success = os_file_flush(file);
2977  if (!success) {
2978 
2979  goto func_exit;
2980  }
2981 
2982  /* We now update the flush_lsn stamp at the start of the file */
2983  success = os_file_read(file, page, 0, 0,
2984  zip_size ? zip_size : UNIV_PAGE_SIZE);
2985  if (!success) {
2986 
2987  goto func_exit;
2988  }
2989 
2990  mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
2991 
2992  success = os_file_write(filepath, file, page, 0, 0,
2993  zip_size ? zip_size : UNIV_PAGE_SIZE);
2994  if (!success) {
2995 
2996  goto func_exit;
2997  }
2998  success = os_file_flush(file);
2999 func_exit:
3000  os_file_close(file);
3001  ut_free(buf2);
3002  mem_free(filepath);
3003 
3004  return(success);
3005 }
3006 
3007 /********************************************************************/
3017 UNIV_INTERN
3018 ibool
3019 fil_open_single_table_tablespace(
3020 /*=============================*/
3021  ibool check_space_id,
3028  ulint id,
3029  ulint flags,
3030  const char* name)
3032 {
3033  os_file_t file;
3034  char* filepath;
3035  ibool success;
3036  byte* buf2;
3037  byte* page;
3038  ulint space_id;
3039  ulint space_flags;
3040 
3041  filepath = fil_make_ibd_name(name, FALSE);
3042 
3043  /* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
3044  ROW_FORMAT=COMPACT
3045  ((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
3046  ROW_FORMAT=REDUNDANT (table->flags == 0). For any other
3047  format, the tablespace flags should equal
3048  (table->flags & ~(~0 << DICT_TF_BITS)). */
3049  ut_a(flags != DICT_TF_COMPACT);
3050  ut_a(!(flags & (~0UL << DICT_TF_BITS)));
3051 
3052  file = os_file_create_simple_no_error_handling(
3053  innodb_file_data_key, filepath, OS_FILE_OPEN,
3054  OS_FILE_READ_ONLY, &success);
3055  if (!success) {
3056  /* The following call prints an error message */
3057  os_file_get_last_error(TRUE);
3058 
3059  ut_print_timestamp(stderr);
3060 
3061  fputs(" InnoDB: Error: trying to open a table,"
3062  " but could not\n"
3063  "InnoDB: open the tablespace file ", stderr);
3064  ut_print_filename(stderr, filepath);
3065  fputs("!\n"
3066  "InnoDB: Have you moved InnoDB .ibd files around"
3067  " without using the\n"
3068  "InnoDB: commands DISCARD TABLESPACE and"
3069  " IMPORT TABLESPACE?\n"
3070  "InnoDB: It is also possible that this is"
3071  " a temporary table #sql...,\n"
3072  "InnoDB: and MySQL removed the .ibd file for this.\n"
3073  "InnoDB: Please refer to\n"
3074  "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3075  "InnoDB: for how to resolve the issue.\n", stderr);
3076 
3077  mem_free(filepath);
3078 
3079  return(FALSE);
3080  }
3081 
3082  if (!check_space_id) {
3083  space_id = id;
3084 
3085  goto skip_check;
3086  }
3087 
3088  /* Read the first page of the tablespace */
3089 
3090  buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
3091  /* Align the memory for file i/o if we might have O_DIRECT set */
3092  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
3093 
3094  success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3095 
3096  /* We have to read the tablespace id and flags from the file. */
3097 
3098  space_id = fsp_header_get_space_id(page);
3099  space_flags = fsp_header_get_flags(page);
3100 
3101  ut_free(buf2);
3102 
3103  if (UNIV_UNLIKELY(space_id != id
3104  || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
3105  ut_print_timestamp(stderr);
3106 
3107  fputs(" InnoDB: Error: tablespace id and flags in file ",
3108  stderr);
3109  ut_print_filename(stderr, filepath);
3110  fprintf(stderr, " are %lu and %lu, but in the InnoDB\n"
3111  "InnoDB: data dictionary they are %lu and %lu.\n"
3112  "InnoDB: Have you moved InnoDB .ibd files"
3113  " around without using the\n"
3114  "InnoDB: commands DISCARD TABLESPACE and"
3115  " IMPORT TABLESPACE?\n"
3116  "InnoDB: Please refer to\n"
3117  "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3118  "InnoDB: for how to resolve the issue.\n",
3119  (ulong) space_id, (ulong) space_flags,
3120  (ulong) id, (ulong) flags);
3121 
3122  success = FALSE;
3123 
3124  goto func_exit;
3125  }
3126 
3127 skip_check:
3128  success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
3129 
3130  if (!success) {
3131  goto func_exit;
3132  }
3133 
3134  /* We do not measure the size of the file, that is why we pass the 0
3135  below */
3136 
3137  fil_node_create(filepath, 0, space_id, FALSE);
3138 func_exit:
3139  os_file_close(file);
3140  mem_free(filepath);
3141 
3142  return(success);
3143 }
3144 #endif /* !UNIV_HOTBACKUP */
3145 
3146 #ifdef UNIV_HOTBACKUP
3147 /*******************************************************************/
3151 static
3152 char*
3153 fil_make_ibbackup_old_name(
3154 /*=======================*/
3155  const char* name)
3156 {
3157  static const char suffix[] = "_ibbackup_old_vers_";
3158  ulint len = strlen(name);
3159  char* path = mem_alloc(len + (15 + sizeof suffix));
3160 
3161  memcpy(path, name, len);
3162  memcpy(path + len, suffix, (sizeof suffix) - 1);
3163  ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix);
3164  return(path);
3165 }
3166 #endif /* UNIV_HOTBACKUP */
3167 
3168 /********************************************************************/
3171 static
3172 void
3173 fil_load_single_table_tablespace(
3174 /*=============================*/
3175  const char* dbname,
3176  const char* filename)
3178 {
3179  os_file_t file;
3180  char* filepath;
3181  ibool success;
3182  byte* buf2;
3183  byte* page;
3184  ulint space_id;
3185  ulint flags;
3186  ulint size_low;
3187  ulint size_high;
3188  uint64_t size;
3189 #ifdef UNIV_HOTBACKUP
3190  fil_space_t* space;
3191 #endif
3192  filepath = static_cast<char *>(mem_alloc(strlen(dbname) + strlen(filename)
3193  + strlen(fil_path_to_mysql_datadir) + 3));
3194 
3195  sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
3196  filename);
3197  srv_normalize_path_for_win(filepath);
3198 #ifdef __WIN__
3199 # ifndef UNIV_HOTBACKUP
3200  /* If lower_case_table_names is 0 or 2, then MySQL allows database
3201  directory names with upper case letters. On Windows, all table and
3202  database names in InnoDB are internally always in lower case. Put the
3203  file path to lower case, so that we are consistent with InnoDB's
3204  internal data dictionary. */
3205 
3206  dict_casedn_str(filepath);
3207 # endif /* !UNIV_HOTBACKUP */
3208 #endif
3209  file = os_file_create_simple_no_error_handling(
3210  innodb_file_data_key, filepath, OS_FILE_OPEN,
3211  OS_FILE_READ_ONLY, &success);
3212  if (!success) {
3213  /* The following call prints an error message */
3214  os_file_get_last_error(TRUE);
3215 
3216  fprintf(stderr,
3217  "InnoDB: Error: could not open single-table tablespace"
3218  " file\n"
3219  "InnoDB: %s!\n"
3220  "InnoDB: We do not continue the crash recovery,"
3221  " because the table may become\n"
3222  "InnoDB: corrupt if we cannot apply the log records"
3223  " in the InnoDB log to it.\n"
3224  "InnoDB: To fix the problem and start mysqld:\n"
3225  "InnoDB: 1) If there is a permission problem"
3226  " in the file and mysqld cannot\n"
3227  "InnoDB: open the file, you should"
3228  " modify the permissions.\n"
3229  "InnoDB: 2) If the table is not needed, or you can"
3230  " restore it from a backup,\n"
3231  "InnoDB: then you can remove the .ibd file,"
3232  " and InnoDB will do a normal\n"
3233  "InnoDB: crash recovery and ignore that table.\n"
3234  "InnoDB: 3) If the file system or the"
3235  " disk is broken, and you cannot remove\n"
3236  "InnoDB: the .ibd file, you can set"
3237  " innodb_force_recovery > 0 in my.cnf\n"
3238  "InnoDB: and force InnoDB to continue crash"
3239  " recovery here.\n", filepath);
3240 
3241  mem_free(filepath);
3242 
3243  if (srv_force_recovery > 0) {
3244  fprintf(stderr,
3245  "InnoDB: innodb_force_recovery"
3246  " was set to %lu. Continuing crash recovery\n"
3247  "InnoDB: even though we cannot access"
3248  " the .ibd file of this table.\n",
3249  srv_force_recovery);
3250  return;
3251  }
3252 
3253  exit(1);
3254  }
3255 
3256  success = os_file_get_size(file, &size_low, &size_high);
3257 
3258  if (!success) {
3259  /* The following call prints an error message */
3260  os_file_get_last_error(TRUE);
3261 
3262  fprintf(stderr,
3263  "InnoDB: Error: could not measure the size"
3264  " of single-table tablespace file\n"
3265  "InnoDB: %s!\n"
3266  "InnoDB: We do not continue crash recovery,"
3267  " because the table will become\n"
3268  "InnoDB: corrupt if we cannot apply the log records"
3269  " in the InnoDB log to it.\n"
3270  "InnoDB: To fix the problem and start mysqld:\n"
3271  "InnoDB: 1) If there is a permission problem"
3272  " in the file and mysqld cannot\n"
3273  "InnoDB: access the file, you should"
3274  " modify the permissions.\n"
3275  "InnoDB: 2) If the table is not needed,"
3276  " or you can restore it from a backup,\n"
3277  "InnoDB: then you can remove the .ibd file,"
3278  " and InnoDB will do a normal\n"
3279  "InnoDB: crash recovery and ignore that table.\n"
3280  "InnoDB: 3) If the file system or the disk is broken,"
3281  " and you cannot remove\n"
3282  "InnoDB: the .ibd file, you can set"
3283  " innodb_force_recovery > 0 in my.cnf\n"
3284  "InnoDB: and force InnoDB to continue"
3285  " crash recovery here.\n", filepath);
3286 
3287  os_file_close(file);
3288  mem_free(filepath);
3289 
3290  if (srv_force_recovery > 0) {
3291  fprintf(stderr,
3292  "InnoDB: innodb_force_recovery"
3293  " was set to %lu. Continuing crash recovery\n"
3294  "InnoDB: even though we cannot access"
3295  " the .ibd file of this table.\n",
3296  srv_force_recovery);
3297  return;
3298  }
3299 
3300  exit(1);
3301  }
3302 
3303  /* TODO: What to do in other cases where we cannot access an .ibd
3304  file during a crash recovery? */
3305 
3306  /* Every .ibd file is created >= 4 pages in size. Smaller files
3307  cannot be ok. */
3308 
3309  size = (((ib_int64_t)size_high) << 32) + (ib_int64_t)size_low;
3310 #ifndef UNIV_HOTBACKUP
3311  if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3312  fprintf(stderr,
3313  "InnoDB: Error: the size of single-table tablespace"
3314  " file %s\n"
3315  "InnoDB: is only %lu %lu, should be at least %lu!",
3316  filepath,
3317  (ulong) size_high,
3318  (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
3319  os_file_close(file);
3320  mem_free(filepath);
3321 
3322  return;
3323  }
3324 #endif
3325  /* Read the first page of the tablespace if the size big enough */
3326 
3327  buf2 = static_cast<byte *>(ut_malloc(2 * UNIV_PAGE_SIZE));
3328  /* Align the memory for file i/o if we might have O_DIRECT set */
3329  page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
3330 
3331  if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
3332  success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
3333 
3334  /* We have to read the tablespace id from the file */
3335 
3336  space_id = fsp_header_get_space_id(page);
3337  flags = fsp_header_get_flags(page);
3338  } else {
3339  space_id = ULINT_UNDEFINED;
3340  flags = 0;
3341  }
3342 
3343 #ifndef UNIV_HOTBACKUP
3344  if (space_id == ULINT_UNDEFINED || space_id == 0) {
3345  fprintf(stderr,
3346  "InnoDB: Error: tablespace id %lu in file %s"
3347  " is not sensible\n",
3348  (ulong) space_id,
3349  filepath);
3350  goto func_exit;
3351  }
3352 #else
3353  if (space_id == ULINT_UNDEFINED || space_id == 0) {
3354  char* new_path;
3355 
3356  fprintf(stderr,
3357  "InnoDB: Renaming tablespace %s of id %lu,\n"
3358  "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3359  "InnoDB: because its size %" PRId64 " is too small"
3360  " (< 4 pages 16 kB each),\n"
3361  "InnoDB: or the space id in the file header"
3362  " is not sensible.\n"
3363  "InnoDB: This can happen in an ibbackup run,"
3364  " and is not dangerous.\n",
3365  filepath, space_id, filepath, size);
3366  os_file_close(file);
3367 
3368  new_path = fil_make_ibbackup_old_name(filepath);
3369  ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
3370 
3371  ut_free(buf2);
3372  mem_free(filepath);
3373  mem_free(new_path);
3374 
3375  return;
3376  }
3377 
3378  /* A backup may contain the same space several times, if the space got
3379  renamed at a sensitive time. Since it is enough to have one version of
3380  the space, we rename the file if a space with the same space id
3381  already exists in the tablespace memory cache. We rather rename the
3382  file than delete it, because if there is a bug, we do not want to
3383  destroy valuable data. */
3384 
3385  mutex_enter(&fil_system->mutex);
3386 
3387  space = fil_space_get_by_id(space_id);
3388 
3389  if (space) {
3390  char* new_path;
3391 
3392  fprintf(stderr,
3393  "InnoDB: Renaming tablespace %s of id %lu,\n"
3394  "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
3395  "InnoDB: because space %s with the same id\n"
3396  "InnoDB: was scanned earlier. This can happen"
3397  " if you have renamed tables\n"
3398  "InnoDB: during an ibbackup run.\n",
3399  filepath, space_id, filepath,
3400  space->name);
3401  os_file_close(file);
3402 
3403  new_path = fil_make_ibbackup_old_name(filepath);
3404 
3405  mutex_exit(&fil_system->mutex);
3406 
3407  ut_a(os_file_rename(innodb_file_data_key, filepath, new_path));
3408 
3409  ut_free(buf2);
3410  mem_free(filepath);
3411  mem_free(new_path);
3412 
3413  return;
3414  }
3415  mutex_exit(&fil_system->mutex);
3416 #endif
3417  success = fil_space_create(filepath, space_id, flags, FIL_TABLESPACE);
3418 
3419  if (!success) {
3420 
3421  if (srv_force_recovery > 0) {
3422  fprintf(stderr,
3423  "InnoDB: innodb_force_recovery"
3424  " was set to %lu. Continuing crash recovery\n"
3425  "InnoDB: even though the tablespace creation"
3426  " of this table failed.\n",
3427  srv_force_recovery);
3428  goto func_exit;
3429  }
3430 
3431  exit(1);
3432  }
3433 
3434  /* We do not use the size information we have about the file, because
3435  the rounding formula for extents and pages is somewhat complex; we
3436  let fil_node_open() do that task. */
3437 
3438  fil_node_create(filepath, 0, space_id, FALSE);
3439 func_exit:
3440  os_file_close(file);
3441  ut_free(buf2);
3442  mem_free(filepath);
3443 }
3444 
3445 /***********************************************************************/
3451 int
3452 fil_file_readdir_next_file(
3453 /*=======================*/
3454  ulint* err,
3456  const char* dirname,
3457  os_file_dir_t dir,
3458  os_file_stat_t* info)
3459 {
3460  ulint i;
3461  int ret;
3462 
3463  for (i = 0; i < 100; i++) {
3464  ret = os_file_readdir_next_file(dirname, dir, info);
3465 
3466  if (ret != -1) {
3467 
3468  return(ret);
3469  }
3470 
3471  fprintf(stderr,
3472  "InnoDB: Error: os_file_readdir_next_file()"
3473  " returned -1 in\n"
3474  "InnoDB: directory %s\n"
3475  "InnoDB: Crash recovery may have failed"
3476  " for some .ibd files!\n", dirname);
3477 
3478  *err = DB_ERROR;
3479  }
3480 
3481  return(-1);
3482 }
3483 
3484 /********************************************************************/
3492 UNIV_INTERN
3493 ulint
3494 fil_load_single_table_tablespaces(void)
3495 /*===================================*/
3496 {
3497  int ret;
3498  char* dbpath = NULL;
3499  ulint dbpath_len = 100;
3500  os_file_dir_t dir;
3501  os_file_dir_t dbdir;
3502  os_file_stat_t dbinfo;
3503  os_file_stat_t fileinfo;
3504  ulint err = DB_SUCCESS;
3505 
3506  /* The datadir of MySQL is always the default directory of mysqld */
3507 
3508  dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
3509 
3510  if (dir == NULL) {
3511 
3512  return(DB_ERROR);
3513  }
3514 
3515  dbpath = static_cast<char *>(mem_alloc(dbpath_len));
3516 
3517  /* Scan all directories under the datadir. They are the database
3518  directories of MySQL. */
3519 
3520  ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
3521  &dbinfo);
3522  while (ret == 0) {
3523  ulint len;
3524  /* printf("Looking at %s in datadir\n", dbinfo.name); */
3525 
3526  if (dbinfo.type == OS_FILE_TYPE_FILE
3527  || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
3528 
3529  goto next_datadir_item;
3530  }
3531 
3532  /* We found a symlink or a directory; try opening it to see
3533  if a symlink is a directory */
3534 
3535  len = strlen(fil_path_to_mysql_datadir)
3536  + strlen (dbinfo.name) + 2;
3537  if (len > dbpath_len) {
3538  dbpath_len = len;
3539 
3540  if (dbpath) {
3541  mem_free(dbpath);
3542  }
3543 
3544  dbpath = static_cast<char *>(mem_alloc(dbpath_len));
3545  }
3546  sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
3547  dbinfo.name);
3549 
3550  dbdir = os_file_opendir(dbpath, FALSE);
3551 
3552  if (dbdir != NULL) {
3553  /* printf("Opened dir %s\n", dbinfo.name); */
3554 
3555  /* We found a database directory; loop through it,
3556  looking for possible .ibd files in it */
3557 
3558  ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
3559  &fileinfo);
3560  while (ret == 0) {
3561  /* printf(
3562  " Looking at file %s\n", fileinfo.name); */
3563 
3564  if (fileinfo.type == OS_FILE_TYPE_DIR) {
3565 
3566  goto next_file_item;
3567  }
3568 
3569  /* We found a symlink or a file */
3570  if (strlen(fileinfo.name) > 4
3571  && 0 == strcmp(fileinfo.name
3572  + strlen(fileinfo.name) - 4,
3573  ".ibd")) {
3574  /* The name ends in .ibd; try opening
3575  the file */
3576  fil_load_single_table_tablespace(
3577  dbinfo.name, fileinfo.name);
3578  }
3579 next_file_item:
3580  ret = fil_file_readdir_next_file(&err,
3581  dbpath, dbdir,
3582  &fileinfo);
3583  }
3584 
3585  if (0 != os_file_closedir(dbdir)) {
3586  fputs("InnoDB: Warning: could not"
3587  " close database directory ", stderr);
3588  ut_print_filename(stderr, dbpath);
3589  putc('\n', stderr);
3590 
3591  err = DB_ERROR;
3592  }
3593  }
3594 
3595 next_datadir_item:
3596  ret = fil_file_readdir_next_file(&err,
3597  fil_path_to_mysql_datadir,
3598  dir, &dbinfo);
3599  }
3600 
3601  mem_free(dbpath);
3602 
3603  if (0 != os_file_closedir(dir)) {
3604  fprintf(stderr,
3605  "InnoDB: Error: could not close MySQL datadir\n");
3606 
3607  return(DB_ERROR);
3608  }
3609 
3610  return(err);
3611 }
3612 
3613 /*******************************************************************/
3617 UNIV_INTERN
3618 ibool
3619 fil_tablespace_deleted_or_being_deleted_in_mem(
3620 /*===========================================*/
3621  ulint id,
3622  ib_int64_t version)
3625 {
3626  fil_space_t* space;
3627 
3628  ut_ad(fil_system);
3629 
3630  mutex_enter(&fil_system->mutex);
3631 
3632  space = fil_space_get_by_id(id);
3633 
3634  if (space == NULL || space->is_being_deleted) {
3635  mutex_exit(&fil_system->mutex);
3636 
3637  return(TRUE);
3638  }
3639 
3640  if (version != ((ib_int64_t)-1)
3641  && space->tablespace_version != version) {
3642  mutex_exit(&fil_system->mutex);
3643 
3644  return(TRUE);
3645  }
3646 
3647  mutex_exit(&fil_system->mutex);
3648 
3649  return(FALSE);
3650 }
3651 
3652 /*******************************************************************/
3655 UNIV_INTERN
3656 ibool
3657 fil_tablespace_exists_in_mem(
3658 /*=========================*/
3659  ulint id)
3660 {
3661  fil_space_t* space;
3662 
3663  ut_ad(fil_system);
3664 
3665  mutex_enter(&fil_system->mutex);
3666 
3667  space = fil_space_get_by_id(id);
3668 
3669  mutex_exit(&fil_system->mutex);
3670 
3671  return(space != NULL);
3672 }
3673 
3674 /*******************************************************************/
3679 UNIV_INTERN
3680 ibool
3681 fil_space_for_table_exists_in_mem(
3682 /*==============================*/
3683  ulint id,
3684  const char* name,
3687  ibool is_temp,
3689  ibool mark_space,
3695  ibool print_error_if_does_not_exist)
3700 {
3701  fil_space_t* tablespace;
3702  fil_space_t* space;
3703  char* path;
3704 
3705  ut_ad(fil_system);
3706 
3707  mutex_enter(&fil_system->mutex);
3708 
3709  path = fil_make_ibd_name(name, is_temp);
3710 
3711  /* Look if there is a space with the same id */
3712 
3713  space = fil_space_get_by_id(id);
3714 
3715  /* Look if there is a space with the same name; the name is the
3716  directory path from the datadir to the file */
3717 
3718  tablespace = fil_space_get_by_name(path);
3719  if (space && space == tablespace) {
3720  /* Found */
3721 
3722  if (mark_space) {
3723  space->mark = TRUE;
3724  }
3725 
3726  mem_free(path);
3727  mutex_exit(&fil_system->mutex);
3728 
3729  return(TRUE);
3730  }
3731 
3732  if (!print_error_if_does_not_exist) {
3733 
3734  mem_free(path);
3735  mutex_exit(&fil_system->mutex);
3736 
3737  return(FALSE);
3738  }
3739 
3740  if (space == NULL) {
3741  if (tablespace == NULL) {
3742  ut_print_timestamp(stderr);
3743  fputs(" InnoDB: Error: table ", stderr);
3744  ut_print_filename(stderr, name);
3745  fprintf(stderr, "\n"
3746  "InnoDB: in InnoDB data dictionary"
3747  " has tablespace id %lu,\n"
3748  "InnoDB: but tablespace with that id"
3749  " or name does not exist. Have\n"
3750  "InnoDB: you deleted or moved .ibd files?\n"
3751  "InnoDB: This may also be a table created with"
3752  " CREATE TEMPORARY TABLE\n"
3753  "InnoDB: whose .ibd and .frm files"
3754  " MySQL automatically removed, but the\n"
3755  "InnoDB: table still exists in the"
3756  " InnoDB internal data dictionary.\n",
3757  (ulong) id);
3758  } else {
3759  ut_print_timestamp(stderr);
3760  fputs(" InnoDB: Error: table ", stderr);
3761  ut_print_filename(stderr, name);
3762  fprintf(stderr, "\n"
3763  "InnoDB: in InnoDB data dictionary has"
3764  " tablespace id %lu,\n"
3765  "InnoDB: but a tablespace with that id"
3766  " does not exist. There is\n"
3767  "InnoDB: a tablespace of name %s and id %lu,"
3768  " though. Have\n"
3769  "InnoDB: you deleted or moved .ibd files?\n",
3770  (ulong) id, tablespace->name,
3771  (ulong) tablespace->id);
3772  }
3773 error_exit:
3774  fputs("InnoDB: Please refer to\n"
3775  "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
3776  "InnoDB: for how to resolve the issue.\n", stderr);
3777 
3778  mem_free(path);
3779  mutex_exit(&fil_system->mutex);
3780 
3781  return(FALSE);
3782  }
3783 
3784  if (0 != strcmp(space->name, path)) {
3785  ut_print_timestamp(stderr);
3786  fputs(" InnoDB: Error: table ", stderr);
3787  ut_print_filename(stderr, name);
3788  fprintf(stderr, "\n"
3789  "InnoDB: in InnoDB data dictionary has"
3790  " tablespace id %lu,\n"
3791  "InnoDB: but the tablespace with that id"
3792  " has name %s.\n"
3793  "InnoDB: Have you deleted or moved .ibd files?\n",
3794  (ulong) id, space->name);
3795 
3796  if (tablespace != NULL) {
3797  fputs("InnoDB: There is a tablespace"
3798  " with the right name\n"
3799  "InnoDB: ", stderr);
3800  ut_print_filename(stderr, tablespace->name);
3801  fprintf(stderr, ", but its id is %lu.\n",
3802  (ulong) tablespace->id);
3803  }
3804 
3805  goto error_exit;
3806  }
3807 
3808  mem_free(path);
3809  mutex_exit(&fil_system->mutex);
3810 
3811  return(FALSE);
3812 }
3813 
3814 /*******************************************************************/
3818 static
3819 ulint
3820 fil_get_space_id_for_table(
3821 /*=======================*/
3822  const char* name)
3824 {
3825  fil_space_t* tablespace;
3826  ulint id = ULINT_UNDEFINED;
3827  char* path;
3828 
3829  ut_ad(fil_system);
3830 
3831  mutex_enter(&fil_system->mutex);
3832 
3833  path = fil_make_ibd_name(name, FALSE);
3834 
3835  /* Look if there is a space with the same name; the name is the
3836  directory path to the file */
3837 
3838  tablespace = fil_space_get_by_name(path);
3839 
3840  if (tablespace) {
3841  id = tablespace->id;
3842  }
3843 
3844  mem_free(path);
3845 
3846  mutex_exit(&fil_system->mutex);
3847 
3848  return(id);
3849 }
3850 
3851 /**********************************************************************/
3856 UNIV_INTERN
3857 ibool
3858 fil_extend_space_to_desired_size(
3859 /*=============================*/
3860  ulint* actual_size,
3863  ulint space_id,
3864  ulint size_after_extend)
3867 {
3868  fil_node_t* node;
3869  fil_space_t* space;
3870  byte* buf2;
3871  byte* buf;
3872  ulint buf_size;
3873  ulint start_page_no;
3874  ulint file_start_page_no;
3875  ulint offset_high;
3876  ulint offset_low;
3877  ulint page_size;
3878  ibool success = TRUE;
3879 
3880  fil_mutex_enter_and_prepare_for_io(space_id);
3881 
3882  space = fil_space_get_by_id(space_id);
3883  ut_a(space);
3884 
3885  if (space->size >= size_after_extend) {
3886  /* Space already big enough */
3887 
3888  *actual_size = space->size;
3889 
3890  mutex_exit(&fil_system->mutex);
3891 
3892  return(TRUE);
3893  }
3894 
3895  page_size = dict_table_flags_to_zip_size(space->flags);
3896  if (!page_size) {
3897  page_size = UNIV_PAGE_SIZE;
3898  }
3899 
3900  node = UT_LIST_GET_LAST(space->chain);
3901 
3902  fil_node_prepare_for_io(node, fil_system, space);
3903 
3904  start_page_no = space->size;
3905  file_start_page_no = space->size - node->size;
3906 
3907  /* Extend at most 64 pages at a time */
3908  buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
3909  buf2 = static_cast<byte *>(mem_alloc(buf_size + page_size));
3910  buf = static_cast<byte *>(ut_align(buf2, page_size));
3911 
3912  memset(buf, 0, buf_size);
3913 
3914  while (start_page_no < size_after_extend) {
3915  ulint n_pages = ut_min(buf_size / page_size,
3916  size_after_extend - start_page_no);
3917 
3918  offset_high = (start_page_no - file_start_page_no)
3919  / (4096 * ((1024 * 1024) / page_size));
3920  offset_low = ((start_page_no - file_start_page_no)
3921  % (4096 * ((1024 * 1024) / page_size)))
3922  * page_size;
3923 #ifdef UNIV_HOTBACKUP
3924  success = os_file_write(node->name, node->handle, buf,
3925  offset_low, offset_high,
3926  page_size * n_pages);
3927 #else
3928  success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
3929  node->name, node->handle, buf,
3930  offset_low, offset_high,
3931  page_size * n_pages,
3932  NULL, NULL);
3933 #endif
3934  if (success) {
3935  node->size += n_pages;
3936  space->size += n_pages;
3937 
3938  os_has_said_disk_full = FALSE;
3939  } else {
3940  /* Let us measure the size of the file to determine
3941  how much we were able to extend it */
3942 
3943  n_pages = ((ulint)
3945  node->handle)
3946  / page_size)) - node->size;
3947 
3948  node->size += n_pages;
3949  space->size += n_pages;
3950 
3951  break;
3952  }
3953 
3954  start_page_no += n_pages;
3955  }
3956 
3957  mem_free(buf2);
3958 
3959  fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
3960 
3961  *actual_size = space->size;
3962 
3963 #ifndef UNIV_HOTBACKUP
3964  if (space_id == 0) {
3965  ulint pages_per_mb = (1024 * 1024) / page_size;
3966 
3967  /* Keep the last data file size info up to date, rounded to
3968  full megabytes */
3969 
3970  srv_data_file_sizes[srv_n_data_files - 1]
3971  = (node->size / pages_per_mb) * pages_per_mb;
3972  }
3973 #endif /* !UNIV_HOTBACKUP */
3974 
3975  /*
3976  printf("Extended %s to %lu, actual size %lu pages\n", space->name,
3977  size_after_extend, *actual_size); */
3978  mutex_exit(&fil_system->mutex);
3979 
3980  fil_flush(space_id);
3981 
3982  return(success);
3983 }
3984 
3985 #ifdef UNIV_HOTBACKUP
3986 /********************************************************************/
3991 UNIV_INTERN
3992 void
3993 fil_extend_tablespaces_to_stored_len(void)
3994 /*======================================*/
3995 {
3996  fil_space_t* space;
3997  byte* buf;
3998  ulint actual_size;
3999  ulint size_in_header;
4000  ulint error;
4001  ibool success;
4002 
4003  buf = mem_alloc(UNIV_PAGE_SIZE);
4004 
4005  mutex_enter(&fil_system->mutex);
4006 
4007  space = UT_LIST_GET_FIRST(fil_system->space_list);
4008 
4009  while (space) {
4010  ut_a(space->purpose == FIL_TABLESPACE);
4011 
4012  mutex_exit(&fil_system->mutex); /* no need to protect with a
4013  mutex, because this is a
4014  single-threaded operation */
4015  error = fil_read(TRUE, space->id,
4017  0, 0, UNIV_PAGE_SIZE, buf, NULL);
4018  ut_a(error == DB_SUCCESS);
4019 
4020  size_in_header = fsp_get_size_low(buf);
4021 
4022  success = fil_extend_space_to_desired_size(
4023  &actual_size, space->id, size_in_header);
4024  if (!success) {
4025  fprintf(stderr,
4026  "InnoDB: Error: could not extend the"
4027  " tablespace of %s\n"
4028  "InnoDB: to the size stored in header,"
4029  " %lu pages;\n"
4030  "InnoDB: size after extension %lu pages\n"
4031  "InnoDB: Check that you have free disk space"
4032  " and retry!\n",
4033  space->name, size_in_header, actual_size);
4034  exit(1);
4035  }
4036 
4037  mutex_enter(&fil_system->mutex);
4038 
4039  space = UT_LIST_GET_NEXT(space_list, space);
4040  }
4041 
4042  mutex_exit(&fil_system->mutex);
4043 
4044  mem_free(buf);
4045 }
4046 #endif
4047 
4048 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
4049 
4050 /*******************************************************************/
4053 UNIV_INTERN
4054 ibool
4055 fil_space_reserve_free_extents(
4056 /*===========================*/
4057  ulint id,
4058  ulint n_free_now,
4059  ulint n_to_reserve)
4060 {
4061  fil_space_t* space;
4062  ibool success;
4063 
4064  ut_ad(fil_system);
4065 
4066  mutex_enter(&fil_system->mutex);
4067 
4068  space = fil_space_get_by_id(id);
4069 
4070  ut_a(space);
4071 
4072  if (space->n_reserved_extents + n_to_reserve > n_free_now) {
4073  success = FALSE;
4074  } else {
4075  space->n_reserved_extents += n_to_reserve;
4076  success = TRUE;
4077  }
4078 
4079  mutex_exit(&fil_system->mutex);
4080 
4081  return(success);
4082 }
4083 
4084 /*******************************************************************/
4086 UNIV_INTERN
4087 void
4088 fil_space_release_free_extents(
4089 /*===========================*/
4090  ulint id,
4091  ulint n_reserved)
4092 {
4093  fil_space_t* space;
4094 
4095  ut_ad(fil_system);
4096 
4097  mutex_enter(&fil_system->mutex);
4098 
4099  space = fil_space_get_by_id(id);
4100 
4101  ut_a(space);
4102  ut_a(space->n_reserved_extents >= n_reserved);
4103 
4104  space->n_reserved_extents -= n_reserved;
4105 
4106  mutex_exit(&fil_system->mutex);
4107 }
4108 
4109 /*******************************************************************/
4112 UNIV_INTERN
4113 ulint
4114 fil_space_get_n_reserved_extents(
4115 /*=============================*/
4116  ulint id)
4117 {
4118  fil_space_t* space;
4119  ulint n;
4120 
4121  ut_ad(fil_system);
4122 
4123  mutex_enter(&fil_system->mutex);
4124 
4125  space = fil_space_get_by_id(id);
4126 
4127  ut_a(space);
4128 
4129  n = space->n_reserved_extents;
4130 
4131  mutex_exit(&fil_system->mutex);
4132 
4133  return(n);
4134 }
4135 
4136 /*============================ FILE I/O ================================*/
4137 
4138 /********************************************************************/
4145 static
4146 void
4147 fil_node_prepare_for_io(
4148 /*====================*/
4149  fil_node_t* node,
4150  fil_system_t* system,
4151  fil_space_t* space)
4152 {
4153  ut_ad(node && system && space);
4154  ut_ad(mutex_own(&(system->mutex)));
4155 
4156  if (system->n_open > system->max_n_open + 5) {
4157  ut_print_timestamp(stderr);
4158  fprintf(stderr,
4159  " InnoDB: Warning: open files %lu"
4160  " exceeds the limit %lu\n",
4161  (ulong) system->n_open,
4162  (ulong) system->max_n_open);
4163  }
4164 
4165  if (node->open == FALSE) {
4166  /* File is closed: open it */
4167  ut_a(node->n_pending == 0);
4168 
4169  fil_node_open_file(node, system, space);
4170  }
4171 
4172  if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
4173  && space->id != 0) {
4174  /* The node is in the LRU list, remove it */
4175 
4176  ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
4177 
4178  UT_LIST_REMOVE(LRU, system->LRU, node);
4179  }
4180 
4181  node->n_pending++;
4182 }
4183 
4184 /********************************************************************/
4187 static
4188 void
4189 fil_node_complete_io(
4190 /*=================*/
4191  fil_node_t* node,
4192  fil_system_t* system,
4193  ulint type)
4196 {
4197  ut_ad(node);
4198  ut_ad(system);
4199  ut_ad(mutex_own(&(system->mutex)));
4200 
4201  ut_a(node->n_pending > 0);
4202 
4203  node->n_pending--;
4204 
4205  if (type == OS_FILE_WRITE) {
4206  system->modification_counter++;
4208 
4209  if (!node->space->is_in_unflushed_spaces) {
4210 
4211  node->space->is_in_unflushed_spaces = TRUE;
4212  UT_LIST_ADD_FIRST(unflushed_spaces,
4213  system->unflushed_spaces,
4214  node->space);
4215  }
4216  }
4217 
4218  if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
4219  && node->space->id != 0) {
4220  /* The node must be put back to the LRU list */
4221  UT_LIST_ADD_FIRST(LRU, system->LRU, node);
4222  }
4223 }
4224 
4225 /********************************************************************/
4227 static
4228 void
4229 fil_report_invalid_page_access(
4230 /*===========================*/
4231  ulint block_offset,
4232  ulint space_id,
4233  const char* space_name,
4234  ulint byte_offset,
4235  ulint len,
4236  ulint type)
4237 {
4238  fprintf(stderr,
4239  "InnoDB: Error: trying to access page number %lu"
4240  " in space %lu,\n"
4241  "InnoDB: space name %s,\n"
4242  "InnoDB: which is outside the tablespace bounds.\n"
4243  "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
4244  "InnoDB: If you get this error at mysqld startup,"
4245  " please check that\n"
4246  "InnoDB: your my.cnf matches the ibdata files"
4247  " that you have in the\n"
4248  "InnoDB: MySQL server.\n",
4249  (ulong) block_offset, (ulong) space_id, space_name,
4250  (ulong) byte_offset, (ulong) len, (ulong) type);
4251 }
4252 
4253 /********************************************************************/
4257 UNIV_INTERN
4258 ulint
4259 fil_io(
4260 /*===*/
4261  ulint type,
4270  ibool sync,
4271  ulint space_id,
4272  ulint zip_size,
4274  ulint block_offset,
4275  ulint byte_offset,
4278  ulint len,
4281  void* buf,
4284  void* message)
4286 {
4287  ulint mode;
4288  fil_space_t* space;
4289  fil_node_t* node;
4290  ulint offset_high;
4291  ulint offset_low;
4292  ibool ret;
4293  ulint is_log;
4294  ulint wake_later;
4295 
4296  is_log = type & OS_FILE_LOG;
4297  type = type & ~OS_FILE_LOG;
4298 
4299  wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
4300  type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
4301 
4302  ut_ad(byte_offset < UNIV_PAGE_SIZE);
4303  ut_ad(!zip_size || !byte_offset);
4304  ut_ad(ut_is_2pow(zip_size));
4305  ut_ad(buf);
4306  ut_ad(len > 0);
4307  ut_ad(fil_validate());
4308 #ifndef UNIV_HOTBACKUP
4309 # ifndef UNIV_LOG_DEBUG
4310  /* ibuf bitmap pages must be read in the sync aio mode: */
4311  ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
4312  || !ibuf_bitmap_page(zip_size, block_offset)
4313  || sync || is_log);
4314  ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
4315  || ibuf_page(space_id, zip_size, block_offset, NULL));
4316 # endif /* UNIV_LOG_DEBUG */
4317  if (sync) {
4318  mode = OS_AIO_SYNC;
4319  } else if (is_log) {
4320  mode = OS_AIO_LOG;
4321  } else if (type == OS_FILE_READ
4323  && ibuf_page(space_id, zip_size, block_offset, NULL)) {
4324  mode = OS_AIO_IBUF;
4325  } else {
4326  mode = OS_AIO_NORMAL;
4327  }
4328 #else /* !UNIV_HOTBACKUP */
4329  ut_a(sync);
4330  mode = OS_AIO_SYNC;
4331 #endif /* !UNIV_HOTBACKUP */
4332 
4333  if (type == OS_FILE_READ) {
4334  srv_data_read+= len;
4335  } else if (type == OS_FILE_WRITE) {
4336  srv_data_written+= len;
4337  }
4338 
4339  /* Reserve the fil_system mutex and make sure that we can open at
4340  least one file while holding it, if the file is not already open */
4341 
4342  fil_mutex_enter_and_prepare_for_io(space_id);
4343 
4344  space = fil_space_get_by_id(space_id);
4345 
4346  if (!space) {
4347  mutex_exit(&fil_system->mutex);
4348 
4349  ut_print_timestamp(stderr);
4350  fprintf(stderr,
4351  " InnoDB: Error: trying to do i/o"
4352  " to a tablespace which does not exist.\n"
4353  "InnoDB: i/o type %lu, space id %lu,"
4354  " page no. %lu, i/o length %lu bytes\n",
4355  (ulong) type, (ulong) space_id, (ulong) block_offset,
4356  (ulong) len);
4357 
4358  return(DB_TABLESPACE_DELETED);
4359  }
4360 
4361  ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
4362 
4363  node = UT_LIST_GET_FIRST(space->chain);
4364 
4365  for (;;) {
4366  if (UNIV_UNLIKELY(node == NULL)) {
4367  fil_report_invalid_page_access(
4368  block_offset, space_id, space->name,
4369  byte_offset, len, type);
4370 
4371  ut_error;
4372  }
4373 
4374  if (space->id != 0 && node->size == 0) {
4375  /* We do not know the size of a single-table tablespace
4376  before we open the file */
4377 
4378  break;
4379  }
4380 
4381  if (node->size > block_offset) {
4382  /* Found! */
4383  break;
4384  } else {
4385  block_offset -= node->size;
4386  node = UT_LIST_GET_NEXT(chain, node);
4387  }
4388  }
4389 
4390  /* Open file if closed */
4391  fil_node_prepare_for_io(node, fil_system, space);
4392 
4393  /* Check that at least the start offset is within the bounds of a
4394  single-table tablespace */
4395  if (UNIV_UNLIKELY(node->size <= block_offset)
4396  && space->id != 0 && space->purpose == FIL_TABLESPACE) {
4397 
4398  fil_report_invalid_page_access(
4399  block_offset, space_id, space->name, byte_offset,
4400  len, type);
4401 
4402  ut_error;
4403  }
4404 
4405  /* Now we have made the changes in the data structures of fil_system */
4406  mutex_exit(&fil_system->mutex);
4407 
4408  /* Calculate the low 32 bits and the high 32 bits of the file offset */
4409 
4410  if (!zip_size) {
4411  offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
4412  offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT)
4413  & 0xFFFFFFFFUL) + byte_offset;
4414 
4415  ut_a(node->size - block_offset
4416  >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
4417  / UNIV_PAGE_SIZE));
4418  } else {
4419  ulint zip_size_shift;
4420  switch (zip_size) {
4421  case 1024: zip_size_shift = 10; break;
4422  case 2048: zip_size_shift = 11; break;
4423  case 4096: zip_size_shift = 12; break;
4424  case 8192: zip_size_shift = 13; break;
4425  case 16384: zip_size_shift = 14; break;
4426  default: ut_error;
4427  }
4428  offset_high = block_offset >> (32 - zip_size_shift);
4429  offset_low = (block_offset << zip_size_shift & 0xFFFFFFFFUL)
4430  + byte_offset;
4431  ut_a(node->size - block_offset
4432  >= (len + (zip_size - 1)) / zip_size);
4433  }
4434 
4435  /* Do aio */
4436 
4437  ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
4438  ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
4439 
4440 #ifdef UNIV_HOTBACKUP
4441  /* In ibbackup do normal i/o, not aio */
4442  if (type == OS_FILE_READ) {
4443  ret = os_file_read(node->handle, buf, offset_low, offset_high,
4444  len);
4445  } else {
4446  ret = os_file_write(node->name, node->handle, buf,
4447  offset_low, offset_high, len);
4448  }
4449 #else
4450  /* Queue the aio request */
4451  ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
4452  offset_low, offset_high, len, node, message);
4453 #endif
4454  ut_a(ret);
4455 
4456  if (mode == OS_AIO_SYNC) {
4457  /* The i/o operation is already completed when we return from
4458  os_aio: */
4459 
4460  mutex_enter(&fil_system->mutex);
4461 
4462  fil_node_complete_io(node, fil_system, type);
4463 
4464  mutex_exit(&fil_system->mutex);
4465 
4466  ut_ad(fil_validate());
4467  }
4468 
4469  return(DB_SUCCESS);
4470 }
4471 
4472 /********************************************************************/
4474 UNIV_INTERN
4475 bool
4476 fil_is_exist(
4477 /*=========*/
4478  ulint space_id,
4479  ulint block_offset)
4480 {
4481  fil_space_t* space;
4482  fil_node_t* node;
4483 
4484  /* Reserve the fil_system mutex and make sure that we can open at
4485  least one file while holding it, if the file is not already open */
4486 
4487  fil_mutex_enter_and_prepare_for_io(space_id);
4488 
4489  space = fil_space_get_by_id(space_id);
4490 
4491  if (!space) {
4492  mutex_exit(&fil_system->mutex);
4493  return(false);
4494  }
4495 
4496  node = UT_LIST_GET_FIRST(space->chain);
4497 
4498  for (;;) {
4499  if (UNIV_UNLIKELY(node == NULL)) {
4500  mutex_exit(&fil_system->mutex);
4501  return(false);
4502  }
4503 
4504  if (space->id != 0 && node->size == 0) {
4505  /* We do not know the size of a single-table tablespace
4506  before we open the file */
4507 
4508  break;
4509  }
4510 
4511  if (node->size > block_offset) {
4512  /* Found! */
4513  break;
4514  } else {
4515  block_offset -= node->size;
4516  node = UT_LIST_GET_NEXT(chain, node);
4517  }
4518  }
4519 
4520  /* Open file if closed */
4521  fil_node_prepare_for_io(node, fil_system, space);
4522  fil_node_complete_io(node, fil_system, OS_FILE_READ);
4523 
4524  /* Check that at least the start offset is within the bounds of a
4525  single-table tablespace */
4526  if (UNIV_UNLIKELY(node->size <= block_offset)
4527  && space->id != 0 && space->purpose == FIL_TABLESPACE) {
4528  mutex_exit(&fil_system->mutex);
4529  return(false);
4530  }
4531 
4532  mutex_exit(&fil_system->mutex);
4533  return(true);
4534 }
4535 
4536 #ifndef UNIV_HOTBACKUP
4537 /**********************************************************************/
4542 UNIV_INTERN
4543 void
4544 fil_aio_wait(
4545 /*=========*/
4546  ulint segment)
4548 {
4549  ibool ret;
4550  fil_node_t* fil_node;
4551  void* message;
4552  ulint type;
4553 
4554  ut_ad(fil_validate());
4555 
4556  if (srv_use_native_aio) {
4557  srv_set_io_thread_op_info(segment, "native aio handle");
4558 #ifdef WIN_ASYNC_IO
4559  ret = os_aio_windows_handle(segment, 0, &fil_node,
4560  &message, &type);
4561 #elif defined(LINUX_NATIVE_AIO)
4562  ret = os_aio_linux_handle(segment, &fil_node,
4563  &message, &type);
4564 #else
4565  ret = 0; /* Eliminate compiler warning */
4566  ut_error;
4567 #endif
4568  } else {
4569  srv_set_io_thread_op_info(segment, "simulated aio handle");
4570 
4571  ret = os_aio_simulated_handle(segment, &fil_node,
4572  &message, &type);
4573  }
4574 
4575  ut_a(ret);
4576 
4577  srv_set_io_thread_op_info(segment, "complete io for fil node");
4578 
4579  mutex_enter(&fil_system->mutex);
4580 
4581  fil_node_complete_io(fil_node, fil_system, type);
4582 
4583  mutex_exit(&fil_system->mutex);
4584 
4585  ut_ad(fil_validate());
4586 
4587  /* Do the i/o handling */
4588  /* IMPORTANT: since i/o handling for reads will read also the insert
4589  buffer in tablespace 0, you have to be very careful not to introduce
4590  deadlocks in the i/o system. We keep tablespace 0 data files always
4591  open, and use a special i/o thread to serve insert buffer requests. */
4592 
4593  if (fil_node->space->purpose == FIL_TABLESPACE) {
4594  srv_set_io_thread_op_info(segment, "complete io for buf page");
4595  buf_page_io_complete(static_cast<buf_page_t *>(message));
4596  } else {
4597  srv_set_io_thread_op_info(segment, "complete io for log");
4598  log_io_complete(static_cast<log_group_t *>(message));
4599  }
4600 }
4601 #endif /* UNIV_HOTBACKUP */
4602 
4603 /**********************************************************************/
4606 UNIV_INTERN
4607 void
4608 fil_flush(
4609 /*======*/
4610  ulint space_id)
4612 {
4613  fil_space_t* space;
4614  fil_node_t* node;
4615  os_file_t file;
4616  ib_int64_t old_mod_counter;
4617 
4618  mutex_enter(&fil_system->mutex);
4619 
4620  space = fil_space_get_by_id(space_id);
4621 
4622  if (!space || space->is_being_deleted) {
4623  mutex_exit(&fil_system->mutex);
4624 
4625  return;
4626  }
4627 
4628  space->n_pending_flushes++;
4630  node = UT_LIST_GET_FIRST(space->chain);
4631 
4632  while (node) {
4633  if (node->modification_counter > node->flush_counter) {
4634  ut_a(node->open);
4635 
4636  /* We want to flush the changes at least up to
4637  old_mod_counter */
4638  old_mod_counter = node->modification_counter;
4639 
4640  if (space->purpose == FIL_TABLESPACE) {
4641  fil_n_pending_tablespace_flushes++;
4642  } else {
4643  fil_n_pending_log_flushes++;
4644  fil_n_log_flushes++;
4645  }
4646 #ifdef __WIN__
4647  if (node->is_raw_disk) {
4648 
4649  goto skip_flush;
4650  }
4651 #endif
4652 retry:
4653  if (node->n_pending_flushes > 0) {
4654  /* We want to avoid calling os_file_flush() on
4655  the file twice at the same time, because we do
4656  not know what bugs OS's may contain in file
4657  i/o; sleep for a while */
4658 
4659  mutex_exit(&fil_system->mutex);
4660 
4661  os_thread_sleep(20000);
4662 
4663  mutex_enter(&fil_system->mutex);
4664 
4665  if (node->flush_counter >= old_mod_counter) {
4666 
4667  goto skip_flush;
4668  }
4669 
4670  goto retry;
4671  }
4672 
4673  ut_a(node->open);
4674  file = node->handle;
4675  node->n_pending_flushes++;
4676 
4677  mutex_exit(&fil_system->mutex);
4678 
4679  /* fprintf(stderr, "Flushing to file %s\n",
4680  node->name); */
4681 
4682  os_file_flush(file);
4683 
4684  mutex_enter(&fil_system->mutex);
4685 
4686  node->n_pending_flushes--;
4687 skip_flush:
4688  if (node->flush_counter < old_mod_counter) {
4689  node->flush_counter = old_mod_counter;
4690 
4691  if (space->is_in_unflushed_spaces
4692  && fil_space_is_flushed(space)) {
4693 
4694  space->is_in_unflushed_spaces = FALSE;
4695 
4697  unflushed_spaces,
4698  fil_system->unflushed_spaces,
4699  space);
4700  }
4701  }
4702 
4703  if (space->purpose == FIL_TABLESPACE) {
4704  fil_n_pending_tablespace_flushes--;
4705  } else {
4706  fil_n_pending_log_flushes--;
4707  }
4708  }
4709 
4710  node = UT_LIST_GET_NEXT(chain, node);
4711  }
4712 
4713  space->n_pending_flushes--;
4714 
4715  mutex_exit(&fil_system->mutex);
4716 }
4717 
4718 /**********************************************************************/
4721 UNIV_INTERN
4722 void
4723 fil_flush_file_spaces(
4724 /*==================*/
4725  ulint purpose)
4726 {
4727  fil_space_t* space;
4728  ulint* space_ids;
4729  ulint n_space_ids;
4730  ulint i;
4731 
4732  mutex_enter(&fil_system->mutex);
4733 
4734  n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
4735  if (n_space_ids == 0) {
4736 
4737  mutex_exit(&fil_system->mutex);
4738  return;
4739  }
4740 
4741  /* Assemble a list of space ids to flush. Previously, we
4742  traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
4743  on a space that was just removed from the list by fil_flush().
4744  Thus, the space could be dropped and the memory overwritten. */
4745  space_ids = static_cast<unsigned long *>(mem_alloc(n_space_ids * sizeof *space_ids));
4746 
4747  n_space_ids = 0;
4748 
4749  for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
4750  space;
4751  space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
4752 
4753  if (space->purpose == purpose && !space->is_being_deleted) {
4754 
4755  space_ids[n_space_ids++] = space->id;
4756  }
4757  }
4758 
4759  mutex_exit(&fil_system->mutex);
4760 
4761  /* Flush the spaces. It will not hurt to call fil_flush() on
4762  a non-existing space id. */
4763  for (i = 0; i < n_space_ids; i++) {
4764 
4765  fil_flush(space_ids[i]);
4766  }
4767 
4768  mem_free(space_ids);
4769 }
4770 
4771 /******************************************************************/
4774 UNIV_INTERN
4775 ibool
4776 fil_validate(void)
4777 /*==============*/
4778 {
4779  fil_space_t* space;
4780  fil_node_t* fil_node;
4781  ulint n_open = 0;
4782  ulint i;
4783 
4784  mutex_enter(&fil_system->mutex);
4785 
4786  /* Look for spaces in the hash table */
4787 
4788  for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
4789 
4790  space = static_cast<fil_space_t *>(HASH_GET_FIRST(fil_system->spaces, i));
4791 
4792  while (space != NULL) {
4793  UT_LIST_VALIDATE(chain, fil_node_t, space->chain,
4794  ut_a(ut_list_node_313->open
4795  || !ut_list_node_313->n_pending));
4796 
4797  fil_node = UT_LIST_GET_FIRST(space->chain);
4798 
4799  while (fil_node != NULL) {
4800  if (fil_node->n_pending > 0) {
4801  ut_a(fil_node->open);
4802  }
4803 
4804  if (fil_node->open) {
4805  n_open++;
4806  }
4807  fil_node = UT_LIST_GET_NEXT(chain, fil_node);
4808  }
4809  space = static_cast<fil_space_t *>(HASH_GET_NEXT(hash, space));
4810  }
4811  }
4812 
4813  ut_a(fil_system->n_open == n_open);
4814 
4815  UT_LIST_VALIDATE(LRU, fil_node_t, fil_system->LRU, (void) 0);
4816 
4817  fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
4818 
4819  while (fil_node != NULL) {
4820  ut_a(fil_node->n_pending == 0);
4821  ut_a(fil_node->open);
4822  ut_a(fil_node->space->purpose == FIL_TABLESPACE);
4823  ut_a(fil_node->space->id != 0);
4824 
4825  fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
4826  }
4827 
4828  mutex_exit(&fil_system->mutex);
4829 
4830  return(TRUE);
4831 }
4832 
4833 /********************************************************************/
4836 UNIV_INTERN
4837 ibool
4838 fil_addr_is_null(
4839 /*=============*/
4840  fil_addr_t addr)
4841 {
4842  return(addr.page == FIL_NULL);
4843 }
4844 
4845 /********************************************************************/
4848 UNIV_INTERN
4849 ulint
4850 fil_page_get_prev(
4851 /*==============*/
4852  const byte* page)
4853 {
4854  return(mach_read_from_4(page + FIL_PAGE_PREV));
4855 }
4856 
4857 /********************************************************************/
4860 UNIV_INTERN
4861 ulint
4862 fil_page_get_next(
4863 /*==============*/
4864  const byte* page)
4865 {
4866  return(mach_read_from_4(page + FIL_PAGE_NEXT));
4867 }
4868 
4869 /*********************************************************************/
4871 UNIV_INTERN
4872 void
4873 fil_page_set_type(
4874 /*==============*/
4875  byte* page,
4876  ulint type)
4877 {
4878  ut_ad(page);
4879 
4880  mach_write_to_2(page + FIL_PAGE_TYPE, type);
4881 }
4882 
4883 /*********************************************************************/
4887 UNIV_INTERN
4888 ulint
4889 fil_page_get_type(
4890 /*==============*/
4891  const byte* page)
4892 {
4893  ut_ad(page);
4894 
4895  return(mach_read_from_2(page + FIL_PAGE_TYPE));
4896 }
4897 
4898 /****************************************************************/
4900 UNIV_INTERN
4901 void
4902 fil_close(void)
4903 /*===========*/
4904 {
4905 #ifndef UNIV_HOTBACKUP
4906  /* The mutex should already have been freed. */
4907  ut_ad(fil_system->mutex.magic_n == 0);
4908 #endif /* !UNIV_HOTBACKUP */
4909 
4910  hash_table_free(fil_system->spaces);
4911 
4912  hash_table_free(fil_system->name_hash);
4913 
4914  ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
4915  ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
4916  ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
4917 
4918  mem_free(fil_system);
4919 
4920  fil_system = NULL;
4921 }