Drizzled Public API Documentation

log0log.cc
00001 /*****************************************************************************
00002 
00003 Copyright (C) 1995, 2010, Innobase Oy. All Rights Reserved.
00004 Copyright (C) 2009 Google Inc.
00005 
00006 Portions of this file contain modifications contributed and copyrighted by
00007 Google, Inc. Those modifications are gratefully acknowledged and are described
00008 briefly in the InnoDB documentation. The contributions by Google are
00009 incorporated with their permission, and subject to the conditions contained in
00010 the file COPYING.Google.
00011 
00012 This program is free software; you can redistribute it and/or modify it under
00013 the terms of the GNU General Public License as published by the Free Software
00014 Foundation; version 2 of the License.
00015 
00016 This program is distributed in the hope that it will be useful, but WITHOUT
00017 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00018 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
00019 
00020 You should have received a copy of the GNU General Public License along with
00021 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
00022 St, Fifth Floor, Boston, MA 02110-1301 USA
00023 
00024 *****************************************************************************/
00025 
00026 /**************************************************/
00033 #include "log0log.h"
00034 
00035 #ifdef UNIV_NONINL
00036 #include "log0log.ic"
00037 #endif
00038 
00039 #ifndef UNIV_HOTBACKUP
00040 #include "mem0mem.h"
00041 #include "buf0buf.h"
00042 #include "buf0flu.h"
00043 #include "srv0srv.h"
00044 #include "log0recv.h"
00045 #include "fil0fil.h"
00046 #include "dict0boot.h"
00047 #include "srv0srv.h"
00048 #include "srv0start.h"
00049 #include "trx0sys.h"
00050 #include "trx0trx.h"
00051 
00052 #include <drizzled/errmsg_print.h>
00053 
00054 /*
00055 General philosophy of InnoDB redo-logs:
00056 
00057 1) Every change to a contents of a data page must be done
00058 through mtr, which in mtr_commit() writes log records
00059 to the InnoDB redo log.
00060 
00061 2) Normally these changes are performed using a mlog_write_ulint()
00062 or similar function.
00063 
00064 3) In some page level operations only a code number of a
00065 c-function and its parameters are written to the log to
00066 reduce the size of the log.
00067 
00068   3a) You should not add parameters to these kind of functions
00069   (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
00070 
00071   3b) You should not add such functionality which either change
00072   working when compared with the old or are dependent on data
00073   outside of the page. These kind of functions should implement
00074   self-contained page transformation and it should be unchanged
00075   if you don't have very essential reasons to change log
00076   semantics or format.
00077 
00078 */
00079 
00080 /* Current free limit of space 0; protected by the log sys mutex; 0 means
00081 uninitialized */
00082 UNIV_INTERN ulint log_fsp_current_free_limit    = 0;
00083 
00084 /* Global log system variable */
00085 UNIV_INTERN log_t*  log_sys = NULL;
00086 
00087 #ifdef UNIV_PFS_RWLOCK
00088 UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key;
00089 # ifdef UNIV_LOG_ARCHIVE
00090 UNIV_INTERN mysql_pfs_key_t archive_lock_key;
00091 # endif
00092 #endif /* UNIV_PFS_RWLOCK */
00093 
00094 #ifdef UNIV_PFS_MUTEX
00095 UNIV_INTERN mysql_pfs_key_t log_sys_mutex_key;
00096 UNIV_INTERN mysql_pfs_key_t log_flush_order_mutex_key;
00097 #endif /* UNIV_PFS_MUTEX */
00098 
00099 #ifdef UNIV_DEBUG
00100 UNIV_INTERN ibool log_do_write = TRUE;
00101 #endif /* UNIV_DEBUG */
00102 
00103 /* These control how often we print warnings if the last checkpoint is too
00104 old */
00105 UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE;
00106 UNIV_INTERN time_t  log_last_warning_time;
00107 
00108 #ifdef UNIV_LOG_ARCHIVE
00109 /* Pointer to this variable is used as the i/o-message when we do i/o to an
00110 archive */
00111 UNIV_INTERN byte  log_archive_io;
00112 #endif /* UNIV_LOG_ARCHIVE */
00113 
00114 /* A margin for free space in the log buffer before a log entry is catenated */
00115 #define LOG_BUF_WRITE_MARGIN  (4 * OS_FILE_LOG_BLOCK_SIZE)
00116 
00117 /* Margins for free space in the log buffer after a log entry is catenated */
00118 #define LOG_BUF_FLUSH_RATIO 2
00119 #define LOG_BUF_FLUSH_MARGIN  (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
00120 
00121 /* Margin for the free space in the smallest log group, before a new query
00122 step which modifies the database, is started */
00123 
00124 #define LOG_CHECKPOINT_FREE_PER_THREAD  (4 * UNIV_PAGE_SIZE)
00125 #define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
00126 
00127 /* This parameter controls asynchronous making of a new checkpoint; the value
00128 should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
00129 
00130 #define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
00131 
00132 /* This parameter controls synchronous preflushing of modified buffer pages */
00133 #define LOG_POOL_PREFLUSH_RATIO_SYNC  16
00134 
00135 /* The same ratio for asynchronous preflushing; this value should be less than
00136 the previous */
00137 #define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
00138 
00139 /* Extra margin, in addition to one log file, used in archiving */
00140 #define LOG_ARCHIVE_EXTRA_MARGIN  (4 * UNIV_PAGE_SIZE)
00141 
00142 /* This parameter controls asynchronous writing to the archive */
00143 #define LOG_ARCHIVE_RATIO_ASYNC   16
00144 
00145 /* Codes used in unlocking flush latches */
00146 #define LOG_UNLOCK_NONE_FLUSHED_LOCK  1
00147 #define LOG_UNLOCK_FLUSH_LOCK   2
00148 
00149 /* States of an archiving operation */
00150 #define LOG_ARCHIVE_READ  1
00151 #define LOG_ARCHIVE_WRITE 2
00152 
00153 /******************************************************/
00155 static
00156 void
00157 log_io_complete_checkpoint(void);
00158 /*============================*/
00159 #ifdef UNIV_LOG_ARCHIVE
00160 /******************************************************/
00162 static
00163 void
00164 log_io_complete_archive(void);
00165 /*=========================*/
00166 #endif /* UNIV_LOG_ARCHIVE */
00167 
00168 /****************************************************************/
00172 UNIV_INTERN
00173 void
00174 log_fsp_current_free_limit_set_and_checkpoint(
00175 /*==========================================*/
00176   ulint limit)  
00177 {
00178   ibool success;
00179 
00180   mutex_enter(&(log_sys->mutex));
00181 
00182   log_fsp_current_free_limit = limit;
00183 
00184   mutex_exit(&(log_sys->mutex));
00185 
00186   /* Try to make a synchronous checkpoint */
00187 
00188   success = FALSE;
00189 
00190   while (!success) {
00191     success = log_checkpoint(TRUE, TRUE);
00192   }
00193 }
00194 
00195 /****************************************************************/
00199 static
00200 ib_uint64_t
00201 log_buf_pool_get_oldest_modification(void)
00202 /*======================================*/
00203 {
00204   ib_uint64_t lsn;
00205 
00206   ut_ad(mutex_own(&(log_sys->mutex)));
00207 
00208   lsn = buf_pool_get_oldest_modification();
00209 
00210   if (!lsn) {
00211 
00212     lsn = log_sys->lsn;
00213   }
00214 
00215   return(lsn);
00216 }
00217 
00218 /************************************************************/
00222 UNIV_INTERN
00223 ib_uint64_t
00224 log_reserve_and_open(
00225 /*=================*/
00226   ulint len)  
00227 {
00228   log_t*  log     = log_sys;
00229   ulint len_upper_limit;
00230 #ifdef UNIV_LOG_ARCHIVE
00231   ulint archived_lsn_age;
00232   ulint dummy;
00233 #endif /* UNIV_LOG_ARCHIVE */
00234 #ifdef UNIV_DEBUG
00235   ulint count     = 0;
00236 #endif /* UNIV_DEBUG */
00237 
00238   ut_a(len < log->buf_size / 2);
00239 loop:
00240   mutex_enter(&(log->mutex));
00241   ut_ad(!recv_no_log_write);
00242 
00243   /* Calculate an upper limit for the space the string may take in the
00244   log buffer */
00245 
00246   len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
00247 
00248   if (log->buf_free + len_upper_limit > log->buf_size) {
00249 
00250     mutex_exit(&(log->mutex));
00251 
00252     /* Not enough free space, do a syncronous flush of the log
00253     buffer */
00254 
00255     log_buffer_flush_to_disk();
00256 
00257     srv_log_waits++;
00258 
00259     ut_ad(++count < 50);
00260 
00261     goto loop;
00262   }
00263 
00264 #ifdef UNIV_LOG_ARCHIVE
00265   if (log->archiving_state != LOG_ARCH_OFF) {
00266 
00267     archived_lsn_age = log->lsn - log->archived_lsn;
00268     if (archived_lsn_age + len_upper_limit
00269         > log->max_archived_lsn_age) {
00270       /* Not enough free archived space in log groups: do a
00271       synchronous archive write batch: */
00272 
00273       mutex_exit(&(log->mutex));
00274 
00275       ut_ad(len_upper_limit <= log->max_archived_lsn_age);
00276 
00277       log_archive_do(TRUE, &dummy);
00278 
00279       ut_ad(++count < 50);
00280 
00281       goto loop;
00282     }
00283   }
00284 #endif /* UNIV_LOG_ARCHIVE */
00285 
00286 #ifdef UNIV_LOG_DEBUG
00287   log->old_buf_free = log->buf_free;
00288   log->old_lsn = log->lsn;
00289 #endif
00290   return(log->lsn);
00291 }
00292 
00293 /************************************************************/
00296 UNIV_INTERN
00297 void
00298 log_write_low(
00299 /*==========*/
00300   byte* str,    
00301   ulint str_len)  
00302 {
00303   log_t*  log = log_sys;
00304   ulint len;
00305   ulint data_len;
00306   byte* log_block;
00307 
00308   ut_ad(mutex_own(&(log->mutex)));
00309 part_loop:
00310   ut_ad(!recv_no_log_write);
00311   /* Calculate a part length */
00312 
00313   data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
00314 
00315   if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
00316 
00317     /* The string fits within the current log block */
00318 
00319     len = str_len;
00320   } else {
00321     data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
00322 
00323     len = OS_FILE_LOG_BLOCK_SIZE
00324       - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
00325       - LOG_BLOCK_TRL_SIZE;
00326   }
00327 
00328   ut_memcpy(log->buf + log->buf_free, str, len);
00329 
00330   str_len -= len;
00331   str = str + len;
00332 
00333   log_block = static_cast<unsigned char *>(ut_align_down(log->buf + log->buf_free,
00334         OS_FILE_LOG_BLOCK_SIZE));
00335   log_block_set_data_len(log_block, data_len);
00336 
00337   if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
00338     /* This block became full */
00339     log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
00340     log_block_set_checkpoint_no(log_block,
00341               log_sys->next_checkpoint_no);
00342     len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
00343 
00344     log->lsn += len;
00345 
00346     /* Initialize the next block header */
00347     log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
00348   } else {
00349     log->lsn += len;
00350   }
00351 
00352   log->buf_free += len;
00353 
00354   ut_ad(log->buf_free <= log->buf_size);
00355 
00356   if (str_len > 0) {
00357     goto part_loop;
00358   }
00359 
00360   srv_log_write_requests++;
00361 }
00362 
00363 /************************************************************/
00366 UNIV_INTERN
00367 ib_uint64_t
00368 log_close(void)
00369 /*===========*/
00370 {
00371   byte*   log_block;
00372   ulint   first_rec_group;
00373   ib_uint64_t oldest_lsn;
00374   ib_uint64_t lsn;
00375   log_t*    log = log_sys;
00376   ib_uint64_t checkpoint_age;
00377 
00378   ut_ad(mutex_own(&(log->mutex)));
00379   ut_ad(!recv_no_log_write);
00380 
00381   lsn = log->lsn;
00382 
00383   log_block = static_cast<unsigned char *>(ut_align_down(log->buf + log->buf_free,
00384         OS_FILE_LOG_BLOCK_SIZE));
00385   first_rec_group = log_block_get_first_rec_group(log_block);
00386 
00387   if (first_rec_group == 0) {
00388     /* We initialized a new log block which was not written
00389     full by the current mtr: the next mtr log record group
00390     will start within this block at the offset data_len */
00391 
00392     log_block_set_first_rec_group(
00393       log_block, log_block_get_data_len(log_block));
00394   }
00395 
00396   if (log->buf_free > log->max_buf_free) {
00397 
00398     log->check_flush_or_checkpoint = TRUE;
00399   }
00400 
00401   checkpoint_age = lsn - log->last_checkpoint_lsn;
00402 
00403   if (checkpoint_age >= log->log_group_capacity) {
00404     /* TODO: split btr_store_big_rec_extern_fields() into small
00405     steps so that we can release all latches in the middle, and
00406     call log_free_check() to ensure we never write over log written
00407     after the latest checkpoint. In principle, we should split all
00408     big_rec operations, but other operations are smaller. */
00409 
00410     if (!log_has_printed_chkp_warning
00411         || difftime(time(NULL), log_last_warning_time) > 15) {
00412 
00413       log_has_printed_chkp_warning = TRUE;
00414       log_last_warning_time = time(NULL);
00415 
00416       ut_print_timestamp(stderr);
00417       fprintf(stderr,
00418         "  InnoDB: ERROR: the age of the last"
00419         " checkpoint is %lu,\n"
00420         "InnoDB: which exceeds the log group"
00421         " capacity %lu.\n"
00422         "InnoDB: If you are using big"
00423         " BLOB or TEXT rows, you must set the\n"
00424         "InnoDB: combined size of log files"
00425         " at least 10 times bigger than the\n"
00426         "InnoDB: largest such row.\n",
00427         (ulong) checkpoint_age,
00428         (ulong) log->log_group_capacity);
00429     }
00430   }
00431 
00432   if (checkpoint_age <= log->max_modified_age_async) {
00433 
00434     goto function_exit;
00435   }
00436 
00437   oldest_lsn = buf_pool_get_oldest_modification();
00438 
00439   if (!oldest_lsn
00440       || lsn - oldest_lsn > log->max_modified_age_async
00441       || checkpoint_age > log->max_checkpoint_age_async) {
00442 
00443     log->check_flush_or_checkpoint = TRUE;
00444   }
00445 function_exit:
00446 
00447 #ifdef UNIV_LOG_DEBUG
00448   log_check_log_recs(log->buf + log->old_buf_free,
00449          log->buf_free - log->old_buf_free, log->old_lsn);
00450 #endif
00451 
00452   return(lsn);
00453 }
00454 
00455 #ifdef UNIV_LOG_ARCHIVE
00456 /******************************************************/
00459 static
00460 void
00461 log_pad_current_log_block(void)
00462 /*===========================*/
00463 {
00464   byte    b   = MLOG_DUMMY_RECORD;
00465   ulint   pad_length;
00466   ulint   i;
00467   ib_uint64_t lsn;
00468 
00469   /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
00470   lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
00471 
00472   pad_length = OS_FILE_LOG_BLOCK_SIZE
00473     - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
00474     - LOG_BLOCK_TRL_SIZE;
00475 
00476   for (i = 0; i < pad_length; i++) {
00477     log_write_low(&b, 1);
00478   }
00479 
00480   lsn = log_sys->lsn;
00481 
00482   log_close();
00483   log_release();
00484 
00485   ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
00486 }
00487 #endif /* UNIV_LOG_ARCHIVE */
00488 
00489 /******************************************************/
00493 UNIV_INTERN
00494 ulint
00495 log_group_get_capacity(
00496 /*===================*/
00497   const log_group_t*  group)  
00498 {
00499   ut_ad(mutex_own(&(log_sys->mutex)));
00500 
00501   return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
00502 }
00503 
00504 /******************************************************/
00508 UNIV_INLINE
00509 ulint
00510 log_group_calc_size_offset(
00511 /*=======================*/
00512   ulint     offset, 
00514   const log_group_t*  group)  
00515 {
00516   ut_ad(mutex_own(&(log_sys->mutex)));
00517 
00518   return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
00519 }
00520 
00521 /******************************************************/
00525 UNIV_INLINE
00526 ulint
00527 log_group_calc_real_offset(
00528 /*=======================*/
00529   ulint     offset, 
00531   const log_group_t*  group)  
00532 {
00533   ut_ad(mutex_own(&(log_sys->mutex)));
00534 
00535   return(offset + LOG_FILE_HDR_SIZE
00536          * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
00537 }
00538 
00539 /******************************************************/
00542 static
00543 ulint
00544 log_group_calc_lsn_offset(
00545 /*======================*/
00546   ib_uint64_t   lsn,  
00548   const log_group_t*  group)  
00549 {
00550   ib_uint64_t gr_lsn;
00551   ib_int64_t  gr_lsn_size_offset;
00552   ib_int64_t  difference;
00553   ib_int64_t  group_size;
00554   ib_int64_t  offset;
00555 
00556   ut_ad(mutex_own(&(log_sys->mutex)));
00557 
00558   /* If total log file size is > 2 GB we can easily get overflows
00559   with 32-bit integers. Use 64-bit integers instead. */
00560 
00561   gr_lsn = group->lsn;
00562 
00563   gr_lsn_size_offset = (ib_int64_t)
00564     log_group_calc_size_offset(group->lsn_offset, group);
00565 
00566   group_size = (ib_int64_t) log_group_get_capacity(group);
00567 
00568   if (lsn >= gr_lsn) {
00569 
00570     difference = (ib_int64_t) (lsn - gr_lsn);
00571   } else {
00572     difference = (ib_int64_t) (gr_lsn - lsn);
00573 
00574     difference = difference % group_size;
00575 
00576     difference = group_size - difference;
00577   }
00578 
00579   offset = (gr_lsn_size_offset + difference) % group_size;
00580 
00581   ut_a(offset < (((ib_int64_t) 1) << 32)); /* offset must be < 4 GB */
00582 
00583   /* fprintf(stderr,
00584   "Offset is %lu gr_lsn_offset is %lu difference is %lu\n",
00585   (ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference);
00586   */
00587 
00588   return(log_group_calc_real_offset((ulint)offset, group));
00589 }
00590 #endif /* !UNIV_HOTBACKUP */
00591 
00592 #ifdef UNIV_DEBUG
00593 UNIV_INTERN ibool log_debug_writes = FALSE;
00594 #endif /* UNIV_DEBUG */
00595 
00596 /*******************************************************************/
00599 UNIV_INTERN
00600 ulint
00601 log_calc_where_lsn_is(
00602 /*==================*/
00603   ib_int64_t* log_file_offset,  
00605   ib_uint64_t first_header_lsn, 
00607   ib_uint64_t lsn,      
00609   ulint   n_log_files,    
00611   ib_int64_t  log_file_size)    
00613 {
00614   ib_int64_t  capacity  = log_file_size - LOG_FILE_HDR_SIZE;
00615   ulint   file_no;
00616   ib_int64_t  add_this_many;
00617 
00618   if (lsn < first_header_lsn) {
00619     add_this_many = 1 + (first_header_lsn - lsn)
00620       / (capacity * (ib_int64_t)n_log_files);
00621     lsn += add_this_many
00622       * capacity * (ib_int64_t)n_log_files;
00623   }
00624 
00625   ut_a(lsn >= first_header_lsn);
00626 
00627   file_no = ((ulint)((lsn - first_header_lsn) / capacity))
00628     % n_log_files;
00629   *log_file_offset = (lsn - first_header_lsn) % capacity;
00630 
00631   *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
00632 
00633   return(file_no);
00634 }
00635 
00636 #ifndef UNIV_HOTBACKUP
00637 /********************************************************/
00641 UNIV_INTERN
00642 void
00643 log_group_set_fields(
00644 /*=================*/
00645   log_group_t*  group,  
00646   ib_uint64_t lsn)  
00648 {
00649   group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
00650   group->lsn = lsn;
00651 }
00652 
00653 /*****************************************************************/
00658 static
00659 ibool
00660 log_calc_max_ages(void)
00661 /*===================*/
00662 {
00663   log_group_t*  group;
00664   ulint   margin;
00665   ulint   free;
00666   ibool   success   = TRUE;
00667   ulint   smallest_capacity;
00668   ulint   archive_margin;
00669   ulint   smallest_archive_margin;
00670 
00671   mutex_enter(&(log_sys->mutex));
00672 
00673   group = UT_LIST_GET_FIRST(log_sys->log_groups);
00674 
00675   ut_ad(group);
00676 
00677   smallest_capacity = ULINT_MAX;
00678   smallest_archive_margin = ULINT_MAX;
00679 
00680   while (group) {
00681     if (log_group_get_capacity(group) < smallest_capacity) {
00682 
00683       smallest_capacity = log_group_get_capacity(group);
00684     }
00685 
00686     archive_margin = log_group_get_capacity(group)
00687       - (group->file_size - LOG_FILE_HDR_SIZE)
00688       - LOG_ARCHIVE_EXTRA_MARGIN;
00689 
00690     if (archive_margin < smallest_archive_margin) {
00691 
00692       smallest_archive_margin = archive_margin;
00693     }
00694 
00695     group = UT_LIST_GET_NEXT(log_groups, group);
00696   }
00697 
00698   /* Add extra safety */
00699   smallest_capacity = smallest_capacity - smallest_capacity / 10;
00700 
00701   /* For each OS thread we must reserve so much free space in the
00702   smallest log group that it can accommodate the log entries produced
00703   by single query steps: running out of free log space is a serious
00704   system error which requires rebooting the database. */
00705 
00706   free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
00707     + LOG_CHECKPOINT_EXTRA_FREE;
00708   if (free >= smallest_capacity / 2) {
00709     success = FALSE;
00710 
00711     goto failure;
00712   } else {
00713     margin = smallest_capacity - free;
00714   }
00715 
00716   margin = ut_min(margin, log_sys->adm_checkpoint_interval);
00717 
00718   margin = margin - margin / 10;  /* Add still some extra safety */
00719 
00720   log_sys->log_group_capacity = smallest_capacity;
00721 
00722   log_sys->max_modified_age_async = margin
00723     - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
00724   log_sys->max_modified_age_sync = margin
00725     - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
00726 
00727   log_sys->max_checkpoint_age_async = margin - margin
00728     / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
00729   log_sys->max_checkpoint_age = margin;
00730 
00731 #ifdef UNIV_LOG_ARCHIVE
00732   log_sys->max_archived_lsn_age = smallest_archive_margin;
00733 
00734   log_sys->max_archived_lsn_age_async = smallest_archive_margin
00735     - smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
00736 #endif /* UNIV_LOG_ARCHIVE */
00737 failure:
00738   mutex_exit(&(log_sys->mutex));
00739 
00740   if (!success) {
00741     fprintf(stderr,
00742       "InnoDB: Error: ib_logfiles are too small"
00743       " for innodb_thread_concurrency %lu.\n"
00744       "InnoDB: The combined size of ib_logfiles"
00745       " should be bigger than\n"
00746       "InnoDB: 200 kB * innodb_thread_concurrency.\n"
00747       "InnoDB: To get mysqld to start up, set"
00748       " innodb_thread_concurrency in my.cnf\n"
00749       "InnoDB: to a lower value, for example, to 8."
00750       " After an ERROR-FREE shutdown\n"
00751       "InnoDB: of mysqld you can adjust the size of"
00752       " ib_logfiles, as explained in\n"
00753       "InnoDB: " REFMAN "adding-and-removing.html\n"
00754       "InnoDB: Cannot continue operation."
00755       " Calling exit(1).\n",
00756       (ulong)srv_thread_concurrency);
00757 
00758     exit(1);
00759   }
00760 
00761   return(success);
00762 }
00763 
00764 /******************************************************/
00766 UNIV_INTERN
00767 void
00768 log_init(void)
00769 /*==========*/
00770 {
00771   log_sys = static_cast<log_t *>(mem_alloc(sizeof(log_t)));
00772 
00773   mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
00774 
00775   mutex_create(log_flush_order_mutex_key,
00776          &log_sys->log_flush_order_mutex,
00777          SYNC_LOG_FLUSH_ORDER);
00778 
00779   mutex_enter(&(log_sys->mutex));
00780 
00781   /* Start the lsn from one log block from zero: this way every
00782   log record has a start lsn != zero, a fact which we will use */
00783 
00784   log_sys->lsn = LOG_START_LSN;
00785 
00786   ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
00787   ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
00788 
00789   log_sys->buf_ptr = static_cast<unsigned char *>(mem_alloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
00790   log_sys->buf = static_cast<unsigned char *>(ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
00791 
00792   log_sys->buf_size = LOG_BUFFER_SIZE;
00793 
00794   memset(log_sys->buf, '\0', LOG_BUFFER_SIZE);
00795 
00796   log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
00797     - LOG_BUF_FLUSH_MARGIN;
00798   log_sys->check_flush_or_checkpoint = TRUE;
00799   UT_LIST_INIT(log_sys->log_groups);
00800 
00801   log_sys->n_log_ios = 0;
00802 
00803   log_sys->n_log_ios_old = log_sys->n_log_ios;
00804   log_sys->last_printout_time = time(NULL);
00805   /*----------------------------*/
00806 
00807   log_sys->buf_next_to_write = 0;
00808 
00809   log_sys->write_lsn = 0;
00810   log_sys->current_flush_lsn = 0;
00811   log_sys->flushed_to_disk_lsn = 0;
00812 
00813   log_sys->written_to_some_lsn = log_sys->lsn;
00814   log_sys->written_to_all_lsn = log_sys->lsn;
00815 
00816   log_sys->n_pending_writes = 0;
00817 
00818   log_sys->no_flush_event = os_event_create(NULL);
00819 
00820   os_event_set(log_sys->no_flush_event);
00821 
00822   log_sys->one_flushed_event = os_event_create(NULL);
00823 
00824   os_event_set(log_sys->one_flushed_event);
00825 
00826   /*----------------------------*/
00827   log_sys->adm_checkpoint_interval = ULINT_MAX;
00828 
00829   log_sys->next_checkpoint_no = 0;
00830   log_sys->last_checkpoint_lsn = log_sys->lsn;
00831   log_sys->n_pending_checkpoint_writes = 0;
00832 
00833   rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
00834            SYNC_NO_ORDER_CHECK);
00835 
00836   log_sys->checkpoint_buf_ptr = static_cast<unsigned char *>(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE));
00837   log_sys->checkpoint_buf = static_cast<unsigned char *>(ut_align(log_sys->checkpoint_buf_ptr,
00838         OS_FILE_LOG_BLOCK_SIZE));
00839   memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
00840   /*----------------------------*/
00841 
00842 #ifdef UNIV_LOG_ARCHIVE
00843   /* Under MySQL, log archiving is always off */
00844   log_sys->archiving_state = LOG_ARCH_OFF;
00845   log_sys->archived_lsn = log_sys->lsn;
00846   log_sys->next_archived_lsn = 0;
00847 
00848   log_sys->n_pending_archive_ios = 0;
00849 
00850   rw_lock_create(archive_lock_key, &log_sys->archive_lock,
00851            SYNC_NO_ORDER_CHECK);
00852 
00853   log_sys->archive_buf = NULL;
00854 
00855   /* ut_align(
00856   ut_malloc(LOG_ARCHIVE_BUF_SIZE
00857   + OS_FILE_LOG_BLOCK_SIZE),
00858   OS_FILE_LOG_BLOCK_SIZE); */
00859   log_sys->archive_buf_size = 0;
00860 
00861   /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
00862 
00863   log_sys->archiving_on = os_event_create(NULL);
00864 #endif /* UNIV_LOG_ARCHIVE */
00865 
00866   /*----------------------------*/
00867 
00868   log_block_init(log_sys->buf, log_sys->lsn);
00869   log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
00870 
00871   log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
00872   log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
00873 
00874   mutex_exit(&(log_sys->mutex));
00875 
00876 #ifdef UNIV_LOG_DEBUG
00877   recv_sys_create();
00878   recv_sys_init(buf_pool_get_curr_size());
00879 
00880   recv_sys->parse_start_lsn = log_sys->lsn;
00881   recv_sys->scanned_lsn = log_sys->lsn;
00882   recv_sys->scanned_checkpoint_no = 0;
00883   recv_sys->recovered_lsn = log_sys->lsn;
00884   recv_sys->limit_lsn = IB_ULONGLONG_MAX;
00885 #endif
00886 }
00887 
00888 /******************************************************************/
00890 UNIV_INTERN
00891 void
00892 log_group_init(
00893 /*===========*/
00894   ulint id,     
00895   ulint n_files,    
00896   ulint file_size,    
00897   ulint space_id,   
00900   ulint /*archive_space_id __attribute__((unused))*/)
00906 {
00907   ulint i;
00908 
00909   log_group_t*  group;
00910 
00911   group = static_cast<log_group_t *>(mem_alloc(sizeof(log_group_t)));
00912 
00913   group->id = id;
00914   group->n_files = n_files;
00915   group->file_size = file_size;
00916   group->space_id = space_id;
00917   group->state = LOG_GROUP_OK;
00918   group->lsn = LOG_START_LSN;
00919   group->lsn_offset = LOG_FILE_HDR_SIZE;
00920   group->n_pending_writes = 0;
00921 
00922   group->file_header_bufs_ptr = static_cast<unsigned char **>(mem_alloc(sizeof(byte*) * n_files));
00923   group->file_header_bufs = static_cast<unsigned char **>(mem_alloc(sizeof(byte*) * n_files));
00924 #ifdef UNIV_LOG_ARCHIVE
00925   group->archive_file_header_bufs_ptr = mem_alloc(
00926     sizeof(byte*) * n_files);
00927   group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
00928 #endif /* UNIV_LOG_ARCHIVE */
00929 
00930   for (i = 0; i < n_files; i++) {
00931     group->file_header_bufs_ptr[i] = static_cast<unsigned char *>(mem_alloc(
00932       LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
00933 
00934     group->file_header_bufs[i] = static_cast<unsigned char *>(ut_align(
00935       group->file_header_bufs_ptr[i],
00936       OS_FILE_LOG_BLOCK_SIZE));
00937 
00938     memset(*(group->file_header_bufs + i), '\0',
00939            LOG_FILE_HDR_SIZE);
00940 
00941 #ifdef UNIV_LOG_ARCHIVE
00942     group->archive_file_header_bufs_ptr[i] = mem_alloc(
00943       LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
00944 
00945     group->archive_file_header_bufs[i] = ut_align(
00946       group->archive_file_header_bufs_ptr[i],
00947       OS_FILE_LOG_BLOCK_SIZE);
00948 
00949     memset(*(group->archive_file_header_bufs + i), '\0',
00950            LOG_FILE_HDR_SIZE);
00951 #endif /* UNIV_LOG_ARCHIVE */
00952   }
00953 
00954 #ifdef UNIV_LOG_ARCHIVE
00955   group->archive_space_id = archive_space_id;
00956 
00957   group->archived_file_no = 0;
00958   group->archived_offset = 0;
00959 #endif /* UNIV_LOG_ARCHIVE */
00960 
00961   group->checkpoint_buf_ptr = static_cast<unsigned char *>(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE));
00962   group->checkpoint_buf = static_cast<unsigned char*>(ut_align(group->checkpoint_buf_ptr,
00963            OS_FILE_LOG_BLOCK_SIZE));
00964 
00965   memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
00966 
00967   UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
00968 
00969   ut_a(log_calc_max_ages());
00970 }
00971 
00972 /******************************************************************/
00974 UNIV_INLINE
00975 void
00976 log_flush_do_unlocks(
00977 /*=================*/
00978   ulint code) 
00980 {
00981   ut_ad(mutex_own(&(log_sys->mutex)));
00982 
00983   /* NOTE that we must own the log mutex when doing the setting of the
00984   events: this is because transactions will wait for these events to
00985   be set, and at that moment the log flush they were waiting for must
00986   have ended. If the log mutex were not reserved here, the i/o-thread
00987   calling this function might be preempted for a while, and when it
00988   resumed execution, it might be that a new flush had been started, and
00989   this function would erroneously signal the NEW flush as completed.
00990   Thus, the changes in the state of these events are performed
00991   atomically in conjunction with the changes in the state of
00992   log_sys->n_pending_writes etc. */
00993 
00994   if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
00995     os_event_set(log_sys->one_flushed_event);
00996   }
00997 
00998   if (code & LOG_UNLOCK_FLUSH_LOCK) {
00999     os_event_set(log_sys->no_flush_event);
01000   }
01001 }
01002 
01003 /******************************************************************/
01007 UNIV_INLINE
01008 ulint
01009 log_group_check_flush_completion(
01010 /*=============================*/
01011   log_group_t*  group)  
01012 {
01013   ut_ad(mutex_own(&(log_sys->mutex)));
01014 
01015   if (!log_sys->one_flushed && group->n_pending_writes == 0) {
01016 #ifdef UNIV_DEBUG
01017     if (log_debug_writes) {
01018       fprintf(stderr,
01019         "Log flushed first to group %lu\n",
01020         (ulong) group->id);
01021     }
01022 #endif /* UNIV_DEBUG */
01023     log_sys->written_to_some_lsn = log_sys->write_lsn;
01024     log_sys->one_flushed = TRUE;
01025 
01026     return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
01027   }
01028 
01029 #ifdef UNIV_DEBUG
01030   if (log_debug_writes && (group->n_pending_writes == 0)) {
01031 
01032     fprintf(stderr, "Log flushed to group %lu\n",
01033       (ulong) group->id);
01034   }
01035 #endif /* UNIV_DEBUG */
01036   return(0);
01037 }
01038 
01039 /******************************************************/
01042 static
01043 ulint
01044 log_sys_check_flush_completion(void)
01045 /*================================*/
01046 {
01047   ulint move_start;
01048   ulint move_end;
01049 
01050   ut_ad(mutex_own(&(log_sys->mutex)));
01051 
01052   if (log_sys->n_pending_writes == 0) {
01053 
01054     log_sys->written_to_all_lsn = log_sys->write_lsn;
01055     log_sys->buf_next_to_write = log_sys->write_end_offset;
01056 
01057     if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
01058       /* Move the log buffer content to the start of the
01059       buffer */
01060 
01061       move_start = ut_calc_align_down(
01062         log_sys->write_end_offset,
01063         OS_FILE_LOG_BLOCK_SIZE);
01064       move_end = ut_calc_align(log_sys->buf_free,
01065              OS_FILE_LOG_BLOCK_SIZE);
01066 
01067       ut_memmove(log_sys->buf, log_sys->buf + move_start,
01068            move_end - move_start);
01069       log_sys->buf_free -= move_start;
01070 
01071       log_sys->buf_next_to_write -= move_start;
01072     }
01073 
01074     return(LOG_UNLOCK_FLUSH_LOCK);
01075   }
01076 
01077   return(0);
01078 }
01079 
01080 /******************************************************/
01082 UNIV_INTERN
01083 void
01084 log_io_complete(
01085 /*============*/
01086   log_group_t*  group)  
01087 {
01088   ulint unlock;
01089 
01090 #ifdef UNIV_LOG_ARCHIVE
01091   if ((byte*)group == &log_archive_io) {
01092     /* It was an archive write */
01093 
01094     log_io_complete_archive();
01095 
01096     return;
01097   }
01098 #endif /* UNIV_LOG_ARCHIVE */
01099 
01100   if ((ulint)group & 0x1UL) {
01101     /* It was a checkpoint write */
01102     group = (log_group_t*)((ulint)group - 1);
01103 
01104     if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
01105         && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
01106 
01107       fil_flush(group->space_id);
01108     }
01109 
01110 #ifdef UNIV_DEBUG
01111     if (log_debug_writes) {
01112       fprintf(stderr,
01113         "Checkpoint info written to group %lu\n",
01114         group->id);
01115     }
01116 #endif /* UNIV_DEBUG */
01117     log_io_complete_checkpoint();
01118 
01119     return;
01120   }
01121 
01122   ut_error; 
01125   if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
01126       && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
01127       && srv_flush_log_at_trx_commit != 2) {
01128 
01129     fil_flush(group->space_id);
01130   }
01131 
01132   mutex_enter(&(log_sys->mutex));
01133   ut_ad(!recv_no_log_write);
01134 
01135   ut_a(group->n_pending_writes > 0);
01136   ut_a(log_sys->n_pending_writes > 0);
01137 
01138   group->n_pending_writes--;
01139   log_sys->n_pending_writes--;
01140 
01141   unlock = log_group_check_flush_completion(group);
01142   unlock = unlock | log_sys_check_flush_completion();
01143 
01144   log_flush_do_unlocks(unlock);
01145 
01146   mutex_exit(&(log_sys->mutex));
01147 }
01148 
01149 /******************************************************/
01151 static
01152 void
01153 log_group_file_header_flush(
01154 /*========================*/
01155   log_group_t*  group,    
01156   ulint   nth_file, 
01158   ib_uint64_t start_lsn)  
01160 {
01161   byte* buf;
01162   ulint dest_offset;
01163 
01164   ut_ad(mutex_own(&(log_sys->mutex)));
01165   ut_ad(!recv_no_log_write);
01166   ut_a(nth_file < group->n_files);
01167 
01168   buf = *(group->file_header_bufs + nth_file);
01169 
01170   mach_write_to_4(buf + LOG_GROUP_ID, group->id);
01171   mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
01172 
01173   /* Wipe over possible label of ibbackup --restore */
01174   memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "    ", 4);
01175 
01176   dest_offset = nth_file * group->file_size;
01177 
01178 #ifdef UNIV_DEBUG
01179   if (log_debug_writes) {
01180     fprintf(stderr,
01181       "Writing log file header to group %lu file %lu\n",
01182       (ulong) group->id, (ulong) nth_file);
01183   }
01184 #endif /* UNIV_DEBUG */
01185   if (log_do_write) {
01186     log_sys->n_log_ios++;
01187 
01188     srv_os_log_pending_writes++;
01189 
01190     fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
01191            dest_offset / UNIV_PAGE_SIZE,
01192            dest_offset % UNIV_PAGE_SIZE,
01193            OS_FILE_LOG_BLOCK_SIZE,
01194            buf, group);
01195 
01196     srv_os_log_pending_writes--;
01197   }
01198 }
01199 
01200 /******************************************************/
01204 static
01205 void
01206 log_block_store_checksum(
01207 /*=====================*/
01208   byte* block)  
01209 {
01210   log_block_set_checksum(block, log_block_calc_checksum(block));
01211 }
01212 
01213 /******************************************************/
01215 UNIV_INTERN
01216 void
01217 log_group_write_buf(
01218 /*================*/
01219   log_group_t*  group,    
01220   byte*   buf,    
01221   ulint   len,    
01223   ib_uint64_t start_lsn,  
01226   ulint   new_data_offset)
01230 {
01231   ulint write_len;
01232   ibool write_header;
01233   ulint next_offset;
01234   ulint i;
01235 
01236   ut_ad(mutex_own(&(log_sys->mutex)));
01237   ut_ad(!recv_no_log_write);
01238   ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
01239   ut_a(((ulint) start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
01240 
01241   if (new_data_offset == 0) {
01242     write_header = TRUE;
01243   } else {
01244     write_header = FALSE;
01245   }
01246 loop:
01247   if (len == 0) {
01248 
01249     return;
01250   }
01251 
01252   next_offset = log_group_calc_lsn_offset(start_lsn, group);
01253 
01254   if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
01255       && write_header) {
01256     /* We start to write a new log file instance in the group */
01257 
01258     log_group_file_header_flush(group,
01259               next_offset / group->file_size,
01260               start_lsn);
01261     srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE;
01262     srv_log_writes++;
01263   }
01264 
01265   if ((next_offset % group->file_size) + len > group->file_size) {
01266 
01267     write_len = group->file_size
01268       - (next_offset % group->file_size);
01269   } else {
01270     write_len = len;
01271   }
01272 
01273 #ifdef UNIV_DEBUG
01274   if (log_debug_writes) {
01275 
01276     fprintf(stderr,
01277       "Writing log file segment to group %lu"
01278       " offset %lu len %lu\n"
01279       "start lsn %"PRIu64"\n"
01280       "First block n:o %lu last block n:o %lu\n",
01281       (ulong) group->id, (ulong) next_offset,
01282       (ulong) write_len,
01283       start_lsn,
01284       (ulong) log_block_get_hdr_no(buf),
01285       (ulong) log_block_get_hdr_no(
01286         buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
01287     ut_a(log_block_get_hdr_no(buf)
01288          == log_block_convert_lsn_to_no(start_lsn));
01289 
01290     for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
01291 
01292       ut_a(log_block_get_hdr_no(buf) + i
01293            == log_block_get_hdr_no(
01294              buf + i * OS_FILE_LOG_BLOCK_SIZE));
01295     }
01296   }
01297 #endif /* UNIV_DEBUG */
01298   /* Calculate the checksums for each log block and write them to
01299   the trailer fields of the log blocks */
01300 
01301   for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
01302     log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
01303   }
01304 
01305   if (log_do_write) {
01306     log_sys->n_log_ios++;
01307 
01308     srv_os_log_pending_writes++;
01309 
01310     fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
01311            next_offset / UNIV_PAGE_SIZE,
01312            next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
01313 
01314     srv_os_log_pending_writes--;
01315 
01316     srv_os_log_written+= write_len;
01317     srv_log_writes++;
01318   }
01319 
01320   if (write_len < len) {
01321     start_lsn += write_len;
01322     len -= write_len;
01323     buf += write_len;
01324 
01325     write_header = TRUE;
01326 
01327     goto loop;
01328   }
01329 }
01330 
01331 /******************************************************/
01336 UNIV_INTERN
01337 void
01338 log_write_up_to(
01339 /*============*/
01340   ib_uint64_t lsn,  
01343   ulint   wait, 
01345   ibool   flush_to_disk)
01348 {
01349   log_group_t*  group;
01350   ulint   start_offset;
01351   ulint   end_offset;
01352   ulint   area_start;
01353   ulint   area_end;
01354 #ifdef UNIV_DEBUG
01355   ulint   loop_count  = 0;
01356 #endif /* UNIV_DEBUG */
01357   ulint   unlock;
01358 
01359   if (recv_no_ibuf_operations) {
01360     /* Recovery is running and no operations on the log files are
01361     allowed yet (the variable name .._no_ibuf_.. is misleading) */
01362 
01363     return;
01364   }
01365 
01366 loop:
01367 #ifdef UNIV_DEBUG
01368   loop_count++;
01369 
01370   ut_ad(loop_count < 5);
01371 
01372 # if 0
01373   if (loop_count > 2) {
01374     fprintf(stderr, "Log loop count %lu\n", loop_count);
01375   }
01376 # endif
01377 #endif
01378 
01379   mutex_enter(&(log_sys->mutex));
01380   ut_ad(!recv_no_log_write);
01381 
01382   if (flush_to_disk
01383       && log_sys->flushed_to_disk_lsn >= lsn) {
01384 
01385     mutex_exit(&(log_sys->mutex));
01386 
01387     return;
01388   }
01389 
01390   if (!flush_to_disk
01391       && (log_sys->written_to_all_lsn >= lsn
01392     || (log_sys->written_to_some_lsn >= lsn
01393         && wait != LOG_WAIT_ALL_GROUPS))) {
01394 
01395     mutex_exit(&(log_sys->mutex));
01396 
01397     return;
01398   }
01399 
01400   if (log_sys->n_pending_writes > 0) {
01401     /* A write (+ possibly flush to disk) is running */
01402 
01403     if (flush_to_disk
01404         && log_sys->current_flush_lsn >= lsn) {
01405       /* The write + flush will write enough: wait for it to
01406       complete  */
01407 
01408       goto do_waits;
01409     }
01410 
01411     if (!flush_to_disk
01412         && log_sys->write_lsn >= lsn) {
01413       /* The write will write enough: wait for it to
01414       complete  */
01415 
01416       goto do_waits;
01417     }
01418 
01419     mutex_exit(&(log_sys->mutex));
01420 
01421     /* Wait for the write to complete and try to start a new
01422     write */
01423 
01424     os_event_wait(log_sys->no_flush_event);
01425 
01426     goto loop;
01427   }
01428 
01429   if (!flush_to_disk
01430       && log_sys->buf_free == log_sys->buf_next_to_write) {
01431     /* Nothing to write and no flush to disk requested */
01432 
01433     mutex_exit(&(log_sys->mutex));
01434 
01435     return;
01436   }
01437 
01438 #ifdef UNIV_DEBUG
01439   if (log_debug_writes) {
01440     fprintf(stderr,
01441       "Writing log from %"PRIu64" up to lsn %"PRIu64"\n",
01442       log_sys->written_to_all_lsn,
01443       log_sys->lsn);
01444   }
01445 #endif /* UNIV_DEBUG */
01446   log_sys->n_pending_writes++;
01447 
01448   group = UT_LIST_GET_FIRST(log_sys->log_groups);
01449   group->n_pending_writes++;  
01452   os_event_reset(log_sys->no_flush_event);
01453   os_event_reset(log_sys->one_flushed_event);
01454 
01455   start_offset = log_sys->buf_next_to_write;
01456   end_offset = log_sys->buf_free;
01457 
01458   area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
01459   area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
01460 
01461   ut_ad(area_end - area_start > 0);
01462 
01463   log_sys->write_lsn = log_sys->lsn;
01464 
01465   if (flush_to_disk) {
01466     log_sys->current_flush_lsn = log_sys->lsn;
01467   }
01468 
01469   log_sys->one_flushed = FALSE;
01470 
01471   log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
01472   log_block_set_checkpoint_no(
01473     log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
01474     log_sys->next_checkpoint_no);
01475 
01476   /* Copy the last, incompletely written, log block a log block length
01477   up, so that when the flush operation writes from the log buffer, the
01478   segment to write will not be changed by writers to the log */
01479 
01480   ut_memcpy(log_sys->buf + area_end,
01481       log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
01482       OS_FILE_LOG_BLOCK_SIZE);
01483 
01484   log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
01485   log_sys->write_end_offset = log_sys->buf_free;
01486 
01487   group = UT_LIST_GET_FIRST(log_sys->log_groups);
01488 
01489   /* Do the write to the log files */
01490 
01491   while (group) {
01492     log_group_write_buf(
01493       group, log_sys->buf + area_start,
01494       area_end - area_start,
01495       ut_uint64_align_down(log_sys->written_to_all_lsn,
01496                OS_FILE_LOG_BLOCK_SIZE),
01497       start_offset - area_start);
01498 
01499     log_group_set_fields(group, log_sys->write_lsn);
01500 
01501     group = UT_LIST_GET_NEXT(log_groups, group);
01502   }
01503 
01504   mutex_exit(&(log_sys->mutex));
01505 
01506   if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
01507     /* O_DSYNC means the OS did not buffer the log file at all:
01508     so we have also flushed to disk what we have written */
01509 
01510     log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
01511 
01512   } else if (flush_to_disk) {
01513 
01514     group = UT_LIST_GET_FIRST(log_sys->log_groups);
01515 
01516     fil_flush(group->space_id);
01517     log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
01518   }
01519 
01520   mutex_enter(&(log_sys->mutex));
01521 
01522   group = UT_LIST_GET_FIRST(log_sys->log_groups);
01523 
01524   ut_a(group->n_pending_writes == 1);
01525   ut_a(log_sys->n_pending_writes == 1);
01526 
01527   group->n_pending_writes--;
01528   log_sys->n_pending_writes--;
01529 
01530   unlock = log_group_check_flush_completion(group);
01531   unlock = unlock | log_sys_check_flush_completion();
01532 
01533   log_flush_do_unlocks(unlock);
01534 
01535   mutex_exit(&(log_sys->mutex));
01536 
01537   return;
01538 
01539 do_waits:
01540   mutex_exit(&(log_sys->mutex));
01541 
01542   switch (wait) {
01543   case LOG_WAIT_ONE_GROUP:
01544     os_event_wait(log_sys->one_flushed_event);
01545     break;
01546   case LOG_WAIT_ALL_GROUPS:
01547     os_event_wait(log_sys->no_flush_event);
01548     break;
01549 #ifdef UNIV_DEBUG
01550   case LOG_NO_WAIT:
01551     break;
01552   default:
01553     ut_error;
01554 #endif /* UNIV_DEBUG */
01555   }
01556 }
01557 
01558 /****************************************************************/
01560 UNIV_INTERN
01561 void
01562 log_buffer_flush_to_disk(void)
01563 /*==========================*/
01564 {
01565   ib_uint64_t lsn;
01566 
01567   mutex_enter(&(log_sys->mutex));
01568 
01569   lsn = log_sys->lsn;
01570 
01571   mutex_exit(&(log_sys->mutex));
01572 
01573   log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
01574 }
01575 
01576 /****************************************************************/
01581 UNIV_INTERN
01582 void
01583 log_buffer_sync_in_background(
01584 /*==========================*/
01585   ibool flush)  
01586 {
01587   ib_uint64_t lsn;
01588 
01589   mutex_enter(&(log_sys->mutex));
01590 
01591   lsn = log_sys->lsn;
01592 
01593   mutex_exit(&(log_sys->mutex));
01594 
01595   log_write_up_to(lsn, LOG_NO_WAIT, flush);
01596 }
01597 
01598 /********************************************************************
01599 
01600 Tries to establish a big enough margin of free space in the log buffer, such
01601 that a new log entry can be catenated without an immediate need for a flush. */
01602 static
01603 void
01604 log_flush_margin(void)
01605 /*==================*/
01606 {
01607   log_t*    log = log_sys;
01608   ib_uint64_t lsn = 0;
01609 
01610   mutex_enter(&(log->mutex));
01611 
01612   if (log->buf_free > log->max_buf_free) {
01613 
01614     if (log->n_pending_writes > 0) {
01615       /* A flush is running: hope that it will provide enough
01616       free space */
01617     } else {
01618       lsn = log->lsn;
01619     }
01620   }
01621 
01622   mutex_exit(&(log->mutex));
01623 
01624   if (lsn) {
01625     log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
01626   }
01627 }
01628 
01629 /****************************************************************/
01635 UNIV_INTERN
01636 ibool
01637 log_preflush_pool_modified_pages(
01638 /*=============================*/
01639   ib_uint64_t new_oldest, 
01642   ibool   sync)   
01644 {
01645   ulint n_pages;
01646 
01647   if (recv_recovery_on) {
01648     /* If the recovery is running, we must first apply all
01649     log records to their respective file pages to get the
01650     right modify lsn values to these pages: otherwise, there
01651     might be pages on disk which are not yet recovered to the
01652     current lsn, and even after calling this function, we could
01653     not know how up-to-date the disk version of the database is,
01654     and we could not make a new checkpoint on the basis of the
01655     info on the buffer pool only. */
01656 
01657     recv_apply_hashed_log_recs(TRUE);
01658   }
01659 
01660   n_pages = buf_flush_list(ULINT_MAX, new_oldest);
01661 
01662   if (sync) {
01663     buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
01664   }
01665 
01666   if (n_pages == ULINT_UNDEFINED) {
01667 
01668     return(FALSE);
01669   }
01670 
01671   return(TRUE);
01672 }
01673 
01674 /******************************************************/
01676 static
01677 void
01678 log_complete_checkpoint(void)
01679 /*=========================*/
01680 {
01681   ut_ad(mutex_own(&(log_sys->mutex)));
01682   ut_ad(log_sys->n_pending_checkpoint_writes == 0);
01683 
01684   log_sys->next_checkpoint_no++;
01685 
01686   log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
01687 
01688   rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
01689 }
01690 
01691 /******************************************************/
01693 static
01694 void
01695 log_io_complete_checkpoint(void)
01696 /*============================*/
01697 {
01698   mutex_enter(&(log_sys->mutex));
01699 
01700   ut_ad(log_sys->n_pending_checkpoint_writes > 0);
01701 
01702   log_sys->n_pending_checkpoint_writes--;
01703 
01704   if (log_sys->n_pending_checkpoint_writes == 0) {
01705     log_complete_checkpoint();
01706   }
01707 
01708   mutex_exit(&(log_sys->mutex));
01709 }
01710 
01711 /*******************************************************************/
01713 static
01714 void
01715 log_checkpoint_set_nth_group_info(
01716 /*==============================*/
01717   byte* buf,  
01718   ulint n,  
01719   ulint file_no,
01720   ulint offset) 
01721 {
01722   ut_ad(n < LOG_MAX_N_GROUPS);
01723 
01724   mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
01725       + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
01726   mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
01727       + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
01728 }
01729 
01730 /*******************************************************************/
01732 UNIV_INTERN
01733 void
01734 log_checkpoint_get_nth_group_info(
01735 /*==============================*/
01736   const byte* buf,  
01737   ulint   n,  
01738   ulint*    file_no,
01739   ulint*    offset) 
01740 {
01741   ut_ad(n < LOG_MAX_N_GROUPS);
01742 
01743   *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
01744             + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
01745   *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
01746            + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
01747 }
01748 
01749 /******************************************************/
01751 static
01752 void
01753 log_group_checkpoint(
01754 /*=================*/
01755   log_group_t*  group)  
01756 {
01757   log_group_t*  group2;
01758 #ifdef UNIV_LOG_ARCHIVE
01759   ib_uint64_t archived_lsn;
01760   ib_uint64_t next_archived_lsn;
01761 #endif /* UNIV_LOG_ARCHIVE */
01762   ulint   write_offset;
01763   ulint   fold;
01764   byte*   buf;
01765   ulint   i;
01766 
01767   ut_ad(mutex_own(&(log_sys->mutex)));
01768 #if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE
01769 # error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE"
01770 #endif
01771 
01772   buf = group->checkpoint_buf;
01773 
01774   mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
01775   mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
01776 
01777   mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
01778       log_group_calc_lsn_offset(
01779         log_sys->next_checkpoint_lsn, group));
01780 
01781   mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
01782 
01783 #ifdef UNIV_LOG_ARCHIVE
01784   if (log_sys->archiving_state == LOG_ARCH_OFF) {
01785     archived_lsn = IB_ULONGLONG_MAX;
01786   } else {
01787     archived_lsn = log_sys->archived_lsn;
01788 
01789     if (archived_lsn != log_sys->next_archived_lsn) {
01790       next_archived_lsn = log_sys->next_archived_lsn;
01791       /* For debugging only */
01792     }
01793   }
01794 
01795   mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
01796 #else /* UNIV_LOG_ARCHIVE */
01797   mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
01798 #endif /* UNIV_LOG_ARCHIVE */
01799 
01800   for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
01801     log_checkpoint_set_nth_group_info(buf, i, 0, 0);
01802   }
01803 
01804   group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
01805 
01806   while (group2) {
01807     log_checkpoint_set_nth_group_info(buf, group2->id,
01808 #ifdef UNIV_LOG_ARCHIVE
01809               group2->archived_file_no,
01810               group2->archived_offset
01811 #else /* UNIV_LOG_ARCHIVE */
01812               0, 0
01813 #endif /* UNIV_LOG_ARCHIVE */
01814               );
01815 
01816     group2 = UT_LIST_GET_NEXT(log_groups, group2);
01817   }
01818 
01819   fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
01820   mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
01821 
01822   fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
01823             LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
01824   mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
01825 
01826   /* Starting from InnoDB-3.23.50, we also write info on allocated
01827   size in the tablespace */
01828 
01829   mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
01830       log_fsp_current_free_limit);
01831 
01832   mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
01833       LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
01834 
01835   /* We alternate the physical place of the checkpoint info in the first
01836   log file */
01837 
01838   if ((log_sys->next_checkpoint_no & 1) == 0) {
01839     write_offset = LOG_CHECKPOINT_1;
01840   } else {
01841     write_offset = LOG_CHECKPOINT_2;
01842   }
01843 
01844   if (log_do_write) {
01845     if (log_sys->n_pending_checkpoint_writes == 0) {
01846 
01847       rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
01848              LOG_CHECKPOINT);
01849     }
01850 
01851     log_sys->n_pending_checkpoint_writes++;
01852 
01853     log_sys->n_log_ios++;
01854 
01855     /* We send as the last parameter the group machine address
01856     added with 1, as we want to distinguish between a normal log
01857     file write and a checkpoint field write */
01858 
01859     fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, 0,
01860            write_offset / UNIV_PAGE_SIZE,
01861            write_offset % UNIV_PAGE_SIZE,
01862            OS_FILE_LOG_BLOCK_SIZE,
01863            buf, ((byte*)group + 1));
01864 
01865     ut_ad(((ulint)group & 0x1UL) == 0);
01866   }
01867 }
01868 #endif /* !UNIV_HOTBACKUP */
01869 
01870 #ifdef UNIV_HOTBACKUP
01871 /******************************************************/
01874 UNIV_INTERN
01875 void
01876 log_reset_first_header_and_checkpoint(
01877 /*==================================*/
01878   byte*   hdr_buf,
01880   ib_uint64_t start)  
01883 {
01884   ulint   fold;
01885   byte*   buf;
01886   ib_uint64_t lsn;
01887 
01888   mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
01889   mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
01890 
01891   lsn = start + LOG_BLOCK_HDR_SIZE;
01892 
01893   /* Write the label of ibbackup --restore */
01894   strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
01895          "ibbackup ");
01896   ut_sprintf_timestamp((char*) hdr_buf
01897            + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
01898         + (sizeof "ibbackup ") - 1));
01899   buf = hdr_buf + LOG_CHECKPOINT_1;
01900 
01901   mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
01902   mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
01903 
01904   mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
01905       LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
01906 
01907   mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
01908 
01909   mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, IB_ULONGLONG_MAX);
01910 
01911   fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
01912   mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
01913 
01914   fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
01915             LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
01916   mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
01917 
01918   /* Starting from InnoDB-3.23.50, we should also write info on
01919   allocated size in the tablespace, but unfortunately we do not
01920   know it here */
01921 }
01922 #endif /* UNIV_HOTBACKUP */
01923 
01924 #ifndef UNIV_HOTBACKUP
01925 /******************************************************/
01927 UNIV_INTERN
01928 void
01929 log_group_read_checkpoint_info(
01930 /*===========================*/
01931   log_group_t*  group,  
01932   ulint   field)  
01933 {
01934   ut_ad(mutex_own(&(log_sys->mutex)));
01935 
01936   log_sys->n_log_ios++;
01937 
01938   fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
01939          field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
01940          OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
01941 }
01942 
01943 /******************************************************/
01945 UNIV_INTERN
01946 void
01947 log_groups_write_checkpoint_info(void)
01948 /*==================================*/
01949 {
01950   log_group_t*  group;
01951 
01952   ut_ad(mutex_own(&(log_sys->mutex)));
01953 
01954   group = UT_LIST_GET_FIRST(log_sys->log_groups);
01955 
01956   while (group) {
01957     log_group_checkpoint(group);
01958 
01959     group = UT_LIST_GET_NEXT(log_groups, group);
01960   }
01961 }
01962 
01963 /******************************************************/
01969 UNIV_INTERN
01970 ibool
01971 log_checkpoint(
01972 /*===========*/
01973   ibool sync,   
01975   ibool write_always) 
01981 {
01982   ib_uint64_t oldest_lsn;
01983 
01984   if (recv_recovery_is_on()) {
01985     recv_apply_hashed_log_recs(TRUE);
01986   }
01987 
01988   if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
01989     fil_flush_file_spaces(FIL_TABLESPACE);
01990   }
01991 
01992   mutex_enter(&(log_sys->mutex));
01993 
01994   ut_ad(!recv_no_log_write);
01995   oldest_lsn = log_buf_pool_get_oldest_modification();
01996 
01997   mutex_exit(&(log_sys->mutex));
01998 
01999   /* Because log also contains headers and dummy log records,
02000   if the buffer pool contains no dirty buffers, oldest_lsn
02001   gets the value log_sys->lsn from the previous function,
02002   and we must make sure that the log is flushed up to that
02003   lsn. If there are dirty buffers in the buffer pool, then our
02004   write-ahead-logging algorithm ensures that the log has been flushed
02005   up to oldest_lsn. */
02006 
02007   log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
02008 
02009   mutex_enter(&(log_sys->mutex));
02010 
02011   if (!write_always
02012       && log_sys->last_checkpoint_lsn >= oldest_lsn) {
02013 
02014     mutex_exit(&(log_sys->mutex));
02015 
02016     return(TRUE);
02017   }
02018 
02019   ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
02020 
02021   if (log_sys->n_pending_checkpoint_writes > 0) {
02022     /* A checkpoint write is running */
02023 
02024     mutex_exit(&(log_sys->mutex));
02025 
02026     if (sync) {
02027       /* Wait for the checkpoint write to complete */
02028       rw_lock_s_lock(&(log_sys->checkpoint_lock));
02029       rw_lock_s_unlock(&(log_sys->checkpoint_lock));
02030     }
02031 
02032     return(FALSE);
02033   }
02034 
02035   log_sys->next_checkpoint_lsn = oldest_lsn;
02036 
02037 #ifdef UNIV_DEBUG
02038   if (log_debug_writes) {
02039     fprintf(stderr, "Making checkpoint no %lu at lsn %"PRIu64"\n",
02040       (ulong) log_sys->next_checkpoint_no,
02041       oldest_lsn);
02042   }
02043 #endif /* UNIV_DEBUG */
02044 
02045   log_groups_write_checkpoint_info();
02046 
02047   mutex_exit(&(log_sys->mutex));
02048 
02049   if (sync) {
02050     /* Wait for the checkpoint write to complete */
02051     rw_lock_s_lock(&(log_sys->checkpoint_lock));
02052     rw_lock_s_unlock(&(log_sys->checkpoint_lock));
02053   }
02054 
02055   return(TRUE);
02056 }
02057 
02058 /****************************************************************/
02060 UNIV_INTERN
02061 void
02062 log_make_checkpoint_at(
02063 /*===================*/
02064   ib_uint64_t lsn,    
02067   ibool   write_always) 
02074 {
02075   /* Preflush pages synchronously */
02076 
02077   while (!log_preflush_pool_modified_pages(lsn, TRUE)) {}
02078 
02079   while (!log_checkpoint(TRUE, write_always)) {}
02080 }
02081 
02082 /****************************************************************/
02087 static
02088 void
02089 log_checkpoint_margin(void)
02090 /*=======================*/
02091 {
02092   log_t*    log   = log_sys;
02093   ib_uint64_t age;
02094   ib_uint64_t checkpoint_age;
02095   ib_uint64_t advance;
02096   ib_uint64_t oldest_lsn;
02097   ibool   sync;
02098   ibool   checkpoint_sync;
02099   ibool   do_checkpoint;
02100   ibool   success;
02101 loop:
02102   sync = FALSE;
02103   checkpoint_sync = FALSE;
02104   do_checkpoint = FALSE;
02105 
02106   mutex_enter(&(log->mutex));
02107   ut_ad(!recv_no_log_write);
02108 
02109   if (log->check_flush_or_checkpoint == FALSE) {
02110     mutex_exit(&(log->mutex));
02111 
02112     return;
02113   }
02114 
02115   oldest_lsn = log_buf_pool_get_oldest_modification();
02116 
02117   age = log->lsn - oldest_lsn;
02118 
02119   if (age > log->max_modified_age_sync) {
02120 
02121     /* A flush is urgent: we have to do a synchronous preflush */
02122 
02123     sync = TRUE;
02124     advance = 2 * (age - log->max_modified_age_sync);
02125   } else if (age > log->max_modified_age_async) {
02126 
02127     /* A flush is not urgent: we do an asynchronous preflush */
02128     advance = age - log->max_modified_age_async;
02129   } else {
02130     advance = 0;
02131   }
02132 
02133   checkpoint_age = log->lsn - log->last_checkpoint_lsn;
02134 
02135   if (checkpoint_age > log->max_checkpoint_age) {
02136     /* A checkpoint is urgent: we do it synchronously */
02137 
02138     checkpoint_sync = TRUE;
02139 
02140     do_checkpoint = TRUE;
02141 
02142   } else if (checkpoint_age > log->max_checkpoint_age_async) {
02143     /* A checkpoint is not urgent: do it asynchronously */
02144 
02145     do_checkpoint = TRUE;
02146 
02147     log->check_flush_or_checkpoint = FALSE;
02148   } else {
02149     log->check_flush_or_checkpoint = FALSE;
02150   }
02151 
02152   mutex_exit(&(log->mutex));
02153 
02154   if (advance) {
02155     ib_uint64_t new_oldest = oldest_lsn + advance;
02156 
02157     success = log_preflush_pool_modified_pages(new_oldest, sync);
02158 
02159     /* If the flush succeeded, this thread has done its part
02160     and can proceed. If it did not succeed, there was another
02161     thread doing a flush at the same time. If sync was FALSE,
02162     the flush was not urgent, and we let this thread proceed.
02163     Otherwise, we let it start from the beginning again. */
02164 
02165     if (sync && !success) {
02166       mutex_enter(&(log->mutex));
02167 
02168       log->check_flush_or_checkpoint = TRUE;
02169 
02170       mutex_exit(&(log->mutex));
02171       goto loop;
02172     }
02173   }
02174 
02175   if (do_checkpoint) {
02176     log_checkpoint(checkpoint_sync, FALSE);
02177 
02178     if (checkpoint_sync) {
02179 
02180       goto loop;
02181     }
02182   }
02183 }
02184 
02185 /******************************************************/
02187 UNIV_INTERN
02188 void
02189 log_group_read_log_seg(
02190 /*===================*/
02191   ulint   type,   
02192   byte*   buf,    
02193   log_group_t*  group,    
02194   ib_uint64_t start_lsn,  
02195   ib_uint64_t end_lsn)  
02196 {
02197   ulint len;
02198   ulint source_offset;
02199   ibool sync;
02200 
02201   ut_ad(mutex_own(&(log_sys->mutex)));
02202 
02203   sync = (type == LOG_RECOVER);
02204 loop:
02205   source_offset = log_group_calc_lsn_offset(start_lsn, group);
02206 
02207   len = (ulint) (end_lsn - start_lsn);
02208 
02209   ut_ad(len != 0);
02210 
02211   if ((source_offset % group->file_size) + len > group->file_size) {
02212 
02213     len = group->file_size - (source_offset % group->file_size);
02214   }
02215 
02216 #ifdef UNIV_LOG_ARCHIVE
02217   if (type == LOG_ARCHIVE) {
02218 
02219     log_sys->n_pending_archive_ios++;
02220   }
02221 #endif /* UNIV_LOG_ARCHIVE */
02222 
02223   log_sys->n_log_ios++;
02224 
02225   fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
02226          source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
02227          len, buf, NULL);
02228 
02229   start_lsn += len;
02230   buf += len;
02231 
02232   if (start_lsn != end_lsn) {
02233 
02234     goto loop;
02235   }
02236 }
02237 
02238 #ifdef UNIV_LOG_ARCHIVE
02239 /******************************************************/
02241 UNIV_INTERN
02242 void
02243 log_archived_file_name_gen(
02244 /*=======================*/
02245   char* buf,  
02246   ulint /*id __attribute__((unused))*/,
02249   ulint file_no)
02250 {
02251   sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
02252 }
02253 
02254 /******************************************************/
02256 static
02257 void
02258 log_group_archive_file_header_write(
02259 /*================================*/
02260   log_group_t*  group,    
02261   ulint   nth_file, 
02263   ulint   file_no,  
02264   ib_uint64_t start_lsn)  
02266 {
02267   byte* buf;
02268   ulint dest_offset;
02269 
02270   ut_ad(mutex_own(&(log_sys->mutex)));
02271 
02272   ut_a(nth_file < group->n_files);
02273 
02274   buf = *(group->archive_file_header_bufs + nth_file);
02275 
02276   mach_write_to_4(buf + LOG_GROUP_ID, group->id);
02277   mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
02278   mach_write_to_4(buf + LOG_FILE_NO, file_no);
02279 
02280   mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
02281 
02282   dest_offset = nth_file * group->file_size;
02283 
02284   log_sys->n_log_ios++;
02285 
02286   fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
02287          dest_offset / UNIV_PAGE_SIZE,
02288          dest_offset % UNIV_PAGE_SIZE,
02289          2 * OS_FILE_LOG_BLOCK_SIZE,
02290          buf, &log_archive_io);
02291 }
02292 
02293 /******************************************************/
02295 static
02296 void
02297 log_group_archive_completed_header_write(
02298 /*=====================================*/
02299   log_group_t*  group,    
02300   ulint   nth_file, 
02302   ib_uint64_t end_lsn)  
02303 {
02304   byte* buf;
02305   ulint dest_offset;
02306 
02307   ut_ad(mutex_own(&(log_sys->mutex)));
02308   ut_a(nth_file < group->n_files);
02309 
02310   buf = *(group->archive_file_header_bufs + nth_file);
02311 
02312   mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
02313   mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
02314 
02315   dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
02316 
02317   log_sys->n_log_ios++;
02318 
02319   fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
02320          dest_offset / UNIV_PAGE_SIZE,
02321          dest_offset % UNIV_PAGE_SIZE,
02322          OS_FILE_LOG_BLOCK_SIZE,
02323          buf + LOG_FILE_ARCH_COMPLETED,
02324          &log_archive_io);
02325 }
02326 
02327 /******************************************************/
02329 static
02330 void
02331 log_group_archive(
02332 /*==============*/
02333   log_group_t*  group)  
02334 {
02335   os_file_t  file_handle;
02336   ib_uint64_t start_lsn;
02337   ib_uint64_t end_lsn;
02338   char    name[1024];
02339   byte*   buf;
02340   ulint   len;
02341   ibool   ret;
02342   ulint   next_offset;
02343   ulint   n_files;
02344   ulint   open_mode;
02345 
02346   ut_ad(mutex_own(&(log_sys->mutex)));
02347 
02348   start_lsn = log_sys->archived_lsn;
02349 
02350   ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
02351 
02352   end_lsn = log_sys->next_archived_lsn;
02353 
02354   ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
02355 
02356   buf = log_sys->archive_buf;
02357 
02358   n_files = 0;
02359 
02360   next_offset = group->archived_offset;
02361 loop:
02362   if ((next_offset % group->file_size == 0)
02363       || (fil_space_get_size(group->archive_space_id) == 0)) {
02364 
02365     /* Add the file to the archive file space; create or open the
02366     file */
02367 
02368     if (next_offset % group->file_size == 0) {
02369       open_mode = OS_FILE_CREATE;
02370     } else {
02371       open_mode = OS_FILE_OPEN;
02372     }
02373 
02374     log_archived_file_name_gen(name, group->id,
02375              group->archived_file_no + n_files);
02376 
02377     file_handle = os_file_create(innodb_file_log_key,
02378                name, open_mode,
02379                OS_FILE_AIO,
02380                OS_DATA_FILE, &ret);
02381 
02382     if (!ret && (open_mode == OS_FILE_CREATE)) {
02383       file_handle = os_file_create(
02384         innodb_file_log_key, name, OS_FILE_OPEN,
02385         OS_FILE_AIO, OS_DATA_FILE, &ret);
02386     }
02387 
02388     if (!ret) {
02389       fprintf(stderr,
02390         "InnoDB: Cannot create or open"
02391         " archive log file %s.\n"
02392         "InnoDB: Cannot continue operation.\n"
02393         "InnoDB: Check that the log archive"
02394         " directory exists,\n"
02395         "InnoDB: you have access rights to it, and\n"
02396         "InnoDB: there is space available.\n", name);
02397       exit(1);
02398     }
02399 
02400 #ifdef UNIV_DEBUG
02401     if (log_debug_writes) {
02402       fprintf(stderr, "Created archive file %s\n", name);
02403     }
02404 #endif /* UNIV_DEBUG */
02405 
02406     ret = os_file_close(file_handle);
02407 
02408     ut_a(ret);
02409 
02410     /* Add the archive file as a node to the space */
02411 
02412     fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
02413         group->archive_space_id, FALSE);
02414 
02415     if (next_offset % group->file_size == 0) {
02416       log_group_archive_file_header_write(
02417         group, n_files,
02418         group->archived_file_no + n_files,
02419         start_lsn);
02420 
02421       next_offset += LOG_FILE_HDR_SIZE;
02422     }
02423   }
02424 
02425   len = end_lsn - start_lsn;
02426 
02427   if (group->file_size < (next_offset % group->file_size) + len) {
02428 
02429     len = group->file_size - (next_offset % group->file_size);
02430   }
02431 
02432 #ifdef UNIV_DEBUG
02433   if (log_debug_writes) {
02434     fprintf(stderr,
02435       "Archiving starting at lsn %"PRIu64", len %lu"
02436       " to group %lu\n",
02437       start_lsn,
02438       (ulong) len, (ulong) group->id);
02439   }
02440 #endif /* UNIV_DEBUG */
02441 
02442   log_sys->n_pending_archive_ios++;
02443 
02444   log_sys->n_log_ios++;
02445 
02446   fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
02447          next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
02448          ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
02449          &log_archive_io);
02450 
02451   start_lsn += len;
02452   next_offset += len;
02453   buf += len;
02454 
02455   if (next_offset % group->file_size == 0) {
02456     n_files++;
02457   }
02458 
02459   if (end_lsn != start_lsn) {
02460 
02461     goto loop;
02462   }
02463 
02464   group->next_archived_file_no = group->archived_file_no + n_files;
02465   group->next_archived_offset = next_offset % group->file_size;
02466 
02467   ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
02468 }
02469 
02470 /*****************************************************/
02473 static
02474 void
02475 log_archive_groups(void)
02476 /*====================*/
02477 {
02478   log_group_t*  group;
02479 
02480   ut_ad(mutex_own(&(log_sys->mutex)));
02481 
02482   group = UT_LIST_GET_FIRST(log_sys->log_groups);
02483 
02484   log_group_archive(group);
02485 }
02486 
02487 /*****************************************************/
02490 static
02491 void
02492 log_archive_write_complete_groups(void)
02493 /*===================================*/
02494 {
02495   log_group_t*  group;
02496   ulint   end_offset;
02497   ulint   trunc_files;
02498   ulint   n_files;
02499   ib_uint64_t start_lsn;
02500   ib_uint64_t end_lsn;
02501   ulint   i;
02502 
02503   ut_ad(mutex_own(&(log_sys->mutex)));
02504 
02505   group = UT_LIST_GET_FIRST(log_sys->log_groups);
02506 
02507   group->archived_file_no = group->next_archived_file_no;
02508   group->archived_offset = group->next_archived_offset;
02509 
02510   /* Truncate from the archive file space all but the last
02511   file, or if it has been written full, all files */
02512 
02513   n_files = (UNIV_PAGE_SIZE
02514        * fil_space_get_size(group->archive_space_id))
02515     / group->file_size;
02516   ut_ad(n_files > 0);
02517 
02518   end_offset = group->archived_offset;
02519 
02520   if (end_offset % group->file_size == 0) {
02521 
02522     trunc_files = n_files;
02523   } else {
02524     trunc_files = n_files - 1;
02525   }
02526 
02527 #ifdef UNIV_DEBUG
02528   if (log_debug_writes && trunc_files) {
02529     fprintf(stderr,
02530       "Complete file(s) archived to group %lu\n",
02531       (ulong) group->id);
02532   }
02533 #endif /* UNIV_DEBUG */
02534 
02535   /* Calculate the archive file space start lsn */
02536   start_lsn = log_sys->next_archived_lsn
02537     - (end_offset - LOG_FILE_HDR_SIZE + trunc_files
02538        * (group->file_size - LOG_FILE_HDR_SIZE));
02539   end_lsn = start_lsn;
02540 
02541   for (i = 0; i < trunc_files; i++) {
02542 
02543     end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
02544 
02545     /* Write a notice to the headers of archived log
02546     files that the file write has been completed */
02547 
02548     log_group_archive_completed_header_write(group, i, end_lsn);
02549   }
02550 
02551   fil_space_truncate_start(group->archive_space_id,
02552          trunc_files * group->file_size);
02553 
02554 #ifdef UNIV_DEBUG
02555   if (log_debug_writes) {
02556     fputs("Archiving writes completed\n", stderr);
02557   }
02558 #endif /* UNIV_DEBUG */
02559 }
02560 
02561 /******************************************************/
02563 static
02564 void
02565 log_archive_check_completion_low(void)
02566 /*==================================*/
02567 {
02568   ut_ad(mutex_own(&(log_sys->mutex)));
02569 
02570   if (log_sys->n_pending_archive_ios == 0
02571       && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
02572 
02573 #ifdef UNIV_DEBUG
02574     if (log_debug_writes) {
02575       fputs("Archiving read completed\n", stderr);
02576     }
02577 #endif /* UNIV_DEBUG */
02578 
02579     /* Archive buffer has now been read in: start archive writes */
02580 
02581     log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
02582 
02583     log_archive_groups();
02584   }
02585 
02586   if (log_sys->n_pending_archive_ios == 0
02587       && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
02588 
02589     log_archive_write_complete_groups();
02590 
02591     log_sys->archived_lsn = log_sys->next_archived_lsn;
02592 
02593     rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
02594   }
02595 }
02596 
02597 /******************************************************/
02599 static
02600 void
02601 log_io_complete_archive(void)
02602 /*=========================*/
02603 {
02604   log_group_t*  group;
02605 
02606   mutex_enter(&(log_sys->mutex));
02607 
02608   group = UT_LIST_GET_FIRST(log_sys->log_groups);
02609 
02610   mutex_exit(&(log_sys->mutex));
02611 
02612   fil_flush(group->archive_space_id);
02613 
02614   mutex_enter(&(log_sys->mutex));
02615 
02616   ut_ad(log_sys->n_pending_archive_ios > 0);
02617 
02618   log_sys->n_pending_archive_ios--;
02619 
02620   log_archive_check_completion_low();
02621 
02622   mutex_exit(&(log_sys->mutex));
02623 }
02624 
02625 /********************************************************************/
02628 UNIV_INTERN
02629 ibool
02630 log_archive_do(
02631 /*===========*/
02632   ibool sync, 
02633   ulint*  n_bytes)
02635 {
02636   ibool   calc_new_limit;
02637   ib_uint64_t start_lsn;
02638   ib_uint64_t limit_lsn;
02639 
02640   calc_new_limit = TRUE;
02641 loop:
02642   mutex_enter(&(log_sys->mutex));
02643 
02644   switch (log_sys->archiving_state) {
02645   case LOG_ARCH_OFF:
02646 arch_none:
02647     mutex_exit(&(log_sys->mutex));
02648 
02649     *n_bytes = 0;
02650 
02651     return(TRUE);
02652   case LOG_ARCH_STOPPED:
02653   case LOG_ARCH_STOPPING2:
02654     mutex_exit(&(log_sys->mutex));
02655 
02656     os_event_wait(log_sys->archiving_on);
02657 
02658     goto loop;
02659   }
02660 
02661   start_lsn = log_sys->archived_lsn;
02662 
02663   if (calc_new_limit) {
02664     ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
02665     limit_lsn = start_lsn + log_sys->archive_buf_size;
02666 
02667     *n_bytes = log_sys->archive_buf_size;
02668 
02669     if (limit_lsn >= log_sys->lsn) {
02670 
02671       limit_lsn = ut_uint64_align_down(
02672         log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
02673     }
02674   }
02675 
02676   if (log_sys->archived_lsn >= limit_lsn) {
02677 
02678     goto arch_none;
02679   }
02680 
02681   if (log_sys->written_to_all_lsn < limit_lsn) {
02682 
02683     mutex_exit(&(log_sys->mutex));
02684 
02685     log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
02686 
02687     calc_new_limit = FALSE;
02688 
02689     goto loop;
02690   }
02691 
02692   if (log_sys->n_pending_archive_ios > 0) {
02693     /* An archiving operation is running */
02694 
02695     mutex_exit(&(log_sys->mutex));
02696 
02697     if (sync) {
02698       rw_lock_s_lock(&(log_sys->archive_lock));
02699       rw_lock_s_unlock(&(log_sys->archive_lock));
02700     }
02701 
02702     *n_bytes = log_sys->archive_buf_size;
02703 
02704     return(FALSE);
02705   }
02706 
02707   rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
02708 
02709   log_sys->archiving_phase = LOG_ARCHIVE_READ;
02710 
02711   log_sys->next_archived_lsn = limit_lsn;
02712 
02713 #ifdef UNIV_DEBUG
02714   if (log_debug_writes) {
02715     fprintf(stderr,
02716       "Archiving from lsn %"PRIu64" to lsn %"PRIu64"\n",
02717       log_sys->archived_lsn, limit_lsn);
02718   }
02719 #endif /* UNIV_DEBUG */
02720 
02721   /* Read the log segment to the archive buffer */
02722 
02723   log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
02724              UT_LIST_GET_FIRST(log_sys->log_groups),
02725              start_lsn, limit_lsn);
02726 
02727   mutex_exit(&(log_sys->mutex));
02728 
02729   if (sync) {
02730     rw_lock_s_lock(&(log_sys->archive_lock));
02731     rw_lock_s_unlock(&(log_sys->archive_lock));
02732   }
02733 
02734   *n_bytes = log_sys->archive_buf_size;
02735 
02736   return(TRUE);
02737 }
02738 
02739 /****************************************************************/
02742 static
02743 void
02744 log_archive_all(void)
02745 /*=================*/
02746 {
02747   ib_uint64_t present_lsn;
02748   ulint   dummy;
02749 
02750   mutex_enter(&(log_sys->mutex));
02751 
02752   if (log_sys->archiving_state == LOG_ARCH_OFF) {
02753     mutex_exit(&(log_sys->mutex));
02754 
02755     return;
02756   }
02757 
02758   present_lsn = log_sys->lsn;
02759 
02760   mutex_exit(&(log_sys->mutex));
02761 
02762   log_pad_current_log_block();
02763 
02764   for (;;) {
02765     mutex_enter(&(log_sys->mutex));
02766 
02767     if (present_lsn <= log_sys->archived_lsn) {
02768 
02769       mutex_exit(&(log_sys->mutex));
02770 
02771       return;
02772     }
02773 
02774     mutex_exit(&(log_sys->mutex));
02775 
02776     log_archive_do(TRUE, &dummy);
02777   }
02778 }
02779 
02780 /*****************************************************/
02783 static
02784 void
02785 log_archive_close_groups(
02786 /*=====================*/
02787   ibool increment_file_count) 
02789 {
02790   log_group_t*  group;
02791   ulint   trunc_len;
02792 
02793   ut_ad(mutex_own(&(log_sys->mutex)));
02794 
02795   if (log_sys->archiving_state == LOG_ARCH_OFF) {
02796 
02797     return;
02798   }
02799 
02800   group = UT_LIST_GET_FIRST(log_sys->log_groups);
02801 
02802   trunc_len = UNIV_PAGE_SIZE
02803     * fil_space_get_size(group->archive_space_id);
02804   if (trunc_len > 0) {
02805     ut_a(trunc_len == group->file_size);
02806 
02807     /* Write a notice to the headers of archived log
02808     files that the file write has been completed */
02809 
02810     log_group_archive_completed_header_write(
02811       group, 0, log_sys->archived_lsn);
02812 
02813     fil_space_truncate_start(group->archive_space_id,
02814            trunc_len);
02815     if (increment_file_count) {
02816       group->archived_offset = 0;
02817       group->archived_file_no += 2;
02818     }
02819 
02820 #ifdef UNIV_DEBUG
02821     if (log_debug_writes) {
02822       fprintf(stderr,
02823         "Incrementing arch file no to %lu"
02824         " in log group %lu\n",
02825         (ulong) group->archived_file_no + 2,
02826         (ulong) group->id);
02827     }
02828 #endif /* UNIV_DEBUG */
02829   }
02830 }
02831 
02832 /****************************************************************/
02838 UNIV_INTERN
02839 ulint
02840 log_archive_stop(void)
02841 /*==================*/
02842 {
02843   ibool success;
02844 
02845   mutex_enter(&(log_sys->mutex));
02846 
02847   if (log_sys->archiving_state != LOG_ARCH_ON) {
02848 
02849     mutex_exit(&(log_sys->mutex));
02850 
02851     return(DB_ERROR);
02852   }
02853 
02854   log_sys->archiving_state = LOG_ARCH_STOPPING;
02855 
02856   mutex_exit(&(log_sys->mutex));
02857 
02858   log_archive_all();
02859 
02860   mutex_enter(&(log_sys->mutex));
02861 
02862   log_sys->archiving_state = LOG_ARCH_STOPPING2;
02863   os_event_reset(log_sys->archiving_on);
02864 
02865   mutex_exit(&(log_sys->mutex));
02866 
02867   /* Wait for a possible archiving operation to end */
02868 
02869   rw_lock_s_lock(&(log_sys->archive_lock));
02870   rw_lock_s_unlock(&(log_sys->archive_lock));
02871 
02872   mutex_enter(&(log_sys->mutex));
02873 
02874   /* Close all archived log files, incrementing the file count by 2,
02875   if appropriate */
02876 
02877   log_archive_close_groups(TRUE);
02878 
02879   mutex_exit(&(log_sys->mutex));
02880 
02881   /* Make a checkpoint, so that if recovery is needed, the file numbers
02882   of new archived log files will start from the right value */
02883 
02884   success = FALSE;
02885 
02886   while (!success) {
02887     success = log_checkpoint(TRUE, TRUE);
02888   }
02889 
02890   mutex_enter(&(log_sys->mutex));
02891 
02892   log_sys->archiving_state = LOG_ARCH_STOPPED;
02893 
02894   mutex_exit(&(log_sys->mutex));
02895 
02896   return(DB_SUCCESS);
02897 }
02898 
02899 /****************************************************************/
02902 UNIV_INTERN
02903 ulint
02904 log_archive_start(void)
02905 /*===================*/
02906 {
02907   mutex_enter(&(log_sys->mutex));
02908 
02909   if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
02910 
02911     mutex_exit(&(log_sys->mutex));
02912 
02913     return(DB_ERROR);
02914   }
02915 
02916   log_sys->archiving_state = LOG_ARCH_ON;
02917 
02918   os_event_set(log_sys->archiving_on);
02919 
02920   mutex_exit(&(log_sys->mutex));
02921 
02922   return(DB_SUCCESS);
02923 }
02924 
02925 /****************************************************************/
02928 UNIV_INTERN
02929 ulint
02930 log_archive_noarchivelog(void)
02931 /*==========================*/
02932 {
02933 loop:
02934   mutex_enter(&(log_sys->mutex));
02935 
02936   if (log_sys->archiving_state == LOG_ARCH_STOPPED
02937       || log_sys->archiving_state == LOG_ARCH_OFF) {
02938 
02939     log_sys->archiving_state = LOG_ARCH_OFF;
02940 
02941     os_event_set(log_sys->archiving_on);
02942 
02943     mutex_exit(&(log_sys->mutex));
02944 
02945     return(DB_SUCCESS);
02946   }
02947 
02948   mutex_exit(&(log_sys->mutex));
02949 
02950   log_archive_stop();
02951 
02952   os_thread_sleep(500000);
02953 
02954   goto loop;
02955 }
02956 
02957 /****************************************************************/
02960 UNIV_INTERN
02961 ulint
02962 log_archive_archivelog(void)
02963 /*========================*/
02964 {
02965   mutex_enter(&(log_sys->mutex));
02966 
02967   if (log_sys->archiving_state == LOG_ARCH_OFF) {
02968 
02969     log_sys->archiving_state = LOG_ARCH_ON;
02970 
02971     log_sys->archived_lsn
02972       = ut_uint64_align_down(log_sys->lsn,
02973                  OS_FILE_LOG_BLOCK_SIZE);
02974     mutex_exit(&(log_sys->mutex));
02975 
02976     return(DB_SUCCESS);
02977   }
02978 
02979   mutex_exit(&(log_sys->mutex));
02980 
02981   return(DB_ERROR);
02982 }
02983 
02984 /****************************************************************/
02988 static
02989 void
02990 log_archive_margin(void)
02991 /*====================*/
02992 {
02993   log_t*  log   = log_sys;
02994   ulint age;
02995   ibool sync;
02996   ulint dummy;
02997 loop:
02998   mutex_enter(&(log->mutex));
02999 
03000   if (log->archiving_state == LOG_ARCH_OFF) {
03001     mutex_exit(&(log->mutex));
03002 
03003     return;
03004   }
03005 
03006   age = log->lsn - log->archived_lsn;
03007 
03008   if (age > log->max_archived_lsn_age) {
03009 
03010     /* An archiving is urgent: we have to do synchronous i/o */
03011 
03012     sync = TRUE;
03013 
03014   } else if (age > log->max_archived_lsn_age_async) {
03015 
03016     /* An archiving is not urgent: we do asynchronous i/o */
03017 
03018     sync = FALSE;
03019   } else {
03020     /* No archiving required yet */
03021 
03022     mutex_exit(&(log->mutex));
03023 
03024     return;
03025   }
03026 
03027   mutex_exit(&(log->mutex));
03028 
03029   log_archive_do(sync, &dummy);
03030 
03031   if (sync == TRUE) {
03032     /* Check again that enough was written to the archive */
03033 
03034     goto loop;
03035   }
03036 }
03037 #endif /* UNIV_LOG_ARCHIVE */
03038 
03039 /********************************************************************/
03044 UNIV_INTERN
03045 void
03046 log_check_margins(void)
03047 /*===================*/
03048 {
03049 loop:
03050   log_flush_margin();
03051 
03052   log_checkpoint_margin();
03053 
03054 #ifdef UNIV_LOG_ARCHIVE
03055   log_archive_margin();
03056 #endif /* UNIV_LOG_ARCHIVE */
03057 
03058   mutex_enter(&(log_sys->mutex));
03059   ut_ad(!recv_no_log_write);
03060 
03061   if (log_sys->check_flush_or_checkpoint) {
03062 
03063     mutex_exit(&(log_sys->mutex));
03064 
03065     goto loop;
03066   }
03067 
03068   mutex_exit(&(log_sys->mutex));
03069 }
03070 
03071 /****************************************************************/
03076 UNIV_INTERN
03077 void
03078 logs_empty_and_mark_files_at_shutdown(void)
03079 /*=======================================*/
03080 {
03081   ib_uint64_t lsn;
03082   ulint   arch_log_no;
03083 
03084   if (srv_print_verbose_log) {
03085     ut_print_timestamp(stderr);
03086     fprintf(stderr, "  InnoDB: Starting shutdown...\n");
03087   }
03088   /* Wait until the master thread and all other operations are idle: our
03089   algorithm only works if the server is idle at shutdown */
03090 
03091   srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
03092 loop:
03093   os_thread_sleep(100000);
03094 
03095   mutex_enter(&kernel_mutex);
03096 
03097   /* We need the monitor threads to stop before we proceed with a
03098   normal shutdown. In case of very fast shutdown, however, we can
03099   proceed without waiting for monitor threads. */
03100 
03101   if (srv_fast_shutdown < 2
03102      && (srv_error_monitor_active
03103         || srv_lock_timeout_active
03104         || srv_monitor_active)) {
03105 
03106     mutex_exit(&kernel_mutex);
03107 
03108     os_event_set(srv_error_event);
03109     os_event_set(srv_monitor_event);
03110     os_event_set(srv_timeout_event);
03111 
03112     goto loop;
03113   }
03114 
03115   /* Check that there are no longer transactions. We need this wait even
03116   for the 'very fast' shutdown, because the InnoDB layer may have
03117   committed or prepared transactions and we don't want to lose them. */
03118 
03119   if (trx_n_mysql_transactions > 0
03120       || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
03121 
03122     mutex_exit(&kernel_mutex);
03123 
03124     goto loop;
03125   }
03126 
03127   if (srv_fast_shutdown == 2) {
03128     /* In this fastest shutdown we do not flush the buffer pool:
03129     it is essentially a 'crash' of the InnoDB server. Make sure
03130     that the log is all flushed to disk, so that we can recover
03131     all committed transactions in a crash recovery. We must not
03132     write the lsn stamps to the data files, since at a startup
03133     InnoDB deduces from the stamps if the previous shutdown was
03134     clean. */
03135 
03136     log_buffer_flush_to_disk();
03137 
03138     mutex_exit(&kernel_mutex);
03139 
03140     return; /* We SKIP ALL THE REST !! */
03141   }
03142 
03143   mutex_exit(&kernel_mutex);
03144 
03145   /* Check that the background threads are suspended */
03146 
03147   if (srv_is_any_background_thread_active()) {
03148     goto loop;
03149   }
03150 
03151   mutex_enter(&(log_sys->mutex));
03152 
03153   if (log_sys->n_pending_checkpoint_writes
03154 #ifdef UNIV_LOG_ARCHIVE
03155       || log_sys->n_pending_archive_ios
03156 #endif /* UNIV_LOG_ARCHIVE */
03157       || log_sys->n_pending_writes) {
03158 
03159     mutex_exit(&(log_sys->mutex));
03160 
03161     goto loop;
03162   }
03163 
03164   mutex_exit(&(log_sys->mutex));
03165 
03166   if (!buf_pool_check_no_pending_io()) {
03167 
03168     goto loop;
03169   }
03170 
03171 #ifdef UNIV_LOG_ARCHIVE
03172   log_archive_all();
03173 #endif /* UNIV_LOG_ARCHIVE */
03174 
03175   log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
03176 
03177   mutex_enter(&(log_sys->mutex));
03178 
03179   lsn = log_sys->lsn;
03180 
03181   if (lsn != log_sys->last_checkpoint_lsn
03182 #ifdef UNIV_LOG_ARCHIVE
03183       || (srv_log_archive_on
03184     && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
03185 #endif /* UNIV_LOG_ARCHIVE */
03186       ) {
03187 
03188     mutex_exit(&(log_sys->mutex));
03189 
03190     goto loop;
03191   }
03192 
03193   arch_log_no = 0;
03194 
03195 #ifdef UNIV_LOG_ARCHIVE
03196   UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
03197 
03198   if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
03199 
03200     arch_log_no--;
03201   }
03202 
03203   log_archive_close_groups(TRUE);
03204 #endif /* UNIV_LOG_ARCHIVE */
03205 
03206   mutex_exit(&(log_sys->mutex));
03207 
03208   /* Check that the background threads stay suspended */
03209   if (srv_is_any_background_thread_active()) {
03210     fprintf(stderr,
03211       "InnoDB: Warning: some background thread woke up"
03212       " during shutdown\n");
03213 
03214     goto loop;
03215   }
03216 
03217   fil_flush_file_spaces(FIL_TABLESPACE);
03218   fil_flush_file_spaces(FIL_LOG);
03219 
03220   /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer
03221   pool: therefore it is essential that the buffer pool has been
03222   completely flushed to disk! (We do not call fil_write... if the
03223   'very fast' shutdown is enabled.) */
03224 
03225   if (!buf_all_freed()) {
03226 
03227     goto loop;
03228   }
03229 
03230   srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
03231 
03232   /* Make some checks that the server really is quiet */
03233   ut_a(!srv_is_any_background_thread_active());
03234 
03235   ut_a(buf_all_freed());
03236   ut_a(lsn == log_sys->lsn);
03237 
03238   if (lsn < srv_start_lsn) {
03239           drizzled::errmsg_printf(drizzled::error::ERROR,
03240                                   "InnoDB: Error: log sequence number at shutdown %"PRIu64" is lower than at startup %"PRIu64"!",
03241                                   lsn, srv_start_lsn);
03242   }
03243 
03244   srv_shutdown_lsn = lsn;
03245 
03246   fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
03247 
03248   fil_flush_file_spaces(FIL_TABLESPACE);
03249 
03250   fil_close_all_files();
03251 
03252   /* Make some checks that the server really is quiet */
03253   ut_a(!srv_is_any_background_thread_active());
03254 
03255   ut_a(buf_all_freed());
03256   ut_a(lsn == log_sys->lsn);
03257 }
03258 
03259 #ifdef UNIV_LOG_DEBUG
03260 /******************************************************/
03263 UNIV_INTERN
03264 ibool
03265 log_check_log_recs(
03266 /*===============*/
03267   const byte* buf,    
03270   ulint   len,    
03271   ib_uint64_t buf_start_lsn)  
03272 {
03273   ib_uint64_t contiguous_lsn;
03274   ib_uint64_t scanned_lsn;
03275   const byte* start;
03276   const byte* end;
03277   byte*   buf1;
03278   byte*   scan_buf;
03279 
03280   ut_ad(mutex_own(&(log_sys->mutex)));
03281 
03282   if (len == 0) {
03283 
03284     return(TRUE);
03285   }
03286 
03287   start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
03288   end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
03289 
03290   buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
03291   scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
03292 
03293   ut_memcpy(scan_buf, start, end - start);
03294 
03295   recv_scan_log_recs((buf_pool_get_n_pages()
03296          - (recv_n_pool_free_frames * srv_buf_pool_instances))
03297          * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start,
03298          ut_uint64_align_down(buf_start_lsn,
03299             OS_FILE_LOG_BLOCK_SIZE),
03300          &contiguous_lsn, &scanned_lsn);
03301 
03302   ut_a(scanned_lsn == buf_start_lsn + len);
03303   ut_a(recv_sys->recovered_lsn == scanned_lsn);
03304 
03305   mem_free(buf1);
03306 
03307   return(TRUE);
03308 }
03309 #endif /* UNIV_LOG_DEBUG */
03310 
03311 /******************************************************/
03314 UNIV_INTERN
03315 ibool
03316 log_peek_lsn(
03317 /*=========*/
03318   ib_uint64_t*  lsn)  
03319 {
03320   if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
03321     *lsn = log_sys->lsn;
03322 
03323     mutex_exit(&(log_sys->mutex));
03324 
03325     return(TRUE);
03326   }
03327 
03328   return(FALSE);
03329 }
03330 
03331 /******************************************************/
03333 UNIV_INTERN
03334 void
03335 log_print(
03336 /*======*/
03337   FILE* file) 
03338 {
03339   double  time_elapsed;
03340   time_t  current_time;
03341 
03342   mutex_enter(&(log_sys->mutex));
03343 
03344   fprintf(file,
03345     "Log sequence number %"PRIu64"\n"
03346     "Log flushed up to   %"PRIu64"\n"
03347     "Last checkpoint at  %"PRIu64"\n",
03348     log_sys->lsn,
03349     log_sys->flushed_to_disk_lsn,
03350     log_sys->last_checkpoint_lsn);
03351 
03352   current_time = time(NULL);
03353 
03354   time_elapsed = 0.001 + difftime(current_time,
03355           log_sys->last_printout_time);
03356   fprintf(file,
03357     "%lu pending log writes, %lu pending chkp writes\n"
03358     "%lu log i/o's done, %.2f log i/o's/second\n",
03359     (ulong) log_sys->n_pending_writes,
03360     (ulong) log_sys->n_pending_checkpoint_writes,
03361     (ulong) log_sys->n_log_ios,
03362     ((log_sys->n_log_ios - log_sys->n_log_ios_old)
03363      / time_elapsed));
03364 
03365   log_sys->n_log_ios_old = log_sys->n_log_ios;
03366   log_sys->last_printout_time = current_time;
03367 
03368   mutex_exit(&(log_sys->mutex));
03369 }
03370 
03371 /**********************************************************************/
03373 UNIV_INTERN
03374 void
03375 log_refresh_stats(void)
03376 /*===================*/
03377 {
03378   log_sys->n_log_ios_old = log_sys->n_log_ios;
03379   log_sys->last_printout_time = time(NULL);
03380 }
03381 
03382 /**********************************************************************
03383 Closes a log group. */
03384 static
03385 void
03386 log_group_close(
03387 /*===========*/
03388   log_group_t*  group)    /* in,own: log group to close */
03389 {
03390   ulint i;
03391 
03392   for (i = 0; i < group->n_files; i++) {
03393     mem_free(group->file_header_bufs_ptr[i]);
03394 #ifdef UNIV_LOG_ARCHIVE
03395     mem_free(group->archive_file_header_bufs_ptr[i]);
03396 #endif /* UNIV_LOG_ARCHIVE */
03397   }
03398 
03399   mem_free(group->file_header_bufs_ptr);
03400   mem_free(group->file_header_bufs);
03401 
03402 #ifdef UNIV_LOG_ARCHIVE
03403   mem_free(group->archive_file_header_bufs_ptr);
03404   mem_free(group->archive_file_header_bufs);
03405 #endif /* UNIV_LOG_ARCHIVE */
03406 
03407   mem_free(group->checkpoint_buf_ptr);
03408 
03409   mem_free(group);
03410 }
03411 
03412 /**********************************************************
03413 Shutdown the log system but do not release all the memory. */
03414 UNIV_INTERN
03415 void
03416 log_shutdown(void)
03417 /*==============*/
03418 {
03419   log_group_t*  group;
03420 
03421   group = UT_LIST_GET_FIRST(log_sys->log_groups);
03422 
03423   while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
03424     log_group_t*  prev_group = group;
03425 
03426     group = UT_LIST_GET_NEXT(log_groups, group);
03427     UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
03428 
03429     log_group_close(prev_group);
03430   }
03431 
03432   mem_free(log_sys->buf_ptr);
03433   log_sys->buf_ptr = NULL;
03434   log_sys->buf = NULL;
03435   mem_free(log_sys->checkpoint_buf_ptr);
03436   log_sys->checkpoint_buf_ptr = NULL;
03437   log_sys->checkpoint_buf = NULL;
03438 
03439   os_event_free(log_sys->no_flush_event);
03440   os_event_free(log_sys->one_flushed_event);
03441 
03442   rw_lock_free(&log_sys->checkpoint_lock);
03443 
03444   mutex_free(&log_sys->mutex);
03445 
03446 #ifdef UNIV_LOG_ARCHIVE
03447   rw_lock_free(&log_sys->archive_lock);
03448   os_event_create(log_sys->archiving_on);
03449 #endif /* UNIV_LOG_ARCHIVE */
03450 
03451 #ifdef UNIV_LOG_DEBUG
03452   recv_sys_debug_free();
03453 #endif
03454 
03455   recv_sys_close();
03456 }
03457 
03458 /**********************************************************
03459 Free the log system data structures. */
03460 UNIV_INTERN
03461 void
03462 log_mem_free(void)
03463 /*==============*/
03464 {
03465   if (log_sys != NULL) {
03466     recv_sys_mem_free();
03467     mem_free(log_sys);
03468 
03469     log_sys = NULL;
03470   }
03471 }
03472 #endif /* !UNIV_HOTBACKUP */