Drizzled Public API Documentation

lock0lock.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1996, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #define LOCK_MODULE_IMPLEMENTATION
27 
28 #include "lock0lock.h"
29 #include "lock0priv.h"
30 
31 #ifdef UNIV_NONINL
32 #include "lock0lock.ic"
33 #include "lock0priv.ic"
34 #endif
35 
36 #include "ha_prototypes.h"
37 #include "usr0sess.h"
38 #include "trx0purge.h"
39 #include "dict0mem.h"
40 #include "trx0sys.h"
41 
42 /* Restricts the length of search we will do in the waits-for
43 graph of transactions */
44 #define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
45 
46 /* Restricts the recursion depth of the search we will do in the waits-for
47 graph of transactions */
48 #define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
49 
50 /* When releasing transaction locks, this specifies how often we release
51 the kernel mutex for a moment to give also others access to it */
52 
53 #define LOCK_RELEASE_KERNEL_INTERVAL 1000
54 
55 /* Safety margin when creating a new record lock: this many extra records
56 can be inserted to the page without need to create a lock with a bigger
57 bitmap */
58 
59 #define LOCK_PAGE_BITMAP_MARGIN 64
60 
61 /* An explicit record lock affects both the record and the gap before it.
62 An implicit x-lock does not affect the gap, it only locks the index
63 record from read or update.
64 
65 If a transaction has modified or inserted an index record, then
66 it owns an implicit x-lock on the record. On a secondary index record,
67 a transaction has an implicit x-lock also if it has modified the
68 clustered index record, the max trx id of the page where the secondary
69 index record resides is >= trx id of the transaction (or database recovery
70 is running), and there are no explicit non-gap lock requests on the
71 secondary index record.
72 
73 This complicated definition for a secondary index comes from the
74 implementation: we want to be able to determine if a secondary index
75 record has an implicit x-lock, just by looking at the present clustered
76 index record, not at the historical versions of the record. The
77 complicated definition can be explained to the user so that there is
78 nondeterminism in the access path when a query is answered: we may,
79 or may not, access the clustered index record and thus may, or may not,
80 bump into an x-lock set there.
81 
82 Different transaction can have conflicting locks set on the gap at the
83 same time. The locks on the gap are purely inhibitive: an insert cannot
84 be made, or a select cursor may have to wait if a different transaction
85 has a conflicting lock on the gap. An x-lock on the gap does not give
86 the right to insert into the gap.
87 
88 An explicit lock can be placed on a user record or the supremum record of
89 a page. The locks on the supremum record are always thought to be of the gap
90 type, though the gap bit is not set. When we perform an update of a record
91 where the size of the record changes, we may temporarily store its explicit
92 locks on the infimum record of the page, though the infimum otherwise never
93 carries locks.
94 
95 A waiting record lock can also be of the gap type. A waiting lock request
96 can be granted when there is no conflicting mode lock request by another
97 transaction ahead of it in the explicit lock queue.
98 
99 In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
100 It only locks the record it is placed on, not the gap before the record.
101 This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
102 level.
103 
104 -------------------------------------------------------------------------
105 RULE 1: If there is an implicit x-lock on a record, and there are non-gap
106 -------
107 lock requests waiting in the queue, then the transaction holding the implicit
108 x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
109 released, we can grant locks to waiting lock requests purely by looking at
110 the explicit lock requests in the queue.
111 
112 RULE 3: Different transactions cannot have conflicting granted non-gap locks
113 -------
114 on a record at the same time. However, they can have conflicting granted gap
115 locks.
116 RULE 4: If a there is a waiting lock request in a queue, no lock request,
117 -------
118 gap or not, can be inserted ahead of it in the queue. In record deletes
119 and page splits new gap type locks can be created by the database manager
120 for a transaction, and without rule 4, the waits-for graph of transactions
121 might become cyclic without the database noticing it, as the deadlock check
122 is only performed when a transaction itself requests a lock!
123 -------------------------------------------------------------------------
124 
125 An insert is allowed to a gap if there are no explicit lock requests by
126 other transactions on the next record. It does not matter if these lock
127 requests are granted or waiting, gap bit set or not, with the exception
128 that a gap type request set by another transaction to wait for
129 its turn to do an insert is ignored. On the other hand, an
130 implicit x-lock by another transaction does not prevent an insert, which
131 allows for more concurrency when using an Oracle-style sequence number
132 generator for the primary key with many transactions doing inserts
133 concurrently.
134 
135 A modify of a record is allowed if the transaction has an x-lock on the
136 record, or if other transactions do not have any non-gap lock requests on the
137 record.
138 
139 A read of a single user record with a cursor is allowed if the transaction
140 has a non-gap explicit, or an implicit lock on the record, or if the other
141 transactions have no x-lock requests on the record. At a page supremum a
142 read is always allowed.
143 
144 In summary, an implicit lock is seen as a granted x-lock only on the
145 record, not on the gap. An explicit lock with no gap bit set is a lock
146 both on the record and the gap. If the gap bit is set, the lock is only
147 on the gap. Different transaction cannot own conflicting locks on the
148 record at the same time, but they may own conflicting locks on the gap.
149 Granted locks on a record give an access right to the record, but gap type
150 locks just inhibit operations.
151 
152 NOTE: Finding out if some transaction has an implicit x-lock on a secondary
153 index record can be cumbersome. We may have to look at previous versions of
154 the corresponding clustered index record to find out if a delete marked
155 secondary index record was delete marked by an active transaction, not by
156 a committed one.
157 
158 FACT A: If a transaction has inserted a row, it can delete it any time
159 without need to wait for locks.
160 
161 PROOF: The transaction has an implicit x-lock on every index record inserted
162 for the row, and can thus modify each record without the need to wait. Q.E.D.
163 
164 FACT B: If a transaction has read some result set with a cursor, it can read
165 it again, and retrieves the same result set, if it has not modified the
166 result set in the meantime. Hence, there is no phantom problem. If the
167 biggest record, in the alphabetical order, touched by the cursor is removed,
168 a lock wait may occur, otherwise not.
169 
170 PROOF: When a read cursor proceeds, it sets an s-lock on each user record
171 it passes, and a gap type s-lock on each page supremum. The cursor must
172 wait until it has these locks granted. Then no other transaction can
173 have a granted x-lock on any of the user records, and therefore cannot
174 modify the user records. Neither can any other transaction insert into
175 the gaps which were passed over by the cursor. Page splits and merges,
176 and removal of obsolete versions of records do not affect this, because
177 when a user record or a page supremum is removed, the next record inherits
178 its locks as gap type locks, and therefore blocks inserts to the same gap.
179 Also, if a page supremum is inserted, it inherits its locks from the successor
180 record. When the cursor is positioned again at the start of the result set,
181 the records it will touch on its course are either records it touched
182 during the last pass or new inserted page supremums. It can immediately
183 access all these records, and when it arrives at the biggest record, it
184 notices that the result set is complete. If the biggest record was removed,
185 lock wait can occur because the next record only inherits a gap type lock,
186 and a wait may be needed. Q.E.D. */
187 
188 /* If an index record should be changed or a new inserted, we must check
189 the lock on the record or the next. When a read cursor starts reading,
190 we will set a record level s-lock on each record it passes, except on the
191 initial record on which the cursor is positioned before we start to fetch
192 records. Our index tree search has the convention that the B-tree
193 cursor is positioned BEFORE the first possibly matching record in
194 the search. Optimizations are possible here: if the record is searched
195 on an equality condition to a unique key, we could actually set a special
196 lock on the record, a lock which would not prevent any insert before
197 this record. In the next key locking an x-lock set on a record also
198 prevents inserts just before that record.
199  There are special infimum and supremum records on each page.
200 A supremum record can be locked by a read cursor. This records cannot be
201 updated but the lock prevents insert of a user record to the end of
202 the page.
203  Next key locks will prevent the phantom problem where new rows
204 could appear to SELECT result sets after the select operation has been
205 performed. Prevention of phantoms ensures the serilizability of
206 transactions.
207  What should we check if an insert of a new record is wanted?
208 Only the lock on the next record on the same page, because also the
209 supremum record can carry a lock. An s-lock prevents insertion, but
210 what about an x-lock? If it was set by a searched update, then there
211 is implicitly an s-lock, too, and the insert should be prevented.
212 What if our transaction owns an x-lock to the next record, but there is
213 a waiting s-lock request on the next record? If this s-lock was placed
214 by a read cursor moving in the ascending order in the index, we cannot
215 do the insert immediately, because when we finally commit our transaction,
216 the read cursor should see also the new inserted record. So we should
217 move the read cursor backward from the the next record for it to pass over
218 the new inserted record. This move backward may be too cumbersome to
219 implement. If we in this situation just enqueue a second x-lock request
220 for our transaction on the next record, then the deadlock mechanism
221 notices a deadlock between our transaction and the s-lock request
222 transaction. This seems to be an ok solution.
223  We could have the convention that granted explicit record locks,
224 lock the corresponding records from changing, and also lock the gaps
225 before them from inserting. A waiting explicit lock request locks the gap
226 before from inserting. Implicit record x-locks, which we derive from the
227 transaction id in the clustered index record, only lock the record itself
228 from modification, not the gap before it from inserting.
229  How should we store update locks? If the search is done by a unique
230 key, we could just modify the record trx id. Otherwise, we could put a record
231 x-lock on the record. If the update changes ordering fields of the
232 clustered index record, the inserted new record needs no record lock in
233 lock table, the trx id is enough. The same holds for a secondary index
234 record. Searched delete is similar to update.
235 
236 PROBLEM:
237 What about waiting lock requests? If a transaction is waiting to make an
238 update to a record which another modified, how does the other transaction
239 know to send the end-lock-wait signal to the waiting transaction? If we have
240 the convention that a transaction may wait for just one lock at a time, how
241 do we preserve it if lock wait ends?
242 
243 PROBLEM:
244 Checking the trx id label of a secondary index record. In the case of a
245 modification, not an insert, is this necessary? A secondary index record
246 is modified only by setting or resetting its deleted flag. A secondary index
247 record contains fields to uniquely determine the corresponding clustered
248 index record. A secondary index record is therefore only modified if we
249 also modify the clustered index record, and the trx id checking is done
250 on the clustered index record, before we come to modify the secondary index
251 record. So, in the case of delete marking or unmarking a secondary index
252 record, we do not have to care about trx ids, only the locks in the lock
253 table must be checked. In the case of a select from a secondary index, the
254 trx id is relevant, and in this case we may have to search the clustered
255 index record.
256 
257 PROBLEM: How to update record locks when page is split or merged, or
258 --------------------------------------------------------------------
259 a record is deleted or updated?
260 If the size of fields in a record changes, we perform the update by
261 a delete followed by an insert. How can we retain the locks set or
262 waiting on the record? Because a record lock is indexed in the bitmap
263 by the heap number of the record, when we remove the record from the
264 record list, it is possible still to keep the lock bits. If the page
265 is reorganized, we could make a table of old and new heap numbers,
266 and permute the bitmaps in the locks accordingly. We can add to the
267 table a row telling where the updated record ended. If the update does
268 not require a reorganization of the page, we can simply move the lock
269 bits for the updated record to the position determined by its new heap
270 number (we may have to allocate a new lock, if we run out of the bitmap
271 in the old one).
272  A more complicated case is the one where the reinsertion of the
273 updated record is done pessimistically, because the structure of the
274 tree may change.
275 
276 PROBLEM: If a supremum record is removed in a page merge, or a record
277 ---------------------------------------------------------------------
278 removed in a purge, what to do to the waiting lock requests? In a split to
279 the right, we just move the lock requests to the new supremum. If a record
280 is removed, we could move the waiting lock request to its inheritor, the
281 next record in the index. But, the next record may already have lock
282 requests on its own queue. A new deadlock check should be made then. Maybe
283 it is easier just to release the waiting transactions. They can then enqueue
284 new lock requests on appropriate records.
285 
286 PROBLEM: When a record is inserted, what locks should it inherit from the
287 -------------------------------------------------------------------------
288 upper neighbor? An insert of a new supremum record in a page split is
289 always possible, but an insert of a new user record requires that the upper
290 neighbor does not have any lock requests by other transactions, granted or
291 waiting, in its lock queue. Solution: We can copy the locks as gap type
292 locks, so that also the waiting locks are transformed to granted gap type
293 locks on the inserted record. */
294 
295 /* LOCK COMPATIBILITY MATRIX
296  * IS IX S X AI
297  * IS + + + - +
298  * IX + + - - +
299  * S + - + - -
300  * X - - - - -
301  * AI + + - - -
302  *
303  * Note that for rows, InnoDB only acquires S or X locks.
304  * For tables, InnoDB normally acquires IS or IX locks.
305  * S or X table locks are only acquired for LOCK TABLES.
306  * Auto-increment (AI) locks are needed because of
307  * statement-level MySQL binlog.
308  * See also lock_mode_compatible().
309  */
310 #define LK(a,b) (1 << ((a) * LOCK_NUM + (b)))
311 #define LKS(a,b) LK(a,b) | LK(b,a)
312 
313 /* Define the lock compatibility matrix in a ulint. The first line below
314 defines the diagonal entries. The following lines define the compatibility
315 for LOCK_IX, LOCK_S, and LOCK_AUTO_INC using LKS(), since the matrix
316 is symmetric. */
317 #define LOCK_MODE_COMPATIBILITY 0 \
318  | LK(LOCK_IS, LOCK_IS) | LK(LOCK_IX, LOCK_IX) | LK(LOCK_S, LOCK_S) \
319  | LKS(LOCK_IX, LOCK_IS) | LKS(LOCK_IS, LOCK_AUTO_INC) \
320  | LKS(LOCK_S, LOCK_IS) \
321  | LKS(LOCK_AUTO_INC, LOCK_IS) | LKS(LOCK_AUTO_INC, LOCK_IX)
322 
323 /* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
324  * IS IX S X AI
325  * IS + - - - -
326  * IX + + - - -
327  * S + - + - -
328  * X + + + + +
329  * AI - - - - +
330  * See lock_mode_stronger_or_eq().
331  */
332 
333 /* Define the stronger-or-equal lock relation in a ulint. This relation
334 contains all pairs LK(mode1, mode2) where mode1 is stronger than or
335 equal to mode2. */
336 #define LOCK_MODE_STRONGER_OR_EQ 0 \
337  | LK(LOCK_IS, LOCK_IS) \
338  | LK(LOCK_IX, LOCK_IS) | LK(LOCK_IX, LOCK_IX) \
339  | LK(LOCK_S, LOCK_IS) | LK(LOCK_S, LOCK_S) \
340  | LK(LOCK_AUTO_INC, LOCK_AUTO_INC) \
341  | LK(LOCK_X, LOCK_IS) | LK(LOCK_X, LOCK_IX) | LK(LOCK_X, LOCK_S) \
342  | LK(LOCK_X, LOCK_AUTO_INC) | LK(LOCK_X, LOCK_X)
343 
344 #ifdef UNIV_DEBUG
345 UNIV_INTERN ibool lock_print_waits = FALSE;
346 
347 /*********************************************************************/
350 static
351 ibool
352 lock_validate(void);
353 /*===============*/
354 
355 /*********************************************************************/
358 static
359 ibool
360 lock_rec_validate_page(
361 /*===================*/
362  ulint space,
363  ulint page_no);
364 #endif /* UNIV_DEBUG */
365 
366 /* The lock system */
367 UNIV_INTERN lock_sys_t* lock_sys = NULL;
368 
369 /* We store info on the latest deadlock error to this buffer. InnoDB
370 Monitor will then fetch it and print */
371 UNIV_INTERN ibool lock_deadlock_found = FALSE;
372 UNIV_INTERN FILE* lock_latest_err_file;
373 
374 /* Flags for recursive deadlock search */
375 #define LOCK_VICTIM_IS_START 1
376 #define LOCK_VICTIM_IS_OTHER 2
377 #define LOCK_EXCEED_MAX_DEPTH 3
378 
379 /********************************************************************/
384 static
385 ibool
386 lock_deadlock_occurs(
387 /*=================*/
388  lock_t* lock,
389  trx_t* trx);
390 /********************************************************************/
398 static
399 ulint
400 lock_deadlock_recursive(
401 /*====================*/
402  trx_t* start,
403  trx_t* trx,
404  lock_t* wait_lock,
405  ulint* cost,
408  ulint depth);
412 /*********************************************************************/
415 UNIV_INLINE
416 ibool
417 lock_rec_get_nth_bit(
418 /*=================*/
419  const lock_t* lock,
420  ulint i)
421 {
422  ulint byte_index;
423  ulint bit_index;
424 
425  ut_ad(lock);
426  ut_ad(lock_get_type_low(lock) == LOCK_REC);
427 
428  if (i >= lock->un_member.rec_lock.n_bits) {
429 
430  return(FALSE);
431  }
432 
433  byte_index = i / 8;
434  bit_index = i % 8;
435 
436  return(1 & ((const byte*) &lock[1])[byte_index] >> bit_index);
437 }
438 
439 /*************************************************************************/
440 
441 #define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex)
442 #define lock_mutex_exit_kernel() mutex_exit(&kernel_mutex)
443 
444 /*********************************************************************/
447 UNIV_INTERN
448 ibool
450 /*=====================*/
451  trx_id_t trx_id,
452  const rec_t* rec,
453  dict_index_t* index,
454  const ulint* offsets,
455  ibool has_kernel_mutex)
457 {
458  ibool is_ok = TRUE;
459 
460  ut_ad(rec_offs_validate(rec, index, offsets));
461 
462  if (!has_kernel_mutex) {
463  mutex_enter(&kernel_mutex);
464  }
465 
466  /* A sanity check: the trx_id in rec must be smaller than the global
467  trx id counter */
468 
469  if (UNIV_UNLIKELY(trx_id >= trx_sys->max_trx_id)) {
470  ut_print_timestamp(stderr);
471  fputs(" InnoDB: Error: transaction id associated"
472  " with record\n",
473  stderr);
474  rec_print_new(stderr, rec, offsets);
475  fputs("InnoDB: in ", stderr);
476  dict_index_name_print(stderr, NULL, index);
477  fprintf(stderr, "\n"
478  "InnoDB: is " TRX_ID_FMT " which is higher than the"
479  " global trx id counter " TRX_ID_FMT "!\n"
480  "InnoDB: The table is corrupt. You have to do"
481  " dump + drop + reimport.\n",
482  trx_id, trx_sys->max_trx_id);
483 
484  is_ok = FALSE;
485  }
486 
487  if (!has_kernel_mutex) {
488  mutex_exit(&kernel_mutex);
489  }
490 
491  return(is_ok);
492 }
493 
494 /*********************************************************************/
498 UNIV_INTERN
499 ibool
501 /*==========================*/
502  const rec_t* rec,
504  dict_index_t* index,
505  const ulint* offsets,
506  read_view_t* view)
507 {
508  trx_id_t trx_id;
509 
510  ut_ad(dict_index_is_clust(index));
512  ut_ad(rec_offs_validate(rec, index, offsets));
513 
514  /* NOTE that we call this function while holding the search
515  system latch. To obey the latching order we must NOT reserve the
516  kernel mutex here! */
517 
518  trx_id = row_get_rec_trx_id(rec, index, offsets);
519 
520  return(read_view_sees_trx_id(view, trx_id));
521 }
522 
523 /*********************************************************************/
533 UNIV_INTERN
534 ulint
536 /*========================*/
537  const rec_t* rec,
540  const read_view_t* view)
541 {
542  trx_id_t max_trx_id;
543 
545 
546  /* NOTE that we might call this function while holding the search
547  system latch. To obey the latching order we must NOT reserve the
548  kernel mutex here! */
549 
550  if (recv_recovery_is_on()) {
551 
552  return(FALSE);
553  }
554 
555  max_trx_id = page_get_max_trx_id(page_align(rec));
556  ut_ad(max_trx_id);
557 
558  return(max_trx_id < view->up_limit_id);
559 }
560 
561 /*********************************************************************/
563 UNIV_INTERN
564 void
566 /*============*/
567  ulint n_cells)
568 {
569  lock_sys = static_cast<lock_sys_t *>(mem_alloc(sizeof(lock_sys_t)));
570 
571  lock_sys->rec_hash = hash_create(n_cells);
572 
573  /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */
574 
575  lock_latest_err_file = os_file_create_tmpfile();
576  ut_a(lock_latest_err_file);
577 }
578 
579 /*********************************************************************/
581 UNIV_INTERN
582 void
584 /*================*/
585 {
586  if (lock_latest_err_file != NULL) {
587  fclose(lock_latest_err_file);
588  lock_latest_err_file = NULL;
589  }
590 
591  hash_table_free(lock_sys->rec_hash);
593  lock_sys = NULL;
594 }
595 
596 /*********************************************************************/
599 UNIV_INTERN
600 ulint
602 /*===============*/
603 {
604  return((ulint)sizeof(lock_t));
605 }
606 
607 /*********************************************************************/
610 UNIV_INLINE
611 enum lock_mode
612 lock_get_mode(
613 /*==========*/
614  const lock_t* lock)
615 {
616  ut_ad(lock);
617 
618  return static_cast<lock_mode>(lock->type_mode & LOCK_MODE_MASK);
619 }
620 
621 /*********************************************************************/
624 UNIV_INLINE
625 ibool
626 lock_get_wait(
627 /*==========*/
628  const lock_t* lock)
629 {
630  ut_ad(lock);
631 
632  if (UNIV_UNLIKELY(lock->type_mode & LOCK_WAIT)) {
633 
634  return(TRUE);
635  }
636 
637  return(FALSE);
638 }
639 
640 /*********************************************************************/
647 UNIV_INTERN
650 /*===============*/
651  trx_t* trx,
652  dict_table_t* dest,
653  enum lock_mode* mode)
654 {
655  dict_table_t* src;
656  lock_t* lock;
657 
658  src = NULL;
659  *mode = LOCK_NONE;
660 
661  for (lock = UT_LIST_GET_FIRST(trx->trx_locks);
662  lock;
663  lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
664  lock_table_t* tab_lock;
665  enum lock_mode lock_mode;
666  if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
667  /* We are only interested in table locks. */
668  continue;
669  }
670  tab_lock = &lock->un_member.tab_lock;
671  if (dest == tab_lock->table) {
672  /* We are not interested in the destination table. */
673  continue;
674  } else if (!src) {
675  /* This presumably is the source table. */
676  src = tab_lock->table;
677  if (UT_LIST_GET_LEN(src->locks) != 1
678  || UT_LIST_GET_FIRST(src->locks) != lock) {
679  /* We only support the case when
680  there is only one lock on this table. */
681  return(NULL);
682  }
683  } else if (src != tab_lock->table) {
684  /* The transaction is locking more than
685  two tables (src and dest): abort */
686  return(NULL);
687  }
688 
689  /* Check that the source table is locked by
690  LOCK_IX or LOCK_IS. */
691  lock_mode = lock_get_mode(lock);
692  if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
693  if (*mode != LOCK_NONE && *mode != lock_mode) {
694  /* There are multiple locks on src. */
695  return(NULL);
696  }
697  *mode = lock_mode;
698  }
699  }
700 
701  if (!src) {
702  /* No source table lock found: flag the situation to caller */
703  src = dest;
704  }
705 
706  return(src);
707 }
708 
709 /*********************************************************************/
715 UNIV_INTERN
716 ibool
718 /*====================*/
719  dict_table_t* table,
720  trx_t* trx)
721 {
722  const lock_t* lock;
723  ibool ok = FALSE;
724 
725  ut_ad(table);
726  ut_ad(trx);
727 
728  lock_mutex_enter_kernel();
729 
730  for (lock = UT_LIST_GET_FIRST(table->locks);
731  lock;
732  lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
733  if (lock->trx != trx) {
734  /* A lock on the table is held
735  by some other transaction. */
736  goto not_ok;
737  }
738 
739  if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
740  /* We are interested in table locks only. */
741  continue;
742  }
743 
744  switch (lock_get_mode(lock)) {
745  case LOCK_IX:
746  ok = TRUE;
747  break;
748  case LOCK_AUTO_INC:
749  /* It is allowed for trx to hold an
750  auto_increment lock. */
751  break;
752  default:
753 not_ok:
754  /* Other table locks than LOCK_IX are not allowed. */
755  ok = FALSE;
756  goto func_exit;
757  }
758  }
759 
760 func_exit:
761  lock_mutex_exit_kernel();
762 
763  return(ok);
764 }
765 
766 /*********************************************************************/
768 UNIV_INLINE
769 void
770 lock_set_lock_and_trx_wait(
771 /*=======================*/
772  lock_t* lock,
773  trx_t* trx)
774 {
775  ut_ad(lock);
776  ut_ad(trx->wait_lock == NULL);
777 
778  trx->wait_lock = lock;
779  lock->type_mode |= LOCK_WAIT;
780 }
781 
782 /**********************************************************************/
785 UNIV_INLINE
786 void
787 lock_reset_lock_and_trx_wait(
788 /*=========================*/
789  lock_t* lock)
790 {
791  ut_ad((lock->trx)->wait_lock == lock);
792  ut_ad(lock_get_wait(lock));
793 
794  /* Reset the back pointer in trx to this waiting lock request */
795 
796  (lock->trx)->wait_lock = NULL;
797  lock->type_mode &= ~LOCK_WAIT;
798 }
799 
800 /*********************************************************************/
803 UNIV_INLINE
804 ibool
805 lock_rec_get_gap(
806 /*=============*/
807  const lock_t* lock)
808 {
809  ut_ad(lock);
810  ut_ad(lock_get_type_low(lock) == LOCK_REC);
811 
812  if (lock->type_mode & LOCK_GAP) {
813 
814  return(TRUE);
815  }
816 
817  return(FALSE);
818 }
819 
820 /*********************************************************************/
823 UNIV_INLINE
824 ibool
825 lock_rec_get_rec_not_gap(
826 /*=====================*/
827  const lock_t* lock)
828 {
829  ut_ad(lock);
830  ut_ad(lock_get_type_low(lock) == LOCK_REC);
831 
832  if (lock->type_mode & LOCK_REC_NOT_GAP) {
833 
834  return(TRUE);
835  }
836 
837  return(FALSE);
838 }
839 
840 /*********************************************************************/
843 UNIV_INLINE
844 ibool
845 lock_rec_get_insert_intention(
846 /*==========================*/
847  const lock_t* lock)
848 {
849  ut_ad(lock);
850  ut_ad(lock_get_type_low(lock) == LOCK_REC);
851 
852  if (lock->type_mode & LOCK_INSERT_INTENTION) {
853 
854  return(TRUE);
855  }
856 
857  return(FALSE);
858 }
859 
860 /*********************************************************************/
863 UNIV_INLINE
864 ulint
865 lock_mode_stronger_or_eq(
866 /*=====================*/
867  enum lock_mode mode1,
868  enum lock_mode mode2)
869 {
870  ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
871  || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
872  ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
873  || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
874 
875  return((LOCK_MODE_STRONGER_OR_EQ) & LK(mode1, mode2));
876 }
877 
878 /*********************************************************************/
881 UNIV_INLINE
882 ulint
883 lock_mode_compatible(
884 /*=================*/
885  enum lock_mode mode1,
886  enum lock_mode mode2)
887 {
888  ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX
889  || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC);
890  ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX
891  || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC);
892 
893  return((LOCK_MODE_COMPATIBILITY) & LK(mode1, mode2));
894 }
895 
896 /*********************************************************************/
899 UNIV_INLINE
900 ibool
901 lock_rec_has_to_wait(
902 /*=================*/
903  const trx_t* trx,
904  ulint type_mode,
908  const lock_t* lock2,
912  ibool lock_is_on_supremum)
916 {
917  ut_ad(trx && lock2);
918  ut_ad(lock_get_type_low(lock2) == LOCK_REC);
919 
920  if (trx != lock2->trx
921  && !lock_mode_compatible(static_cast<lock_mode>(LOCK_MODE_MASK & type_mode),
922  lock_get_mode(lock2))) {
923 
924  /* We have somewhat complex rules when gap type record locks
925  cause waits */
926 
927  if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
928  && !(type_mode & LOCK_INSERT_INTENTION)) {
929 
930  /* Gap type locks without LOCK_INSERT_INTENTION flag
931  do not need to wait for anything. This is because
932  different users can have conflicting lock types
933  on gaps. */
934 
935  return(FALSE);
936  }
937 
938  if (!(type_mode & LOCK_INSERT_INTENTION)
939  && lock_rec_get_gap(lock2)) {
940 
941  /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
942  does not need to wait for a gap type lock */
943 
944  return(FALSE);
945  }
946 
947  if ((type_mode & LOCK_GAP)
948  && lock_rec_get_rec_not_gap(lock2)) {
949 
950  /* Lock on gap does not need to wait for
951  a LOCK_REC_NOT_GAP type lock */
952 
953  return(FALSE);
954  }
955 
956  if (lock_rec_get_insert_intention(lock2)) {
957 
958  /* No lock request needs to wait for an insert
959  intention lock to be removed. This is ok since our
960  rules allow conflicting locks on gaps. This eliminates
961  a spurious deadlock caused by a next-key lock waiting
962  for an insert intention lock; when the insert
963  intention lock was granted, the insert deadlocked on
964  the waiting next-key lock.
965 
966  Also, insert intention locks do not disturb each
967  other. */
968 
969  return(FALSE);
970  }
971 
972  return(TRUE);
973  }
974 
975  return(FALSE);
976 }
977 
978 /*********************************************************************/
981 UNIV_INTERN
982 ibool
984 /*=============*/
985  const lock_t* lock1,
986  const lock_t* lock2)
990 {
991  ut_ad(lock1 && lock2);
992 
993  if (lock1->trx != lock2->trx
994  && !lock_mode_compatible(lock_get_mode(lock1),
995  lock_get_mode(lock2))) {
996  if (lock_get_type_low(lock1) == LOCK_REC) {
997  ut_ad(lock_get_type_low(lock2) == LOCK_REC);
998 
999  /* If this lock request is for a supremum record
1000  then the second bit on the lock bitmap is set */
1001 
1002  return(lock_rec_has_to_wait(lock1->trx,
1003  lock1->type_mode, lock2,
1004  lock_rec_get_nth_bit(
1005  lock1, 1)));
1006  }
1007 
1008  return(TRUE);
1009  }
1010 
1011  return(FALSE);
1012 }
1013 
1014 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
1015 
1016 /*********************************************************************/
1019 UNIV_INLINE
1020 ulint
1021 lock_rec_get_n_bits(
1022 /*================*/
1023  const lock_t* lock)
1024 {
1025  return(lock->un_member.rec_lock.n_bits);
1026 }
1027 
1028 /**********************************************************************/
1030 UNIV_INLINE
1031 void
1032 lock_rec_set_nth_bit(
1033 /*=================*/
1034  lock_t* lock,
1035  ulint i)
1036 {
1037  ulint byte_index;
1038  ulint bit_index;
1039 
1040  ut_ad(lock);
1041  ut_ad(lock_get_type_low(lock) == LOCK_REC);
1042  ut_ad(i < lock->un_member.rec_lock.n_bits);
1043 
1044  byte_index = i / 8;
1045  bit_index = i % 8;
1046 
1047  ((byte*) &lock[1])[byte_index] |= 1 << bit_index;
1048 }
1049 
1050 /**********************************************************************/
1055 UNIV_INTERN
1056 ulint
1058 /*==================*/
1059  const lock_t* lock)
1060 {
1061  ulint i;
1062 
1063  for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
1064 
1065  if (lock_rec_get_nth_bit(lock, i)) {
1066 
1067  return(i);
1068  }
1069  }
1070 
1071  return(ULINT_UNDEFINED);
1072 }
1073 
1074 /**********************************************************************/
1076 UNIV_INLINE
1077 void
1078 lock_rec_reset_nth_bit(
1079 /*===================*/
1080  lock_t* lock,
1081  ulint i)
1083 {
1084  ulint byte_index;
1085  ulint bit_index;
1086 
1087  ut_ad(lock);
1088  ut_ad(lock_get_type_low(lock) == LOCK_REC);
1089  ut_ad(i < lock->un_member.rec_lock.n_bits);
1090 
1091  byte_index = i / 8;
1092  bit_index = i % 8;
1093 
1094  ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
1095 }
1096 
1097 /*********************************************************************/
1100 UNIV_INLINE
1101 lock_t*
1102 lock_rec_get_next_on_page(
1103 /*======================*/
1104  lock_t* lock)
1105 {
1106  ulint space;
1107  ulint page_no;
1108 
1109  ut_ad(mutex_own(&kernel_mutex));
1110  ut_ad(lock_get_type_low(lock) == LOCK_REC);
1111 
1112  space = lock->un_member.rec_lock.space;
1113  page_no = lock->un_member.rec_lock.page_no;
1114 
1115  for (;;) {
1116  lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
1117 
1118  if (!lock) {
1119 
1120  break;
1121  }
1122 
1123  if ((lock->un_member.rec_lock.space == space)
1124  && (lock->un_member.rec_lock.page_no == page_no)) {
1125 
1126  break;
1127  }
1128  }
1129 
1130  return(lock);
1131 }
1132 
1133 /*********************************************************************/
1137 UNIV_INLINE
1138 lock_t*
1139 lock_rec_get_first_on_page_addr(
1140 /*============================*/
1141  ulint space,
1142  ulint page_no)
1143 {
1144  lock_t* lock;
1145 
1146  ut_ad(mutex_own(&kernel_mutex));
1147 
1148  lock = static_cast<lock_t *>(HASH_GET_FIRST(lock_sys->rec_hash,
1149  lock_rec_hash(space, page_no)));
1150  while (lock) {
1151  if ((lock->un_member.rec_lock.space == space)
1152  && (lock->un_member.rec_lock.page_no == page_no)) {
1153 
1154  break;
1155  }
1156 
1157  lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
1158  }
1159 
1160  return(lock);
1161 }
1162 
1163 /*********************************************************************/
1166 UNIV_INTERN
1167 ibool
1169 /*========================*/
1170  ulint space,
1171  ulint page_no)
1172 {
1173  ibool ret;
1174 
1175  mutex_enter(&kernel_mutex);
1176 
1177  if (lock_rec_get_first_on_page_addr(space, page_no)) {
1178  ret = TRUE;
1179  } else {
1180  ret = FALSE;
1181  }
1182 
1183  mutex_exit(&kernel_mutex);
1184 
1185  return(ret);
1186 }
1187 
1188 /*********************************************************************/
1192 UNIV_INLINE
1193 lock_t*
1194 lock_rec_get_first_on_page(
1195 /*=======================*/
1196  const buf_block_t* block)
1197 {
1198  ulint hash;
1199  lock_t* lock;
1200  ulint space = buf_block_get_space(block);
1201  ulint page_no = buf_block_get_page_no(block);
1202 
1203  ut_ad(mutex_own(&kernel_mutex));
1204 
1205  hash = buf_block_get_lock_hash_val(block);
1206 
1207  lock = static_cast<lock_t *>(HASH_GET_FIRST(lock_sys->rec_hash, hash));
1208 
1209  while (lock) {
1210  if ((lock->un_member.rec_lock.space == space)
1211  && (lock->un_member.rec_lock.page_no == page_no)) {
1212 
1213  break;
1214  }
1215 
1216  lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
1217  }
1218 
1219  return(lock);
1220 }
1221 
1222 /*********************************************************************/
1225 UNIV_INLINE
1226 lock_t*
1227 lock_rec_get_next(
1228 /*==============*/
1229  ulint heap_no,
1230  lock_t* lock)
1231 {
1232  ut_ad(mutex_own(&kernel_mutex));
1233 
1234  do {
1235  ut_ad(lock_get_type_low(lock) == LOCK_REC);
1236  lock = lock_rec_get_next_on_page(lock);
1237  } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
1238 
1239  return(lock);
1240 }
1241 
1242 /*********************************************************************/
1245 UNIV_INLINE
1246 lock_t*
1247 lock_rec_get_first(
1248 /*===============*/
1249  const buf_block_t* block,
1250  ulint heap_no)
1251 {
1252  lock_t* lock;
1253 
1254  ut_ad(mutex_own(&kernel_mutex));
1255 
1256  for (lock = lock_rec_get_first_on_page(block); lock;
1257  lock = lock_rec_get_next_on_page(lock)) {
1258  if (lock_rec_get_nth_bit(lock, heap_no)) {
1259  break;
1260  }
1261  }
1262 
1263  return(lock);
1264 }
1265 
1266 /*********************************************************************/
1270 static
1271 void
1272 lock_rec_bitmap_reset(
1273 /*==================*/
1274  lock_t* lock)
1275 {
1276  ulint n_bytes;
1277 
1278  ut_ad(lock_get_type_low(lock) == LOCK_REC);
1279 
1280  /* Reset to zero the bitmap which resides immediately after the lock
1281  struct */
1282 
1283  n_bytes = lock_rec_get_n_bits(lock) / 8;
1284 
1285  ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
1286 
1287  memset(&lock[1], 0, n_bytes);
1288 }
1289 
1290 /*********************************************************************/
1293 static
1294 lock_t*
1295 lock_rec_copy(
1296 /*==========*/
1297  const lock_t* lock,
1298  mem_heap_t* heap)
1299 {
1300  ulint size;
1301 
1302  ut_ad(lock_get_type_low(lock) == LOCK_REC);
1303 
1304  size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
1305 
1306  return static_cast<lock_t *>(mem_heap_dup(heap, lock, size));
1307 }
1308 
1309 /*********************************************************************/
1312 UNIV_INTERN
1313 const lock_t*
1315 /*==============*/
1316  const lock_t* in_lock,
1317  ulint heap_no)
1318 {
1319  lock_t* lock;
1320  ulint space;
1321  ulint page_no;
1322  lock_t* found_lock = NULL;
1323 
1324  ut_ad(mutex_own(&kernel_mutex));
1325  ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
1326 
1327  space = in_lock->un_member.rec_lock.space;
1328  page_no = in_lock->un_member.rec_lock.page_no;
1329 
1330  lock = lock_rec_get_first_on_page_addr(space, page_no);
1331 
1332  for (;;) {
1333  ut_ad(lock);
1334 
1335  if (lock == in_lock) {
1336 
1337  return(found_lock);
1338  }
1339 
1340  if (lock_rec_get_nth_bit(lock, heap_no)) {
1341 
1342  found_lock = lock;
1343  }
1344 
1345  lock = lock_rec_get_next_on_page(lock);
1346  }
1347 }
1348 
1349 /*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
1350 
1351 /*********************************************************************/
1354 UNIV_INLINE
1355 lock_t*
1356 lock_table_has(
1357 /*===========*/
1358  trx_t* trx,
1359  dict_table_t* table,
1360  enum lock_mode mode)
1361 {
1362  lock_t* lock;
1363 
1364  ut_ad(mutex_own(&kernel_mutex));
1365 
1366  /* Look for stronger locks the same trx already has on the table */
1367 
1368  lock = UT_LIST_GET_LAST(table->locks);
1369 
1370  while (lock != NULL) {
1371 
1372  if (lock->trx == trx
1373  && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1374 
1375  /* The same trx already has locked the table in
1376  a mode stronger or equal to the mode given */
1377 
1378  ut_ad(!lock_get_wait(lock));
1379 
1380  return(lock);
1381  }
1382 
1383  lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
1384  }
1385 
1386  return(NULL);
1387 }
1388 
1389 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
1390 
1391 /*********************************************************************/
1395 UNIV_INLINE
1396 lock_t*
1397 lock_rec_has_expl(
1398 /*==============*/
1399  ulint precise_mode,
1404  const buf_block_t* block,
1406  ulint heap_no,
1407  trx_t* trx)
1408 {
1409  lock_t* lock;
1410 
1411  ut_ad(mutex_own(&kernel_mutex));
1412  ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
1413  || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
1414  ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
1415 
1416  lock = lock_rec_get_first(block, heap_no);
1417 
1418  while (lock) {
1419  if (lock->trx == trx
1420  && lock_mode_stronger_or_eq(lock_get_mode(lock),
1421  static_cast<lock_mode>(precise_mode & LOCK_MODE_MASK))
1422  && !lock_get_wait(lock)
1423  && (!lock_rec_get_rec_not_gap(lock)
1424  || (precise_mode & LOCK_REC_NOT_GAP)
1425  || heap_no == PAGE_HEAP_NO_SUPREMUM)
1426  && (!lock_rec_get_gap(lock)
1427  || (precise_mode & LOCK_GAP)
1428  || heap_no == PAGE_HEAP_NO_SUPREMUM)
1429  && (!lock_rec_get_insert_intention(lock))) {
1430 
1431  return(lock);
1432  }
1433 
1434  lock = lock_rec_get_next(heap_no, lock);
1435  }
1436 
1437  return(NULL);
1438 }
1439 
1440 #ifdef UNIV_DEBUG
1441 /*********************************************************************/
1444 static
1445 lock_t*
1446 lock_rec_other_has_expl_req(
1447 /*========================*/
1448  enum lock_mode mode,
1449  ulint gap,
1452  ulint wait,
1455  const buf_block_t* block,
1457  ulint heap_no,
1458  const trx_t* trx)
1461 {
1462  lock_t* lock;
1463 
1464  ut_ad(mutex_own(&kernel_mutex));
1465  ut_ad(mode == LOCK_X || mode == LOCK_S);
1466  ut_ad(gap == 0 || gap == LOCK_GAP);
1467  ut_ad(wait == 0 || wait == LOCK_WAIT);
1468 
1469  lock = lock_rec_get_first(block, heap_no);
1470 
1471  while (lock) {
1472  if (lock->trx != trx
1473  && (gap
1474  || !(lock_rec_get_gap(lock)
1475  || heap_no == PAGE_HEAP_NO_SUPREMUM))
1476  && (wait || !lock_get_wait(lock))
1477  && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1478 
1479  return(lock);
1480  }
1481 
1482  lock = lock_rec_get_next(heap_no, lock);
1483  }
1484 
1485  return(NULL);
1486 }
1487 #endif /* UNIV_DEBUG */
1488 
1489 /*********************************************************************/
1493 static
1494 lock_t*
1495 lock_rec_other_has_conflicting(
1496 /*===========================*/
1497  enum lock_mode mode,
1501  const buf_block_t* block,
1503  ulint heap_no,
1504  trx_t* trx)
1505 {
1506  lock_t* lock;
1507 
1508  ut_ad(mutex_own(&kernel_mutex));
1509 
1510  lock = lock_rec_get_first(block, heap_no);
1511 
1512  if (UNIV_LIKELY_NULL(lock)) {
1513  if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1514 
1515  do {
1516  if (lock_rec_has_to_wait(trx, mode, lock,
1517  TRUE)) {
1518  return(lock);
1519  }
1520 
1521  lock = lock_rec_get_next(heap_no, lock);
1522  } while (lock);
1523  } else {
1524 
1525  do {
1526  if (lock_rec_has_to_wait(trx, mode, lock,
1527  FALSE)) {
1528  return(lock);
1529  }
1530 
1531  lock = lock_rec_get_next(heap_no, lock);
1532  } while (lock);
1533  }
1534  }
1535 
1536  return(NULL);
1537 }
1538 
1539 /*********************************************************************/
1544 UNIV_INLINE
1545 lock_t*
1546 lock_rec_find_similar_on_page(
1547 /*==========================*/
1548  ulint type_mode,
1549  ulint heap_no,
1550  lock_t* lock,
1551  const trx_t* trx)
1552 {
1553  ut_ad(mutex_own(&kernel_mutex));
1554 
1555  while (lock != NULL) {
1556  if (lock->trx == trx
1557  && lock->type_mode == type_mode
1558  && lock_rec_get_n_bits(lock) > heap_no) {
1559 
1560  return(lock);
1561  }
1562 
1563  lock = lock_rec_get_next_on_page(lock);
1564  }
1565 
1566  return(NULL);
1567 }
1568 
1569 /*********************************************************************/
1573 static
1574 trx_t*
1575 lock_sec_rec_some_has_impl_off_kernel(
1576 /*==================================*/
1577  const rec_t* rec,
1578  dict_index_t* index,
1579  const ulint* offsets)
1580 {
1581  const page_t* page = page_align(rec);
1582 
1583  ut_ad(mutex_own(&kernel_mutex));
1584  ut_ad(!dict_index_is_clust(index));
1586  ut_ad(rec_offs_validate(rec, index, offsets));
1587 
1588  /* Some transaction may have an implicit x-lock on the record only
1589  if the max trx id for the page >= min trx id for the trx list, or
1590  database recovery is running. We do not write the changes of a page
1591  max trx id to the log, and therefore during recovery, this value
1592  for a page may be incorrect. */
1593 
1595  && !recv_recovery_is_on()) {
1596 
1597  return(NULL);
1598  }
1599 
1600  /* Ok, in this case it is possible that some transaction has an
1601  implicit x-lock. We have to look in the clustered index. */
1602 
1604  rec, index, offsets, TRUE)) {
1605  buf_page_print(page, 0);
1606 
1607  /* The page is corrupt: try to avoid a crash by returning
1608  NULL */
1609  return(NULL);
1610  }
1611 
1612  return(row_vers_impl_x_locked_off_kernel(rec, index, offsets));
1613 }
1614 
1615 /*********************************************************************/
1619 UNIV_INTERN
1620 ulint
1622 /*=======================*/
1623  const trx_t* trx)
1624 {
1625  lock_t* lock;
1626  ulint n_records = 0;
1627  ulint n_bits;
1628  ulint n_bit;
1629 
1630  lock = UT_LIST_GET_FIRST(trx->trx_locks);
1631 
1632  while (lock) {
1633  if (lock_get_type_low(lock) == LOCK_REC) {
1634  n_bits = lock_rec_get_n_bits(lock);
1635 
1636  for (n_bit = 0; n_bit < n_bits; n_bit++) {
1637  if (lock_rec_get_nth_bit(lock, n_bit)) {
1638  n_records++;
1639  }
1640  }
1641  }
1642 
1643  lock = UT_LIST_GET_NEXT(trx_locks, lock);
1644  }
1645 
1646  return (n_records);
1647 }
1648 
1649 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
1650 
1651 /*********************************************************************/
1655 static
1656 lock_t*
1657 lock_rec_create(
1658 /*============*/
1659  ulint type_mode,
1662  const buf_block_t* block,
1664  ulint heap_no,
1665  dict_index_t* index,
1666  trx_t* trx)
1667 {
1668  lock_t* lock;
1669  ulint page_no;
1670  ulint space;
1671  ulint n_bits;
1672  ulint n_bytes;
1673  const page_t* page;
1674 
1675  ut_ad(mutex_own(&kernel_mutex));
1676 
1677  space = buf_block_get_space(block);
1678  page_no = buf_block_get_page_no(block);
1679  page = block->frame;
1680 
1681  ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
1682 
1683  /* If rec is the supremum record, then we reset the gap and
1684  LOCK_REC_NOT_GAP bits, as all locks on the supremum are
1685  automatically of the gap type */
1686 
1687  if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1688  ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1689 
1690  type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1691  }
1692 
1693  /* Make lock bitmap bigger by a safety margin */
1694  n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
1695  n_bytes = 1 + n_bits / 8;
1696 
1697  lock = static_cast<lock_t *>(mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes));
1698 
1699  UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
1700 
1701  lock->trx = trx;
1702 
1703  lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
1704  lock->index = index;
1705 
1706  lock->un_member.rec_lock.space = space;
1707  lock->un_member.rec_lock.page_no = page_no;
1708  lock->un_member.rec_lock.n_bits = n_bytes * 8;
1709 
1710  /* Reset to zero the bitmap which resides immediately after the
1711  lock struct */
1712 
1713  lock_rec_bitmap_reset(lock);
1714 
1715  /* Set the bit corresponding to rec */
1716  lock_rec_set_nth_bit(lock, heap_no);
1717 
1719  lock_rec_fold(space, page_no), lock);
1720  if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
1721 
1722  lock_set_lock_and_trx_wait(lock, trx);
1723  }
1724 
1725  return(lock);
1726 }
1727 
1728 /*********************************************************************/
1735 static
1736 enum db_err
1737 lock_rec_enqueue_waiting(
1738 /*=====================*/
1739  ulint type_mode,
1748  const buf_block_t* block,
1750  ulint heap_no,
1751  dict_index_t* index,
1752  que_thr_t* thr)
1753 {
1754  lock_t* lock;
1755  trx_t* trx;
1756 
1757  ut_ad(mutex_own(&kernel_mutex));
1758 
1759  /* Test if there already is some other reason to suspend thread:
1760  we do not enqueue a lock request if the query thread should be
1761  stopped anyway */
1762 
1763  if (UNIV_UNLIKELY(que_thr_stop(thr))) {
1764 
1765  ut_error;
1766 
1767  return(DB_QUE_THR_SUSPENDED);
1768  }
1769 
1770  trx = thr_get_trx(thr);
1771 
1772  switch (trx_get_dict_operation(trx)) {
1773  case TRX_DICT_OP_NONE:
1774  break;
1775  case TRX_DICT_OP_TABLE:
1776  case TRX_DICT_OP_INDEX:
1777  ut_print_timestamp(stderr);
1778  fputs(" InnoDB: Error: a record lock wait happens"
1779  " in a dictionary operation!\n"
1780  "InnoDB: ", stderr);
1781  dict_index_name_print(stderr, trx, index);
1782  fputs(".\n"
1783  "InnoDB: Submit a detailed bug report"
1784  " to http://bugs.mysql.com\n",
1785  stderr);
1786  }
1787 
1788  /* Enqueue the lock request that will wait to be granted */
1789  lock = lock_rec_create(type_mode | LOCK_WAIT,
1790  block, heap_no, index, trx);
1791 
1792  /* Check if a deadlock occurs: if yes, remove the lock request and
1793  return an error code */
1794 
1795  if (UNIV_UNLIKELY(lock_deadlock_occurs(lock, trx))) {
1796 
1797  lock_reset_lock_and_trx_wait(lock);
1798  lock_rec_reset_nth_bit(lock, heap_no);
1799 
1800  return(DB_DEADLOCK);
1801  }
1802 
1803  /* If there was a deadlock but we chose another transaction as a
1804  victim, it is possible that we already have the lock now granted! */
1805 
1806  if (trx->wait_lock == NULL) {
1807 
1808  return(DB_SUCCESS_LOCKED_REC);
1809  }
1810 
1811  trx->que_state = TRX_QUE_LOCK_WAIT;
1812  trx->was_chosen_as_deadlock_victim = FALSE;
1813  trx->wait_started = time(NULL);
1814 
1815  ut_a(que_thr_stop(thr));
1816 
1817 #ifdef UNIV_DEBUG
1818  if (lock_print_waits) {
1819  fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ",
1820  trx->id);
1821  ut_print_name(stderr, trx, FALSE, index->name);
1822  }
1823 #endif /* UNIV_DEBUG */
1824 
1825  return(DB_LOCK_WAIT);
1826 }
1827 
1828 /*********************************************************************/
1836 static
1837 lock_t*
1838 lock_rec_add_to_queue(
1839 /*==================*/
1840  ulint type_mode,
1843  const buf_block_t* block,
1845  ulint heap_no,
1846  dict_index_t* index,
1847  trx_t* trx)
1848 {
1849  lock_t* lock;
1850 
1851  ut_ad(mutex_own(&kernel_mutex));
1852 #ifdef UNIV_DEBUG
1853  switch (type_mode & LOCK_MODE_MASK) {
1854  case LOCK_X:
1855  case LOCK_S:
1856  break;
1857  default:
1858  ut_error;
1859  }
1860 
1861  if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
1862  enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
1863  ? LOCK_X
1864  : LOCK_S;
1865  lock_t* other_lock
1866  = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
1867  block, heap_no, trx);
1868  ut_a(!other_lock);
1869  }
1870 #endif /* UNIV_DEBUG */
1871 
1872  type_mode |= LOCK_REC;
1873 
1874  /* If rec is the supremum record, then we can reset the gap bit, as
1875  all locks on the supremum are automatically of the gap type, and we
1876  try to avoid unnecessary memory consumption of a new record lock
1877  struct for a gap type lock */
1878 
1879  if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1880  ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1881 
1882  /* There should never be LOCK_REC_NOT_GAP on a supremum
1883  record, but let us play safe */
1884 
1885  type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1886  }
1887 
1888  /* Look for a waiting lock request on the same record or on a gap */
1889 
1890  lock = lock_rec_get_first_on_page(block);
1891 
1892  while (lock != NULL) {
1893  if (lock_get_wait(lock)
1894  && (lock_rec_get_nth_bit(lock, heap_no))) {
1895 
1896  goto somebody_waits;
1897  }
1898 
1899  lock = lock_rec_get_next_on_page(lock);
1900  }
1901 
1902  if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
1903 
1904  /* Look for a similar record lock on the same page:
1905  if one is found and there are no waiting lock requests,
1906  we can just set the bit */
1907 
1908  lock = lock_rec_find_similar_on_page(
1909  type_mode, heap_no,
1910  lock_rec_get_first_on_page(block), trx);
1911 
1912  if (lock) {
1913 
1914  lock_rec_set_nth_bit(lock, heap_no);
1915 
1916  return(lock);
1917  }
1918  }
1919 
1920 somebody_waits:
1921  return(lock_rec_create(type_mode, block, heap_no, index, trx));
1922 }
1923 
1925 enum lock_rec_req_status {
1927  LOCK_REC_FAIL,
1929  LOCK_REC_SUCCESS,
1931  LOCK_REC_SUCCESS_CREATED
1932 };
1933 
1934 /*********************************************************************/
1942 UNIV_INLINE
1943 enum lock_rec_req_status
1944 lock_rec_lock_fast(
1945 /*===============*/
1946  ibool impl,
1950  ulint mode,
1953  const buf_block_t* block,
1955  ulint heap_no,
1956  dict_index_t* index,
1957  que_thr_t* thr)
1958 {
1959  lock_t* lock;
1960  trx_t* trx;
1961 
1962  ut_ad(mutex_own(&kernel_mutex));
1963  ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
1964  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1965  ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
1966  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1967  ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
1968  || (LOCK_MODE_MASK & mode) == LOCK_X);
1969  ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
1970  || mode - (LOCK_MODE_MASK & mode) == 0
1971  || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1972 
1973  lock = lock_rec_get_first_on_page(block);
1974 
1975  trx = thr_get_trx(thr);
1976 
1977  if (lock == NULL) {
1978  if (!impl) {
1979  lock_rec_create(mode, block, heap_no, index, trx);
1980  }
1981 
1982  return(LOCK_REC_SUCCESS_CREATED);
1983  }
1984 
1985  if (lock_rec_get_next_on_page(lock)) {
1986 
1987  return(LOCK_REC_FAIL);
1988  }
1989 
1990  if (lock->trx != trx
1991  || lock->type_mode != (mode | LOCK_REC)
1992  || lock_rec_get_n_bits(lock) <= heap_no) {
1993 
1994  return(LOCK_REC_FAIL);
1995  }
1996 
1997  if (!impl) {
1998  /* If the nth bit of the record lock is already set then we
1999  do not set a new lock bit, otherwise we do set */
2000 
2001  if (!lock_rec_get_nth_bit(lock, heap_no)) {
2002  lock_rec_set_nth_bit(lock, heap_no);
2003  return(LOCK_REC_SUCCESS_CREATED);
2004  }
2005  }
2006 
2007  return(LOCK_REC_SUCCESS);
2008 }
2009 
2010 /*********************************************************************/
2017 static
2018 enum db_err
2019 lock_rec_lock_slow(
2020 /*===============*/
2021  ibool impl,
2025  ulint mode,
2028  const buf_block_t* block,
2030  ulint heap_no,
2031  dict_index_t* index,
2032  que_thr_t* thr)
2033 {
2034  trx_t* trx;
2035 
2036  ut_ad(mutex_own(&kernel_mutex));
2037  ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2038  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2039  ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2040  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2041  ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2042  || (LOCK_MODE_MASK & mode) == LOCK_X);
2043  ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2044  || mode - (LOCK_MODE_MASK & mode) == 0
2045  || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
2046 
2047  trx = thr_get_trx(thr);
2048 
2049  if (lock_rec_has_expl(mode, block, heap_no, trx)) {
2050  /* The trx already has a strong enough lock on rec: do
2051  nothing */
2052 
2053  } else if (lock_rec_other_has_conflicting(static_cast<lock_mode>(mode), block, heap_no, trx)) {
2054 
2055  /* If another transaction has a non-gap conflicting request in
2056  the queue, as this transaction does not have a lock strong
2057  enough already granted on the record, we have to wait. */
2058 
2059  return(lock_rec_enqueue_waiting(mode, block, heap_no,
2060  index, thr));
2061  } else if (!impl) {
2062  /* Set the requested lock on the record */
2063 
2064  lock_rec_add_to_queue(LOCK_REC | mode, block,
2065  heap_no, index, trx);
2066  return(DB_SUCCESS_LOCKED_REC);
2067  }
2068 
2069  return(DB_SUCCESS);
2070 }
2071 
2072 /*********************************************************************/
2080 static
2081 enum db_err
2082 lock_rec_lock(
2083 /*==========*/
2084  ibool impl,
2088  ulint mode,
2091  const buf_block_t* block,
2093  ulint heap_no,
2094  dict_index_t* index,
2095  que_thr_t* thr)
2096 {
2097  ut_ad(mutex_own(&kernel_mutex));
2098  ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2099  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2100  ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2101  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2102  ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2103  || (LOCK_MODE_MASK & mode) == LOCK_X);
2104  ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2105  || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
2106  || mode - (LOCK_MODE_MASK & mode) == 0);
2107 
2108  /* We try a simplified and faster subroutine for the most
2109  common cases */
2110  switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
2111  case LOCK_REC_SUCCESS:
2112  return(DB_SUCCESS);
2113  case LOCK_REC_SUCCESS_CREATED:
2114  return(DB_SUCCESS_LOCKED_REC);
2115  case LOCK_REC_FAIL:
2116  return(lock_rec_lock_slow(impl, mode, block,
2117  heap_no, index, thr));
2118  }
2119 
2120  ut_error;
2121  return(DB_ERROR);
2122 }
2123 
2124 /*********************************************************************/
2127 static
2128 ibool
2129 lock_rec_has_to_wait_in_queue(
2130 /*==========================*/
2131  lock_t* wait_lock)
2132 {
2133  lock_t* lock;
2134  ulint space;
2135  ulint page_no;
2136  ulint heap_no;
2137 
2138  ut_ad(mutex_own(&kernel_mutex));
2139  ut_ad(lock_get_wait(wait_lock));
2140  ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
2141 
2142  space = wait_lock->un_member.rec_lock.space;
2143  page_no = wait_lock->un_member.rec_lock.page_no;
2144  heap_no = lock_rec_find_set_bit(wait_lock);
2145 
2146  lock = lock_rec_get_first_on_page_addr(space, page_no);
2147 
2148  while (lock != wait_lock) {
2149 
2150  if (lock_rec_get_nth_bit(lock, heap_no)
2151  && lock_has_to_wait(wait_lock, lock)) {
2152 
2153  return(TRUE);
2154  }
2155 
2156  lock = lock_rec_get_next_on_page(lock);
2157  }
2158 
2159  return(FALSE);
2160 }
2161 
2162 /*************************************************************/
2165 static
2166 void
2167 lock_grant(
2168 /*=======*/
2169  lock_t* lock)
2170 {
2171  ut_ad(mutex_own(&kernel_mutex));
2172 
2173  lock_reset_lock_and_trx_wait(lock);
2174 
2175  if (lock_get_mode(lock) == LOCK_AUTO_INC) {
2176  trx_t* trx = lock->trx;
2177  dict_table_t* table = lock->un_member.tab_lock.table;
2178 
2179  if (table->autoinc_trx == trx) {
2180  fprintf(stderr,
2181  "InnoDB: Error: trx already had"
2182  " an AUTO-INC lock!\n");
2183  } else {
2184  table->autoinc_trx = trx;
2185 
2186  ib_vector_push(trx->autoinc_locks, lock);
2187  }
2188  }
2189 
2190 #ifdef UNIV_DEBUG
2191  if (lock_print_waits) {
2192  fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " ends\n",
2193  lock->trx->id);
2194  }
2195 #endif /* UNIV_DEBUG */
2196 
2197  /* If we are resolving a deadlock by choosing another transaction
2198  as a victim, then our original transaction may not be in the
2199  TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
2200  for it */
2201 
2202  if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) {
2203  trx_end_lock_wait(lock->trx);
2204  }
2205 }
2206 
2207 /*************************************************************/
2211 static
2212 void
2213 lock_rec_cancel(
2214 /*============*/
2215  lock_t* lock)
2216 {
2217  ut_ad(mutex_own(&kernel_mutex));
2218  ut_ad(lock_get_type_low(lock) == LOCK_REC);
2219 
2220  /* Reset the bit (there can be only one set bit) in the lock bitmap */
2221  lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
2222 
2223  /* Reset the wait flag and the back pointer to lock in trx */
2224 
2225  lock_reset_lock_and_trx_wait(lock);
2226 
2227  /* The following function releases the trx from lock wait */
2228 
2229  trx_end_lock_wait(lock->trx);
2230 }
2231 
2232 /*************************************************************/
2236 static
2237 void
2238 lock_rec_dequeue_from_page(
2239 /*=======================*/
2240  lock_t* in_lock)
2244 {
2245  ulint space;
2246  ulint page_no;
2247  lock_t* lock;
2248  trx_t* trx;
2249 
2250  ut_ad(mutex_own(&kernel_mutex));
2251  ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2252 
2253  trx = in_lock->trx;
2254 
2255  space = in_lock->un_member.rec_lock.space;
2256  page_no = in_lock->un_member.rec_lock.page_no;
2257 
2259  lock_rec_fold(space, page_no), in_lock);
2260 
2261  UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
2262 
2263  /* Check if waiting locks in the queue can now be granted: grant
2264  locks if there are no conflicting locks ahead. */
2265 
2266  lock = lock_rec_get_first_on_page_addr(space, page_no);
2267 
2268  while (lock != NULL) {
2269  if (lock_get_wait(lock)
2270  && !lock_rec_has_to_wait_in_queue(lock)) {
2271 
2272  /* Grant the lock */
2273  lock_grant(lock);
2274  }
2275 
2276  lock = lock_rec_get_next_on_page(lock);
2277  }
2278 }
2279 
2280 /*************************************************************/
2282 static
2283 void
2284 lock_rec_discard(
2285 /*=============*/
2286  lock_t* in_lock)
2288 {
2289  ulint space;
2290  ulint page_no;
2291  trx_t* trx;
2292 
2293  ut_ad(mutex_own(&kernel_mutex));
2294  ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2295 
2296  trx = in_lock->trx;
2297 
2298  space = in_lock->un_member.rec_lock.space;
2299  page_no = in_lock->un_member.rec_lock.page_no;
2300 
2302  lock_rec_fold(space, page_no), in_lock);
2303 
2304  UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock);
2305 }
2306 
2307 /*************************************************************/
2311 static
2312 void
2313 lock_rec_free_all_from_discard_page(
2314 /*================================*/
2315  const buf_block_t* block)
2316 {
2317  ulint space;
2318  ulint page_no;
2319  lock_t* lock;
2320  lock_t* next_lock;
2321 
2322  ut_ad(mutex_own(&kernel_mutex));
2323 
2324  space = buf_block_get_space(block);
2325  page_no = buf_block_get_page_no(block);
2326 
2327  lock = lock_rec_get_first_on_page_addr(space, page_no);
2328 
2329  while (lock != NULL) {
2330  ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2331  ut_ad(!lock_get_wait(lock));
2332 
2333  next_lock = lock_rec_get_next_on_page(lock);
2334 
2335  lock_rec_discard(lock);
2336 
2337  lock = next_lock;
2338  }
2339 }
2340 
2341 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
2342 
2343 /*************************************************************/
2346 static
2347 void
2348 lock_rec_reset_and_release_wait(
2349 /*============================*/
2350  const buf_block_t* block,
2352  ulint heap_no)
2353 {
2354  lock_t* lock;
2355 
2356  ut_ad(mutex_own(&kernel_mutex));
2357 
2358  lock = lock_rec_get_first(block, heap_no);
2359 
2360  while (lock != NULL) {
2361  if (lock_get_wait(lock)) {
2362  lock_rec_cancel(lock);
2363  } else {
2364  lock_rec_reset_nth_bit(lock, heap_no);
2365  }
2366 
2367  lock = lock_rec_get_next(heap_no, lock);
2368  }
2369 }
2370 
2371 /*************************************************************/
2376 static
2377 void
2378 lock_rec_inherit_to_gap(
2379 /*====================*/
2380  const buf_block_t* heir_block,
2382  const buf_block_t* block,
2386  ulint heir_heap_no,
2388  ulint heap_no)
2390 {
2391  lock_t* lock;
2392 
2393  ut_ad(mutex_own(&kernel_mutex));
2394 
2395  lock = lock_rec_get_first(block, heap_no);
2396 
2397  /* If srv_locks_unsafe_for_binlog is TRUE or session is using
2398  READ COMMITTED isolation level, we do not want locks set
2399  by an UPDATE or a DELETE to be inherited as gap type locks. But we
2400  DO want S-locks set by a consistency constraint to be inherited also
2401  then. */
2402 
2403  while (lock != NULL) {
2404  if (!lock_rec_get_insert_intention(lock)
2405  && !((srv_locks_unsafe_for_binlog
2406  || lock->trx->isolation_level
2407  <= TRX_ISO_READ_COMMITTED)
2408  && lock_get_mode(lock) == LOCK_X)) {
2409 
2410  lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
2411  | lock_get_mode(lock),
2412  heir_block, heir_heap_no,
2413  lock->index, lock->trx);
2414  }
2415 
2416  lock = lock_rec_get_next(heap_no, lock);
2417  }
2418 }
2419 
2420 /*************************************************************/
2424 static
2425 void
2426 lock_rec_inherit_to_gap_if_gap_lock(
2427 /*================================*/
2428  const buf_block_t* block,
2429  ulint heir_heap_no,
2431  ulint heap_no)
2435 {
2436  lock_t* lock;
2437 
2438  ut_ad(mutex_own(&kernel_mutex));
2439 
2440  lock = lock_rec_get_first(block, heap_no);
2441 
2442  while (lock != NULL) {
2443  if (!lock_rec_get_insert_intention(lock)
2444  && (heap_no == PAGE_HEAP_NO_SUPREMUM
2445  || !lock_rec_get_rec_not_gap(lock))) {
2446 
2447  lock_rec_add_to_queue(LOCK_REC | LOCK_GAP
2448  | lock_get_mode(lock),
2449  block, heir_heap_no,
2450  lock->index, lock->trx);
2451  }
2452 
2453  lock = lock_rec_get_next(heap_no, lock);
2454  }
2455 }
2456 
2457 /*************************************************************/
2460 static
2461 void
2462 lock_rec_move(
2463 /*==========*/
2464  const buf_block_t* receiver,
2466  const buf_block_t* donator,
2468  ulint receiver_heap_no,
2472  ulint donator_heap_no)
2474 {
2475  lock_t* lock;
2476 
2477  ut_ad(mutex_own(&kernel_mutex));
2478 
2479  lock = lock_rec_get_first(donator, donator_heap_no);
2480 
2481  ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
2482 
2483  while (lock != NULL) {
2484  const ulint type_mode = lock->type_mode;
2485 
2486  lock_rec_reset_nth_bit(lock, donator_heap_no);
2487 
2488  if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
2489  lock_reset_lock_and_trx_wait(lock);
2490  }
2491 
2492  /* Note that we FIRST reset the bit, and then set the lock:
2493  the function works also if donator == receiver */
2494 
2495  lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no,
2496  lock->index, lock->trx);
2497  lock = lock_rec_get_next(donator_heap_no, lock);
2498  }
2499 
2500  ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
2501 }
2502 
2503 /*************************************************************/
2508 UNIV_INTERN
2509 void
2511 /*======================*/
2512  const buf_block_t* block,
2514  const buf_block_t* oblock)
2516 {
2517  lock_t* lock;
2518  UT_LIST_BASE_NODE_T(lock_t) old_locks;
2519  mem_heap_t* heap = NULL;
2520  ulint comp;
2521 
2522  lock_mutex_enter_kernel();
2523 
2524  lock = lock_rec_get_first_on_page(block);
2525 
2526  if (lock == NULL) {
2527  lock_mutex_exit_kernel();
2528 
2529  return;
2530  }
2531 
2532  heap = mem_heap_create(256);
2533 
2534  /* Copy first all the locks on the page to heap and reset the
2535  bitmaps in the original locks; chain the copies of the locks
2536  using the trx_locks field in them. */
2537 
2538  UT_LIST_INIT(old_locks);
2539 
2540  do {
2541  /* Make a copy of the lock */
2542  lock_t* old_lock = lock_rec_copy(lock, heap);
2543 
2544  UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
2545 
2546  /* Reset bitmap of lock */
2547  lock_rec_bitmap_reset(lock);
2548 
2549  if (lock_get_wait(lock)) {
2550  lock_reset_lock_and_trx_wait(lock);
2551  }
2552 
2553  lock = lock_rec_get_next_on_page(lock);
2554  } while (lock != NULL);
2555 
2556  comp = page_is_comp(block->frame);
2557  ut_ad(comp == page_is_comp(oblock->frame));
2558 
2559  for (lock = UT_LIST_GET_FIRST(old_locks); lock;
2560  lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2561  /* NOTE: we copy also the locks set on the infimum and
2562  supremum of the page; the infimum may carry locks if an
2563  update of a record is occurring on the page, and its locks
2564  were temporarily stored on the infimum */
2565  page_cur_t cur1;
2566  page_cur_t cur2;
2567 
2568  page_cur_set_before_first(block, &cur1);
2569  page_cur_set_before_first(oblock, &cur2);
2570 
2571  /* Set locks according to old locks */
2572  for (;;) {
2573  ulint old_heap_no;
2574  ulint new_heap_no;
2575 
2576  ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
2577  page_cur_get_rec(&cur2),
2579  page_cur_get_rec(
2580  &cur2))));
2581  if (UNIV_LIKELY(comp)) {
2582  old_heap_no = rec_get_heap_no_new(
2583  page_cur_get_rec(&cur2));
2584  new_heap_no = rec_get_heap_no_new(
2585  page_cur_get_rec(&cur1));
2586  } else {
2587  old_heap_no = rec_get_heap_no_old(
2588  page_cur_get_rec(&cur2));
2589  new_heap_no = rec_get_heap_no_old(
2590  page_cur_get_rec(&cur1));
2591  }
2592 
2593  if (lock_rec_get_nth_bit(lock, old_heap_no)) {
2594 
2595  /* Clear the bit in old_lock. */
2596  ut_d(lock_rec_reset_nth_bit(lock,
2597  old_heap_no));
2598 
2599  /* NOTE that the old lock bitmap could be too
2600  small for the new heap number! */
2601 
2602  lock_rec_add_to_queue(lock->type_mode, block,
2603  new_heap_no,
2604  lock->index, lock->trx);
2605 
2606  /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
2607  && lock_get_wait(lock)) {
2608  fprintf(stderr,
2609  "---\n--\n!!!Lock reorg: supr type %lu\n",
2610  lock->type_mode);
2611  } */
2612  }
2613 
2614  if (UNIV_UNLIKELY
2615  (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
2616 
2617  ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
2618  break;
2619  }
2620 
2621  page_cur_move_to_next(&cur1);
2622  page_cur_move_to_next(&cur2);
2623  }
2624 
2625 #ifdef UNIV_DEBUG
2626  {
2627  ulint i = lock_rec_find_set_bit(lock);
2628 
2629  /* Check that all locks were moved. */
2630  if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
2631  fprintf(stderr,
2632  "lock_move_reorganize_page():"
2633  " %lu not moved in %p\n",
2634  (ulong) i, (void*) lock);
2635  ut_error;
2636  }
2637  }
2638 #endif /* UNIV_DEBUG */
2639  }
2640 
2641  lock_mutex_exit_kernel();
2642 
2643  mem_heap_free(heap);
2644 
2645 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2646  ut_ad(lock_rec_validate_page(buf_block_get_space(block),
2647  buf_block_get_page_no(block)));
2648 #endif
2649 }
2650 
2651 /*************************************************************/
2654 UNIV_INTERN
2655 void
2657 /*===================*/
2658  const buf_block_t* new_block,
2659  const buf_block_t* block,
2660  const rec_t* rec)
2662 {
2663  lock_t* lock;
2664  const ulint comp = page_rec_is_comp(rec);
2665 
2666  lock_mutex_enter_kernel();
2667 
2668  /* Note: when we move locks from record to record, waiting locks
2669  and possible granted gap type locks behind them are enqueued in
2670  the original order, because new elements are inserted to a hash
2671  table to the end of the hash chain, and lock_rec_add_to_queue
2672  does not reuse locks if there are waiters in the queue. */
2673 
2674  for (lock = lock_rec_get_first_on_page(block); lock;
2675  lock = lock_rec_get_next_on_page(lock)) {
2676  page_cur_t cur1;
2677  page_cur_t cur2;
2678  const ulint type_mode = lock->type_mode;
2679 
2680  page_cur_position(rec, block, &cur1);
2681 
2682  if (page_cur_is_before_first(&cur1)) {
2683  page_cur_move_to_next(&cur1);
2684  }
2685 
2686  page_cur_set_before_first(new_block, &cur2);
2687  page_cur_move_to_next(&cur2);
2688 
2689  /* Copy lock requests on user records to new page and
2690  reset the lock bits on the old */
2691 
2692  while (!page_cur_is_after_last(&cur1)) {
2693  ulint heap_no;
2694 
2695  if (comp) {
2696  heap_no = rec_get_heap_no_new(
2697  page_cur_get_rec(&cur1));
2698  } else {
2699  heap_no = rec_get_heap_no_old(
2700  page_cur_get_rec(&cur1));
2701  ut_ad(!memcmp(page_cur_get_rec(&cur1),
2702  page_cur_get_rec(&cur2),
2704  page_cur_get_rec(&cur2))));
2705  }
2706 
2707  if (lock_rec_get_nth_bit(lock, heap_no)) {
2708  lock_rec_reset_nth_bit(lock, heap_no);
2709 
2710  if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
2711  lock_reset_lock_and_trx_wait(lock);
2712  }
2713 
2714  if (comp) {
2715  heap_no = rec_get_heap_no_new(
2716  page_cur_get_rec(&cur2));
2717  } else {
2718  heap_no = rec_get_heap_no_old(
2719  page_cur_get_rec(&cur2));
2720  }
2721 
2722  lock_rec_add_to_queue(type_mode,
2723  new_block, heap_no,
2724  lock->index, lock->trx);
2725  }
2726 
2727  page_cur_move_to_next(&cur1);
2728  page_cur_move_to_next(&cur2);
2729  }
2730  }
2731 
2732  lock_mutex_exit_kernel();
2733 
2734 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2735  ut_ad(lock_rec_validate_page(buf_block_get_space(block),
2736  buf_block_get_page_no(block)));
2737  ut_ad(lock_rec_validate_page(buf_block_get_space(new_block),
2738  buf_block_get_page_no(new_block)));
2739 #endif
2740 }
2741 
2742 /*************************************************************/
2745 UNIV_INTERN
2746 void
2748 /*=====================*/
2749  const buf_block_t* new_block,
2750  const buf_block_t* block,
2751  const rec_t* rec,
2754  const rec_t* old_end)
2759 {
2760  lock_t* lock;
2761  const ulint comp = page_rec_is_comp(rec);
2762 
2763  ut_ad(block->frame == page_align(rec));
2764  ut_ad(new_block->frame == page_align(old_end));
2765 
2766  lock_mutex_enter_kernel();
2767 
2768  for (lock = lock_rec_get_first_on_page(block); lock;
2769  lock = lock_rec_get_next_on_page(lock)) {
2770  page_cur_t cur1;
2771  page_cur_t cur2;
2772  const ulint type_mode = lock->type_mode;
2773 
2774  page_cur_set_before_first(block, &cur1);
2775  page_cur_move_to_next(&cur1);
2776 
2777  page_cur_position(old_end, new_block, &cur2);
2778  page_cur_move_to_next(&cur2);
2779 
2780  /* Copy lock requests on user records to new page and
2781  reset the lock bits on the old */
2782 
2783  while (page_cur_get_rec(&cur1) != rec) {
2784  ulint heap_no;
2785 
2786  if (comp) {
2787  heap_no = rec_get_heap_no_new(
2788  page_cur_get_rec(&cur1));
2789  } else {
2790  heap_no = rec_get_heap_no_old(
2791  page_cur_get_rec(&cur1));
2792  ut_ad(!memcmp(page_cur_get_rec(&cur1),
2793  page_cur_get_rec(&cur2),
2795  page_cur_get_rec(
2796  &cur2))));
2797  }
2798 
2799  if (lock_rec_get_nth_bit(lock, heap_no)) {
2800  lock_rec_reset_nth_bit(lock, heap_no);
2801 
2802  if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
2803  lock_reset_lock_and_trx_wait(lock);
2804  }
2805 
2806  if (comp) {
2807  heap_no = rec_get_heap_no_new(
2808  page_cur_get_rec(&cur2));
2809  } else {
2810  heap_no = rec_get_heap_no_old(
2811  page_cur_get_rec(&cur2));
2812  }
2813 
2814  lock_rec_add_to_queue(type_mode,
2815  new_block, heap_no,
2816  lock->index, lock->trx);
2817  }
2818 
2819  page_cur_move_to_next(&cur1);
2820  page_cur_move_to_next(&cur2);
2821  }
2822 
2823 #ifdef UNIV_DEBUG
2824  if (page_rec_is_supremum(rec)) {
2825  ulint i;
2826 
2827  for (i = PAGE_HEAP_NO_USER_LOW;
2828  i < lock_rec_get_n_bits(lock); i++) {
2829  if (UNIV_UNLIKELY
2830  (lock_rec_get_nth_bit(lock, i))) {
2831 
2832  fprintf(stderr,
2833  "lock_move_rec_list_start():"
2834  " %lu not moved in %p\n",
2835  (ulong) i, (void*) lock);
2836  ut_error;
2837  }
2838  }
2839  }
2840 #endif /* UNIV_DEBUG */
2841  }
2842 
2843  lock_mutex_exit_kernel();
2844 
2845 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2846  ut_ad(lock_rec_validate_page(buf_block_get_space(block),
2847  buf_block_get_page_no(block)));
2848 #endif
2849 }
2850 
2851 /*************************************************************/
2853 UNIV_INTERN
2854 void
2856 /*====================*/
2857  const buf_block_t* right_block,
2858  const buf_block_t* left_block)
2859 {
2860  ulint heap_no = lock_get_min_heap_no(right_block);
2861 
2862  lock_mutex_enter_kernel();
2863 
2864  /* Move the locks on the supremum of the left page to the supremum
2865  of the right page */
2866 
2867  lock_rec_move(right_block, left_block,
2868  PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
2869 
2870  /* Inherit the locks to the supremum of left page from the successor
2871  of the infimum on right page */
2872 
2873  lock_rec_inherit_to_gap(left_block, right_block,
2874  PAGE_HEAP_NO_SUPREMUM, heap_no);
2875 
2876  lock_mutex_exit_kernel();
2877 }
2878 
2879 /*************************************************************/
2881 UNIV_INTERN
2882 void
2884 /*====================*/
2885  const buf_block_t* right_block,
2887  const rec_t* orig_succ,
2891  const buf_block_t* left_block)
2894 {
2895  lock_mutex_enter_kernel();
2896 
2897  /* Inherit the locks from the supremum of the left page to the
2898  original successor of infimum on the right page, to which the left
2899  page was merged */
2900 
2901  lock_rec_inherit_to_gap(right_block, left_block,
2902  page_rec_get_heap_no(orig_succ),
2903  PAGE_HEAP_NO_SUPREMUM);
2904 
2905  /* Reset the locks on the supremum of the left page, releasing
2906  waiting transactions */
2907 
2908  lock_rec_reset_and_release_wait(left_block,
2909  PAGE_HEAP_NO_SUPREMUM);
2910 
2911  lock_rec_free_all_from_discard_page(left_block);
2912 
2913  lock_mutex_exit_kernel();
2914 }
2915 
2916 /*************************************************************/
2923 UNIV_INTERN
2924 void
2926 /*===================*/
2927  const buf_block_t* block,
2928  const buf_block_t* root)
2929 {
2930  lock_mutex_enter_kernel();
2931 
2932  /* Move the locks on the supremum of the root to the supremum
2933  of block */
2934 
2935  lock_rec_move(block, root,
2936  PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
2937  lock_mutex_exit_kernel();
2938 }
2939 
2940 /*************************************************************/
2943 UNIV_INTERN
2944 void
2946 /*=========================*/
2947  const buf_block_t* new_block,
2949  const buf_block_t* block)
2951 {
2952  lock_mutex_enter_kernel();
2953 
2954  /* Move the locks on the supremum of the old page to the supremum
2955  of new_page */
2956 
2957  lock_rec_move(new_block, block,
2958  PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
2959  lock_rec_free_all_from_discard_page(block);
2960 
2961  lock_mutex_exit_kernel();
2962 }
2963 
2964 /*************************************************************/
2966 UNIV_INTERN
2967 void
2969 /*===================*/
2970  const buf_block_t* right_block,
2971  const buf_block_t* left_block)
2972 {
2973  ulint heap_no = lock_get_min_heap_no(right_block);
2974 
2975  lock_mutex_enter_kernel();
2976 
2977  /* Inherit the locks to the supremum of the left page from the
2978  successor of the infimum on the right page */
2979 
2980  lock_rec_inherit_to_gap(left_block, right_block,
2981  PAGE_HEAP_NO_SUPREMUM, heap_no);
2982 
2983  lock_mutex_exit_kernel();
2984 }
2985 
2986 /*************************************************************/
2988 UNIV_INTERN
2989 void
2991 /*===================*/
2992  const buf_block_t* left_block,
2994  const rec_t* orig_pred,
2997  const buf_block_t* right_block)
2999 {
3000  const rec_t* left_next_rec;
3001 
3002  ut_ad(left_block->frame == page_align(orig_pred));
3003 
3004  lock_mutex_enter_kernel();
3005 
3006  left_next_rec = page_rec_get_next_const(orig_pred);
3007 
3008  if (!page_rec_is_supremum(left_next_rec)) {
3009 
3010  /* Inherit the locks on the supremum of the left page to the
3011  first record which was moved from the right page */
3012 
3013  lock_rec_inherit_to_gap(left_block, left_block,
3014  page_rec_get_heap_no(left_next_rec),
3015  PAGE_HEAP_NO_SUPREMUM);
3016 
3017  /* Reset the locks on the supremum of the left page,
3018  releasing waiting transactions */
3019 
3020  lock_rec_reset_and_release_wait(left_block,
3021  PAGE_HEAP_NO_SUPREMUM);
3022  }
3023 
3024  /* Move the locks from the supremum of right page to the supremum
3025  of the left page */
3026 
3027  lock_rec_move(left_block, right_block,
3028  PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3029 
3030  lock_rec_free_all_from_discard_page(right_block);
3031 
3032  lock_mutex_exit_kernel();
3033 }
3034 
3035 /*************************************************************/
3038 UNIV_INTERN
3039 void
3041 /*=================================*/
3042  const buf_block_t* heir_block,
3044  const buf_block_t* block,
3048  ulint heir_heap_no,
3050  ulint heap_no)
3052 {
3053  mutex_enter(&kernel_mutex);
3054 
3055  lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3056 
3057  lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3058 
3059  mutex_exit(&kernel_mutex);
3060 }
3061 
3062 /*************************************************************/
3064 UNIV_INTERN
3065 void
3067 /*================*/
3068  const buf_block_t* heir_block,
3070  ulint heir_heap_no,
3072  const buf_block_t* block)
3074 {
3075  const page_t* page = block->frame;
3076  const rec_t* rec;
3077  ulint heap_no;
3078 
3079  lock_mutex_enter_kernel();
3080 
3081  if (!lock_rec_get_first_on_page(block)) {
3082  /* No locks exist on page, nothing to do */
3083 
3084  lock_mutex_exit_kernel();
3085 
3086  return;
3087  }
3088 
3089  /* Inherit all the locks on the page to the record and reset all
3090  the locks on the page */
3091 
3092  if (page_is_comp(page)) {
3093  rec = page + PAGE_NEW_INFIMUM;
3094 
3095  do {
3096  heap_no = rec_get_heap_no_new(rec);
3097 
3098  lock_rec_inherit_to_gap(heir_block, block,
3099  heir_heap_no, heap_no);
3100 
3101  lock_rec_reset_and_release_wait(block, heap_no);
3102 
3103  rec = page + rec_get_next_offs(rec, TRUE);
3104  } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3105  } else {
3106  rec = page + PAGE_OLD_INFIMUM;
3107 
3108  do {
3109  heap_no = rec_get_heap_no_old(rec);
3110 
3111  lock_rec_inherit_to_gap(heir_block, block,
3112  heir_heap_no, heap_no);
3113 
3114  lock_rec_reset_and_release_wait(block, heap_no);
3115 
3116  rec = page + rec_get_next_offs(rec, FALSE);
3117  } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3118  }
3119 
3120  lock_rec_free_all_from_discard_page(block);
3121 
3122  lock_mutex_exit_kernel();
3123 }
3124 
3125 /*************************************************************/
3127 UNIV_INTERN
3128 void
3130 /*===============*/
3131  const buf_block_t* block,
3132  const rec_t* rec)
3133 {
3134  ulint receiver_heap_no;
3135  ulint donator_heap_no;
3136 
3137  ut_ad(block->frame == page_align(rec));
3138 
3139  /* Inherit the gap-locking locks for rec, in gap mode, from the next
3140  record */
3141 
3142  if (page_rec_is_comp(rec)) {
3143  receiver_heap_no = rec_get_heap_no_new(rec);
3144  donator_heap_no = rec_get_heap_no_new(
3145  page_rec_get_next_low(rec, TRUE));
3146  } else {
3147  receiver_heap_no = rec_get_heap_no_old(rec);
3148  donator_heap_no = rec_get_heap_no_old(
3149  page_rec_get_next_low(rec, FALSE));
3150  }
3151 
3152  lock_mutex_enter_kernel();
3153  lock_rec_inherit_to_gap_if_gap_lock(block,
3154  receiver_heap_no, donator_heap_no);
3155  lock_mutex_exit_kernel();
3156 }
3157 
3158 /*************************************************************/
3160 UNIV_INTERN
3161 void
3163 /*===============*/
3164  const buf_block_t* block,
3165  const rec_t* rec)
3166 {
3167  const page_t* page = block->frame;
3168  ulint heap_no;
3169  ulint next_heap_no;
3170 
3171  ut_ad(page == page_align(rec));
3172 
3173  if (page_is_comp(page)) {
3174  heap_no = rec_get_heap_no_new(rec);
3175  next_heap_no = rec_get_heap_no_new(page
3176  + rec_get_next_offs(rec,
3177  TRUE));
3178  } else {
3179  heap_no = rec_get_heap_no_old(rec);
3180  next_heap_no = rec_get_heap_no_old(page
3181  + rec_get_next_offs(rec,
3182  FALSE));
3183  }
3184 
3185  lock_mutex_enter_kernel();
3186 
3187  /* Let the next record inherit the locks from rec, in gap mode */
3188 
3189  lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3190 
3191  /* Reset the lock bits on rec and release waiting transactions */
3192 
3193  lock_rec_reset_and_release_wait(block, heap_no);
3194 
3195  lock_mutex_exit_kernel();
3196 }
3197 
3198 /*********************************************************************/
3205 UNIV_INTERN
3206 void
3208 /*===========================*/
3209  const buf_block_t* block,
3210  const rec_t* rec)
3215 {
3216  ulint heap_no = page_rec_get_heap_no(rec);
3217 
3218  ut_ad(block->frame == page_align(rec));
3219 
3220  lock_mutex_enter_kernel();
3221 
3222  lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3223 
3224  lock_mutex_exit_kernel();
3225 }
3226 
3227 /*********************************************************************/
3230 UNIV_INTERN
3231 void
3233 /*===============================*/
3234  const buf_block_t* block,
3235  const rec_t* rec,
3237  const buf_block_t* donator)
3242 {
3243  ulint heap_no = page_rec_get_heap_no(rec);
3244 
3245  lock_mutex_enter_kernel();
3246 
3247  lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3248 
3249  lock_mutex_exit_kernel();
3250 }
3251 
3252 /*=========== DEADLOCK CHECKING ======================================*/
3253 
3254 /********************************************************************/
3259 static
3260 ibool
3261 lock_deadlock_occurs(
3262 /*=================*/
3263  lock_t* lock,
3264  trx_t* trx)
3265 {
3266  trx_t* mark_trx;
3267  ulint ret;
3268  ulint cost = 0;
3269 
3270  ut_ad(trx);
3271  ut_ad(lock);
3272  ut_ad(mutex_own(&kernel_mutex));
3273 retry:
3274  /* We check that adding this trx to the waits-for graph
3275  does not produce a cycle. First mark all active transactions
3276  with 0: */
3277 
3278  mark_trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
3279 
3280  while (mark_trx) {
3281  mark_trx->deadlock_mark = 0;
3282  mark_trx = UT_LIST_GET_NEXT(trx_list, mark_trx);
3283  }
3284 
3285  ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
3286 
3287  switch (ret) {
3288  case LOCK_VICTIM_IS_OTHER:
3289  /* We chose some other trx as a victim: retry if there still
3290  is a deadlock */
3291  goto retry;
3292 
3293  case LOCK_EXCEED_MAX_DEPTH:
3294  /* If the lock search exceeds the max step
3295  or the max depth, the current trx will be
3296  the victim. Print its information. */
3297  rewind(lock_latest_err_file);
3298  ut_print_timestamp(lock_latest_err_file);
3299 
3300  fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
3301  " WAITS-FOR GRAPH, WE WILL ROLL BACK"
3302  " FOLLOWING TRANSACTION \n",
3303  lock_latest_err_file);
3304 
3305  fputs("\n*** TRANSACTION:\n", lock_latest_err_file);
3306  trx_print(lock_latest_err_file, trx, 3000);
3307 
3308  fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
3309  lock_latest_err_file);
3310 
3311  if (lock_get_type(lock) == LOCK_REC) {
3312  lock_rec_print(lock_latest_err_file, lock);
3313  } else {
3314  lock_table_print(lock_latest_err_file, lock);
3315  }
3316  break;
3317 
3318  case LOCK_VICTIM_IS_START:
3319  fputs("*** WE ROLL BACK TRANSACTION (2)\n",
3320  lock_latest_err_file);
3321  break;
3322 
3323  default:
3324  /* No deadlock detected*/
3325  return(FALSE);
3326  }
3327 
3328  lock_deadlock_found = TRUE;
3329 
3330  return(TRUE);
3331 }
3332 
3333 /********************************************************************/
3341 static
3342 ulint
3343 lock_deadlock_recursive(
3344 /*====================*/
3345  trx_t* start,
3346  trx_t* trx,
3347  lock_t* wait_lock,
3348  ulint* cost,
3351  ulint depth)
3354 {
3355  ulint ret;
3356  lock_t* lock;
3357  trx_t* lock_trx;
3358  ulint heap_no = ULINT_UNDEFINED;
3359 
3360  ut_a(trx);
3361  ut_a(start);
3362  ut_a(wait_lock);
3363  ut_ad(mutex_own(&kernel_mutex));
3364 
3365  if (trx->deadlock_mark == 1) {
3366  /* We have already exhaustively searched the subtree starting
3367  from this trx */
3368 
3369  return(0);
3370  }
3371 
3372  *cost = *cost + 1;
3373 
3374  if (lock_get_type_low(wait_lock) == LOCK_REC) {
3375  ulint space;
3376  ulint page_no;
3377 
3378  heap_no = lock_rec_find_set_bit(wait_lock);
3379  ut_a(heap_no != ULINT_UNDEFINED);
3380 
3381  space = wait_lock->un_member.rec_lock.space;
3382  page_no = wait_lock->un_member.rec_lock.page_no;
3383 
3384  lock = lock_rec_get_first_on_page_addr(space, page_no);
3385 
3386  /* Position the iterator on the first matching record lock. */
3387  while (lock != NULL
3388  && lock != wait_lock
3389  && !lock_rec_get_nth_bit(lock, heap_no)) {
3390 
3391  lock = lock_rec_get_next_on_page(lock);
3392  }
3393 
3394  if (lock == wait_lock) {
3395  lock = NULL;
3396  }
3397 
3398  ut_ad(lock == NULL || lock_rec_get_nth_bit(lock, heap_no));
3399 
3400  } else {
3401  lock = wait_lock;
3402  }
3403 
3404  /* Look at the locks ahead of wait_lock in the lock queue */
3405 
3406  for (;;) {
3407  /* Get previous table lock. */
3408  if (heap_no == ULINT_UNDEFINED) {
3409 
3410  lock = UT_LIST_GET_PREV(
3411  un_member.tab_lock.locks, lock);
3412  }
3413 
3414  if (lock == NULL) {
3415  /* We can mark this subtree as searched */
3416  trx->deadlock_mark = 1;
3417 
3418  return(FALSE);
3419  }
3420 
3421  if (lock_has_to_wait(wait_lock, lock)) {
3422 
3423  ibool too_far
3424  = depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
3425  || *cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK;
3426 
3427  lock_trx = lock->trx;
3428 
3429  if (lock_trx == start) {
3430 
3431  /* We came back to the recursion starting
3432  point: a deadlock detected; or we have
3433  searched the waits-for graph too long */
3434 
3435  FILE* ef = lock_latest_err_file;
3436 
3437  rewind(ef);
3438  ut_print_timestamp(ef);
3439 
3440  fputs("\n*** (1) TRANSACTION:\n", ef);
3441 
3442  trx_print(ef, wait_lock->trx, 3000);
3443 
3444  fputs("*** (1) WAITING FOR THIS LOCK"
3445  " TO BE GRANTED:\n", ef);
3446 
3447  if (lock_get_type_low(wait_lock) == LOCK_REC) {
3448  lock_rec_print(ef, wait_lock);
3449  } else {
3450  lock_table_print(ef, wait_lock);
3451  }
3452 
3453  fputs("*** (2) TRANSACTION:\n", ef);
3454 
3455  trx_print(ef, lock->trx, 3000);
3456 
3457  fputs("*** (2) HOLDS THE LOCK(S):\n", ef);
3458 
3459  if (lock_get_type_low(lock) == LOCK_REC) {
3460  lock_rec_print(ef, lock);
3461  } else {
3462  lock_table_print(ef, lock);
3463  }
3464 
3465  fputs("*** (2) WAITING FOR THIS LOCK"
3466  " TO BE GRANTED:\n", ef);
3467 
3468  if (lock_get_type_low(start->wait_lock)
3469  == LOCK_REC) {
3470  lock_rec_print(ef, start->wait_lock);
3471  } else {
3472  lock_table_print(ef, start->wait_lock);
3473  }
3474 #ifdef UNIV_DEBUG
3475  if (lock_print_waits) {
3476  fputs("Deadlock detected\n",
3477  stderr);
3478  }
3479 #endif /* UNIV_DEBUG */
3480 
3481  if (trx_weight_ge(wait_lock->trx, start)) {
3482  /* Our recursion starting point
3483  transaction is 'smaller', let us
3484  choose 'start' as the victim and roll
3485  back it */
3486 
3487  return(LOCK_VICTIM_IS_START);
3488  }
3489 
3490  lock_deadlock_found = TRUE;
3491 
3492  /* Let us choose the transaction of wait_lock
3493  as a victim to try to avoid deadlocking our
3494  recursion starting point transaction */
3495 
3496  fputs("*** WE ROLL BACK TRANSACTION (1)\n",
3497  ef);
3498 
3499  wait_lock->trx->was_chosen_as_deadlock_victim
3500  = TRUE;
3501 
3503 
3504  /* Since trx and wait_lock are no longer
3505  in the waits-for graph, we can return FALSE;
3506  note that our selective algorithm can choose
3507  several transactions as victims, but still
3508  we may end up rolling back also the recursion
3509  starting point transaction! */
3510 
3511  return(LOCK_VICTIM_IS_OTHER);
3512  }
3513 
3514  if (too_far) {
3515 
3516 #ifdef UNIV_DEBUG
3517  if (lock_print_waits) {
3518  fputs("Deadlock search exceeds"
3519  " max steps or depth.\n",
3520  stderr);
3521  }
3522 #endif /* UNIV_DEBUG */
3523  /* The information about transaction/lock
3524  to be rolled back is available in the top
3525  level. Do not print anything here. */
3526  return(LOCK_EXCEED_MAX_DEPTH);
3527  }
3528 
3529  if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
3530 
3531  /* Another trx ahead has requested lock in an
3532  incompatible mode, and is itself waiting for
3533  a lock */
3534 
3535  ret = lock_deadlock_recursive(
3536  start, lock_trx,
3537  lock_trx->wait_lock, cost, depth + 1);
3538 
3539  if (ret != 0) {
3540 
3541  return(ret);
3542  }
3543  }
3544  }
3545  /* Get the next record lock to check. */
3546  if (heap_no != ULINT_UNDEFINED) {
3547 
3548  ut_a(lock != NULL);
3549 
3550  do {
3551  lock = lock_rec_get_next_on_page(lock);
3552  } while (lock != NULL
3553  && lock != wait_lock
3554  && !lock_rec_get_nth_bit(lock, heap_no));
3555 
3556  if (lock == wait_lock) {
3557  lock = NULL;
3558  }
3559  }
3560  }/* end of the 'for (;;)'-loop */
3561 }
3562 
3563 /*========================= TABLE LOCKS ==============================*/
3564 
3565 /*********************************************************************/
3569 UNIV_INLINE
3570 lock_t*
3571 lock_table_create(
3572 /*==============*/
3573  dict_table_t* table,
3574  ulint type_mode,
3576  trx_t* trx)
3577 {
3578  lock_t* lock;
3579 
3580  ut_ad(table && trx);
3581  ut_ad(mutex_own(&kernel_mutex));
3582 
3583  if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
3585  }
3586 
3587  /* For AUTOINC locking we reuse the lock instance only if
3588  there is no wait involved else we allocate the waiting lock
3589  from the transaction lock heap. */
3590  if (type_mode == LOCK_AUTO_INC) {
3591 
3592  lock = table->autoinc_lock;
3593 
3594  table->autoinc_trx = trx;
3595 
3596  ib_vector_push(trx->autoinc_locks, lock);
3597  } else {
3598  lock = static_cast<lock_t *>(mem_heap_alloc(trx->lock_heap, sizeof(lock_t)));
3599  }
3600 
3601  UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock);
3602 
3603  lock->type_mode = type_mode | LOCK_TABLE;
3604  lock->trx = trx;
3605 
3606  lock->un_member.tab_lock.table = table;
3607 
3608  UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
3609 
3610  if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
3611 
3612  lock_set_lock_and_trx_wait(lock, trx);
3613  }
3614 
3615  return(lock);
3616 }
3617 
3618 /*************************************************************/
3622 UNIV_INLINE
3623 void
3624 lock_table_remove_low(
3625 /*==================*/
3626  lock_t* lock)
3627 {
3628  trx_t* trx;
3629  dict_table_t* table;
3630 
3631  ut_ad(mutex_own(&kernel_mutex));
3632 
3633  trx = lock->trx;
3634  table = lock->un_member.tab_lock.table;
3635 
3636  /* Remove the table from the transaction's AUTOINC vector, if
3637  the lock that is being release is an AUTOINC lock. */
3638  if (lock_get_mode(lock) == LOCK_AUTO_INC) {
3639 
3640  /* The table's AUTOINC lock can get transferred to
3641  another transaction before we get here. */
3642  if (table->autoinc_trx == trx) {
3643  table->autoinc_trx = NULL;
3644  }
3645 
3646  /* The locks must be freed in the reverse order from
3647  the one in which they were acquired. This is to avoid
3648  traversing the AUTOINC lock vector unnecessarily.
3649 
3650  We only store locks that were granted in the
3651  trx->autoinc_locks vector (see lock_table_create()
3652  and lock_grant()). Therefore it can be empty and we
3653  need to check for that. */
3654 
3655  if (!lock_get_wait(lock)
3656  && !ib_vector_is_empty(trx->autoinc_locks)) {
3657  lock_t* autoinc_lock;
3658 
3659  autoinc_lock = static_cast<lock_t *>(ib_vector_pop(trx->autoinc_locks));
3660  ut_a(autoinc_lock == lock);
3661  }
3662 
3665  }
3666 
3667  UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock);
3668  UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
3669 }
3670 
3671 /*********************************************************************/
3678 static
3679 ulint
3680 lock_table_enqueue_waiting(
3681 /*=======================*/
3682  ulint mode,
3684  dict_table_t* table,
3685  que_thr_t* thr)
3686 {
3687  lock_t* lock;
3688  trx_t* trx;
3689 
3690  ut_ad(mutex_own(&kernel_mutex));
3691 
3692  /* Test if there already is some other reason to suspend thread:
3693  we do not enqueue a lock request if the query thread should be
3694  stopped anyway */
3695 
3696  if (que_thr_stop(thr)) {
3697  ut_error;
3698 
3699  return(DB_QUE_THR_SUSPENDED);
3700  }
3701 
3702  trx = thr_get_trx(thr);
3703 
3704  switch (trx_get_dict_operation(trx)) {
3705  case TRX_DICT_OP_NONE:
3706  break;
3707  case TRX_DICT_OP_TABLE:
3708  case TRX_DICT_OP_INDEX:
3709  ut_print_timestamp(stderr);
3710  fputs(" InnoDB: Error: a table lock wait happens"
3711  " in a dictionary operation!\n"
3712  "InnoDB: Table name ", stderr);
3713  ut_print_name(stderr, trx, TRUE, table->name);
3714  fputs(".\n"
3715  "InnoDB: Submit a detailed bug report"
3716  " to http://bugs.mysql.com\n",
3717  stderr);
3718  }
3719 
3720  /* Enqueue the lock request that will wait to be granted */
3721 
3722  lock = lock_table_create(table, mode | LOCK_WAIT, trx);
3723 
3724  /* Check if a deadlock occurs: if yes, remove the lock request and
3725  return an error code */
3726 
3727  if (lock_deadlock_occurs(lock, trx)) {
3728 
3729  /* The order here is important, we don't want to
3730  lose the state of the lock before calling remove. */
3731  lock_table_remove_low(lock);
3732  lock_reset_lock_and_trx_wait(lock);
3733 
3734  return(DB_DEADLOCK);
3735  }
3736 
3737  if (trx->wait_lock == NULL) {
3738  /* Deadlock resolution chose another transaction as a victim,
3739  and we accidentally got our lock granted! */
3740 
3741  return(DB_SUCCESS_LOCKED_REC);
3742  }
3743 
3744  trx->que_state = TRX_QUE_LOCK_WAIT;
3745  trx->was_chosen_as_deadlock_victim = FALSE;
3746  trx->wait_started = time(NULL);
3747 
3748  ut_a(que_thr_stop(thr));
3749 
3750  return(DB_LOCK_WAIT);
3751 }
3752 
3753 /*********************************************************************/
3757 UNIV_INLINE
3758 lock_t*
3759 lock_table_other_has_incompatible(
3760 /*==============================*/
3761  trx_t* trx,
3763  ulint wait,
3765  dict_table_t* table,
3766  enum lock_mode mode)
3767 {
3768  lock_t* lock;
3769 
3770  ut_ad(mutex_own(&kernel_mutex));
3771 
3772  lock = UT_LIST_GET_LAST(table->locks);
3773 
3774  while (lock != NULL) {
3775 
3776  if ((lock->trx != trx)
3777  && (!lock_mode_compatible(lock_get_mode(lock), mode))
3778  && (wait || !(lock_get_wait(lock)))) {
3779 
3780  return(lock);
3781  }
3782 
3783  lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
3784  }
3785 
3786  return(NULL);
3787 }
3788 
3789 /*********************************************************************/
3793 UNIV_INTERN
3794 ulint
3796 /*=======*/
3797  ulint flags,
3799  dict_table_t* table,
3800  enum lock_mode mode,
3801  que_thr_t* thr)
3802 {
3803  trx_t* trx;
3804  ulint err;
3805 
3806  ut_ad(table && thr);
3807 
3808  if (flags & BTR_NO_LOCKING_FLAG) {
3809 
3810  return(DB_SUCCESS_LOCKED_REC);
3811  }
3812 
3813  ut_a(flags == 0);
3814 
3815  trx = thr_get_trx(thr);
3816 
3817  lock_mutex_enter_kernel();
3818 
3819  /* Look for stronger locks the same trx already has on the table */
3820 
3821  if (lock_table_has(trx, table, mode)) {
3822 
3823  lock_mutex_exit_kernel();
3824 
3825  return(DB_SUCCESS);
3826  }
3827 
3828  /* We have to check if the new lock is compatible with any locks
3829  other transactions have in the table lock queue. */
3830 
3831  if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) {
3832 
3833  /* Another trx has a request on the table in an incompatible
3834  mode: this trx may have to wait */
3835 
3836  err = lock_table_enqueue_waiting(mode | flags, table, thr);
3837 
3838  lock_mutex_exit_kernel();
3839 
3840  return(err);
3841  }
3842 
3843  lock_table_create(table, mode | flags, trx);
3844 
3845  ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
3846 
3847  lock_mutex_exit_kernel();
3848 
3849  return(DB_SUCCESS);
3850 }
3851 
3852 /*********************************************************************/
3855 static
3856 ibool
3857 lock_table_has_to_wait_in_queue(
3858 /*============================*/
3859  lock_t* wait_lock)
3860 {
3861  dict_table_t* table;
3862  lock_t* lock;
3863 
3864  ut_ad(mutex_own(&kernel_mutex));
3865  ut_ad(lock_get_wait(wait_lock));
3866 
3867  table = wait_lock->un_member.tab_lock.table;
3868 
3869  lock = UT_LIST_GET_FIRST(table->locks);
3870 
3871  while (lock != wait_lock) {
3872 
3873  if (lock_has_to_wait(wait_lock, lock)) {
3874 
3875  return(TRUE);
3876  }
3877 
3878  lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
3879  }
3880 
3881  return(FALSE);
3882 }
3883 
3884 /*************************************************************/
3888 static
3889 void
3890 lock_table_dequeue(
3891 /*===============*/
3892  lock_t* in_lock)
3895 {
3896  lock_t* lock;
3897 
3898  ut_ad(mutex_own(&kernel_mutex));
3899  ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
3900 
3901  lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
3902 
3903  lock_table_remove_low(in_lock);
3904 
3905  /* Check if waiting locks in the queue can now be granted: grant
3906  locks if there are no conflicting locks ahead. */
3907 
3908  while (lock != NULL) {
3909 
3910  if (lock_get_wait(lock)
3911  && !lock_table_has_to_wait_in_queue(lock)) {
3912 
3913  /* Grant the lock */
3914  lock_grant(lock);
3915  }
3916 
3917  lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
3918  }
3919 }
3920 
3921 /*=========================== LOCK RELEASE ==============================*/
3922 
3923 /*************************************************************/
3927 UNIV_INTERN
3928 void
3930 /*============*/
3931  trx_t* trx,
3933  const buf_block_t* block,
3934  const rec_t* rec,
3935  enum lock_mode lock_mode)
3936 {
3937  lock_t* first_lock;
3938  lock_t* lock;
3939  ulint heap_no;
3940 
3941  ut_ad(trx && rec);
3942  ut_ad(block->frame == page_align(rec));
3943 
3944  heap_no = page_rec_get_heap_no(rec);
3945 
3946  mutex_enter(&kernel_mutex);
3947 
3948  first_lock = lock_rec_get_first(block, heap_no);
3949 
3950  /* Find the last lock with the same lock_mode and transaction
3951  from the record. */
3952 
3953  for (lock = first_lock; lock != NULL;
3954  lock = lock_rec_get_next(heap_no, lock)) {
3955  if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
3956  ut_a(!lock_get_wait(lock));
3957  lock_rec_reset_nth_bit(lock, heap_no);
3958  goto released;
3959  }
3960  }
3961 
3962  mutex_exit(&kernel_mutex);
3963  ut_print_timestamp(stderr);
3964  fprintf(stderr,
3965  " InnoDB: Error: unlock row could not"
3966  " find a %lu mode lock on the record\n",
3967  (ulong) lock_mode);
3968 
3969  return;
3970 
3971 released:
3972  /* Check if we can now grant waiting lock requests */
3973 
3974  for (lock = first_lock; lock != NULL;
3975  lock = lock_rec_get_next(heap_no, lock)) {
3976  if (lock_get_wait(lock)
3977  && !lock_rec_has_to_wait_in_queue(lock)) {
3978 
3979  /* Grant the lock */
3980  lock_grant(lock);
3981  }
3982  }
3983 
3984  mutex_exit(&kernel_mutex);
3985 }
3986 
3987 /*********************************************************************/
3990 UNIV_INTERN
3991 void
3993 /*====================*/
3994  trx_t* trx)
3995 {
3996  dict_table_t* table;
3997  ulint count;
3998  lock_t* lock;
3999 
4000  ut_ad(mutex_own(&kernel_mutex));
4001 
4002  lock = UT_LIST_GET_LAST(trx->trx_locks);
4003 
4004  count = 0;
4005 
4006  while (lock != NULL) {
4007 
4008  count++;
4009 
4010  if (lock_get_type_low(lock) == LOCK_REC) {
4011 
4012  lock_rec_dequeue_from_page(lock);
4013  } else {
4015 
4016  if (lock_get_mode(lock) != LOCK_IS
4017  && trx->undo_no != 0) {
4018 
4019  /* The trx may have modified the table. We
4020  block the use of the MySQL query cache for
4021  all currently active transactions. */
4022 
4023  table = lock->un_member.tab_lock.table;
4024 
4025  table->query_cache_inv_trx_id
4026  = trx_sys->max_trx_id;
4027  }
4028 
4029  lock_table_dequeue(lock);
4030  }
4031 
4032  if (count == LOCK_RELEASE_KERNEL_INTERVAL) {
4033  /* Release the kernel mutex for a while, so that we
4034  do not monopolize it */
4035 
4036  lock_mutex_exit_kernel();
4037 
4038  lock_mutex_enter_kernel();
4039 
4040  count = 0;
4041  }
4042 
4043  lock = UT_LIST_GET_LAST(trx->trx_locks);
4044  }
4045 
4046  ut_a(ib_vector_size(trx->autoinc_locks) == 0);
4047 
4048  mem_heap_empty(trx->lock_heap);
4049 }
4050 
4051 /*********************************************************************/
4054 UNIV_INTERN
4055 void
4057 /*============================*/
4058  lock_t* lock)
4059 {
4060  ut_ad(mutex_own(&kernel_mutex));
4061 
4062  if (lock_get_type_low(lock) == LOCK_REC) {
4063 
4064  lock_rec_dequeue_from_page(lock);
4065  } else {
4067 
4068  if (lock->trx->autoinc_locks != NULL) {
4069  /* Release the transaction's AUTOINC locks/ */
4071  }
4072 
4073  lock_table_dequeue(lock);
4074  }
4075 
4076  /* Reset the wait flag and the back pointer to lock in trx */
4077 
4078  lock_reset_lock_and_trx_wait(lock);
4079 
4080  /* The following function releases the trx from lock wait */
4081 
4082  trx_end_lock_wait(lock->trx);
4083 }
4084 
4085 /* True if a lock mode is S or X */
4086 #define IS_LOCK_S_OR_X(lock) \
4087  (lock_get_mode(lock) == LOCK_S \
4088  || lock_get_mode(lock) == LOCK_X)
4089 
4090 
4091 /*********************************************************************/
4096 static
4097 void
4098 lock_remove_all_on_table_for_trx(
4099 /*=============================*/
4100  dict_table_t* table,
4101  trx_t* trx,
4102  ibool remove_also_table_sx_locks)
4104 {
4105  lock_t* lock;
4106  lock_t* prev_lock;
4107 
4108  ut_ad(mutex_own(&kernel_mutex));
4109 
4110  lock = UT_LIST_GET_LAST(trx->trx_locks);
4111 
4112  while (lock != NULL) {
4113  prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
4114 
4115  if (lock_get_type_low(lock) == LOCK_REC
4116  && lock->index->table == table) {
4117  ut_a(!lock_get_wait(lock));
4118 
4119  lock_rec_discard(lock);
4120  } else if (lock_get_type_low(lock) & LOCK_TABLE
4121  && lock->un_member.tab_lock.table == table
4122  && (remove_also_table_sx_locks
4123  || !IS_LOCK_S_OR_X(lock))) {
4124 
4125  ut_a(!lock_get_wait(lock));
4126 
4127  lock_table_remove_low(lock);
4128  }
4129 
4130  lock = prev_lock;
4131  }
4132 }
4133 
4134 /*********************************************************************/
4139 UNIV_INTERN
4140 void
4142 /*=====================*/
4143  dict_table_t* table,
4145  ibool remove_also_table_sx_locks)
4147 {
4148  lock_t* lock;
4149  lock_t* prev_lock;
4150 
4151  mutex_enter(&kernel_mutex);
4152 
4153  lock = UT_LIST_GET_FIRST(table->locks);
4154 
4155  while (lock != NULL) {
4156 
4157  prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
4158  lock);
4159 
4160  /* If we should remove all locks (remove_also_table_sx_locks
4161  is TRUE), or if the lock is not table-level S or X lock,
4162  then check we are not going to remove a wait lock. */
4163  if (remove_also_table_sx_locks
4164  || !(lock_get_type(lock) == LOCK_TABLE
4165  && IS_LOCK_S_OR_X(lock))) {
4166 
4167  ut_a(!lock_get_wait(lock));
4168  }
4169 
4170  lock_remove_all_on_table_for_trx(table, lock->trx,
4171  remove_also_table_sx_locks);
4172 
4173  if (prev_lock == NULL) {
4174  if (lock == UT_LIST_GET_FIRST(table->locks)) {
4175  /* lock was not removed, pick its successor */
4176  lock = UT_LIST_GET_NEXT(
4177  un_member.tab_lock.locks, lock);
4178  } else {
4179  /* lock was removed, pick the first one */
4180  lock = UT_LIST_GET_FIRST(table->locks);
4181  }
4182  } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
4183  prev_lock) != lock) {
4184  /* If lock was removed by
4185  lock_remove_all_on_table_for_trx() then pick the
4186  successor of prev_lock ... */
4187  lock = UT_LIST_GET_NEXT(
4188  un_member.tab_lock.locks, prev_lock);
4189  } else {
4190  /* ... otherwise pick the successor of lock. */
4191  lock = UT_LIST_GET_NEXT(
4192  un_member.tab_lock.locks, lock);
4193  }
4194  }
4195 
4196  mutex_exit(&kernel_mutex);
4197 }
4198 
4199 /*===================== VALIDATION AND DEBUGGING ====================*/
4200 
4201 /*********************************************************************/
4203 UNIV_INTERN
4204 void
4206 /*=============*/
4207  FILE* file,
4208  const lock_t* lock)
4209 {
4210  ut_ad(mutex_own(&kernel_mutex));
4211  ut_a(lock_get_type_low(lock) == LOCK_TABLE);
4212 
4213  fputs("TABLE LOCK table ", file);
4214  ut_print_name(file, lock->trx, TRUE,
4215  lock->un_member.tab_lock.table->name);
4216  fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
4217 
4218  if (lock_get_mode(lock) == LOCK_S) {
4219  fputs(" lock mode S", file);
4220  } else if (lock_get_mode(lock) == LOCK_X) {
4221  fputs(" lock mode X", file);
4222  } else if (lock_get_mode(lock) == LOCK_IS) {
4223  fputs(" lock mode IS", file);
4224  } else if (lock_get_mode(lock) == LOCK_IX) {
4225  fputs(" lock mode IX", file);
4226  } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4227  fputs(" lock mode AUTO-INC", file);
4228  } else {
4229  fprintf(file, " unknown lock mode %lu",
4230  (ulong) lock_get_mode(lock));
4231  }
4232 
4233  if (lock_get_wait(lock)) {
4234  fputs(" waiting", file);
4235  }
4236 
4237  putc('\n', file);
4238 }
4239 
4240 /*********************************************************************/
4242 UNIV_INTERN
4243 void
4245 /*===========*/
4246  FILE* file,
4247  const lock_t* lock)
4248 {
4249  const buf_block_t* block;
4250  ulint space;
4251  ulint page_no;
4252  ulint i;
4253  mtr_t mtr;
4254  mem_heap_t* heap = NULL;
4255  ulint offsets_[REC_OFFS_NORMAL_SIZE];
4256  ulint* offsets = offsets_;
4257  rec_offs_init(offsets_);
4258 
4259  ut_ad(mutex_own(&kernel_mutex));
4260  ut_a(lock_get_type_low(lock) == LOCK_REC);
4261 
4262  space = lock->un_member.rec_lock.space;
4263  page_no = lock->un_member.rec_lock.page_no;
4264 
4265  fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
4266  (ulong) space, (ulong) page_no,
4267  (ulong) lock_rec_get_n_bits(lock));
4268  dict_index_name_print(file, lock->trx, lock->index);
4269  fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
4270 
4271  if (lock_get_mode(lock) == LOCK_S) {
4272  fputs(" lock mode S", file);
4273  } else if (lock_get_mode(lock) == LOCK_X) {
4274  fputs(" lock_mode X", file);
4275  } else {
4276  ut_error;
4277  }
4278 
4279  if (lock_rec_get_gap(lock)) {
4280  fputs(" locks gap before rec", file);
4281  }
4282 
4283  if (lock_rec_get_rec_not_gap(lock)) {
4284  fputs(" locks rec but not gap", file);
4285  }
4286 
4287  if (lock_rec_get_insert_intention(lock)) {
4288  fputs(" insert intention", file);
4289  }
4290 
4291  if (lock_get_wait(lock)) {
4292  fputs(" waiting", file);
4293  }
4294 
4295  mtr_start(&mtr);
4296 
4297  putc('\n', file);
4298 
4299  block = buf_page_try_get(space, page_no, &mtr);
4300 
4301  for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
4302 
4303  if (!lock_rec_get_nth_bit(lock, i)) {
4304  continue;
4305  }
4306 
4307  fprintf(file, "Record lock, heap no %lu", (ulong) i);
4308 
4309  if (block) {
4310  const rec_t* rec;
4311 
4313  buf_block_get_frame(block), i);
4314 
4315  offsets = rec_get_offsets(
4316  rec, lock->index, offsets,
4317  ULINT_UNDEFINED, &heap);
4318 
4319  putc(' ', file);
4320  rec_print_new(file, rec, offsets);
4321  }
4322 
4323  putc('\n', file);
4324  }
4325 
4326  mtr_commit(&mtr);
4327  if (UNIV_LIKELY_NULL(heap)) {
4328  mem_heap_free(heap);
4329  }
4330 }
4331 
4332 #ifdef UNIV_DEBUG
4333 /* Print the number of lock structs from lock_print_info_summary() only
4334 in non-production builds for performance reasons, see
4335 http://bugs.mysql.com/36942 */
4336 #define PRINT_NUM_OF_LOCK_STRUCTS
4337 #endif /* UNIV_DEBUG */
4338 
4339 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4340 /*********************************************************************/
4343 static
4344 ulint
4345 lock_get_n_rec_locks(void)
4346 /*======================*/
4347 {
4348  lock_t* lock;
4349  ulint n_locks = 0;
4350  ulint i;
4351 
4352  ut_ad(mutex_own(&kernel_mutex));
4353 
4354  for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4355 
4356  lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
4357 
4358  while (lock) {
4359  n_locks++;
4360 
4361  lock = HASH_GET_NEXT(hash, lock);
4362  }
4363  }
4364 
4365  return(n_locks);
4366 }
4367 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4368 
4369 /*********************************************************************/
4373 UNIV_INTERN
4374 ibool
4376 /*====================*/
4377  FILE* file,
4378  ibool nowait)
4379 {
4380  /* if nowait is FALSE, wait on the kernel mutex,
4381  otherwise return immediately if fail to obtain the
4382  mutex. */
4383  if (!nowait) {
4384  lock_mutex_enter_kernel();
4385  } else if (mutex_enter_nowait(&kernel_mutex)) {
4386  fputs("FAIL TO OBTAIN KERNEL MUTEX, "
4387  "SKIP LOCK INFO PRINTING\n", file);
4388  return(FALSE);
4389  }
4390 
4391  if (lock_deadlock_found) {
4392  fputs("------------------------\n"
4393  "LATEST DETECTED DEADLOCK\n"
4394  "------------------------\n", file);
4395 
4396  ut_copy_file(file, lock_latest_err_file);
4397  }
4398 
4399  fputs("------------\n"
4400  "TRANSACTIONS\n"
4401  "------------\n", file);
4402 
4403  fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
4404  trx_sys->max_trx_id);
4405 
4406  fprintf(file,
4407  "Purge done for trx's n:o < " TRX_ID_FMT
4408  " undo n:o < " TRX_ID_FMT "\n",
4411 
4412  fprintf(file,
4413  "History list length %lu\n",
4414  (ulong) trx_sys->rseg_history_len);
4415 
4416 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4417  fprintf(file,
4418  "Total number of lock structs in row lock hash table %lu\n",
4419  (ulong) lock_get_n_rec_locks());
4420 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4421  return(TRUE);
4422 }
4423 
4424 /*********************************************************************/
4426 UNIV_INTERN
4427 void
4429 /*=============================*/
4430  FILE* file)
4431 {
4432  lock_t* lock;
4433  ibool load_page_first = TRUE;
4434  ulint nth_trx = 0;
4435  ulint nth_lock = 0;
4436  ulint i;
4437  mtr_t mtr;
4438  trx_t* trx;
4439 
4440  fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
4441 
4442  /* First print info on non-active transactions */
4443 
4444  trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
4445 
4446  while (trx) {
4447  if (trx->conc_state == TRX_NOT_STARTED) {
4448  fputs("---", file);
4449  trx_print(file, trx, 600);
4450  }
4451 
4452  trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
4453  }
4454 
4455 loop:
4456  trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
4457 
4458  i = 0;
4459 
4460  /* Since we temporarily release the kernel mutex when
4461  reading a database page in below, variable trx may be
4462  obsolete now and we must loop through the trx list to
4463  get probably the same trx, or some other trx. */
4464 
4465  while (trx && (i < nth_trx)) {
4466  trx = UT_LIST_GET_NEXT(trx_list, trx);
4467  i++;
4468  }
4469 
4470  if (trx == NULL) {
4471  lock_mutex_exit_kernel();
4472 
4473  ut_ad(lock_validate());
4474 
4475  return;
4476  }
4477 
4478  if (nth_lock == 0) {
4479  fputs("---", file);
4480  trx_print(file, trx, 600);
4481 
4482  if (trx->read_view) {
4483  fprintf(file,
4484  "Trx read view will not see trx with"
4485  " id >= " TRX_ID_FMT
4486  ", sees < " TRX_ID_FMT "\n",
4487  trx->read_view->low_limit_id,
4488  trx->read_view->up_limit_id);
4489  }
4490 
4491  if (trx->que_state == TRX_QUE_LOCK_WAIT) {
4492  fprintf(file,
4493  "------- TRX HAS BEEN WAITING %lu SEC"
4494  " FOR THIS LOCK TO BE GRANTED:\n",
4495  (ulong) difftime(time(NULL),
4496  trx->wait_started));
4497 
4498  if (lock_get_type_low(trx->wait_lock) == LOCK_REC) {
4499  lock_rec_print(file, trx->wait_lock);
4500  } else {
4501  lock_table_print(file, trx->wait_lock);
4502  }
4503 
4504  fputs("------------------\n", file);
4505  }
4506  }
4507 
4508  if (!srv_print_innodb_lock_monitor) {
4509  nth_trx++;
4510  goto loop;
4511  }
4512 
4513  i = 0;
4514 
4515  /* Look at the note about the trx loop above why we loop here:
4516  lock may be an obsolete pointer now. */
4517 
4518  lock = UT_LIST_GET_FIRST(trx->trx_locks);
4519 
4520  while (lock && (i < nth_lock)) {
4521  lock = UT_LIST_GET_NEXT(trx_locks, lock);
4522  i++;
4523  }
4524 
4525  if (lock == NULL) {
4526  nth_trx++;
4527  nth_lock = 0;
4528 
4529  goto loop;
4530  }
4531 
4532  if (lock_get_type_low(lock) == LOCK_REC) {
4533  if (load_page_first) {
4534  ulint space = lock->un_member.rec_lock.space;
4535  ulint zip_size= fil_space_get_zip_size(space);
4536  ulint page_no = lock->un_member.rec_lock.page_no;
4537 
4538  if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
4539 
4540  /* It is a single table tablespace and
4541  the .ibd file is missing (TRUNCATE
4542  TABLE probably stole the locks): just
4543  print the lock without attempting to
4544  load the page in the buffer pool. */
4545 
4546  fprintf(file, "RECORD LOCKS on"
4547  " non-existing space %lu\n",
4548  (ulong) space);
4549  goto print_rec;
4550  }
4551 
4552  lock_mutex_exit_kernel();
4553 
4554  mtr_start(&mtr);
4555 
4556  buf_page_get_with_no_latch(space, zip_size,
4557  page_no, &mtr);
4558 
4559  mtr_commit(&mtr);
4560 
4561  load_page_first = FALSE;
4562 
4563  lock_mutex_enter_kernel();
4564 
4565  goto loop;
4566  }
4567 
4568 print_rec:
4569  lock_rec_print(file, lock);
4570  } else {
4572 
4573  lock_table_print(file, lock);
4574  }
4575 
4576  load_page_first = TRUE;
4577 
4578  nth_lock++;
4579 
4580  if (nth_lock >= 10) {
4581  fputs("10 LOCKS PRINTED FOR THIS TRX:"
4582  " SUPPRESSING FURTHER PRINTS\n",
4583  file);
4584 
4585  nth_trx++;
4586  nth_lock = 0;
4587 
4588  goto loop;
4589  }
4590 
4591  goto loop;
4592 }
4593 
4594 #ifdef UNIV_DEBUG
4595 /*********************************************************************/
4598 static
4599 ibool
4600 lock_table_queue_validate(
4601 /*======================*/
4602  dict_table_t* table)
4603 {
4604  lock_t* lock;
4605 
4606  ut_ad(mutex_own(&kernel_mutex));
4607 
4608  lock = UT_LIST_GET_FIRST(table->locks);
4609 
4610  while (lock) {
4611  ut_a(((lock->trx)->conc_state == TRX_ACTIVE)
4612  || ((lock->trx)->conc_state == TRX_PREPARED)
4613  || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY));
4614 
4615  if (!lock_get_wait(lock)) {
4616 
4617  ut_a(!lock_table_other_has_incompatible(
4618  lock->trx, 0, table,
4619  lock_get_mode(lock)));
4620  } else {
4621 
4622  ut_a(lock_table_has_to_wait_in_queue(lock));
4623  }
4624 
4625  lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
4626  }
4627 
4628  return(TRUE);
4629 }
4630 
4631 /*********************************************************************/
4634 static
4635 ibool
4636 lock_rec_queue_validate(
4637 /*====================*/
4638  const buf_block_t* block,
4639  const rec_t* rec,
4640  dict_index_t* index,
4641  const ulint* offsets)
4642 {
4643  trx_t* impl_trx;
4644  lock_t* lock;
4645  ulint heap_no;
4646 
4647  ut_a(rec);
4648  ut_a(block->frame == page_align(rec));
4649  ut_ad(rec_offs_validate(rec, index, offsets));
4650  ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
4651 
4652  heap_no = page_rec_get_heap_no(rec);
4653 
4654  lock_mutex_enter_kernel();
4655 
4656  if (!page_rec_is_user_rec(rec)) {
4657 
4658  lock = lock_rec_get_first(block, heap_no);
4659 
4660  while (lock) {
4661  switch(lock->trx->conc_state) {
4662  case TRX_ACTIVE:
4663  case TRX_PREPARED:
4664  case TRX_COMMITTED_IN_MEMORY:
4665  break;
4666  default:
4667  ut_error;
4668  }
4669 
4670  ut_a(trx_in_trx_list(lock->trx));
4671 
4672  if (lock_get_wait(lock)) {
4673  ut_a(lock_rec_has_to_wait_in_queue(lock));
4674  }
4675 
4676  if (index) {
4677  ut_a(lock->index == index);
4678  }
4679 
4680  lock = lock_rec_get_next(heap_no, lock);
4681  }
4682 
4683  lock_mutex_exit_kernel();
4684 
4685  return(TRUE);
4686  }
4687 
4688  if (!index);
4689  else if (dict_index_is_clust(index)) {
4690 
4691  impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
4692 
4693  if (impl_trx
4694  && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
4695  block, heap_no, impl_trx)) {
4696 
4697  ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
4698  block, heap_no, impl_trx));
4699  }
4700 #if 0
4701  } else {
4702 
4703  /* The kernel mutex may get released temporarily in the
4704  next function call: we have to release lock table mutex
4705  to obey the latching order */
4706 
4707  /* If this thread is holding the file space latch
4708  (fil_space_t::latch), the following check WILL break
4709  latching order and may cause a deadlock of threads. */
4710 
4711  /* NOTE: This is a bogus check that would fail in the
4712  following case: Our transaction is updating a
4713  row. After it has updated the clustered index record,
4714  it goes to a secondary index record and finds someone
4715  else holding an explicit S- or X-lock on that
4716  secondary index record, presumably from a locking
4717  read. Our transaction cannot update the secondary
4718  index immediately, but places a waiting X-lock request
4719  on the secondary index record. There is nothing
4720  illegal in this. The assertion is simply too strong. */
4721 
4722  /* From the locking point of view, each secondary
4723  index is a separate table. A lock that is held on
4724  secondary index rec does not give any rights to modify
4725  or read the clustered index rec. Therefore, we can
4726  think of the sec index as a separate 'table' from the
4727  clust index 'table'. Conversely, a transaction that
4728  has acquired a lock on and modified a clustered index
4729  record may need to wait for a lock on the
4730  corresponding record in a secondary index. */
4731 
4732  impl_trx = lock_sec_rec_some_has_impl_off_kernel(
4733  rec, index, offsets);
4734 
4735  if (impl_trx
4736  && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
4737  block, heap_no, impl_trx)) {
4738 
4739  ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
4740  block, heap_no, impl_trx));
4741  }
4742 #endif
4743  }
4744 
4745  lock = lock_rec_get_first(block, heap_no);
4746 
4747  while (lock) {
4748  ut_a(lock->trx->conc_state == TRX_ACTIVE
4749  || lock->trx->conc_state == TRX_PREPARED
4750  || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
4751  ut_a(trx_in_trx_list(lock->trx));
4752 
4753  if (index) {
4754  ut_a(lock->index == index);
4755  }
4756 
4757  if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
4758 
4759  enum lock_mode mode;
4760 
4761  if (lock_get_mode(lock) == LOCK_S) {
4762  mode = LOCK_X;
4763  } else {
4764  mode = LOCK_S;
4765  }
4766  ut_a(!lock_rec_other_has_expl_req(
4767  mode, 0, 0, block, heap_no, lock->trx));
4768 
4769  } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
4770 
4771  ut_a(lock_rec_has_to_wait_in_queue(lock));
4772  }
4773 
4774  lock = lock_rec_get_next(heap_no, lock);
4775  }
4776 
4777  lock_mutex_exit_kernel();
4778 
4779  return(TRUE);
4780 }
4781 
4782 /*********************************************************************/
4785 static
4786 ibool
4787 lock_rec_validate_page(
4788 /*===================*/
4789  ulint space,
4790  ulint page_no)
4791 {
4792  dict_index_t* index;
4793  buf_block_t* block;
4794  const page_t* page;
4795  lock_t* lock;
4796  const rec_t* rec;
4797  ulint nth_lock = 0;
4798  ulint nth_bit = 0;
4799  ulint i;
4800  ulint zip_size;
4801  mtr_t mtr;
4802  mem_heap_t* heap = NULL;
4803  ulint offsets_[REC_OFFS_NORMAL_SIZE];
4804  ulint* offsets = offsets_;
4805  rec_offs_init(offsets_);
4806 
4807  ut_ad(!mutex_own(&kernel_mutex));
4808 
4809  mtr_start(&mtr);
4810 
4811  zip_size = fil_space_get_zip_size(space);
4812  ut_ad(zip_size != ULINT_UNDEFINED);
4813  block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, &mtr);
4814  buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
4815 
4816  page = block->frame;
4817 
4818  lock_mutex_enter_kernel();
4819 loop:
4820  lock = lock_rec_get_first_on_page_addr(space, page_no);
4821 
4822  if (!lock) {
4823  goto function_exit;
4824  }
4825 
4826  for (i = 0; i < nth_lock; i++) {
4827 
4828  lock = lock_rec_get_next_on_page(lock);
4829 
4830  if (!lock) {
4831  goto function_exit;
4832  }
4833  }
4834 
4835  ut_a(trx_in_trx_list(lock->trx));
4836  ut_a(lock->trx->conc_state == TRX_ACTIVE
4837  || lock->trx->conc_state == TRX_PREPARED
4838  || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
4839 
4840 # ifdef UNIV_SYNC_DEBUG
4841  /* Only validate the record queues when this thread is not
4842  holding a space->latch. Deadlocks are possible due to
4843  latching order violation when UNIV_DEBUG is defined while
4844  UNIV_SYNC_DEBUG is not. */
4845  if (!sync_thread_levels_contains(SYNC_FSP))
4846 # endif /* UNIV_SYNC_DEBUG */
4847  for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
4848 
4849  if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
4850 
4851  index = lock->index;
4852  rec = page_find_rec_with_heap_no(page, i);
4853  ut_a(rec);
4854  offsets = rec_get_offsets(rec, index, offsets,
4855  ULINT_UNDEFINED, &heap);
4856 #if 0
4857  fprintf(stderr,
4858  "Validating %lu %lu\n",
4859  (ulong) space, (ulong) page_no);
4860 #endif
4861  lock_mutex_exit_kernel();
4862 
4863  /* If this thread is holding the file space
4864  latch (fil_space_t::latch), the following
4865  check WILL break the latching order and may
4866  cause a deadlock of threads. */
4867 
4868  lock_rec_queue_validate(block, rec, index, offsets);
4869 
4870  lock_mutex_enter_kernel();
4871 
4872  nth_bit = i + 1;
4873 
4874  goto loop;
4875  }
4876  }
4877 
4878  nth_bit = 0;
4879  nth_lock++;
4880 
4881  goto loop;
4882 
4883 function_exit:
4884  lock_mutex_exit_kernel();
4885 
4886  mtr_commit(&mtr);
4887 
4888  if (UNIV_LIKELY_NULL(heap)) {
4889  mem_heap_free(heap);
4890  }
4891  return(TRUE);
4892 }
4893 
4894 /*********************************************************************/
4897 static
4898 ibool
4899 lock_validate(void)
4900 /*===============*/
4901 {
4902  lock_t* lock;
4903  trx_t* trx;
4904  ib_uint64_t limit;
4905  ulint space;
4906  ulint page_no;
4907  ulint i;
4908 
4909  lock_mutex_enter_kernel();
4910 
4911  trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
4912 
4913  while (trx) {
4914  lock = UT_LIST_GET_FIRST(trx->trx_locks);
4915 
4916  while (lock) {
4917  if (lock_get_type_low(lock) & LOCK_TABLE) {
4918 
4919  lock_table_queue_validate(
4920  lock->un_member.tab_lock.table);
4921  }
4922 
4923  lock = UT_LIST_GET_NEXT(trx_locks, lock);
4924  }
4925 
4926  trx = UT_LIST_GET_NEXT(trx_list, trx);
4927  }
4928 
4929  for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4930 
4931  limit = 0;
4932 
4933  for (;;) {
4934  lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
4935 
4936  while (lock) {
4937  ib_uint64_t space_page;
4938  ut_a(trx_in_trx_list(lock->trx));
4939 
4940  space = lock->un_member.rec_lock.space;
4941  page_no = lock->un_member.rec_lock.page_no;
4942 
4943  space_page = ut_ull_create(space, page_no);
4944 
4945  if (space_page >= limit) {
4946  break;
4947  }
4948 
4949  lock = HASH_GET_NEXT(hash, lock);
4950  }
4951 
4952  if (!lock) {
4953 
4954  break;
4955  }
4956 
4957  lock_mutex_exit_kernel();
4958 
4959  lock_rec_validate_page(space, page_no);
4960 
4961  lock_mutex_enter_kernel();
4962 
4963  limit = ut_ull_create(space, page_no + 1);
4964  }
4965  }
4966 
4967  lock_mutex_exit_kernel();
4968 
4969  return(TRUE);
4970 }
4971 #endif /* UNIV_DEBUG */
4972 /*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
4973 
4974 /*********************************************************************/
4981 UNIV_INTERN
4982 ulint
4984 /*===========================*/
4985  ulint flags,
4987  const rec_t* rec,
4988  buf_block_t* block,
4989  dict_index_t* index,
4990  que_thr_t* thr,
4991  mtr_t* mtr,
4992  ibool* inherit)
4996 {
4997  const rec_t* next_rec;
4998  trx_t* trx;
4999  lock_t* lock;
5000  ulint err;
5001  ulint next_rec_heap_no;
5002 
5003  ut_ad(block->frame == page_align(rec));
5004 
5005  if (flags & BTR_NO_LOCKING_FLAG) {
5006 
5007  return(DB_SUCCESS);
5008  }
5009 
5010  trx = thr_get_trx(thr);
5011  next_rec = page_rec_get_next_const(rec);
5012  next_rec_heap_no = page_rec_get_heap_no(next_rec);
5013 
5014  lock_mutex_enter_kernel();
5015 
5016  /* When inserting a record into an index, the table must be at
5017  least IX-locked or we must be building an index, in which case
5018  the table must be at least S-locked. */
5019  ut_ad(lock_table_has(trx, index->table, LOCK_IX)
5020  || (*index->name == TEMP_INDEX_PREFIX
5021  && lock_table_has(trx, index->table, LOCK_S)));
5022 
5023  lock = lock_rec_get_first(block, next_rec_heap_no);
5024 
5025  if (UNIV_LIKELY(lock == NULL)) {
5026  /* We optimize CPU time usage in the simplest case */
5027 
5028  lock_mutex_exit_kernel();
5029 
5030  if (!dict_index_is_clust(index)) {
5031  /* Update the page max trx id field */
5032  page_update_max_trx_id(block,
5033  buf_block_get_page_zip(block),
5034  trx->id, mtr);
5035  }
5036 
5037  *inherit = FALSE;
5038 
5039  return(DB_SUCCESS);
5040  }
5041 
5042  *inherit = TRUE;
5043 
5044  /* If another transaction has an explicit lock request which locks
5045  the gap, waiting or granted, on the successor, the insert has to wait.
5046 
5047  An exception is the case where the lock by the another transaction
5048  is a gap type lock which it placed to wait for its turn to insert. We
5049  do not consider that kind of a lock conflicting with our insert. This
5050  eliminates an unnecessary deadlock which resulted when 2 transactions
5051  had to wait for their insert. Both had waiting gap type lock requests
5052  on the successor, which produced an unnecessary deadlock. */
5053 
5054  if (lock_rec_other_has_conflicting(
5055  static_cast<lock_mode>(LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
5056  block, next_rec_heap_no, trx)) {
5057 
5058  /* Note that we may get DB_SUCCESS also here! */
5059  err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP
5060  | LOCK_INSERT_INTENTION,
5061  block, next_rec_heap_no,
5062  index, thr);
5063  } else {
5064  err = DB_SUCCESS;
5065  }
5066 
5067  lock_mutex_exit_kernel();
5068 
5069  switch (err) {
5070  case DB_SUCCESS_LOCKED_REC:
5071  err = DB_SUCCESS;
5072  /* fall through */
5073  case DB_SUCCESS:
5074  if (dict_index_is_clust(index)) {
5075  break;
5076  }
5077  /* Update the page max trx id field */
5078  page_update_max_trx_id(block,
5079  buf_block_get_page_zip(block),
5080  trx->id, mtr);
5081  }
5082 
5083 #ifdef UNIV_DEBUG
5084  {
5085  mem_heap_t* heap = NULL;
5086  ulint offsets_[REC_OFFS_NORMAL_SIZE];
5087  const ulint* offsets;
5088  rec_offs_init(offsets_);
5089 
5090  offsets = rec_get_offsets(next_rec, index, offsets_,
5091  ULINT_UNDEFINED, &heap);
5092  ut_ad(lock_rec_queue_validate(block,
5093  next_rec, index, offsets));
5094  if (UNIV_LIKELY_NULL(heap)) {
5095  mem_heap_free(heap);
5096  }
5097  }
5098 #endif /* UNIV_DEBUG */
5099 
5100  return(err);
5101 }
5102 
5103 /*********************************************************************/
5107 static
5108 void
5109 lock_rec_convert_impl_to_expl(
5110 /*==========================*/
5111  const buf_block_t* block,
5112  const rec_t* rec,
5113  dict_index_t* index,
5114  const ulint* offsets)
5115 {
5116  trx_t* impl_trx;
5117 
5118  ut_ad(mutex_own(&kernel_mutex));
5120  ut_ad(rec_offs_validate(rec, index, offsets));
5121  ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5122 
5123  if (dict_index_is_clust(index)) {
5124  impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets);
5125  } else {
5126  impl_trx = lock_sec_rec_some_has_impl_off_kernel(
5127  rec, index, offsets);
5128  }
5129 
5130  if (impl_trx) {
5131  ulint heap_no = page_rec_get_heap_no(rec);
5132 
5133  /* If the transaction has no explicit x-lock set on the
5134  record, set one for it */
5135 
5136  if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
5137  heap_no, impl_trx)) {
5138 
5139  lock_rec_add_to_queue(
5140  LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
5141  block, heap_no, index, impl_trx);
5142  }
5143  }
5144 }
5145 
5146 /*********************************************************************/
5154 UNIV_INTERN
5155 ulint
5157 /*=================================*/
5158  ulint flags,
5160  const buf_block_t* block,
5161  const rec_t* rec,
5163  dict_index_t* index,
5164  const ulint* offsets,
5165  que_thr_t* thr)
5166 {
5167  ulint err;
5168  ulint heap_no;
5169 
5170  ut_ad(rec_offs_validate(rec, index, offsets));
5171  ut_ad(dict_index_is_clust(index));
5172  ut_ad(block->frame == page_align(rec));
5173 
5174  if (flags & BTR_NO_LOCKING_FLAG) {
5175 
5176  return(DB_SUCCESS);
5177  }
5178 
5179  heap_no = rec_offs_comp(offsets)
5180  ? rec_get_heap_no_new(rec)
5181  : rec_get_heap_no_old(rec);
5182 
5183  lock_mutex_enter_kernel();
5184 
5185  ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5186 
5187  /* If a transaction has no explicit x-lock set on the record, set one
5188  for it */
5189 
5190  lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5191 
5192  err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
5193  block, heap_no, index, thr);
5194 
5195  lock_mutex_exit_kernel();
5196 
5197  ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
5198 
5199  if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
5200  err = DB_SUCCESS;
5201  }
5202 
5203  return(err);
5204 }
5205 
5206 /*********************************************************************/
5210 UNIV_INTERN
5211 ulint
5213 /*===============================*/
5214  ulint flags,
5216  buf_block_t* block,
5217  const rec_t* rec,
5222  dict_index_t* index,
5223  que_thr_t* thr,
5224  mtr_t* mtr)
5225 {
5226  ulint err;
5227  ulint heap_no;
5228 
5229  ut_ad(!dict_index_is_clust(index));
5230  ut_ad(block->frame == page_align(rec));
5231 
5232  if (flags & BTR_NO_LOCKING_FLAG) {
5233 
5234  return(DB_SUCCESS);
5235  }
5236 
5237  heap_no = page_rec_get_heap_no(rec);
5238 
5239  /* Another transaction cannot have an implicit lock on the record,
5240  because when we come here, we already have modified the clustered
5241  index record, and this would not have been possible if another active
5242  transaction had modified this secondary index record. */
5243 
5244  lock_mutex_enter_kernel();
5245 
5246  ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5247 
5248  err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
5249  block, heap_no, index, thr);
5250 
5251  lock_mutex_exit_kernel();
5252 
5253 #ifdef UNIV_DEBUG
5254  {
5255  mem_heap_t* heap = NULL;
5256  ulint offsets_[REC_OFFS_NORMAL_SIZE];
5257  const ulint* offsets;
5258  rec_offs_init(offsets_);
5259 
5260  offsets = rec_get_offsets(rec, index, offsets_,
5261  ULINT_UNDEFINED, &heap);
5262  ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
5263  if (UNIV_LIKELY_NULL(heap)) {
5264  mem_heap_free(heap);
5265  }
5266  }
5267 #endif /* UNIV_DEBUG */
5268 
5269  if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
5270  /* Update the page max trx id field */
5271  /* It might not be necessary to do this if
5272  err == DB_SUCCESS (no new lock created),
5273  but it should not cost too much performance. */
5274  page_update_max_trx_id(block,
5275  buf_block_get_page_zip(block),
5276  thr_get_trx(thr)->id, mtr);
5277  err = DB_SUCCESS;
5278  }
5279 
5280  return(err);
5281 }
5282 
5283 /*********************************************************************/
5288 UNIV_INTERN
5289 enum db_err
5291 /*=============================*/
5292  ulint flags,
5294  const buf_block_t* block,
5295  const rec_t* rec,
5299  dict_index_t* index,
5300  const ulint* offsets,
5301  enum lock_mode mode,
5306  ulint gap_mode,
5308  que_thr_t* thr)
5309 {
5310  enum db_err err;
5311  ulint heap_no;
5312 
5313  ut_ad(!dict_index_is_clust(index));
5314  ut_ad(block->frame == page_align(rec));
5316  ut_ad(rec_offs_validate(rec, index, offsets));
5317  ut_ad(mode == LOCK_X || mode == LOCK_S);
5318 
5319  if (flags & BTR_NO_LOCKING_FLAG) {
5320 
5321  return(DB_SUCCESS);
5322  }
5323 
5324  heap_no = page_rec_get_heap_no(rec);
5325 
5326  lock_mutex_enter_kernel();
5327 
5328  ut_ad(mode != LOCK_X
5329  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5330  ut_ad(mode != LOCK_S
5331  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5332 
5333  /* Some transaction may have an implicit x-lock on the record only
5334  if the max trx id for the page >= min trx id for the trx list or a
5335  database recovery is running. */
5336 
5338  || recv_recovery_is_on())
5339  && !page_rec_is_supremum(rec)) {
5340 
5341  lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5342  }
5343 
5344  err = lock_rec_lock(FALSE, mode | gap_mode,
5345  block, heap_no, index, thr);
5346 
5347  lock_mutex_exit_kernel();
5348 
5349  ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
5350 
5351  if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
5352  err = DB_SUCCESS;
5353  }
5354 
5355  return(err);
5356 }
5357 
5358 /*********************************************************************/
5367 UNIV_INTERN
5368 enum db_err
5370 /*===============================*/
5371  ulint flags,
5373  const buf_block_t* block,
5374  const rec_t* rec,
5378  dict_index_t* index,
5379  const ulint* offsets,
5380  enum lock_mode mode,
5385  ulint gap_mode,
5387  que_thr_t* thr)
5388 {
5389  enum db_err err;
5390  ulint heap_no;
5391 
5392  ut_ad(dict_index_is_clust(index));
5393  ut_ad(block->frame == page_align(rec));
5395  ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
5396  || gap_mode == LOCK_REC_NOT_GAP);
5397  ut_ad(rec_offs_validate(rec, index, offsets));
5398 
5399  if (flags & BTR_NO_LOCKING_FLAG) {
5400 
5401  return(DB_SUCCESS);
5402  }
5403 
5404  heap_no = page_rec_get_heap_no(rec);
5405 
5406  lock_mutex_enter_kernel();
5407 
5408  ut_ad(mode != LOCK_X
5409  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5410  ut_ad(mode != LOCK_S
5411  || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5412 
5413  if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
5414 
5415  lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5416  }
5417 
5418  err = lock_rec_lock(FALSE, mode | gap_mode,
5419  block, heap_no, index, thr);
5420 
5421  lock_mutex_exit_kernel();
5422 
5423  ut_ad(lock_rec_queue_validate(block, rec, index, offsets));
5424 
5425  return(err);
5426 }
5427 /*********************************************************************/
5437 UNIV_INTERN
5438 ulint
5440 /*===================================*/
5441  ulint flags,
5443  const buf_block_t* block,
5444  const rec_t* rec,
5448  dict_index_t* index,
5449  enum lock_mode mode,
5454  ulint gap_mode,
5456  que_thr_t* thr)
5457 {
5458  mem_heap_t* tmp_heap = NULL;
5459  ulint offsets_[REC_OFFS_NORMAL_SIZE];
5460  ulint* offsets = offsets_;
5461  ulint err;
5462  rec_offs_init(offsets_);
5463 
5464  offsets = rec_get_offsets(rec, index, offsets,
5465  ULINT_UNDEFINED, &tmp_heap);
5466  err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
5467  offsets, mode, gap_mode, thr);
5468  if (tmp_heap) {
5469  mem_heap_free(tmp_heap);
5470  }
5471 
5472  if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
5473  err = DB_SUCCESS;
5474  }
5475 
5476  return(err);
5477 }
5478 
5479 /*******************************************************************/
5481 UNIV_INLINE
5482 void
5483 lock_release_autoinc_last_lock(
5484 /*===========================*/
5485  ib_vector_t* autoinc_locks)
5486 {
5487  ulint last;
5488  lock_t* lock;
5489 
5490  ut_ad(mutex_own(&kernel_mutex));
5491  ut_a(!ib_vector_is_empty(autoinc_locks));
5492 
5493  /* The lock to be release must be the last lock acquired. */
5494  last = ib_vector_size(autoinc_locks) - 1;
5495  lock = static_cast<lock_t *>(ib_vector_get(autoinc_locks, last));
5496 
5497  /* Should have only AUTOINC locks in the vector. */
5498  ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
5499  ut_a(lock_get_type(lock) == LOCK_TABLE);
5500 
5501  ut_a(lock->un_member.tab_lock.table != NULL);
5502 
5503  /* This will remove the lock from the trx autoinc_locks too. */
5504  lock_table_dequeue(lock);
5505 }
5506 
5507 /*******************************************************************/
5510 UNIV_INTERN
5511 ibool
5513 /*=========================*/
5514  const trx_t* trx)
5515 {
5516  ut_a(trx->autoinc_locks != NULL);
5517 
5518  return(!ib_vector_is_empty(trx->autoinc_locks));
5519 }
5520 
5521 /*******************************************************************/
5523 UNIV_INTERN
5524 void
5526 /*=======================*/
5527  trx_t* trx)
5528 {
5529  ut_ad(mutex_own(&kernel_mutex));
5530 
5531  ut_a(trx->autoinc_locks != NULL);
5532 
5533  /* We release the locks in the reverse order. This is to
5534  avoid searching the vector for the element to delete at
5535  the lower level. See (lock_table_remove_low()) for details. */
5536  while (!ib_vector_is_empty(trx->autoinc_locks)) {
5537 
5538  /* lock_table_remove_low() will also remove the lock from
5539  the transaction's autoinc_locks vector. */
5540  lock_release_autoinc_last_lock(trx->autoinc_locks);
5541  }
5542 
5543  /* Should release all locks. */
5544  ut_a(ib_vector_is_empty(trx->autoinc_locks));
5545 }
5546 
5547 /*******************************************************************/
5551 UNIV_INTERN
5552 ulint
5554 /*==========*/
5555  const lock_t* lock)
5556 {
5557  return(lock_get_type_low(lock));
5558 }
5559 
5560 /*******************************************************************/
5563 UNIV_INTERN
5564 trx_id_t
5566 /*============*/
5567  const lock_t* lock)
5568 {
5569  return(lock->trx->id);
5570 }
5571 
5572 /*******************************************************************/
5576 UNIV_INTERN
5577 const char*
5579 /*==============*/
5580  const lock_t* lock)
5581 {
5582  ibool is_gap_lock;
5583 
5584  is_gap_lock = lock_get_type_low(lock) == LOCK_REC
5585  && lock_rec_get_gap(lock);
5586 
5587  switch (lock_get_mode(lock)) {
5588  case LOCK_S:
5589  if (is_gap_lock) {
5590  return("S,GAP");
5591  } else {
5592  return("S");
5593  }
5594  case LOCK_X:
5595  if (is_gap_lock) {
5596  return("X,GAP");
5597  } else {
5598  return("X");
5599  }
5600  case LOCK_IS:
5601  if (is_gap_lock) {
5602  return("IS,GAP");
5603  } else {
5604  return("IS");
5605  }
5606  case LOCK_IX:
5607  if (is_gap_lock) {
5608  return("IX,GAP");
5609  } else {
5610  return("IX");
5611  }
5612  case LOCK_AUTO_INC:
5613  return("AUTO_INC");
5614  default:
5615  return("UNKNOWN");
5616  }
5617 }
5618 
5619 /*******************************************************************/
5623 UNIV_INTERN
5624 const char*
5626 /*==============*/
5627  const lock_t* lock)
5628 {
5629  switch (lock_get_type_low(lock)) {
5630  case LOCK_REC:
5631  return("RECORD");
5632  case LOCK_TABLE:
5633  return("TABLE");
5634  default:
5635  return("UNKNOWN");
5636  }
5637 }
5638 
5639 /*******************************************************************/
5642 UNIV_INLINE
5643 dict_table_t*
5644 lock_get_table(
5645 /*===========*/
5646  const lock_t* lock)
5647 {
5648  switch (lock_get_type_low(lock)) {
5649  case LOCK_REC:
5650  return(lock->index->table);
5651  case LOCK_TABLE:
5652  return(lock->un_member.tab_lock.table);
5653  default:
5654  ut_error;
5655  return(NULL);
5656  }
5657 }
5658 
5659 /*******************************************************************/
5662 UNIV_INTERN
5663 table_id_t
5665 /*==============*/
5666  const lock_t* lock)
5667 {
5668  dict_table_t* table;
5669 
5670  table = lock_get_table(lock);
5671 
5672  return(table->id);
5673 }
5674 
5675 /*******************************************************************/
5679 UNIV_INTERN
5680 const char*
5682 /*================*/
5683  const lock_t* lock)
5684 {
5685  dict_table_t* table;
5686 
5687  table = lock_get_table(lock);
5688 
5689  return(table->name);
5690 }
5691 
5692 /*******************************************************************/
5695 UNIV_INTERN
5696 const dict_index_t*
5698 /*===============*/
5699  const lock_t* lock)
5700 {
5701  ut_a(lock_get_type_low(lock) == LOCK_REC);
5702 
5703  return(lock->index);
5704 }
5705 
5706 /*******************************************************************/
5710 UNIV_INTERN
5711 const char*
5713 /*====================*/
5714  const lock_t* lock)
5715 {
5716  ut_a(lock_get_type_low(lock) == LOCK_REC);
5717 
5718  return(lock->index->name);
5719 }
5720 
5721 /*******************************************************************/
5724 UNIV_INTERN
5725 ulint
5727 /*==================*/
5728  const lock_t* lock)
5729 {
5730  ut_a(lock_get_type_low(lock) == LOCK_REC);
5731 
5732  return(lock->un_member.rec_lock.space);
5733 }
5734 
5735 /*******************************************************************/
5738 UNIV_INTERN
5739 ulint
5741 /*=================*/
5742  const lock_t* lock)
5743 {
5744  ut_a(lock_get_type_low(lock) == LOCK_REC);
5745 
5746  return(lock->un_member.rec_lock.page_no);
5747 }