Drizzled Public API Documentation

row0purge.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1997, 2010, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
15 St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************/
26 #include "row0purge.h"
27 
28 #ifdef UNIV_NONINL
29 #include "row0purge.ic"
30 #endif
31 
32 #include "fsp0fsp.h"
33 #include "mach0data.h"
34 #include "trx0rseg.h"
35 #include "trx0trx.h"
36 #include "trx0roll.h"
37 #include "trx0undo.h"
38 #include "trx0purge.h"
39 #include "trx0rec.h"
40 #include "que0que.h"
41 #include "row0row.h"
42 #include "row0upd.h"
43 #include "row0vers.h"
44 #include "row0mysql.h"
45 #include "log0log.h"
46 
47 /*************************************************************************
48 IMPORTANT NOTE: Any operation that generates redo MUST check that there
49 is enough space in the redo log before for that operation. This is
50 done by calling log_free_check(). The reason for checking the
51 availability of the redo log space before the start of the operation is
52 that we MUST not hold any synchonization objects when performing the
53 check.
54 If you make a change in this module make sure that no codepath is
55 introduced where a call to log_free_check() is bypassed. */
56 
57 /*************************************************************************
58 IMPORTANT NOTE: Any operation that generates redo MUST check that there
59 is enough space in the redo log before for that operation. This is
60 done by calling log_free_check(). The reason for checking the
61 availability of the redo log space before the start of the operation is
62 that we MUST not hold any synchonization objects when performing the
63 check.
64 If you make a change in this module make sure that no codepath is
65 introduced where a call to log_free_check() is bypassed. */
66 
67 /********************************************************************/
70 UNIV_INTERN
73 /*==================*/
74  que_thr_t* parent,
75  mem_heap_t* heap)
76 {
77  purge_node_t* node;
78 
79  ut_ad(parent && heap);
80 
81  node = static_cast<purge_node_t *>(mem_heap_alloc(heap, sizeof(purge_node_t)));
82 
83  node->common.type = QUE_NODE_PURGE;
84  node->common.parent = parent;
85 
86  node->heap = mem_heap_create(256);
87 
88  return(node);
89 }
90 
91 /***********************************************************/
95 static
96 ibool
97 row_purge_reposition_pcur(
98 /*======================*/
99  ulint mode,
100  purge_node_t* node,
101  mtr_t* mtr)
102 {
103  ibool found;
104 
105  if (node->found_clust) {
106  found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
107 
108  return(found);
109  }
110 
111  found = row_search_on_row_ref(&(node->pcur), mode, node->table,
112  node->ref, mtr);
113  node->found_clust = found;
114 
115  if (found) {
116  btr_pcur_store_position(&(node->pcur), mtr);
117  }
118 
119  return(found);
120 }
121 
122 /***********************************************************/
126 static
127 ibool
128 row_purge_remove_clust_if_poss_low(
129 /*===============================*/
130  purge_node_t* node,
131  ulint mode)
132 {
133  dict_index_t* index;
134  btr_pcur_t* pcur;
135  btr_cur_t* btr_cur;
136  ibool success;
137  ulint err;
138  mtr_t mtr;
139  rec_t* rec;
140  mem_heap_t* heap = NULL;
141  ulint offsets_[REC_OFFS_NORMAL_SIZE];
142  rec_offs_init(offsets_);
143 
144  index = dict_table_get_first_index(node->table);
145 
146  pcur = &(node->pcur);
147  btr_cur = btr_pcur_get_btr_cur(pcur);
148 
149  log_free_check();
150  mtr_start(&mtr);
151 
152  success = row_purge_reposition_pcur(mode, node, &mtr);
153 
154  if (!success) {
155  /* The record is already removed */
156 
157  btr_pcur_commit_specify_mtr(pcur, &mtr);
158 
159  return(TRUE);
160  }
161 
162  rec = btr_pcur_get_rec(pcur);
163 
164  if (node->roll_ptr != row_get_rec_roll_ptr(
165  rec, index, rec_get_offsets(rec, index, offsets_,
166  ULINT_UNDEFINED, &heap))) {
167  if (UNIV_LIKELY_NULL(heap)) {
168  mem_heap_free(heap);
169  }
170  /* Someone else has modified the record later: do not remove */
171  btr_pcur_commit_specify_mtr(pcur, &mtr);
172 
173  return(TRUE);
174  }
175 
176  if (UNIV_LIKELY_NULL(heap)) {
177  mem_heap_free(heap);
178  }
179 
180  if (mode == BTR_MODIFY_LEAF) {
181  success = btr_cur_optimistic_delete(btr_cur, &mtr);
182  } else {
183  ut_ad(mode == BTR_MODIFY_TREE);
184  btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
185  RB_NONE, &mtr);
186 
187  if (err == DB_SUCCESS) {
188  success = TRUE;
189  } else if (err == DB_OUT_OF_FILE_SPACE) {
190  success = FALSE;
191  } else {
192  ut_error;
193  }
194  }
195 
196  btr_pcur_commit_specify_mtr(pcur, &mtr);
197 
198  return(success);
199 }
200 
201 /***********************************************************/
204 static
205 void
206 row_purge_remove_clust_if_poss(
207 /*===========================*/
208  purge_node_t* node)
209 {
210  ibool success;
211  ulint n_tries = 0;
212 
213  /* fputs("Purge: Removing clustered record\n", stderr); */
214 
215  success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
216  if (success) {
217 
218  return;
219  }
220 retry:
221  success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
222  /* The delete operation may fail if we have little
223  file space left: TODO: easiest to crash the database
224  and restart with more file space */
225 
226  if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
227  n_tries++;
228 
230 
231  goto retry;
232  }
233 
234  ut_a(success);
235 }
236 
237 /***********************************************************/
252 UNIV_INTERN
253 ibool
255 /*===============*/
256  purge_node_t* node,
257  dict_index_t* index,
258  const dtuple_t* entry)
259 {
260  ibool can_delete;
261  mtr_t mtr;
262 
263  ut_ad(!dict_index_is_clust(index));
264  mtr_start(&mtr);
265 
266  can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
268  btr_pcur_get_rec(&node->pcur),
269  &mtr, index, entry);
270 
271  btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
272 
273  return(can_delete);
274 }
275 
276 /***************************************************************
277 Removes a secondary index entry if possible, by modifying the
278 index tree. Does not try to buffer the delete.
279 @return TRUE if success or if not found */
280 static
281 ibool
282 row_purge_remove_sec_if_poss_tree(
283 /*==============================*/
284  purge_node_t* node,
285  dict_index_t* index,
286  const dtuple_t* entry)
287 {
288  btr_pcur_t pcur;
289  btr_cur_t* btr_cur;
290  ibool success = TRUE;
291  ulint err;
292  mtr_t mtr;
293  enum row_search_result search_result;
294 
295  log_free_check();
296  mtr_start(&mtr);
297 
298  search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
299  &pcur, &mtr);
300 
301  switch (search_result) {
302  case ROW_NOT_FOUND:
303  /* Not found. This is a legitimate condition. In a
304  rollback, InnoDB will remove secondary recs that would
305  be purged anyway. Then the actual purge will not find
306  the secondary index record. Also, the purge itself is
307  eager: if it comes to consider a secondary index
308  record, and notices it does not need to exist in the
309  index, it will remove it. Then if/when the purge
310  comes to consider the secondary index record a second
311  time, it will not exist any more in the index. */
312 
313  /* fputs("PURGE:........sec entry not found\n", stderr); */
314  /* dtuple_print(stderr, entry); */
315  goto func_exit;
316  case ROW_FOUND:
317  break;
318  case ROW_BUFFERED:
319  case ROW_NOT_DELETED_REF:
320  /* These are invalid outcomes, because the mode passed
321  to row_search_index_entry() did not include any of the
322  flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
323  ut_error;
324  }
325 
326  btr_cur = btr_pcur_get_btr_cur(&pcur);
327 
328  /* We should remove the index record if no later version of the row,
329  which cannot be purged yet, requires its existence. If some requires,
330  we should do nothing. */
331 
332  if (row_purge_poss_sec(node, index, entry)) {
333  /* Remove the index record, which should have been
334  marked for deletion. */
335  ut_ad(REC_INFO_DELETED_FLAG
337  dict_table_is_comp(index->table)));
338 
339  btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
340  RB_NONE, &mtr);
341  switch (UNIV_EXPECT(err, DB_SUCCESS)) {
342  case DB_SUCCESS:
343  break;
344  case DB_OUT_OF_FILE_SPACE:
345  success = FALSE;
346  break;
347  default:
348  ut_error;
349  }
350  }
351 
352 func_exit:
353  btr_pcur_close(&pcur);
354  mtr_commit(&mtr);
355 
356  return(success);
357 }
358 
359 /***************************************************************
360 Removes a secondary index entry without modifying the index tree,
361 if possible.
362 @return TRUE if success or if not found */
363 static
364 ibool
365 row_purge_remove_sec_if_poss_leaf(
366 /*==============================*/
367  purge_node_t* node,
368  dict_index_t* index,
369  const dtuple_t* entry)
370 {
371  mtr_t mtr;
372  btr_pcur_t pcur;
373  enum row_search_result search_result;
374 
375  log_free_check();
376 
377  mtr_start(&mtr);
378 
379  /* Set the purge node for the call to row_purge_poss_sec(). */
380  pcur.btr_cur.purge_node = node;
381  /* Set the query thread, so that ibuf_insert_low() will be
382  able to invoke thd_get_trx(). */
383  pcur.btr_cur.thr = static_cast<que_thr_t *>(que_node_get_parent(node));
384 
385  search_result = row_search_index_entry(
386  index, entry, BTR_MODIFY_LEAF | BTR_DELETE, &pcur, &mtr);
387 
388  switch (search_result) {
389  ibool success;
390  case ROW_FOUND:
391  /* Before attempting to purge a record, check
392  if it is safe to do so. */
393  if (row_purge_poss_sec(node, index, entry)) {
394  btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
395 
396  /* Only delete-marked records should be purged. */
397  ut_ad(REC_INFO_DELETED_FLAG
399  btr_cur_get_rec(btr_cur),
400  dict_table_is_comp(index->table)));
401 
402  if (!btr_cur_optimistic_delete(btr_cur, &mtr)) {
403 
404  /* The index entry could not be deleted. */
405  success = FALSE;
406  goto func_exit;
407  }
408  }
409  /* fall through (the index entry is still needed,
410  or the deletion succeeded) */
411  case ROW_NOT_DELETED_REF:
412  /* The index entry is still needed. */
413  case ROW_BUFFERED:
414  /* The deletion was buffered. */
415  case ROW_NOT_FOUND:
416  /* The index entry does not exist, nothing to do. */
417  success = TRUE;
418  func_exit:
419  btr_pcur_close(&pcur);
420  mtr_commit(&mtr);
421  return(success);
422  }
423 
424  ut_error;
425  return(FALSE);
426 }
427 
428 /***********************************************************/
430 UNIV_INLINE
431 void
432 row_purge_remove_sec_if_poss(
433 /*=========================*/
434  purge_node_t* node,
435  dict_index_t* index,
436  dtuple_t* entry)
437 {
438  ibool success;
439  ulint n_tries = 0;
440 
441  /* fputs("Purge: Removing secondary record\n", stderr); */
442 
443  if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
444 
445  return;
446  }
447 retry:
448  success = row_purge_remove_sec_if_poss_tree(node, index, entry);
449  /* The delete operation may fail if we have little
450  file space left: TODO: easiest to crash the database
451  and restart with more file space */
452 
453  if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
454 
455  n_tries++;
456 
458 
459  goto retry;
460  }
461 
462  ut_a(success);
463 }
464 
465 /***********************************************************/
467 static
468 void
469 row_purge_del_mark(
470 /*===============*/
471  purge_node_t* node)
472 {
473  mem_heap_t* heap;
474  dtuple_t* entry;
475  dict_index_t* index;
476 
477  ut_ad(node);
478 
479  heap = mem_heap_create(1024);
480 
481  while (node->index != NULL) {
482  index = node->index;
483 
484  /* Build the index entry */
485  entry = row_build_index_entry(node->row, NULL, index, heap);
486  ut_a(entry);
487  row_purge_remove_sec_if_poss(node, index, entry);
488 
489  node->index = dict_table_get_next_index(node->index);
490  }
491 
492  mem_heap_free(heap);
493 
494  row_purge_remove_clust_if_poss(node);
495 }
496 
497 /***********************************************************/
500 static
501 void
502 row_purge_upd_exist_or_extern_func(
503 /*===============================*/
504 #ifdef UNIV_DEBUG
505  const que_thr_t*thr,
506 #endif /* UNIV_DEBUG */
507  purge_node_t* node)
508 {
509  mem_heap_t* heap;
510  dtuple_t* entry;
511  dict_index_t* index;
512  ibool is_insert;
513  ulint rseg_id;
514  ulint page_no;
515  ulint offset;
516  ulint i;
517  mtr_t mtr;
518 
519  ut_ad(node);
520 
521  if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
522 
523  goto skip_secondaries;
524  }
525 
526  heap = mem_heap_create(1024);
527 
528  while (node->index != NULL) {
529  index = node->index;
530 
531  if (row_upd_changes_ord_field_binary(node->index, node->update,
532  thr, NULL, NULL)) {
533  /* Build the older version of the index entry */
534  entry = row_build_index_entry(node->row, NULL,
535  index, heap);
536  ut_a(entry);
537  row_purge_remove_sec_if_poss(node, index, entry);
538  }
539 
540  node->index = dict_table_get_next_index(node->index);
541  }
542 
543  mem_heap_free(heap);
544 
545 skip_secondaries:
546  /* Free possible externally stored fields */
547  for (i = 0; i < upd_get_n_fields(node->update); i++) {
548 
549  const upd_field_t* ufield
550  = upd_get_nth_field(node->update, i);
551 
552  if (dfield_is_ext(&ufield->new_val)) {
553  buf_block_t* block;
554  ulint internal_offset;
555  byte* data_field;
556 
557  /* We use the fact that new_val points to
558  node->undo_rec and get thus the offset of
559  dfield data inside the undo record. Then we
560  can calculate from node->roll_ptr the file
561  address of the new_val data */
562 
563  internal_offset
564  = ((const byte*)
565  dfield_get_data(&ufield->new_val))
566  - node->undo_rec;
567 
568  ut_a(internal_offset < UNIV_PAGE_SIZE);
569 
570  trx_undo_decode_roll_ptr(node->roll_ptr,
571  &is_insert, &rseg_id,
572  &page_no, &offset);
573  mtr_start(&mtr);
574 
575  /* We have to acquire an X-latch to the clustered
576  index tree */
577 
578  index = dict_table_get_first_index(node->table);
579 
580  mtr_x_lock(dict_index_get_lock(index), &mtr);
581 
582  /* NOTE: we must also acquire an X-latch to the
583  root page of the tree. We will need it when we
584  free pages from the tree. If the tree is of height 1,
585  the tree X-latch does NOT protect the root page,
586  because it is also a leaf page. Since we will have a
587  latch on an undo log page, we would break the
588  latching order if we would only later latch the
589  root page of such a tree! */
590 
591  btr_root_get(index, &mtr);
592 
593  /* We assume in purge of externally stored fields
594  that the space id of the undo log record is 0! */
595 
596  block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr);
597  buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
598 
599  data_field = buf_block_get_frame(block)
600  + offset + internal_offset;
601 
602  ut_a(dfield_get_len(&ufield->new_val)
604  btr_free_externally_stored_field(
605  index,
606  data_field + dfield_get_len(&ufield->new_val)
608  NULL, NULL, NULL, 0, RB_NONE, &mtr);
609  mtr_commit(&mtr);
610  }
611  }
612 }
613 
614 #ifdef UNIV_DEBUG
615 # define row_purge_upd_exist_or_extern(thr,node) \
616  row_purge_upd_exist_or_extern_func(thr,node)
617 #else /* UNIV_DEBUG */
618 # define row_purge_upd_exist_or_extern(thr,node) \
619  row_purge_upd_exist_or_extern_func(node)
620 #endif /* UNIV_DEBUG */
621 
622 /***********************************************************/
626 static
627 ibool
628 row_purge_parse_undo_rec(
629 /*=====================*/
630  purge_node_t* node,
631  ibool* updated_extern,
634  que_thr_t* thr)
635 {
636  dict_index_t* clust_index;
637  byte* ptr;
638  trx_t* trx;
639  undo_no_t undo_no;
640  table_id_t table_id;
641  trx_id_t trx_id;
642  roll_ptr_t roll_ptr;
643  ulint info_bits;
644  ulint type;
645  ulint cmpl_info;
646 
647  ut_ad(node && thr);
648 
649  trx = thr_get_trx(thr);
650 
651  ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
652  updated_extern, &undo_no, &table_id);
653  node->rec_type = type;
654 
655  if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
656 
657  return(FALSE);
658  }
659 
660  ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
661  &info_bits);
662  node->table = NULL;
663 
664  if (type == TRX_UNDO_UPD_EXIST_REC
665  && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
666 
667  /* Purge requires no changes to indexes: we may return */
668 
669  return(FALSE);
670  }
671 
672  /* Prevent DROP TABLE etc. from running when we are doing the purge
673  for this row */
674 
675  row_mysql_freeze_data_dictionary(trx);
676 
677  mutex_enter(&(dict_sys->mutex));
678 
679  node->table = dict_table_get_on_id_low(table_id);
680 
681  mutex_exit(&(dict_sys->mutex));
682 
683  if (node->table == NULL) {
684  /* The table has been dropped: no need to do purge */
685 err_exit:
687  return(FALSE);
688  }
689 
690  if (node->table->ibd_file_missing) {
691  /* We skip purge of missing .ibd files */
692 
693  node->table = NULL;
694 
695  goto err_exit;
696  }
697 
698  clust_index = dict_table_get_first_index(node->table);
699 
700  if (clust_index == NULL) {
701  /* The table was corrupt in the data dictionary */
702 
703  goto err_exit;
704  }
705 
706  ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
707  node->heap);
708 
709  ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
710  roll_ptr, info_bits, trx,
711  node->heap, &(node->update));
712 
713  /* Read to the partial row the fields that occur in indexes */
714 
715  if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
717  ptr, clust_index, &node->row,
718  type == TRX_UNDO_UPD_DEL_REC,
719  node->heap);
720  }
721 
722  return(TRUE);
723 }
724 
725 /***********************************************************/
729 static __attribute__((nonnull))
730 void
731 row_purge(
732 /*======*/
733  purge_node_t* node,
734  que_thr_t* thr)
735 {
736  ibool updated_extern;
737 
738  ut_ad(node);
739  ut_ad(thr);
740 
741  node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr,
742  &node->reservation,
743  node->heap);
744  if (!node->undo_rec) {
745  /* Purge completed for this query thread */
746 
747  thr->run_node = que_node_get_parent(node);
748 
749  return;
750  }
751 
752  if (node->undo_rec != &trx_purge_dummy_rec
753  && row_purge_parse_undo_rec(node, &updated_extern, thr)) {
754  node->found_clust = FALSE;
755 
756  node->index = dict_table_get_next_index(
757  dict_table_get_first_index(node->table));
758 
759  if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
760  row_purge_del_mark(node);
761 
762  } else if (updated_extern
763  || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
764 
765  row_purge_upd_exist_or_extern(thr, node);
766  }
767 
768  if (node->found_clust) {
769  btr_pcur_close(&(node->pcur));
770  }
771 
773  }
774 
775  /* Do some cleanup */
776  trx_purge_rec_release(node->reservation);
777  mem_heap_empty(node->heap);
778 
779  thr->run_node = node;
780 }
781 
782 /***********************************************************/
786 UNIV_INTERN
787 que_thr_t*
789 /*===========*/
790  que_thr_t* thr)
791 {
792  purge_node_t* node;
793 
794  ut_ad(thr);
795 
796  node = static_cast<purge_node_t *>(thr->run_node);
797 
798  ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
799 
800  row_purge(node, thr);
801 
802  return(thr);
803 }