Skip to content

Commit

Permalink
BUG#27135742 CLONE CREATED IBDATA1 IS OF A DIFFERENT SIZE
Browse files Browse the repository at this point in the history
BUG#27245214 LATCHDEBUG::CHECK_ORDER

Problem :
---------
1. When file is extended in page copy stage we are always extending
the first node/file for the tablespace. For multi-node system tablespace
this results in extending the first file more than configured length.

2. For persisting dynamic metadata, checkpoint needs to insert data into
tables. A recent patch caused this regression, where checkpoint is
called before releasing archiver mutex. This triggers the debug mode
assert checking mutex order.

3. Issue when clone lsn with block boundary
    - when Trailer chunk is first block in next redo file
    - when trailer chunk ends at file and block boundary

4. Non-linux platforms are not using clone_buffer_size

Solution :
----------
1. Evaluate the right size and extend the last file after PAGE COPY
2. Call checkpoint after releasing archiver mutex
3. Avoid overwriting trailer if archived log ends at block boundary
4. Add check if zero-copy is supported and use clone_buffer_size

5. Tests for scenarios 1-4
5A.Test shutdown while clone is in progress
5B.Test coverage for clone with read/write when not using O_DIRECT

Reviewed-by: Satya Bodapati <satya.bodapati@oracle.com>

RB: 18427
  • Loading branch information
Debarun Banerjee committed Jan 31, 2018
1 parent e2cea82 commit 80013ff
Show file tree
Hide file tree
Showing 12 changed files with 207 additions and 63 deletions.
38 changes: 20 additions & 18 deletions storage/innobase/arch/arch0arch.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Expand Down Expand Up @@ -215,26 +215,14 @@ Arch_Group::write_to_file(
}
}

while (length > 0) {

ib_uint64_t len_copy;
ib_uint64_t len_left;

len_copy = static_cast<ib_uint64_t>(length);

len_left = m_file_ctx.bytes_left();
auto len_left = m_file_ctx.bytes_left();

/* Current file is over, switch to next file. */
if (len_left == 0) {
/* New file is immediately opened when current file is over. */
ut_ad(len_left != 0);

err = m_file_ctx.open_new(m_begin_lsn, m_header_len);
if (err != DB_SUCCESS) {

return(err);
}
while (length > 0) {

len_left = m_file_ctx.bytes_left();
}
auto len_copy = static_cast<uint64_t>(length);

/* Write as much as possible in current file. */
if (len_left < len_copy) {
Expand All @@ -254,6 +242,20 @@ Arch_Group::write_to_file(

ut_ad(length >= write_size);
length -= write_size;

len_left = m_file_ctx.bytes_left();

/* Current file is over, switch to next file. */
if (len_left == 0) {

err = m_file_ctx.open_new(m_begin_lsn, m_header_len);
if (err != DB_SUCCESS) {

return(err);
}

len_left = m_file_ctx.bytes_left();
}
}

return(DB_SUCCESS);
Expand Down
62 changes: 52 additions & 10 deletions storage/innobase/arch/arch0log.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Expand Down Expand Up @@ -78,16 +78,17 @@ Log_Arch_Client_Ctx::start(
return(err);
}

/** Stop redo log archiving
/** Stop redo log archiving. Exact trailer length is returned as out
parameter which could be less than the redo block size.
@param[out] trailer redo trailer. Caller must allocate buffer.
@param[in] len buffer length
@param[in,out] len trailer length
@param[out] offset trailer block offset
@return error code */
dberr_t
Log_Arch_Client_Ctx::stop(
byte* trailer,
uint len,
ib_uint64_t& offset)
uint32_t& len,
uint64_t& offset)
{
dberr_t err;

Expand All @@ -97,7 +98,7 @@ Log_Arch_Client_Ctx::stop(
ut_ad(m_state == ARCH_CLIENT_STATE_STARTED);
ut_ad(trailer == nullptr || len >= OS_FILE_LOG_BLOCK_SIZE);

err = arch_log_sys->stop(m_group, m_end_lsn, trailer);
err = arch_log_sys->stop(m_group, m_end_lsn, trailer, len);

start_lsn = m_group->get_begin_lsn();

Expand Down Expand Up @@ -218,10 +219,11 @@ Log_Arch_Client_Ctx::release()

if (m_state == ARCH_CLIENT_STATE_STARTED) {

ib_uint64_t dummy_offset;
uint64_t dummy_offset;
uint32_t dummy_len = 0;

/* This is for cleanup in error cases. */
stop(nullptr, 0, dummy_offset);
stop(nullptr, dummy_len, dummy_offset);
}

ut_ad(m_state == ARCH_CLIENT_STATE_STOPPED);
Expand Down Expand Up @@ -442,24 +444,60 @@ Arch_Log_Sys::start(
return(DB_SUCCESS);
}

#ifdef UNIV_DEBUG
void
Arch_Group::adjust_end_lsn(lsn_t& stop_lsn, uint32_t& blk_len)
{
stop_lsn = ut_uint64_align_down(get_begin_lsn(),
OS_FILE_LOG_BLOCK_SIZE);

stop_lsn += get_file_size() - LOG_FILE_HDR_SIZE;
blk_len = 0;

/* Increase Stop LSN 64 bytes ahead of file end not exceeding
redo block size. */
DBUG_EXECUTE_IF("clone_arch_log_extra_bytes",
blk_len = 64;
stop_lsn += blk_len;);
}

void
Arch_Group::adjust_copy_length(uint32_t& length)
{
auto cmp_length = static_cast<uint64_t>(length);

if (cmp_length > m_file_ctx.bytes_left()) {

cmp_length = m_file_ctx.bytes_left();

length = static_cast<uint32_t>(cmp_length);
}
}
#endif /* UNIV_DEBUG */

/** Stop redo log archiving.
If other clients are there, the client is detached from
the current group.
@param[out] group log archive group
@param[out] stop_lsn stop lsn for client
@param[out] log_blk redo log trailer block
@param[in,out] blk_len length in bytes
@return error code */
dberr_t
Arch_Log_Sys::stop(
Arch_Group* group,
lsn_t& stop_lsn,
byte* log_blk)
byte* log_blk,
uint32_t& blk_len)
{
dberr_t err = DB_SUCCESS;
uint count_active;

/* Get the current LSN and trailer block. */
log_get_last_block(stop_lsn, log_blk);
log_get_last_block(stop_lsn, log_blk, blk_len);

DBUG_EXECUTE_IF("clone_arch_log_stop_file_end",
group->adjust_end_lsn(stop_lsn, blk_len););

/* Will throw error, if shutdown. We still continue
with detach but return the error. */
Expand Down Expand Up @@ -843,6 +881,10 @@ Arch_Log_Sys::archive(

} else {

DBUG_EXECUTE_IF("clone_arch_log_stop_file_end",
auto curr_group = get_arch_group();
curr_group->adjust_copy_length(arch_len););

ut_ad(curr_state == ARCH_STATE_ACTIVE);

if (arch_len == 0) {
Expand Down
8 changes: 4 additions & 4 deletions storage/innobase/arch/arch0page.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Expand Down Expand Up @@ -849,9 +849,6 @@ Arch_Page_Sys::track_initial_pages()
buf_flush_list_mutex_exit(buf_pool);
mutex_exit(&buf_pool->flush_state_mutex);
}

/* Make sure all written pages are flushed to disk. */
log_checkpoint(false, false);
}

/** Enable tracking pages in all buffer pools.
Expand Down Expand Up @@ -1068,6 +1065,9 @@ Arch_Page_Sys::start(

arch_mutex_exit();

/* Make sure all written pages are synced to disk. */
log_checkpoint(false, false);

return(DB_SUCCESS);
}

Expand Down
41 changes: 39 additions & 2 deletions storage/innobase/clone/clone0apply.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Expand Down Expand Up @@ -330,7 +330,8 @@ Clone_Handle::receive_data(
&& offset == 0
&& file_meta->m_file_size < file_size) {

file_meta->m_file_size = file_size;
snapshot->update_file_size(task->m_current_file_index,
file_size);
}

/* Open destination file for first block. */
Expand Down Expand Up @@ -463,6 +464,42 @@ Clone_Handle::apply(
return(err);
}

void
Clone_Snapshot::update_file_size(uint32_t file_index, uint64_t file_size)
{
/* Update file size when file is extended during page copy */
ut_ad(m_snapshot_state == CLONE_SNAPSHOT_PAGE_COPY);

auto cur_file = get_file_by_index(file_index);

while (file_size > cur_file->m_file_size) {

++file_index;

if (file_index >= m_num_data_files) {

/* Update file size for the last file. */
cur_file->m_file_size = file_size;
break;
}

auto next_file = get_file_by_index(file_index);

if (next_file->m_space_id != cur_file->m_space_id) {

/* Update file size for the last file. */
cur_file->m_file_size = file_size;
break;
}

/* Only system tablespace can have multiple nodes. */
ut_ad(cur_file->m_space_id == 0);

file_size -= cur_file->m_file_size;
cur_file = next_file;
}
}

/** Extend files after copying pages, if needed
@return error code */
dberr_t
Expand Down
4 changes: 3 additions & 1 deletion storage/innobase/clone/clone0clone.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Expand Down Expand Up @@ -1096,6 +1096,8 @@ Clone_Handle::open_file(
task->m_file_cache = false;
}

DBUG_EXECUTE_IF("clone_no_zero_copy", task->m_file_cache = false;);

task->m_current_file_index = file_meta->m_file_index;

return(DB_SUCCESS);
Expand Down
9 changes: 6 additions & 3 deletions storage/innobase/clone/clone0copy.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Expand Down Expand Up @@ -150,7 +150,7 @@ Clone_Snapshot::init_file_copy()
m_redo_ctx.get_header_size(m_redo_file_size, m_redo_header_size,
m_redo_trailer_size);

m_redo_header = static_cast<byte*>(mem_heap_alloc(
m_redo_header = static_cast<byte*>(mem_heap_zalloc(
m_snapshot_heap,
m_redo_header_size + m_redo_trailer_size));

Expand Down Expand Up @@ -314,7 +314,10 @@ Clone_Snapshot::init_redo_copy()
/* Add another chunk for the redo log trailer. */
++m_num_redo_chunks;
#ifdef HAVE_PSI_STAGE_INTERFACE
m_monitor.add_estimate(m_redo_trailer_size);
if (m_redo_trailer_size != 0) {

m_monitor.add_estimate(m_redo_trailer_size);
}
#endif

m_num_current_chunks = m_num_redo_chunks;
Expand Down
17 changes: 15 additions & 2 deletions storage/innobase/clone/clone0snapshot.cc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License, version 2.0, as published by the
Expand Down Expand Up @@ -448,7 +448,7 @@ Clone_Snapshot::get_next_block(
} else if (chunk_num == m_num_current_chunks) {
/* Last chunk is the redo trailer. */

if (block_num != 0) {
if (block_num != 0 || m_redo_trailer_size == 0) {

block_num = 0;
return(DB_SUCCESS);
Expand Down Expand Up @@ -573,6 +573,19 @@ Clone_Snapshot::get_next_block(
current_file->m_file_size - data_offset);
}

#ifdef UNIV_DEBUG
if (m_snapshot_state == CLONE_SNAPSHOT_REDO_COPY) {

/* Current file is the last redo file */
if (current_file == m_redo_file_vector.back()
&& m_redo_trailer_size != 0) {

/* Should not exceed/overwrite the trailer */
ut_ad(data_offset + data_size <= m_redo_trailer_offset);
}
}
#endif /* UNIV_DEBUG */

return(DB_SUCCESS);
}

Expand Down
Loading

0 comments on commit 80013ff

Please sign in to comment.