Skip to content

Commit b6d88dd

Browse files
author
Shlomi Noach
authored
Merge pull request #77 from github/cut-over-lock-table-names
Solved cut-over stall; change of table names
2 parents dc8d274 + 96e8419 commit b6d88dd

File tree

4 files changed

+47
-15
lines changed

4 files changed

+47
-15
lines changed

build.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
#!/bin/bash
22
#
33
#
4-
RELEASE_VERSION="0.9.6"
4+
RELEASE_VERSION="0.9.7"
55

66
buildpath=/tmp/gh-ost
77
target=gh-ost

go/base/context.go

+9-6
Original file line numberDiff line numberDiff line change
@@ -164,20 +164,23 @@ func GetMigrationContext() *MigrationContext {
164164

165165
// GetGhostTableName generates the name of ghost table, based on original table name
166166
func (this *MigrationContext) GetGhostTableName() string {
167-
return fmt.Sprintf("_%s_gst", this.OriginalTableName)
167+
return fmt.Sprintf("_%s_gho", this.OriginalTableName)
168168
}
169169

170170
// GetOldTableName generates the name of the "old" table, into which the original table is renamed.
171171
func (this *MigrationContext) GetOldTableName() string {
172-
// if this.TestOnReplica {
173-
// return fmt.Sprintf("_%s_tst", this.OriginalTableName)
174-
// }
175-
return fmt.Sprintf("_%s_old", this.OriginalTableName)
172+
if this.TestOnReplica {
173+
return fmt.Sprintf("_%s_ght", this.OriginalTableName)
174+
}
175+
if this.MigrateOnReplica {
176+
return fmt.Sprintf("_%s_ghr", this.OriginalTableName)
177+
}
178+
return fmt.Sprintf("_%s_del", this.OriginalTableName)
176179
}
177180

178181
// GetChangelogTableName generates the name of changelog table, based on original table name
179182
func (this *MigrationContext) GetChangelogTableName() string {
180-
return fmt.Sprintf("_%s_osc", this.OriginalTableName)
183+
return fmt.Sprintf("_%s_ghc", this.OriginalTableName)
181184
}
182185

183186
// GetVoluntaryLockName returns a name of a voluntary lock to be used throughout

go/logic/applier.go

+12-1
Original file line numberDiff line numberDiff line change
@@ -607,6 +607,7 @@ func (this *Applier) LockOriginalTableAndWait(sessionIdChan chan int64, tableLoc
607607

608608
var sessionId int64
609609
if err := tx.QueryRow(`select connection_id()`).Scan(&sessionId); err != nil {
610+
tableLocked <- err
610611
return err
611612
}
612613
sessionIdChan <- sessionId
@@ -616,7 +617,17 @@ func (this *Applier) LockOriginalTableAndWait(sessionIdChan chan int64, tableLoc
616617
lockName := this.GetSessionLockName(sessionId)
617618
log.Infof("Grabbing voluntary lock: %s", lockName)
618619
if err := tx.QueryRow(query, lockName).Scan(&lockResult); err != nil || lockResult != 1 {
619-
return fmt.Errorf("Unable to acquire lock %s", lockName)
620+
err := fmt.Errorf("Unable to acquire lock %s", lockName)
621+
tableLocked <- err
622+
return err
623+
}
624+
625+
tableLockTimeoutSeconds := this.migrationContext.SwapTablesTimeoutSeconds * 2
626+
log.Infof("Setting LOCK timeout as %d seconds", tableLockTimeoutSeconds)
627+
query = fmt.Sprintf(`set session lock_wait_timeout:=%d`, tableLockTimeoutSeconds)
628+
if _, err := tx.Exec(query); err != nil {
629+
tableLocked <- err
630+
return err
620631
}
621632

622633
query = fmt.Sprintf(`lock /* gh-ost */ tables %s.%s write`,

go/logic/migrator.go

+25-7
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,6 @@ func (this *Migrator) Migrate() (err error) {
415415
this.consumeRowCopyComplete()
416416
log.Infof("Row copy complete")
417417
this.printStatus(ForcePrintStatusRule)
418-
this.migrationContext.MarkPointOfInterest()
419418

420419
if err := this.cutOver(); err != nil {
421420
return err
@@ -435,10 +434,12 @@ func (this *Migrator) cutOver() (err error) {
435434
log.Debugf("Noop operation; not really swapping tables")
436435
return nil
437436
}
437+
this.migrationContext.MarkPointOfInterest()
438438
this.throttle(func() {
439439
log.Debugf("throttling before swapping tables")
440440
})
441441

442+
this.migrationContext.MarkPointOfInterest()
442443
this.sleepWhileTrue(
443444
func() (bool, error) {
444445
if this.migrationContext.PostponeCutOverFlagFile == "" {
@@ -454,6 +455,7 @@ func (this *Migrator) cutOver() (err error) {
454455
},
455456
)
456457
atomic.StoreInt64(&this.migrationContext.IsPostponingCutOver, 0)
458+
this.migrationContext.MarkPointOfInterest()
457459

458460
if this.migrationContext.TestOnReplica {
459461
// With `--test-on-replica` we stop replication thread, and then proceed to use
@@ -478,15 +480,20 @@ func (this *Migrator) cutOver() (err error) {
478480
return err
479481
}
480482
if this.migrationContext.CutOverType == base.CutOverTwoStep {
481-
err := this.retryOperation(this.cutOverTwoStep)
483+
err := this.retryOperation(
484+
func() error {
485+
return this.executeAndThrottleOnError(this.cutOverTwoStep)
486+
},
487+
)
482488
return err
483489
}
484-
return nil
490+
return log.Fatalf("Unknown cut-over type: %d; should never get here!", this.migrationContext.CutOverType)
485491
}
486492

487493
// Inject the "AllEventsUpToLockProcessed" state hint, wait for it to appear in the binary logs,
488494
// make sure the queue is drained.
489495
func (this *Migrator) waitForEventsUpToLock() (err error) {
496+
this.migrationContext.MarkPointOfInterest()
490497
waitForEventsUpToLockStartTime := time.Now()
491498

492499
log.Infof("Writing changelog state: %+v", AllEventsUpToLockProcessed)
@@ -541,19 +548,28 @@ func (this *Migrator) safeCutOver() (err error) {
541548

542549
okToUnlockTable := make(chan bool, 2)
543550
originalTableRenamed := make(chan error, 1)
551+
var originalTableRenameIntended int64
544552
defer func() {
553+
log.Infof("Checking to see if we need to roll back")
545554
// The following is to make sure we unlock the table no-matter-what!
546555
// There's enough buffer in the channel to support a redundant write here.
547556
okToUnlockTable <- true
548-
// We need to make sure we wait for the original-rename, successful or not,
549-
// so as to be able to rollback in case the ghost-rename fails.
550-
<-originalTableRenamed
551-
557+
if atomic.LoadInt64(&originalTableRenameIntended) == 1 {
558+
log.Infof("Waiting for original table rename result")
559+
// We need to make sure we wait for the original-rename, successful or not,
560+
// so as to be able to rollback in case the ghost-rename fails.
561+
// But we only wait on this queue if there's actually going to be a rename.
562+
// As an example, what happens should the initial `lock tables` fail? We would
563+
// never proceed to rename the table, hence this queue is never written to.
564+
<-originalTableRenamed
565+
}
552566
// Rollback operation
553567
if !this.applier.tableExists(this.migrationContext.OriginalTableName) {
554568
log.Infof("Cannot find %s, rolling back", this.migrationContext.OriginalTableName)
555569
err := this.applier.RenameTable(this.migrationContext.GetOldTableName(), this.migrationContext.OriginalTableName)
556570
log.Errore(err)
571+
} else {
572+
log.Info("No need for rollback")
557573
}
558574
}()
559575
lockOriginalSessionIdChan := make(chan int64, 1)
@@ -577,6 +593,8 @@ func (this *Migrator) safeCutOver() (err error) {
577593
// We now attempt a RENAME on the original table, and expect it to block
578594
renameOriginalSessionIdChan := make(chan int64, 1)
579595
this.migrationContext.RenameTablesStartTime = time.Now()
596+
atomic.StoreInt64(&originalTableRenameIntended, 1)
597+
580598
go func() {
581599
this.applier.RenameOriginalTable(renameOriginalSessionIdChan, originalTableRenamed)
582600
}()

0 commit comments

Comments
 (0)