DMS - Ongoing replication causing duplicate records due to redshift cluster maintenancelg...
Hi,
I'm facing an issue with DMS ongoing replication causing a small number of duplicate records due to the target redshift cluster going down for maintenance every week. One of the error messages in the cloudwatch logs is `Failed to start rollback apply transaction`, so it looks like DMS fails to undo a batch, and with `RecoverableErrorCount` set to -1, the process ends up retrying the ongoing replication from the batch it already ran, resulting in the duplicates.
Is there a way via the DMS settings to handle this rollback failure issue caused by the cluster maintenance? Or maybe a way to work around the cluster going down for maintenance?
Thanks!
Below are my settings:
Source: Mysql
Target: Redshift
`{
"ErrorBehavior": {
"FailOnNoTablesCaptured": true,
"ApplyErrorUpdatePolicy": "SUSPEND_TABLE",
"FailOnTransactionConsistencyBreached": true,
"RecoverableErrorThrottlingMax": 1800,
"DataErrorEscalationPolicy": "SUSPEND_TABLE",
"ApplyErrorEscalationCount": 5,
"RecoverableErrorStopRetryAfterThrottlingMax": true,
"RecoverableErrorThrottling": true,
"ApplyErrorFailOnTruncationDdl": false,
"DataTruncationErrorPolicy": "SUSPEND_TABLE",
"ApplyErrorInsertPolicy": "SUSPEND_TABLE",
"EventErrorPolicy": "IGNORE",
"ApplyErrorEscalationPolicy": "SUSPEND_TABLE",
"RecoverableErrorCount": -1,
"DataErrorEscalationCount": 5,
"TableErrorEscalationPolicy": "SUSPEND_TABLE",
"RecoverableErrorInterval": 5,
"ApplyErrorDeletePolicy": "SUSPEND_TABLE",
"TableErrorEscalationCount": 5,
"FullLoadIgnoreConflicts": true,
"DataErrorPolicy": "SUSPEND_TABLE",
"TableErrorPolicy": "SUSPEND_TABLE"
},
"TTSettings": {
"TTS3Settings": null,
"TTRecordSettings": null,
"EnableTT": false
},
"FullLoadSettings": {
"CommitRate": 10000,
"StopTaskCachedChangesApplied": false,
"StopTaskCachedChangesNotApplied": false,
"MaxFullLoadSubTasks": 8,
"TransactionConsistencyTimeout": 1200,
"CreatePkAfterFullLoad": false,
"TargetTablePrepMode": "DROP_AND_CREATE"
},
"TargetMetadata": {
"ParallelApplyBufferSize": 10000,
"ParallelApplyQueuesPerThread": 0,
"ParallelApplyThreads": 32,
"TargetSchema": "",
"InlineLobMaxSize": 0,
"ParallelLoadQueuesPerThread": 0,
"SupportLobs": true,
"LobChunkSize": 0,
"TaskRecoveryTableEnabled": true,
"ParallelLoadThreads": 32,
"LobMaxSize": 63,
"BatchApplyEnabled": true,
"FullLobMode": false,
"LimitedSizeLobMode": true,
"LoadMaxFileSize": 0,
"ParallelLoadBufferSize": 1000
},
"BeforeImageSettings": null,
"ControlTablesSettings": {
"historyTimeslotInMinutes": 5,
"HistoryTimeslotInMinutes": 5,
"StatusTableEnabled": true,
"SuspendedTablesTableEnabled": true,
"HistoryTableEnabled": true,
"ControlSchema": "dms_control",
"FullLoadExceptionTableEnabled": true
},
"LoopbackPreventionSettings": null,
"CharacterSetSettings": null,
"FailTaskWhenCleanTaskResourceFailed": false,
"ChangeProcessingTuning": {
"StatementCacheSize": 50,
"CommitTimeout": 1,
"BatchApplyPreserveTransaction": true,
"BatchApplyTimeoutMin": 1,
"BatchSplitSize": 0,
"BatchApplyTimeoutMax": 30,
"MinTransactionSize": 1000,
"MemoryKeepTime": 60,
"BatchApplyMemoryLimit": 500,
"MemoryLimitTotal": 1024
},
"ChangeProcessingDdlHandlingPolicy": {
"HandleSourceTableDropped": true,
"HandleSourceTableTruncated": true,
"HandleSourceTableAltered": true
},
"PostProcessingRules": null
}`lg...