Hi FDB Community,
I’m trying to run FDB backup with aws_sdk flag enabled as we run FDB in K8s pods with IAM role for Service Account Authentication. However, my effort to get it running has failed so far. I’m using 7.3.43 for servers.
I’ve built the FDB image with WITH_AWS_BACKUP flag enabled. Here is the fdbstart command on a pod and the trace log.
I see that I get 403 error from S3, but I’ve double/triple checked my policies. I even passed the secret, and tokens to fdbbackup directly and they worked.
command:
fdbbackup start -C /var/dynamic-conf/fdb.cluster -w -z -s 3600 --log --tls_verify_peers=Check.Valid=0 --logdir . -t c2 -d "blobstore://s3.us-east-1.amazonaws.com:443/test-backups?region=us-east-1&sdk_auth=1&bucket=super-private-bucket-name"
Log:
<Event Severity="10" Time="1727560032.194388" DateTime="2024-09-28T21:47:12Z" Type="FBA_SubmitBackup" ID="0000000000000000" TagName="c2" StopWhenDone="0" UsePartitionedLog="0" OutContainer="blobstore://s3.us-east-1.amazonaws.com:443/test-backups?region=us-east-1&sdk_auth=1&bucket=super-private-bucket-name" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.194388" DateTime="2024-09-28T21:47:12Z" Type="CodeCoverage" ID="0000000000000000" File="fdbclient/ReadYourWrites.actor.cpp" Line="1632" Condition="true" Covered="1" Comment="ReadYourWritesTransaction::get" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.194388" DateTime="2024-09-28T21:47:12Z" Type="StartingRunLoopProfilingThread" ID="0000000000000000" Interval="0.125" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.194388" DateTime="2024-09-28T21:47:12Z" Type="Net2Running" ID="0000000000000000" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.194388" DateTime="2024-09-28T21:47:12Z" Type="ProcessTimeOffset" ID="0000000000000000" ProcessTime="1727560032.201446" SystemTime="1727560032.201447" OffsetFromSystemTime="-0.000000" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.201496" DateTime="2024-09-28T21:47:12Z" Type="ConnectingTo" ID="0000000000000000" SuppressedEventCount="0" PeerAddr="172.20.247.149:4501" PeerReferences="3" FailureStatus="OK" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.201496" DateTime="2024-09-28T21:47:12Z" Type="BlobStoreMetrics" ID="df17f05a0101c4d4" Elapsed="0" RequestsSuccessful="0 -1 0" RequestsFailed="0 -1 0" NewConnections="0 -1 0" ExpiredConnections="0 -1 0" ReusedConnections="0 -1 0" FastRetries="0 -1 0" GlobalConnectionPoolCount="0" GlobalConnectionPoolSize="0" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" TrackLatestType="Original" />
<Event Severity="10" Time="1727560032.201496" DateTime="2024-09-28T21:47:12Z" Type="ConnectionExchangingConnectPacket" ID="51d7a3fa8bfa5c6e" SuppressedEventCount="0" PeerAddr="172.20.247.149:4501" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.204250" DateTime="2024-09-28T21:47:12Z" Type="ConnectionEstablished" ID="51d7a3fa8bfa5c6e" SuppressedEventCount="0" Peer="172.20.247.149:4501" ConnectionId="1" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.204250" DateTime="2024-09-28T21:47:12Z" Type="ConnectedOutgoing" ID="0000000000000000" SuppressedEventCount="0" PeerAddr="172.20.247.149:4501" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.232761" DateTime="2024-09-28T21:47:12Z" Type="S3BlobStoreEndpointNewConnectionSuccess" ID="0000000000000000" SuppressedEventCount="0" RemoteEndpoint="54.231.236.80:443:tls(fromHostname)" ExpiresIn="120" Proxy="" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.232761" DateTime="2024-09-28T21:47:12Z" Type="AWSSDKInitSuccessful" ID="0000000000000000" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.232761" DateTime="2024-09-28T21:47:12Z" Type="S3BlobStoreGotSdkCredentials" ID="0000000000000000" SuppressedEventCount="0" Duration="0.158162" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.232761" DateTime="2024-09-28T21:47:12Z" Type="SlowTask" ID="0000000000000000" TaskID="8900" MClocks="348.959" Duration="0.158619" SampleRate="1" NumYields="1" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="20" Time="1727560032.442914" DateTime="2024-09-28T21:47:12Z" Type="ParseS3XMLResponseNoError" ID="0000000000000000" Response="" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="40" ErrorKind="Unset" Time="1727560032.442914" DateTime="2024-09-28T21:47:12Z" Type="StopAfterError" ID="0000000000000000" Error="backup_error" ErrorDescription="Backup error" ErrorCode="2300" ThreadID="10261369439410137752" Backtrace="addr2line -e fdbbackup.debug -p -C -f -i 0x14da7f7 0x14d9dd1 0x14d9f6d 0x6d5707 0x6d58bf 0x70f260 0x6f12fc 0x6f17bf 0x70f260 0x6f6742 0x6f70a1 0x70f260 0xa432a1 0xac7c3f 0x70f260 0x6cafd9 0x70f260 0x7f0e04 0x70f260 0xfd05fe 0xfc7eb0 0xfd018e 0xfdb1f0 0xff6519 0xff6974 0xff6baa 0xfdb0e0 0x1281638 0x128cbd8 0x7134c8 0x1290dd7 0x128d998 0x128db0d 0x7134c8 0x79a310 0x128c558 0x145ea78 0xcc43e2 0x68af46 0x7f5d0ab6f590" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="20" Time="1727560032.442914" DateTime="2024-09-28T21:47:12Z" Type="S3BlobStoreEndpointRequestFailed" ID="0000000000000000" SuppressedEventCount="0" ResponseCode="403" S3ErrorCode="" ConnectionEstablished="1" ReusingConn="0" ConnID="82f5cf0fb20cb4f4" ConnectDuration="0.17573" ReqDuration="0.0518951" RemoteEndpoint="54.231.236.80:443:tls(fromHostname)" Verb="HEAD" Resource="/super-private-bucket-name" ThisTry="1" URI="/super-private-bucket-name" Proxy="" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.442914" DateTime="2024-09-28T21:47:12Z" Type="MachineLoadDetail" ID="0000000000000000" User="144358039" Nice="50762" System="35703444" Idle="2035142928" IOWait="25796772" IRQ="0" SoftIRQ="3505726" Steal="178503" Guest="0" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.442914" DateTime="2024-09-28T21:47:12Z" Type="ProcessMetrics" ID="0000000000000000" Elapsed="0.247243" CPUSeconds="0.021114" MainThreadCPUSeconds="0.01532" UptimeSeconds="0.248525" Memory="1512628224" ResidentMemory="25993216" UnusedAllocatedMemory="0" MbpsSent="0.0859397" MbpsReceived="0.114996" DiskTotalBytes="0" DiskFreeBytes="0" DiskQueueDepth="0" DiskIdleSeconds="0" DiskReads="0" DiskReadSeconds="0" DiskWrites="0" DiskWriteSeconds="0" DiskReadsCount="0" DiskWritesCount="0" DiskWriteSectors="0" DiskReadSectors="0" FileWrites="0" FileReads="0" CacheReadBytes="0" CacheFinds="0" CacheWritesBlocked="0" CacheReadsBlocked="0" CachePageReadsMerged="0" CacheWrites="0" CacheReads="0" CacheHits="0" CacheMisses="0" CacheEvictions="0" DCID="[not set]" ZoneID="[not set]" MachineID="[not set]" Version="[not set]" AIOSubmitCount="0" AIOCollectCount="0" AIOSubmitLag="0" AIODiskStall="0" CurrentConnections="5" ConnectionsEstablished="20.223" ConnectionsClosed="0" ConnectionErrors="0" TLSPolicyFailures="0" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" TrackLatestType="Original" />
<Event Severity="10" Time="1727560032.442914" DateTime="2024-09-28T21:47:12Z" Type="MemoryMetrics" ID="0000000000000000" TotalMemory16="0" ApproximateUnusedMemory16="0" ActiveThreads16="0" TotalMemory32="131072" ApproximateUnusedMemory32="0" ActiveThreads32="1" TotalMemory64="262144" ApproximateUnusedMemory64="0" ActiveThreads64="2" TotalMemory96="131040" ApproximateUnusedMemory96="0" ActiveThreads96="1" TotalMemory128="131072" ApproximateUnusedMemory128="0" ActiveThreads128="1" TotalMemory256="262144" ApproximateUnusedMemory256="0" ActiveThreads256="2" TotalMemory512="0" ApproximateUnusedMemory512="0" ActiveThreads512="0" TotalMemory1024="0" ApproximateUnusedMemory1024="0" ActiveThreads1024="0" TotalMemory2048="0" ApproximateUnusedMemory2048="0" ActiveThreads2048="0" TotalMemory4096="0" ApproximateUnusedMemory4096="0" ActiveThreads4096="0" TotalMemory8192="0" ApproximateUnusedMemory8192="0" ActiveThreads8192="0" TotalMemory16384="0" ApproximateUnusedMemory16384="0" ActiveThreads16384="0" HugeArenaMemory="0" DCID="[not set]" ZoneID="[not set]" MachineID="[not set]" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />
<Event Severity="10" Time="1727560032.442914" DateTime="2024-09-28T21:47:12Z" Type="FastAllocMemoryUsage" ID="0000000000000000" TotalMemory="917472" UnusedMemory="0" Utilization="100.000000%" ThreadID="10261369439410137752" Machine="10.10.69.117:57" LogGroup="default" ClientDescription="primary-7.3.43-13056699628119360000" />