Hi,
I am "using boto3 copying an S3 object to itself" in order to change the last modified date. While
response = input_s3_client.copy_object(
Bucket=bucket,
CopySource={'Bucket': bucket, 'Key': key},
Key=key
)
has been very successful, for the file larger than 5GB, I have to use multipart upload to "upload a object to itself" in order to achieve the same goal. Apparently, AWS only gave Java and .net example for multipart upload : https://docs.aws.amazon.com/AmazonS3/latest/userguide/CopyingObjectsMPUapi.html
and I have wrote a little program based on python boto3.
import pytz
import boto3
import botocore
from boto3.s3.transfer import TransferConfig
part_size = 5 * 1024 * 1024
GB = 1024 ** 3
config = TransferConfig(multipart_threshold=5 * GB)
class S3Util:
def update_last_modified_date(self, input_s3_client, bucket: str, key: str) -> bool:
try:
start_time = datetime.now(tz=pytz.UTC)
# input_s3_client.upload_file(bucket, key, bucket, key, ExtraArgs={'ACL': 'bucket-owner-full-control'})
# Get object metadata (used for progress tracking)
head_object = input_s3_client.head_object(Bucket=bucket, Key=key)
object_size = head_object['ContentLength']
# Initiate multipart upload
upload_id = input_s3_client.create_multipart_upload(Bucket=bucket, Key=key)[
'UploadId']
uploaded_parts = []
# Upload in parts
part_number = 1
offset = 0
while offset < object_size:
# Determine part size for this iteration
remaining_size = object_size - offset
current_part_size = min(part_size, remaining_size)
# Get object data for this part
get_object_response = input_s3_client.get_object(Bucket=bucket, Key=key,
Range=f"bytes={offset}-{offset + current_part_size - 1}")
self.logger.info(f'body stream type: {type(get_object_response)}')
# Upload the part
upload_part_response = input_s3_client.upload_part(Bucket='podaac-dev-tst', Key='temp.nc',
UploadId=upload_id, PartNumber=part_number,
Body=get_object_response['Body'])
uploaded_parts.append({'PartNumber': part_number, 'ETag': upload_part_response['ETag']})
# Print progress (optional)
self.logger.info(
f"Uploaded part {part_number} of {int(object_size / part_size) + 1} ({offset + current_part_size}/{object_size} bytes)")
offset += current_part_size
part_number += 1
# Complete the multipart upload
input_s3_client.complete_multipart_upload(Bucket=bucket, Key=key,
UploadId=upload_id, Parts=uploaded_parts)
self.logger.info(f"Large object '{bucket}{key}' copied successfully")
while running the program against a 44GB file, I got
UnsupportOperation(seek) error at the line while calling the input_s3_client.uploadpart and I have been struggling with it. Can somebody please help? Any suggestion would be very much appreciated.
please accept the answer if it was useful