def parse_s3_uri(s3_uri):
"""
Parses an S3 URI (e.g., 's3://bucket-name/folder/path')
and returns the bucket name and the path.
Args:
s3_uri (str): The S3 URI string.
Returns:
tuple: A tuple containing (bucket_name, folder_path).
"""
parsed_uri = urlparse(s3_uri)
if parsed_uri.scheme != 's3':
raise ValueError("Invalid S3 URI. Must start with 's3://'")
return parsed_uri.netloc, parsed_uri.path.lstrip('/')
def download_and_upload_to_s3(zip_url, s3_uri):
"""
Downloads a zip file from a URL using requests, extracts its contents,
and uploads each file to an S3 bucket specified by an S3 URI.
Args:
zip_url (str): The URL of the zip file to download.
s3_uri (str): The S3 URI (e.g., 's3://bucket-name/folder/path')
where extracted files will be uploaded.
"""
try:
# Ignore the InsecureRequestWarning when verify=False
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
# 1. Parse the S3 URI
s3_bucket, s3_path_prefix = parse_s3_uri(s3_uri)
# 2. Download the zip file into memory, ignoring SSL certificate errors
print("Downloading zip file...")
response = requests.get(zip_url, verify=False)
response.raise_for_status()
# 3. Extract and upload each file to S3
zip_buffer = io.BytesIO(response.content)
s3_client = boto3.client('s3')
with zipfile.ZipFile(zip_buffer, 'r') as zip_file:
file_list = zip_file.namelist()
print(f"Found {len(file_list)} files in the zip.")
for filename in zip_file.namelist():
if not filename.endswith('/'):
with zip_file.open(filename, 'r') as file_in_zip:
file_buffer = io.BytesIO(file_in_zip.read())
s3_key = f"{s3_path_prefix}/{filename}".lstrip('/')
# Upload the file from memory to S3
print(f"Uploading {s3_key} to {s3_bucket}...")
s3_client.upload_fileobj(file_buffer, s3_bucket, s3_key)
print("All files extracted and uploaded to S3 successfully!")
except requests.exceptions.RequestException as e:
print(f"HTTP Request failed: {e}")
except zipfile.BadZipFile:
print("The downloaded file is not a valid zip file.")
except ValueError as e:
print(f"Input error: {e}")
except Exception as e:
print(f"An error occurred: {e}")