import argparse
import datetime
import hashlib
import os

from google.cloud import storage

#!/usr/bin/env python3
"""
Cloud Backup Automation Tool (Basic Version)
A simple script to automatically backup files and directories to Google Cloud Storage.
"""


# Set up command-line arguments
parser = argparse.ArgumentParser(description="Backup files to Google Cloud Storage")
parser.add_argument(
    "--source", required=True, help="Source directory or file to backup"
)
parser.add_argument("--bucket", required=True, help="GCS bucket name")
parser.add_argument(
    "--prefix", default="backup", help="Prefix for backup objects in GCS"
)
parser.add_argument(
    "--project", help="GCP Project ID (if not using default credentials)"
)
parser.add_argument("--exclude", nargs="*", default=[], help="Patterns to exclude")
parser.add_argument(
    "--retention", type=int, default=30, help="Number of days to retain backups"
)
args = parser.parse_args()


def calculate_md5(file_path):
    """Calculate MD5 hash of a file."""
    hash_md5 = hashlib.md5()
    with open(file_path, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()


def should_exclude(file_path, exclude_patterns):
    """Check if file should be excluded based on patterns."""
    for pattern in exclude_patterns:
        if pattern in file_path:
            return True
    return False


def backup_file(client, source_path, bucket_name, destination_blob_name):
    """Upload a file to GCS bucket."""
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    # Check if blob already exists with same MD5
    if blob.exists():
        local_md5 = calculate_md5(source_path)
        blob.reload()  # Get latest metadata

        # Compare MD5 hashes (if available)
        if blob.md5_hash and blob.md5_hash.encode("utf-8") == local_md5.encode("utf-8"):
            print(f"File {source_path} unchanged, skipping upload")
            return False

    # Upload the file
    blob.upload_from_filename(source_path)
    print(f"File {source_path} uploaded to gs://{bucket_name}/{destination_blob_name}")
    return True


def backup_directory(client, source_dir, bucket_name, prefix, exclude_patterns):
    """Recursively backup a directory to GCS bucket."""
    uploaded_count = 0
    skipped_count = 0
    error_count = 0

    # Generate timestamp for this backup
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_prefix = f"{prefix}/{timestamp}"

    for root, dirs, files in os.walk(source_dir):
        for file_name in files:
            source_path = os.path.join(root, file_name)

            # Skip files matching exclude patterns
            if should_exclude(source_path, exclude_patterns):
                print(f"Skipping excluded file: {source_path}")
                skipped_count += 1
                continue

            # Create destination blob name (preserve directory structure)
            rel_path = os.path.relpath(source_path, source_dir)
            destination_blob_name = f"{backup_prefix}/{rel_path}"

            try:
                result = backup_file(
                    client, source_path, bucket_name, destination_blob_name
                )
                if result:
                    uploaded_count += 1
                else:
                    skipped_count += 1
            except Exception as e:
                print(f"Error uploading {source_path}: {e}")
                error_count += 1

    print(f"\nBackup Summary:")
    print(f"  Timestamp: {timestamp}")
    print(f"  Files uploaded: {uploaded_count}")
    print(f"  Files skipped: {skipped_count}")
    print(f"  Errors: {error_count}")

    return timestamp, uploaded_count, skipped_count, error_count


def cleanup_old_backups(client, bucket_name, prefix, retention_days):
    """Delete backups older than the retention period."""
    retention_date = datetime.datetime.now() - datetime.timedelta(days=retention_days)
    client.bucket(bucket_name)

    # List all backup timestamps
    blobs = client.list_blobs(bucket_name, prefix=prefix)

    # Extract unique timestamp directories
    timestamps = set()
    for blob in blobs:
        parts = blob.name.split("/")
        if len(parts) > 1:
            timestamps.add(parts[1])

    # Process each timestamp
    deleted_count = 0
    for ts in timestamps:
        try:
            # Parse timestamp
            backup_date = datetime.datetime.strptime(ts, "%Y%m%d_%H%M%S")

            # Check if older than retention period
            if backup_date < retention_date:
                print(f"Deleting old backup from {backup_date}")

                # Delete all objects with this timestamp prefix
                old_blobs = client.list_blobs(bucket_name, prefix=f"{prefix}/{ts}")
                for blob in old_blobs:
                    blob.delete()
                    deleted_count += 1
        except ValueError:
            # Skip if timestamp format doesn't match
            pass

    print(
        f"Deleted {deleted_count} files from backups older than {retention_days} days"
    )


def main():
    try:
        # Create GCS client
        if args.project:
            client = storage.Client(project=args.project)
        else:
            client = storage.Client()

        # Ensure bucket exists
        bucket = client.bucket(args.bucket)
        if not bucket.exists():
            print(f"Creating bucket {args.bucket}")
            bucket.create()

        # Perform backup
        if os.path.isdir(args.source):
            timestamp, uploaded, skipped, errors = backup_directory(
                client, args.source, args.bucket, args.prefix, args.exclude
            )

            # Create backup metadata
            metadata = {
                "timestamp": timestamp,
                "source": args.source,
                "files_uploaded": uploaded,
                "files_skipped": skipped,
                "errors": errors,
            }

            # Save metadata
            bucket = client.bucket(args.bucket)
            metadata_blob = bucket.blob(
                f"{args.prefix}/{timestamp}/backup_metadata.txt"
            )
            metadata_blob.upload_from_string(
                "\n".join([f"{k}: {v}" for k, v in metadata.items()])
            )

        elif os.path.isfile(args.source):
            file_name = os.path.basename(args.source)
            timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")

            # Upload single file
            destination_blob_name = f"{args.prefix}/{timestamp}/{file_name}"
            bucket = client.bucket(args.bucket)
            blob = bucket.blob(destination_blob_name)

            blob.upload_from_filename(args.source)
            print(f"File {args.source} uploaded to {destination_blob_name}")
        else:
            print(f"Error: {args.source} is neither a file nor a directory")
            return 1

        print("Backup completed successfully")
        return 0

    except Exception as e:
        print(f"Error during backup: {str(e)}")
        return 1
