import argparse
import csv
import datetime
import os
import re
import sys
from collections import Counter, defaultdict

#!/usr/bin/env python3
"""
Log Analysis Security Scanner (Basic Version)
A tool to analyze system and application logs for security threats.
"""


# Set up argument parser
parser = argparse.ArgumentParser(description="Analyze logs for security threats")
parser.add_argument("--log", required=True, help="Path to log file")
parser.add_argument(
    "--output", default="security_report.txt", help="Output report file"
)
parser.add_argument(
    "--format",
    choices=["syslog", "apache", "nginx", "custom"],
    default="syslog",
    help="Log format",
)
parser.add_argument(
    "--pattern", help="Custom regex pattern for parsing (if format=custom)"
)
parser.add_argument(
    "--ip-threshold",
    type=int,
    default=10,
    help="Threshold for suspicious IP access count",
)
parser.add_argument(
    "--error-threshold",
    type=int,
    default=5,
    help="Threshold for suspicious error count",
)
parser.add_argument("--csv", action="store_true", help="Output results in CSV format")
args = parser.parse_args()


def parse_syslog(line):
    """Parse a syslog format line."""
    # Basic syslog pattern
    pattern = r"(\w{3}\s+\d+\s+\d+:\d+:\d+)\s+(\S+)\s+([^:]+):\s+(.*)"
    match = re.match(pattern, line)

    if match:
        timestamp, host, process, message = match.groups()
        return {
            "timestamp": timestamp,
            "host": host,
            "process": process,
            "message": message,
            "raw": line,
        }
    return None


def parse_apache(line):
    """Parse an Apache access log format line."""
    # Apache combined log format
    pattern = r'(\S+) \S+ \S+ \[([^]]+)\] "(\S+) (\S+) (\S+)" (\d+) (\d+|-) "([^"]*)" "([^"]*)"'
    match = re.match(pattern, line)

    if match:
        (
            ip,
            timestamp,
            method,
            path,
            protocol,
            status,
            bytes_sent,
            referer,
            user_agent,
        ) = match.groups()
        return {
            "ip": ip,
            "timestamp": timestamp,
            "method": method,
            "path": path,
            "protocol": protocol,
            "status": status,
            "bytes_sent": bytes_sent if bytes_sent != "-" else "0",
            "referer": referer,
            "user_agent": user_agent,
            "raw": line,
        }
    return None


def parse_nginx(line):
    """Parse an Nginx access log format line."""
    # Nginx default log format
    pattern = (
        r'(\S+) - \S+ \[([^]]+)\] "(\S+) (\S+) (\S+)" (\d+) (\d+) "([^"]*)" "([^"]*)"'
    )
    match = re.match(pattern, line)

    if match:
        (
            ip,
            timestamp,
            method,
            path,
            protocol,
            status,
            bytes_sent,
            referer,
            user_agent,
        ) = match.groups()
        return {
            "ip": ip,
            "timestamp": timestamp,
            "method": method,
            "path": path,
            "protocol": protocol,
            "status": status,
            "bytes_sent": bytes_sent,
            "referer": referer,
            "user_agent": user_agent,
            "raw": line,
        }
    return None


def parse_custom(line, pattern):
    """Parse a log line using a custom regex pattern."""
    match = re.match(pattern, line)

    if match:
        return {"raw": line, **{str(i): v for i, v in enumerate(match.groups(), 1)}}
    return None


def parse_log_line(line, format_type, custom_pattern=None):
    """Parse a log line based on the specified format."""
    if format_type == "syslog":
        return parse_syslog(line)
    elif format_type == "apache":
        return parse_apache(line)
    elif format_type == "nginx":
        return parse_nginx(line)
    elif format_type == "custom" and custom_pattern:
        return parse_custom(line, custom_pattern)
    return None


def detect_suspicious_ips(log_entries):
    """Detect suspicious IPs based on access frequency."""
    ip_counts = Counter()
    method_counts = defaultdict(Counter)
    path_counts = defaultdict(Counter)
    status_counts = defaultdict(Counter)

    for entry in log_entries:
        if "ip" in entry:
            ip = entry.get("ip")
            ip_counts[ip] += 1

            if "method" in entry:
                method_counts[ip][entry.get("method")] += 1

            if "path" in entry:
                path_counts[ip][entry.get("path")] += 1

            if "status" in entry:
                status_counts[ip][entry.get("status")] += 1

    suspicious_ips = []
    for ip, count in ip_counts.items():
        if count >= args.ip_threshold:
            suspicious_ips.append(
                {
                    "ip": ip,
                    "count": count,
                    "methods": dict(method_counts[ip]),
                    "paths": dict(path_counts[ip]),
                    "statuses": dict(status_counts[ip]),
                }
            )

    return suspicious_ips


def detect_authentication_failures(log_entries):
    """Detect failed authentication attempts."""
    auth_failures = []
    auth_patterns = [
        r"failed password",
        r"authentication failure",
        r"failed login",
        r"invalid user",
        r"failed auth",
        r"login failed",
    ]

    for entry in log_entries:
        message = entry.get("message", entry.get("raw", ""))

        for pattern in auth_patterns:
            if re.search(pattern, message, re.IGNORECASE):
                auth_failures.append(entry)
                break

    return auth_failures


def detect_error_patterns(log_entries):
    """Detect patterns in error logs."""
    error_patterns = [
        r"error",
        r"exception",
        r"fail",
        r"critical",
        r"warn",
        r"denied",
        r"refused",
        r"unauthorized",
    ]

    errors = defaultdict(list)

    for entry in log_entries:
        message = entry.get("message", entry.get("raw", ""))

        for pattern in error_patterns:
            if re.search(pattern, message, re.IGNORECASE):
                process = entry.get("process", "unknown")
                errors[process].append(entry)
                break

    # Filter out processes with fewer errors than the threshold
    return {
        process: entries
        for process, entries in errors.items()
        if len(entries) >= args.error_threshold
    }


def detect_unusual_activity(log_entries):
    """Detect unusual activity patterns."""
    # Check for unusual HTTP methods
    unusual_methods = defaultdict(list)
    common_methods = {"GET", "POST", "HEAD", "OPTIONS", "PUT", "DELETE"}

    # Check for access to sensitive files
    sensitive_paths = defaultdict(list)
    sensitive_patterns = [
        r"\.php$",
        r"\.config$",
        r"\.ini$",
        r"\.env$",
        r"wp-admin",
        r"admin",
        r"login",
        r"passwd",
        r"\.git",
        r"\.htaccess",
        r"backup",
        r"/etc/",
        r"/var/log",
    ]

    # Check for unusual status codes
    unusual_status = defaultdict(list)

    for entry in log_entries:
        # Check for unusual methods
        if "method" in entry and entry["method"] not in common_methods:
            unusual_methods[entry["method"]].append(entry)

        # Check for sensitive paths
        if "path" in entry:
            path = entry["path"]
            for pattern in sensitive_patterns:
                if re.search(pattern, path, re.IGNORECASE):
                    sensitive_paths[path].append(entry)
                    break

        # Check for unusual status codes
        if "status" in entry:
            status = entry["status"]
            # Status codes that might indicate probing or vulnerabilities
            if status in [
                "500",
                "501",
                "502",
                "503",
                "504",
                "400",
                "401",
                "403",
                "405",
                "406",
                "407",
                "408",
                "444",
            ]:
                unusual_status[status].append(entry)

    return {
        "unusual_methods": unusual_methods,
        "sensitive_paths": sensitive_paths,
        "unusual_status": unusual_status,
    }


def generate_text_report(
    suspicious_ips,
    auth_failures,
    error_patterns,
    unusual_activity,
    log_file,
    format_type,
):
    """Generate a text-based security report."""
    now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    report = [
        "=" * 80,
        f"SECURITY LOG ANALYSIS REPORT",
        f"Generated: {now}",
        f"Log File: {log_file}",
        f"Format: {format_type}",
        "=" * 80,
        "",
    ]

    # Summary section
    report.append("SUMMARY")
    report.append("-" * 80)

    total_issues = len(suspicious_ips) + (len(auth_failures) > 0) + len(error_patterns)
    total_issues += sum(len(v) > 0 for v in unusual_activity.values())

    report.append(f"Total Potential Security Issues: {total_issues}")
    report.append(f"Suspicious IP Addresses: {len(suspicious_ips)}")
    report.append(f"Authentication Failures: {len(auth_failures)}")
    report.append(f"Error Patterns: {len(error_patterns)}")

    unusual_methods_count = len(unusual_activity["unusual_methods"])
    sensitive_paths_count = len(unusual_activity["sensitive_paths"])
    unusual_status_count = len(unusual_activity["unusual_status"])

    report.append(f"Unusual HTTP Methods: {unusual_methods_count}")
    report.append(f"Sensitive Path Access: {sensitive_paths_count}")
    report.append(f"Unusual Status Codes: {unusual_status_count}")
    report.append("")

    # Suspicious IPs section
    if suspicious_ips:
        report.append("SUSPICIOUS IP ADDRESSES")
        report.append("-" * 80)
        for i, ip_data in enumerate(suspicious_ips, 1):
            ip = ip_data["ip"]
            count = ip_data["count"]

            report.append(f"[{i}] IP: {ip}")
            report.append(f"    Access Count: {count}")

            if ip_data["methods"]:
                methods_str = ", ".join(
                    [f"{m}: {c}" for m, c in ip_data["methods"].items()]
                )
                report.append(f"    HTTP Methods: {methods_str}")

            if ip_data["statuses"]:
                status_str = ", ".join(
                    [f"{s}: {c}" for s, c in ip_data["statuses"].items()]
                )
                report.append(f"    Status Codes: {status_str}")

            # Show top 5 accessed paths
            if ip_data["paths"]:
                top_paths = sorted(
                    ip_data["paths"].items(), key=lambda x: x[1], reverse=True
                )[:5]
                report.append(f"    Top Paths:")
                for path, count in top_paths:
                    report.append(f"      - {path}: {count}")

            report.append("")

    # Authentication failures section
    if auth_failures:
        report.append("AUTHENTICATION FAILURES")
        report.append("-" * 80)
        report.append(f"Total Authentication Failures: {len(auth_failures)}")

        # Group by host if available
        auth_by_host = defaultdict(list)
        for entry in auth_failures:
            host = entry.get("host", "unknown")
            auth_by_host[host].append(entry)

        report.append(f"Hosts with Authentication Failures: {len(auth_by_host)}")
        for host, entries in auth_by_host.items():
            report.append(f"  - {host}: {len(entries)} failures")

        report.append("\nSample Authentication Failure Messages:")
        for i, entry in enumerate(auth_failures[:5], 1):
            timestamp = entry.get("timestamp", "unknown")
            message = entry.get("message", entry.get("raw", "unknown"))
            report.append(f"[{i}] {timestamp}: {message[:100]}...")

        report.append("")

    # Error patterns section
    if error_patterns:
        report.append("ERROR PATTERNS")
        report.append("-" * 80)
        for process, entries in error_patterns.items():
            report.append(f"Process: {process}")
            report.append(f"  Error Count: {len(entries)}")

            # Group similar errors
            error_types = defaultdict(int)
            for entry in entries:
                message = entry.get("message", entry.get("raw", ""))
                # Create a simplified message by removing variable parts
                simplified = re.sub(r"\b\d+\b", "XXX", message)  # Replace numbers
                simplified = re.sub(
                    r"\b[a-f0-9]{8,}\b", "HASH", simplified
                )  # Replace hashes
                simplified = re.sub(
                    r"\b\w+\.[a-zA-Z0-9]{3,4}\b", "FILE.EXT", simplified
                )  # Replace filenames
                error_types[simplified] += 1

            # Show top error types
            report.append("  Top Error Types:")
            for error_type, count in sorted(
                error_types.items(), key=lambda x: x[1], reverse=True
            )[:5]:
                report.append(f"    - {error_type[:80]}... ({count} occurrences)")

            report.append("")

    # Unusual activity section
    if any(len(v) > 0 for v in unusual_activity.values()):
        report.append("UNUSUAL ACTIVITY")
        report.append("-" * 80)

        # Unusual HTTP methods
        if unusual_activity["unusual_methods"]:
            report.append("Unusual HTTP Methods:")
            for method, entries in unusual_activity["unusual_methods"].items():
                report.append(f"  - {method}: {len(entries)} requests")
                # Show sample IPs
                ips = set(entry.get("ip", "unknown") for entry in entries)
                ip_str = ", ".join(list(ips)[:5])
                if len(ips) > 5:
                    ip_str += f" and {len(ips) - 5} more"
                report.append(f"    From IPs: {ip_str}")
            report.append("")

        # Sensitive path access
        if unusual_activity["sensitive_paths"]:
            report.append("Sensitive Path Access:")
            for path, entries in sorted(
                unusual_activity["sensitive_paths"].items(),
                key=lambda x: len(x[1]),
                reverse=True,
            )[:10]:
                report.append(f"  - {path}: {len(entries)} requests")
                # Show sample IPs
                ips = set(entry.get("ip", "unknown") for entry in entries)
                ip_str = ", ".join(list(ips)[:5])
                if len(ips) > 5:
                    ip_str += f" and {len(ips) - 5} more"
                report.append(f"    From IPs: {ip_str}")
            report.append("")

        # Unusual status codes
        if unusual_activity["unusual_status"]:
            report.append("Unusual Status Codes:")
            for status, entries in unusual_activity["unusual_status"].items():
                report.append(f"  - {status}: {len(entries)} responses")
                # Show sample paths
                paths = set(entry.get("path", "unknown") for entry in entries)
                path_str = ", ".join(list(paths)[:5])
                if len(paths) > 5:
                    path_str += f" and {len(paths) - 5} more"
                report.append(f"    For paths: {path_str}")
            report.append("")

    # Recommendations section
    report.append("RECOMMENDATIONS")
    report.append("-" * 80)

    if suspicious_ips:
        report.append("1. Monitor or block the following suspicious IP addresses:")
        for ip_data in suspicious_ips[:5]:
            report.append(f"   - {ip_data['ip']} ({ip_data['count']} requests)")
        if len(suspicious_ips) > 5:
            report.append(
                f"   - Plus {len(suspicious_ips) - 5} more IPs (see full report)"
            )

    if auth_failures:
        report.append("2. Review authentication security:")
        report.append("   - Check for brute force attacks")
        report.append("   - Consider implementing account lockouts")
        report.append("   - Enable two-factor authentication if available")

    if unusual_activity["sensitive_paths"]:
        report.append("3. Secure sensitive paths:")
        report.append("   - Restrict access to admin areas")
        report.append("   - Ensure proper permissions on sensitive files")
        report.append("   - Consider implementing a web application firewall")

    if error_patterns:
        report.append("4. Investigate recurring errors:")
        report.append(
            "   - Address application errors that might indicate vulnerabilities"
        )
        report.append("   - Check for failed exploitation attempts")

    report.append("")
    report.append("=" * 80)
    report.append(
        f"End of Security Report | Generated by Log Analysis Security Scanner"
    )
    report.append("=" * 80)

    return "\n".join(report)


def generate_csv_report(
    suspicious_ips, auth_failures, error_patterns, unusual_activity
):
    """Generate CSV reports for security findings."""
    reports = {}

    # Suspicious IPs report
    if suspicious_ips:
        csv_data = [["IP", "Request Count", "Methods", "Status Codes", "Top Paths"]]

        for ip_data in suspicious_ips:
            methods = "; ".join(
                [f"{m}: {c}" for m, c in ip_data.get("methods", {}).items()]
            )
            statuses = "; ".join(
                [f"{s}: {c}" for s, c in ip_data.get("statuses", {}).items()]
            )

            # Get top 3 paths
            top_paths = []
            if ip_data.get("paths"):
                for path, count in sorted(
                    ip_data["paths"].items(), key=lambda x: x[1], reverse=True
                )[:3]:
                    top_paths.append(f"{path}: {count}")

            paths_str = "; ".join(top_paths)

            csv_data.append(
                [ip_data["ip"], str(ip_data["count"]), methods, statuses, paths_str]
            )

        reports["suspicious_ips.csv"] = csv_data

    # Authentication failures report
    if auth_failures:
        csv_data = [["Timestamp", "Host", "Process", "Message"]]

        for entry in auth_failures:
            csv_data.append(
                [
                    entry.get("timestamp", ""),
                    entry.get("host", ""),
                    entry.get("process", ""),
                    entry.get("message", entry.get("raw", "")),
                ]
            )

        reports["auth_failures.csv"] = csv_data

    # Error patterns report
    if error_patterns:
        csv_data = [["Process", "Error Count", "Sample Error"]]

        for process, entries in error_patterns.items():
            sample = (
                entries[0].get("message", entries[0].get("raw", "")) if entries else ""
            )
            csv_data.append([process, str(len(entries)), sample])

        reports["error_patterns.csv"] = csv_data

    # Unusual activity reports
    if unusual_activity["unusual_methods"]:
        csv_data = [["Method", "Count", "Sample IPs"]]

        for method, entries in unusual_activity["unusual_methods"].items():
            ips = set(entry.get("ip", "unknown") for entry in entries)
            ip_sample = "; ".join(list(ips)[:5])
            csv_data.append([method, str(len(entries)), ip_sample])

        reports["unusual_methods.csv"] = csv_data

    if unusual_activity["sensitive_paths"]:
        csv_data = [["Path", "Access Count", "Sample IPs"]]

        for path, entries in unusual_activity["sensitive_paths"].items():
            ips = set(entry.get("ip", "unknown") for entry in entries)
            ip_sample = "; ".join(list(ips)[:5])
            csv_data.append([path, str(len(entries)), ip_sample])

        reports["sensitive_paths.csv"] = csv_data

    return reports


def write_csv_file(filename, data):
    """Write data to a CSV file."""
    with open(filename, "w", newline="") as f:
        writer = csv.writer(f)
        writer.writerows(data)


def main():
    # Validate arguments
    if args.format == "custom" and not args.pattern:
        print("Error: Custom format requires a regex pattern")
        sys.exit(1)

    if not os.path.exists(args.log):
        print(f"Error: Log file '{args.log}' not found")
        sys.exit(1)

    print(f"Analyzing log file: {args.log}")
    print(f"Format: {args.format}")

    # Parse log file
    log_entries = []
    line_count = 0
    parsed_count = 0

    try:
        with open(args.log, "r", errors="replace") as f:
            for line in f:
                line_count += 1
                line = line.strip()
                if not line:
                    continue

                entry = parse_log_line(line, args.format, args.pattern)
                if entry:
                    log_entries.append(entry)
                    parsed_count += 1
    except Exception as e:
        print(f"Error reading log file: {e}")
        sys.exit(1)

    print(f"Processed {line_count} lines, successfully parsed {parsed_count} entries")

    if not log_entries:
        print("No valid log entries found. Check the format specification.")
        sys.exit(1)

    # Analyze log entries
    print("Analyzing log entries for security issues...")
    suspicious_ips = detect_suspicious_ips(log_entries)
    auth_failures = detect_authentication_failures(log_entries)
    error_patterns = detect_error_patterns(log_entries)
    unusual_activity = detect_unusual_activity(log_entries)

    # Generate reports
    if args.csv:
        print("Generating CSV reports...")
        csv_reports = generate_csv_report(
            suspicious_ips, auth_failures, error_patterns, unusual_activity
        )

        for filename, data in csv_reports.items():
            write_csv_file(filename, data)
            print(f"- Wrote {filename}")
    else:
        print("Generating text report...")
        report = generate_text_report(
            suspicious_ips,
            auth_failures,
            error_patterns,
            unusual_activity,
            args.log,
            args.format,
        )

        with open(args.output, "w") as f:
            f.write(report)

        print(f"Report saved to {args.output}")

    # Print summary
    print("\nAnalysis Summary:")
    print(f"- Suspicious IP addresses: {len(suspicious_ips)}")
    print(f"- Authentication failures: {len(auth_failures)}")
    print(f"- Error patterns detected: {len(error_patterns)}")

    unusual_count = sum(1 for v in unusual_activity.values() if v)
    print(f"- Unusual activity types: {unusual_count}")

    if suspicious_ips or auth_failures or error_patterns or unusual_count:
        print("\nPotential security issues detected! Review the report for details.")
    else:
        print("\nNo significant security issues detected.")


if __name__ == "__main__":
    main()
