#!/usr/bin/env python3
import os
import re
import subprocess
import sys
import concurrent.futures
import tempfile
import shutil

# Regex to capture PN and PV from ebuild filename.
# Example: ollama-bin-0.10.1.ebuild -> PN=ollama-bin, PV=0.10.1
# Example: g2-bin-0.0.2.ebuild -> PN=g2-bin, PV=0.0.2
# This regex is a simplification but covers most cases in this overlay
EBUILD_FILENAME_PATTERN = re.compile(r'^(?P<pn>.+)-(?P<pv>\d+(\.\d+)*([a-z]|_p\d+|_rc\d+|_beta\d+|_alpha\d+)?(-r\d+)?)\.ebuild$')

def parse_ebuild_variables(filename):
    # Basic parsing for PV, P, PN from filename
    basename = os.path.basename(filename)

    match = EBUILD_FILENAME_PATTERN.match(basename)

    if not match:
        return None

    pn = match.group('pn')
    pv = match.group('pv')
    p = f"{pn}-{pv}"

    return {
        'P': p,
        'PN': pn,
        'PV': pv,
    }

def resolve_variables(text, variables):
    # Replace ${VAR} and $VAR
    for key, value in variables.items():
        text = text.replace(f"${{{key}}}", value)
        text = text.replace(f"${key}", value)
    return text

def extract_uris(content, variables):
    # Remove comments
    lines = [line.split('#', 1)[0] for line in content.splitlines()]
    clean_content = '\n'.join(lines)

    # Find SRC_URI block
    # It might use " or '
    match = re.search(r'SRC_URI\s*=\s*"([^"]*)"', clean_content, re.DOTALL)
    if not match:
        match = re.search(r"SRC_URI\s*=\s*'([^']*)'", clean_content, re.DOTALL)

    if not match:
        return []

    src_uri_body = match.group(1)

    # Simple tokenizer
    tokens = src_uri_body.split()

    uris = []
    i = 0
    while i < len(tokens):
        token = tokens[i]

        # Check if it looks like a URL
        if '://' in token:
            url = token
            filename = os.path.basename(url)

            # Check for -> rename
            if i + 2 < len(tokens) and tokens[i+1] == '->':
                filename = tokens[i+2]
                i += 3 # skip url, ->, filename
            else:
                i += 1 # skip url

            # Resolve variables in both URL and filename
            url = resolve_variables(url, variables)
            filename = resolve_variables(filename, variables)

            uris.append((url, filename))
        else:
            i += 1

    return uris

def upsert_worker(url, filename):
    with tempfile.TemporaryDirectory() as tmpdir:
        temp_path = os.path.join(tmpdir, 'Manifest')
        # Create empty manifest file
        open(temp_path, 'a').close()

        try:
            subprocess.run(['g2', 'manifest', 'upsert-from-url', url, filename, temp_path], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE)

            with open(temp_path, 'r') as f:
                lines = f.readlines()

            return lines
        except subprocess.CalledProcessError as e:
            print(f"    Error updating manifest for {url}: {e}")
            return []

def process_directory(directory):
    print(f"Processing directory: {directory}")
    manifest_path = os.path.join(directory, 'Manifest')

    # Find all ebuilds
    ebuilds = [f for f in os.listdir(directory) if f.endswith('.ebuild')]

    if not ebuilds:
        print("No ebuilds found.")
        return

    tasks = []

    for ebuild in ebuilds:
        ebuild_path = os.path.join(directory, ebuild)
        print(f"  Parsing {ebuild}...")

        variables = parse_ebuild_variables(ebuild)
        if not variables:
            print(f"  Skipping {ebuild}: Could not parse version/name.")
            continue

        with open(ebuild_path, 'r') as f:
            content = f.read()

        uris = extract_uris(content, variables)
        tasks.extend(uris)

    if not tasks:
        return

    print(f"  Upserting {len(tasks)} URIs in parallel...")

    new_entries = []
    # Deduplicate tasks based on (url, filename) just in case
    tasks = list(set(tasks))

    with concurrent.futures.ThreadPoolExecutor(max_workers=min(32, len(tasks) + 1)) as executor:
        future_to_url = {executor.submit(upsert_worker, url, filename): (url, filename) for url, filename in tasks}

        for future in concurrent.futures.as_completed(future_to_url):
            url, filename = future_to_url[future]
            try:
                lines = future.result()
                if lines:
                    new_entries.extend(lines)
                    print(f"    Upserted: {url} -> {filename}")
                else:
                    print(f"    Failed to upsert: {url}")
            except Exception as e:
                print(f"    Exception processing {url}: {e}")

    # Now update Manifest
    header_lines = []
    dist_lines_map = {}

    if os.path.exists(manifest_path):
        with open(manifest_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) > 1 and parts[0] == 'DIST':
                    dist_lines_map[parts[1]] = line
                else:
                    header_lines.append(line)

    # Update with new entries
    for line in new_entries:
        parts = line.strip().split()
        if len(parts) > 1 and parts[0] == 'DIST':
            dist_lines_map[parts[1]] = line

    # Write back
    with open(manifest_path, 'w') as f:
        for line in header_lines:
            f.write(line)

        for filename in sorted(dist_lines_map.keys()):
            f.write(dist_lines_map[filename])

def main():
    if len(sys.argv) < 2:
        print("Usage: verify_manifest.py <directory1> [directory2 ...]")
        sys.exit(1)

    for directory in sys.argv[1:]:
        if os.path.isdir(directory):
            process_directory(directory)
        else:
            print(f"Directory not found: {directory}")

if __name__ == "__main__":
    main()