#!/usr/bin/env python3
import sys
import os
import json
import subprocess
import fnmatch

# --- Configuration ---
# Hooks are expected to be in <repo_root>/hooks/ or injected via core.hooksPath
# We assume this script is located in OpenGit/hooks/
HOOKS_DIR = os.path.dirname(os.path.abspath(__file__))
POLICY_FILE = os.path.join(HOOKS_DIR, 'server_git_policy.json')

def load_policy():
    defaults = {'max_upload_size_mb': 10, 'global_gitignore': ''}
    if not os.path.exists(POLICY_FILE):
        return defaults
    try:
        with open(POLICY_FILE, 'r') as f:
            data = json.load(f)
            # Normalize keys
            return {
                'max_upload_size_mb': int(data.get('max_upload_size_mb', 10)),
                'global_gitignore': data.get('global_gitignore', '')
            }
    except Exception as e:
        print(f"WARNING: Failed to load git policy: {e}")
        return defaults

def check_push():
    policy = load_policy()
    max_bytes = policy['max_upload_size_mb'] * 1024 * 1024
    ignore_patterns = [p.strip() for p in policy['global_gitignore'].splitlines() if p.strip()]

    # Read stdin: <old_sha> <new_sha> <ref_name>
    lines = sys.stdin.readlines()
    
    for line in lines:
        old_sha, new_sha, ref_name = line.strip().split()
        
        # Skip deletions
        if new_sha == '0000000000000000000000000000000000000000':
            continue

        # Find new commits introduced by this push
        # We use --not --all to ignore things already on the server (in any branch)
        rev_list_args = ['git', 'rev-list', new_sha, '--not', '--all']
        if old_sha != '0000000000000000000000000000000000000000':
             rev_list_args = ['git', 'rev-list', f'{old_sha}..{new_sha}']

        try:
            # If it's a new branch, --not --all is safer to avoid scanning whole history
            new_commits = subprocess.check_output(rev_list_args).decode().splitlines()
        except subprocess.CalledProcessError:
            # Fallback or error
            continue

        if not new_commits:
            continue

        # 1. Check Filenames in new commits
        for commit in new_commits:
            try:
                # List files changed/added in this commit
                files = subprocess.check_output(['git', 'diff-tree', '--no-commit-id', '--name-only', '-r', commit]).decode().splitlines()
                for fpath in files:
                    for pattern in ignore_patterns:
                        is_match = False
                        if pattern.endswith('/'):
                            dir_part = pattern.rstrip('/')
                            # Check if dir_part is in the path components
                            if dir_part in fpath.split('/'):
                                is_match = True
                        else:
                            # Match basename or full path
                            if fnmatch.fnmatch(os.path.basename(fpath), pattern) or fnmatch.fnmatch(fpath, pattern):
                                is_match = True
                        
                        if is_match:
                            print(f"ERROR: File '{fpath}' matches banned pattern '{pattern}'.")
                            sys.exit(1)
            except subprocess.CalledProcessError:
                pass

        # 2. Check Blob Sizes (New Objects)
        # We need to identify new blobs. 
        # git rev-list --objects new_sha --not --all
        try:
            objects = subprocess.check_output(['git', 'rev-list', '--objects', new_sha, '--not', '--all']).decode().splitlines()
        except subprocess.CalledProcessError:
            continue
            
        for obj_line in objects:
            parts = obj_line.split(maxsplit=1)
            sha = parts[0]
            # Get type and size
            # git cat-file -t sha
            # git cat-file -s sha
            # Optimized: git cat-file --batch-check
            # But subprocess for each is slow. 
            pass 
        
        # Batch check size
        if objects:
            shas = [x.split()[0] for x in objects]
            try:
                # Pass all SHAs to cat-file --batch-check
                p = subprocess.Popen(['git', 'cat-file', '--batch-check=%(objectname) %(objecttype) %(objectsize)'], 
                                     stdin=subprocess.PIPE, stdout=subprocess.PIPE, text=True)
                stdout, _ = p.communicate(input='\n'.join(shas))
                
                for check_line in stdout.splitlines():
                    c_sha, c_type, c_size = check_line.split()
                    if c_type == 'blob':
                        size = int(c_size)
                        if size > max_bytes:
                            # Try to find filename for this blob (hard in Git without reverse mapping)
                            # But we can just report the SHA or look it up in the objects list if path is there
                            # The rev-list --objects output contains paths for trees/blobs if they are reachable!
                            file_path = "unknown file"
                            for o in objects:
                                if o.startswith(c_sha) and len(o) > 40:
                                    file_path = o[41:]
                                    break
                            
                            print(f"ERROR: File '{file_path}' ({size/1024/1024:.2f} MB) exceeds limit of {policy['max_upload_size_mb']} MB.")
                            sys.exit(1)
            except Exception as e:
                print(f"Error checking file sizes: {e}")
                sys.exit(1)

check_push()
