#!/usr/bin/env python3
"""
Update sold count only for eBay products from JSON file
"""

import json
import argparse
import time
import re
import random
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import os
import shutil

def extract_sold_count_from_page(page):
    """Extract sold count from eBay product page using Playwright"""
    try:
        # Wait for the qtyAvailability element to be present
        print("🔍 Waiting for #qtyAvailability element...")
        page.wait_for_selector("#qtyAvailability", timeout=10000)

        # Find the last span inside #qtyAvailability
        qty_element = page.locator("#qtyAvailability")
        spans = qty_element.locator("span")
        span_count = spans.count()

        if span_count > 0:
            # Get the last span text
            last_span = spans.nth(span_count - 1)
            text = last_span.text_content()
            print(f"📊 Found #qtyAvailability last span text: {text}")

            if text:
                # Remove commas and dots, keep only digits
                cleaned_text = re.sub(r'[,.]', '', text)
                numbers = re.findall(r'\d+', cleaned_text)
                if numbers:
                    return int(numbers[-1])  # Return the last number found

        print("⚠️ No spans found in #qtyAvailability")
        return 0

    except Exception as e:
        print(f"❌ Error finding #qtyAvailability: {e}")
        # Fallback to HTML parsing method
        return extract_sold_count_fallback(page.content())

def extract_sold_count_fallback(html):
    """Fallback method using BeautifulSoup"""
    soup = BeautifulSoup(html, "lxml")

    # Find all spans and look for the last number in the last span that contains "sold"
    spans = soup.find_all('span')

    for span in reversed(spans):  # Start from the last span
        text = span.get_text(strip=True)
        if 'sold' in text.lower():
            # Extract the last number from this span
            cleaned_text = re.sub(r'[,.]', '', text)
            numbers = re.findall(r'\d+', cleaned_text)
            if numbers:
                print(f"📊 Fallback found sold text: {text} → {numbers[-1]}")
                return int(numbers[-1])  # Return the last number found

    return 0

def update_sold_only(input_file, output_file=None, delay=1.0):
    """Update sold count for all products in JSON file"""

    # Random User Agents
    USER_AGENTS = [
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15"
    ]

    if not output_file:
        output_file = input_file.replace('.json', '_updated_sold.json')

    print(f"📂 Loading products from: {input_file}")

    # Load input JSON file
    try:
        with open(input_file, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"❌ File {input_file} not found!")
        return
    except json.JSONDecodeError as e:
        print(f"❌ Error parsing JSON: {e}")
        return

    products = data.get('products', [])
    total_products = len(products)

    print(f"🔍 Found {total_products} products to update")

    if total_products == 0:
        print("⚠️ No products found in the file")
        return

    # Launch browser with random user agent
    with sync_playwright() as p:
        # Select random user agent
        user_agent = random.choice(USER_AGENTS)
        print(f"🕵️ Using User Agent: {user_agent[:50]}...")

        try:
            browser = p.chromium.launch_persistent_context(
                './browser-data',
                headless=False,
                user_agent=user_agent,
                args=[
                    "--disable-gpu",
                    "--no-sandbox",
                    "--disable-dev-shm-usage",
                    "--disable-blink-features=AutomationControlled",
                    "--disable-web-security"
                ]
            )
            page = browser.new_page()
        except Exception as e:
            print(f"⚠️ Failed to launch persistent context: {e}")
            print("🔄 Trying regular browser launch...")
            browser = p.chromium.launch(
                headless=False,
                args=[
                    "--disable-gpu",
                    "--no-sandbox",
                    "--disable-dev-shm-usage",
                    "--disable-blink-features=AutomationControlled",
                    "--disable-web-security"
                ]
            )
            context = browser.new_context(user_agent=user_agent)
            page = context.new_page()

        updated_count = 0
        removed_count = 0
        products_to_keep = []

        for i, product in enumerate(products, 1):
            product_url = product.get('url', '')
            current_sold = product.get('sold', 0)

            if not product_url:
                print(f"⚠️ Product {i}/{total_products}: No URL found - keeping product")
                products_to_keep.append(product)
                continue

            print(f"\n🔄 Processing Product {i}/{total_products}")
            print(f"📦 {product.get('name', 'Unknown')[:50]}...")
            print(f"🔗 URL: {product_url}")
            print(f"📊 Current sold: {current_sold}")

            try:
                # Navigate to product page with redirects allowed and wait for full load
                print(f"🌐 Navigating to product page...")
                page.goto(product_url, timeout=60000, wait_until="networkidle")

                # Additional wait to ensure page is fully loaded
                print(f"⏳ Waiting for page to fully load...")
                page.wait_for_timeout(3000)

                # Scroll to bottom to trigger any lazy loading
                page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
                page.wait_for_timeout(2000)

                # Extract sold count using the page directly
                new_sold = extract_sold_count_from_page(page)

                # Check if product should be removed or kept
                if new_sold == 0:
                    print(f"🗑️ Removing product with 0 sold: {current_sold} → {new_sold}")
                    removed_count += 1
                else:
                    # Update the product and keep it
                    product['sold'] = new_sold
                    products_to_keep.append(product)
                    updated_count += 1
                    print(f"✅ Updated sold count: {current_sold} → {new_sold}")

                # Update the products list and save progress
                data['products'] = products_to_keep
                with open(output_file, 'w', encoding='utf-8') as f:
                    json.dump(data, f, indent=2, ensure_ascii=False)

            except Exception as e:
                print(f"❌ Error processing product {i}: {e}")
                # Keep product on error
                products_to_keep.append(product)
                continue

            # Add delay between requests
            print(f"😴 Waiting {delay} seconds...")
            time.sleep(delay)

        try:
            browser.close()
        except Exception as e:
            print(f"⚠️ Warning during browser close: {e}")

    # Final save
    try:
        # Update metadata if it exists
        if 'metadata' in data:
            data['metadata']['total_products'] = len(products_to_keep)

        # Ensure final file is saved in the script's folder
        script_dir = os.path.dirname(os.path.abspath(__file__))
        out_path = os.path.join(script_dir, os.path.basename(output_file))
        output_file = out_path  # normalize for messages below

        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

        print(f"\n✅ Updated file saved to: {output_file}")
        print(f"📊 Successfully updated {updated_count} products")
        print(f"🗑️ Removed {removed_count} products with 0 sold")
        print(f"📋 Total products remaining: {len(products_to_keep)}/{total_products}")

        shutil.move(output_file, '../../public/h/ebay_products.json')
        print(f"📁 Moved updated file to: ../../public/h/ebay_products.json")

    except Exception as e:
        print(f"❌ Error saving final file: {e}")

def main():
    """Main function"""
    parser = argparse.ArgumentParser(description="Update sold count only for eBay products")
    parser.add_argument("input_file", help="Input JSON file path")
    parser.add_argument("--output", "-o", help="Output JSON file path (optional)")
    parser.add_argument("--delay", "-d", type=float, default=1.0, help="Delay between requests in seconds (default: 1.0)")

    args = parser.parse_args()

    print("🚀 Starting Sold Count Update")
    print("=" * 50)
    print(f"📂 Input file: {args.input_file}")
    print(f"⏱️ Delay: {args.delay} seconds")

    update_sold_only(args.input_file, args.output, args.delay)

if __name__ == "__main__":
    main()
