import json
import time
import sys
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import random
import re

user_agents = [
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
    "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 11_2_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0",
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15",
    "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
    "Mozilla/5.0 (iPad; CPU OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
    "Mozilla/5.0 (Linux; Android 13; SM-G991B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.216 Mobile Safari/537.36",
    "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.6045.134 Mobile Safari/537.36"
]

def scrape_ebay_product_ids(search_term):
    with sync_playwright() as p:
        browser = p.firefox.launch(headless=False)
        context = browser.new_context(user_agent=random.choice(user_agents))
        page = context.new_page()

        url = f"https://www.ebay.com/sch/i.html?_nkw={search_term.replace(' ', '+')}"

        page.goto(url)
        page.wait_for_selector("li.s-card", timeout=10000)

        for _ in range(6):
            page.mouse.wheel(0, 30000)
            time.sleep(1.5)

        html = page.content()
        browser.close()

    soup = BeautifulSoup(html, "lxml")
    product_ids = []

    for li in soup.select("li.s-card[data-listingid]"):
        pid = li["data-listingid"].strip()
        if pid.isdigit() and len(pid) <= 12:
            product_ids.append(pid)

    product_ids = list(dict.fromkeys(product_ids))

    file_name = f"{search_term.replace(' ', '_')}.json"
    with open(file_name, "w", encoding="utf-8") as f:
        json.dump(product_ids, f, indent=4, ensure_ascii=False)

    print(f"✅ done save {len(product_ids)} ids in the file: {file_name}")
    return file_name


def scrape_ebay_product_details(search_term, batch_size=5):
    # أول خطوة: استدعاء الاسكريبت الأول لجلب IDs
    json_file = scrape_ebay_product_ids(search_term)

    # قراءة IDs من الملف
    with open(json_file, "r", encoding="utf-8") as f:
        product_ids = json.load(f)

    results = []
    output_file = json_file.replace(".json", "_data.json")

    coins = "$€£¥₹₽₺₩₮₦₲₪₵₡₱₳₫₭₴ƒ₠₡₢₣₤₥₦₧₨₩₫₭₮₯₰₲₳₴₵ℳ"

    with sync_playwright() as p:
        browser = p.firefox.launch(headless=False)
        context = browser.new_context(user_agent=random.choice(user_agents))

        for i in range(0, len(product_ids), batch_size):
            batch = product_ids[i:i + batch_size]
            print(f"\n🔹 Starting batch {i//batch_size + 1} ({len(batch)} items)...")

            # process each pid one-by-one (no nested duplicate loop)
            for pid in batch:
                # skip immediately if already saved (safety)
                if any(r.get("id") == pid for r in results):
                    print(f"🔁 Already have {pid}, skipping duplicate.")
                    continue

                success = False

                for attempt in range(2):  # two attempts max
                    page = None
                    try:
                        page = context.new_page()
                        url = f"https://www.ebay.com/itm/{pid}"
                        print(f"🔍 Trying {pid} (Attempt {attempt+1}/2)")
                        # goto + wait: keep slightly larger timeout for reliability
                        page.goto(url, timeout=30000)
                        # Wait for title or load state (prefer selector for dynamic content)
                        try:
                            page.wait_for_selector("h1.x-item-title__mainTitle span.ux-textspans--BOLD", timeout=15000)
                        except Exception:
                            # fallback to a short load wait if selector not found quickly
                            page.wait_for_load_state("domcontentloaded", timeout=10000)

                        html = page.content()
                        soup = BeautifulSoup(html, "lxml")

                        # Title
                        title_tag = soup.select_one("h1.x-item-title__mainTitle span.ux-textspans--BOLD")
                        title = title_tag.get_text(strip=True) if title_tag else None

                        # Price handling (split value + currency)
                        price_tag = soup.select_one("div.x-price-primary span.ux-textspans")
                        price_raw = price_tag.get_text(strip=True) if price_tag else None
                        price_value = None
                        currency_symbol = None

                        if price_raw:
                            for c in coins:
                                if c in price_raw:
                                    currency_symbol = c
                                    break
                            cleaned_price = re.sub(rf"[^\d.,]", "", price_raw)
                            try:
                                price_value = float(cleaned_price.replace(",", ""))
                            except:
                                price_value = None
                        sold = None
                        sold_tag = soup.select_one(
                            ".vim.x-quantity-evo .ux-textspans.ux-textspans--SECONDARY")
                        sold = sold_tag.get_text(
                            strip=True) if sold_tag else None
                        
                        # Seller
                        seller = None
                        seller_selectors = [
                            ".x-sellercard-atf_info.about-seller a span.ux-textspans",
                            ".x-sellercard-atf__info a.ux-action span.ux-textspans",
                            "div.x-sellercard-atf__about-seller a span.ux-textspans",
                            "a.ux-action span.ux-textspans--BOLD"
                        ]
                        for sel in seller_selectors:
                            tag = soup.select_one(sel)
                            if tag and tag.get_text(strip=True):
                                seller = tag.get_text(strip=True)
                                break

                        # Image
                        img_tag = soup.select_one("div.ux-image-carousel-item img")
                        image = img_tag.get("data-zoom-src") or img_tag.get("src") if img_tag else None

                        # If we have the essential fields, save
                        if title and price_raw:
                            # final duplicate check before append
                            if not any(r.get("id") == pid for r in results):
                                product_data = {
                                    "id": pid,
                                    "url": url,
                                    "title": title,
                                    "price_value": price_value,
                                    "currency": currency_symbol,
                                    "sold": sold,
                                    "image": image,
                                    "Seller_Name": seller
                                }
                                results.append(product_data)
                                with open(output_file, "w", encoding="utf-8") as f:
                                    json.dump(results, f, indent=4, ensure_ascii=False)
                                print(f"✅ Saved {pid} | {title} | {price_raw}")
                            else:
                                print(f"🔁 Duplicate detected for {pid}, not saving.")
                            success = True
                            break  # exit attempts loop

                    except Exception as e:
                        print(f"⚠️ Error fetching {pid} on attempt {attempt+1}: {e}")

                    finally:
                        if page:
                            try:
                                page.close()
                            except:
                                pass

                    time.sleep(1)  # small delay before retry

                if not success:
                    print(f"⏭️ Skipped {pid} after 2 failed attempts.")

        browser.close()

    print(f"\n🎉 Save Done {output_file}")


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("❌ Usage: python ebay_data.py <product name>")
        sys.exit(1)

    product = " ".join(sys.argv[1:])
    scrape_ebay_product_details(product, batch_size=5)
