#!/usr/bin/env python3

from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import sys
import random
import csv
import re
import os
from datetime import datetime

def user_agent_rotator():
    user_agents = [
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
        "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0"
    ]
    return random.choice(user_agents)

def store_link(link):
    # Extract store name from link
    match = re.search(r'/str/([^/?]+)', link)
    if match:
        return match.group(1)
    return None
def main():
    # Check if user provided ID
    if len(sys.argv) < 2:
        print("Usage: python ebay_seller_csv.py <item_id1,item_id2,...> [--save] [--debug] [--headless]")
        print("Example: python ebay_seller_csv.py 123456789,987654321 --save")
        print("Options:")
        print("  --save       Save output to TSV file")
        print("  --debug      Show debug information")
        print("  --headless   Run in headless mode (default)")
        print("  --no-headless Run with visible browser")
        sys.exit(1)

    # Check for flags
    save_to_file = False
    headless_mode = True  # Default to headless for CSV extraction
    debug_mode = False
    args = sys.argv[1:]

    if '--save' in args:
        save_to_file = True
        args.remove('--save')

    if '--debug' in args:
        debug_mode = True
        args.remove('--debug')

    if '--headless' in args:
        headless_mode = True
        args.remove('--headless')

    if '--no-headless' in args:
        headless_mode = False
        args.remove('--no-headless')

    if len(args) == 0:
        print("Please provide the item ID(s)")
        sys.exit(1)

    item_ids = args[0].strip().split(",")
    if debug_mode:
        print(f"Processing {len(item_ids)} item(s)...", file=sys.stderr)

    sellers_data = []

    with sync_playwright() as p:
        # Configure browser arguments for stability
        browser_args = [
            '--no-first-run',
            '--disable-default-apps',
            '--disable-dev-shm-usage',
            '--no-sandbox',
            '--disable-gpu'
        ]

        browser = p.firefox.launch(
            headless=headless_mode,
            args=browser_args
        )
        context = browser.new_context(
            user_agent=user_agent_rotator(),
            ignore_https_errors=True,
            bypass_csp=True,
            java_script_enabled=True,
            extra_http_headers={
                'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                'Accept-Language': 'en-US,en;q=0.5',
                'Accept-Encoding': 'gzip, deflate',
                'DNT': '1',
                'Connection': 'keep-alive',
                'Upgrade-Insecure-Requests': '1',
                'Sec-Fetch-Dest': 'document',
                'Sec-Fetch-Mode': 'navigate',
                'Sec-Fetch-Site': 'none',
                'Sec-Fetch-User': '?1'
            }
        )
        page = context.new_page()

        # Add stealth settings
        page.add_init_script("""
            Object.defineProperty(navigator, 'webdriver', {
                get: () => undefined,
            });
        """)

        try:
            for i, item_id in enumerate(item_ids):
                url = f"https://www.ebay.com/itm/{item_id}"
                if debug_mode:
                    print(f"Processing item {i+1}/{len(item_ids)}: {item_id}", file=sys.stderr)

                # Add delay between requests
                if i > 0:
                    page.wait_for_timeout(500)  # 0.5 seconds

                try:
                    # Allow redirects and wait for network idle - comprehensive loading
                    response = page.goto(
                        url,
                        wait_until="networkidle",
                        timeout=30000
                    )

                    # Wait for any additional redirects and network activity to settle completely
                    page.wait_for_load_state("networkidle", timeout=15000)

                    # Ensure DOM content is fully loaded
                    page.wait_for_load_state("domcontentloaded", timeout=10000)

                    # Wait for JavaScript to fully render seller elements
                    page.wait_for_timeout(3000)

                    # Try to extract seller info using JavaScript like in login.py

                    seller_info = {}
                    # find first <a> that has data-clientpresentationmetadata and grab its value/text/href
                    meta_anchor = page.query_selector(".x-sellercard-atf__info__about-seller>a[data-clientpresentationmetadata]")
                    # print(f"{meta_anchor.inner_html() if meta_anchor else 'No meta anchor found'}")

                    # Add item_id to the result
                    seller_info['item_id'] = item_id

                    # Populate expected fields
                    seller_info['seller_name'] = meta_anchor.inner_text().strip() if meta_anchor else None
                    seller_info['seller_link'] = store_link(meta_anchor.get_attribute('href') if meta_anchor else None)


                    # Debug output (only if debug mode is enabled)
                    if debug_mode:
                        print(f"Item {item_id}: seller_name='{seller_info['seller_name']}', seller_link='{seller_info['seller_link']}'", file=sys.stderr)
                    sellers_data.append(seller_info)

                except Exception as e:
                    if debug_mode:
                        print(f"Error processing item {item_id}: {e}", file=sys.stderr)
                    # Add empty entry for failed items
                    sellers_data.append({
                        'item_id': item_id,
                        'seller_name': None,
                        'seller_link': None
                    })

        finally:
            browser.close()

    # Output CSV data
    if save_to_file:
        # Create files directory if it doesn't exist
        files_dir = os.path.join(os.getcwd(), 'files')
        os.makedirs(files_dir, exist_ok=True)

        # Generate filename with timestamp
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"ebay_sellers_{timestamp}.tsv"
        file_path = os.path.join(files_dir, filename)

        # Save TSV to file (tab-separated)
        with open(file_path, 'w', newline='', encoding='utf-8') as csvfile:
            fieldnames = ['item_id', 'seller_name', 'seller_link']
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')

            writer.writeheader()
            for seller in sellers_data:
                writer.writerow(seller)

        if debug_mode:
            print(f"TSV saved to: {file_path}", file=sys.stderr)
            print(f"Processed {len(sellers_data)} items", file=sys.stderr)
    else:
        # Print TSV to stdout (tab-separated)
        fieldnames = ['item_id', 'seller_name', 'seller_link']
        writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames, delimiter='\t')

        writer.writeheader()
        for seller in sellers_data:
            writer.writerow(seller)

if __name__ == "__main__":
    main()
