#!/usr/bin/env python3
import argparse
import urllib.parse
import json
import time
import os
import re
from playwright.sync_api import sync_playwright

parser = argparse.ArgumentParser(description="Getting results from eBay search using Playwright")
parser.add_argument("search_term", type=str, help="You have to enter a search term")
parser.add_argument("--shipping_location", type=str, help="shipping_location", default="", nargs='?')
parser.add_argument("--price_min", type=int, help="min price", default=1, nargs='?')
parser.add_argument("--price_max", type=int, help="max price", nargs='?')
parser.add_argument("--listing_type", type=int, help="listing type", default=1, nargs='?')
parser.add_argument("--condition", type=str, help="condition", default="", nargs='?')
parser.add_argument("--category", type=int, help="category", default=0, nargs='?')
parser.add_argument("--exclude", type=str, help="exclude search term", default="", nargs='?')
parser.add_argument("--page_count", type=str, help="page count", default="100", nargs='?')
parser.add_argument("--days", type=str, help="days", default="30", nargs='?')
parser.add_argument("--site", type=str, help="website search location: i.e ebay.com", default="ebay.com", nargs='?')
parser.add_argument("--pages", type=int, help="Number of pages to scrape", default=1, nargs='?')
parser.add_argument("--timeout", type=int, help="Request timeout in seconds", default=30, nargs='?')
parser.add_argument("--headless", action="store_true", help="Run browser in headless mode")
parser.add_argument("--debug", action="store_true", help="Save screenshots for debugging")

args = parser.parse_args()

def build_url(page=1):
    url = f"https://www.{args.site}/sch/i.html?"
    params = ""

    params += "_nkw=" + urllib.parse.quote(args.search_term)

    if args.shipping_location:
        params += "&LH_PrefLoc=" + str(args.shipping_location)

    if args.price_min:
        params += "&_udlo=" + str(args.price_min)

    if args.price_max:
        params += "&_udhi=" + str(args.price_max)

    if args.listing_type:
        if args.listing_type == 1:  # all
            params += "&LH_All=" + str(args.listing_type)
        elif args.listing_type == 2:  # buy it now
            params += "&LH_BIN=" + str(args.listing_type)
        elif args.listing_type == 3:  # offers
            params += "&LH_BO=" + str(args.listing_type)
        elif args.listing_type == 4:  # auction
            params += "&LH_Auction=" + str(args.listing_type)

    if args.condition:
        if args.condition == "new":
            params += "&LH_ItemCondition=3"
        if args.condition == "used":
            params += "&LH_ItemCondition=4"

    if args.category:
        params += "&_sacat=" + str(args.category)

    if args.exclude:
        params += "&_ex_kw=" + urllib.parse.quote(args.exclude)

    if args.page_count:
        params += "&_ipg=" + str(args.page_count)

    if page > 1:
        params += f"&_pgn={page}"

    params += "&_dmd=1"
    return url + params

def scrape_search_results(browser, page_num):
    start_time = time.time()
    url = build_url(page_num)
    # print(f"Scraping page {page_num}: {url}")

    # Launch a new context for each page
    context = browser.new_context(
        viewport={"width": 1920, "height": 1080},
        user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
    )
    page = context.new_page()

    # Set timeout for navigation
    page.set_default_timeout(args.timeout * 1000)  # Convert to milliseconds

    try:
        # Navigate to the URL
        response = page.goto(url)

        if not response:
            print(f"Failed to navigate to {url}")
            context.close()
            return []

        if response.status >= 400:
            print(f"Error response: {response.status} for {url}")
            context.close()
            return []

        # Wait for the search results to load
        try:
            page.wait_for_selector("ul.srp-results li.s-item", timeout=args.timeout * 1000)
        except:
            # print("Couldn't find search results list items")
            if args.debug:
                page.screenshot(path=f"debug_page_{page_num}_no_results.png")
            context.close()
            return []

        # Take screenshot if debug mode is on
        if args.debug:
            page.screenshot(path=f"debug_page_{page_num}.png")

        # Extract data from items
        items = []

        # Get all items from the search results
        item_elements = page.query_selector_all("ul.srp-results li.s-item")

        if not item_elements:
            # print("No item elements found")
            context.close()
            return []

        # print(f"Found {len(item_elements)} item elements")

        for i, item_element in enumerate(item_elements):
            # Skip the first result which is usually a "Shop on eBay" placeholder
            if i == 0 and "Shop on eBay" in item_element.text_content():
                continue

            item = {}

            # Extract item ID and URL
            url_element = item_element.query_selector("a.s-item__link")
            if url_element:
                href = url_element.get_attribute("href")
                if href:
                    item["url"] = href
                    # Extract item ID from URL
                    if "/itm/" in href:
                        item_id = href.split("/itm/")[1].split("?")[0]
                        item["item_id"] = item_id

            # Extract title
            title_element = item_element.query_selector("div.s-item__title span")
            if title_element:
                item["title"] = title_element.text_content()

            # Extract image
            img_element = item_element.query_selector(".s-item__image img")
            if img_element:
                item["img"] = img_element.get_attribute("src") or ""

            # Extract price
            price_element = item_element.query_selector("span.s-item__price")
            if price_element:
                price_text = price_element.text_content()
                item["price"] = price_text

            # Extract shipping cost
            shipping_element = item_element.query_selector("span.s-item__shipping")
            if shipping_element:
                shipping_text = shipping_element.text_content()
                item["shipping"] = shipping_text

            # Extract location
            location_element = item_element.query_selector("span.s-item__location")
            if location_element:
                location_text = location_element.text_content()
                if "from " in location_text:
                    location_text = location_text.split("from ")[1]
                item["location"] = location_text

            # Extract seller info
            seller_info_element = item_element.query_selector("span.s-item__seller-info-text")
            if seller_info_element:
                seller_info = seller_info_element.text_content()
                item["seller_info"] = seller_info

                # Parse seller name, feedback score, etc.
                seller_parts = seller_info.split()
                if len(seller_parts) >= 1:
                    item["seller"] = seller_parts[0]

                # Try to extract feedback percentage
                feedback_match = re.search(r'(\d+\.?\d*)%', seller_info)
                if feedback_match:
                    item["positive_feedback"] = feedback_match.group(0)

            # Extract quantity sold
            sold_element = item_element.query_selector("span.s-item__quantitySold")
            if sold_element:
                sold_text = sold_element.text_content()
                # Extract numbers from text like "123 sold"
                sold_match = re.search(r'(\d+)', sold_text)
                if sold_match:
                    item["quantity_sold"] = sold_match.group(1)

            # Add the item to our list if we extracted at least some data
            if item.get("title") and item.get("url"):
                items.append(item)

        # print(f"Page {page_num} scraped in {time.time() - start_time:.2f} seconds, found {len(items)} items")
        context.close()
        return items

    except Exception as e:
        # print(f"Error scraping page {page_num}: {e}")
        if args.debug:
            try:
                page.screenshot(path=f"error_page_{page_num}.png")
                # print(f"Error screenshot saved as error_page_{page_num}.png")
            except:
                pass
        context.close()
        return []

def get_item_details(browser, item_id):
    """Get additional details for a specific item"""
    url = f"https://www.ebay.com/itm/{item_id}"
    # print(f"Getting details for item {item_id}")

    context = browser.new_context(
        viewport={"width": 1920, "height": 1080},
        user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
    )
    page = context.new_page()

    # Set timeout for navigation
    page.set_default_timeout(args.timeout * 1000)

    details = {}

    try:
        # Navigate to the URL
        response = page.goto(url)

        if not response or response.status >= 400:
            # print(f"Failed to load item page (status: {response.status if response else 'N/A'})")
            context.close()
            return details

        # Take screenshot if debug mode is on
        if args.debug:
            page.screenshot(path=f"item_{item_id}_details.png")

        # Extract seller information
        try:
            seller_element = page.query_selector("div.x-sellercard-atf__info__about-seller a")
            if seller_element:
                details["seller"] = seller_element.text_content()
                details["seller_url"] = seller_element.get_attribute("href")
        except:
            pass  # Failed to extract seller info

        # Extract rating/feedback info
        try:
            feedback_element = page.query_selector("div.x-sellercard-atf__feedback")
            if feedback_element:
                feedback_text = feedback_element.text_content()
                # Extract feedback percentage
                feedback_match = re.search(r'(\d+\.?\d*)%', feedback_text)
                if feedback_match:
                    details["positive_feedback"] = feedback_match.group(0)

                # Extract feedback count
                count_match = re.search(r'\((\d+)\)', feedback_text)
                if count_match:
                    details["feedback_count"] = count_match.group(1)
        except:
            print("Failed to extract feedback info")

        # Extract total sold count if available
        try:
            sold_element = page.query_selector("span.d-quantity__availability span.ux-textspans--BOLD")
            if sold_element:
                sold_text = sold_element.text_content()
                sold_match = re.search(r'(\d+)', sold_text)
                if sold_match:
                    details["quantity_sold"] = sold_match.group(1)
        except:
            print("Failed to extract sold count")

    except Exception as e:
        print(f"Error getting item details: {e}")
    finally:
        context.close()

    return details

def main():
    overall_start_time = time.time()

    with sync_playwright() as playwright:
        # Launch a persistent browser
        browser = playwright.chromium.launch(headless=True)

        try:
            # Scrape search results
            all_items = []
            for page_num in range(1, args.pages + 1):
                items = scrape_search_results(browser, page_num)
                all_items.extend(items)

                # If no items on first page, try one more time
                if page_num == 1 and not items:
                    # print("No items found on first attempt. Retrying...")
                    # Removed sleep
                    items = scrape_search_results(browser, page_num)
                    all_items.extend(items)

                # If still no items, give up
                if page_num == 1 and not all_items:
                    # print("No items could be found. Exiting.")
                    browser.close()
                    return

                # No longer pausing between pages
                if page_num < args.pages:
                    pass  # Removed sleep

            # Get additional details for items with missing information
            # Only process a limited number to avoid long runtime
            items_to_process = min(10, len(all_items))
            # print(f"Getting additional details for {items_to_process} items")

            for i in range(items_to_process):
                item = all_items[i]

                # Only get details if necessary fields are missing
                if not item.get("seller") or not item.get("positive_feedback"):
                    item_id = item.get("item_id")
                    if item_id:
                        details = get_item_details(browser, item_id)
                        # Update the item with additional details
                        for key, value in details.items():
                            if value:  # Only update if we got a value
                                item[key] = value

                    # No longer pausing between item detail requests
                    pass  # Removed sleep

        except Exception as e:
            # print(f"Error during execution: {e}")
            import traceback
            traceback.print_exc()
        finally:
            browser.close()

    # Format the data for output
    formatted_data = {"data": []}

    for item in all_items:
        formatted_item = {
            "title": item.get("title", ""),
            "url": item.get("url", ""),
            "price": item.get("price", ""),
            "shipping": item.get("shipping", ""),
            "location": item.get("location", ""),
            "image": item.get("img", ""),
            "seller": item.get("seller", ""),
            "positive_feedback": item.get("positive_feedback", ""),
            "quantity_sold": item.get("quantity_sold", ""),
            "seller_url": item.get("seller_url", ""),
            "country_code": "US",  # Default
            "item_id": item.get("item_id", ""),
        }

        formatted_data["data"].append(formatted_item)

    # print(f"Total scraping completed in {time.time() - overall_start_time:.2f} seconds")
    # print(f"Found {len(formatted_data['data'])} items")

    # Output the data
    print(json.dumps(formatted_data, separators=(',', ':'), ensure_ascii=False))

if __name__ == "__main__":
    main()
