import os
from aliexpress_extractors import extract_all_product_info
from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
import json
import sys
import time
import argparse
from typing import List, Dict
import subprocess
import re

class AliExpressTrendyScraper:
    """AliExpress Trendy Products Scraper"""

    def __init__(self, headless: bool = False):
        self.base_url = "https://www.aliexpress.us"
        self.user_data_dir = 'chrome-data'
        self.headless = headless

    def is_valid_product_link(self, link: str) -> bool:
        """Check if the product link contains /item/ using regex"""
        if not link:
            return False
        # Use regex to match /item/ pattern in the URL
        pattern = r'/item/\d+'
        return bool(re.search(pattern, link))

    def get_trendy_categories_urls(self) -> List[str]:
        """Get URLs for trending/popular categories on AliExpress"""
        trendy_urls = [
            f"{self.base_url}/w/wholesale-trending.html",
            f"{self.base_url}/w/wholesale-hot-deals.html",
            f"{self.base_url}/w/wholesale-popular.html",
            f"{self.base_url}/w/wholesale-best-selling.html",
            f"{self.base_url}/w/wholesale-top-rated.html",
            f"{self.base_url}/w/wholesale-hot-products.html",
            f"{self.base_url}/w/wholesale-viral-products.html",
            f"{self.base_url}/w/wholesale-bestseller.html",
        ]
        return trendy_urls

    def get_trending_search_terms(self) -> List[str]:
        """Get trending search terms for popular products"""
        return [
            "trending products",
            "viral products",
            "hot deals",
            "best sellers",
            "popular items",
            "top rated",
            "must have",
            "gadgets",
            "electronics",
            "fashion trends",
            "home decor trending",
            "beauty trending",
            "phone accessories",
            "smart watch",
            "bluetooth earphones",
            "led lights",
            "car accessories",
            "kitchen gadgets",
            "fitness equipment",
            "gaming accessories"
        ]

    def scrape_trending_by_search(self, search_term: str, total_pages: int = 3, include_store_details: bool = False) -> List[Dict]:
        """Scrape trending products by search term"""
        products_data = []

        with sync_playwright() as p:
            browser = p.chromium.launch_persistent_context(
                self.user_data_dir,
                headless=self.headless,
                args=[
                    "--disable-gpu",
                    "--no-sandbox",
                    "--disable-dev-shm-usage",
                    "--disable-blink-features=AutomationControlled",
                    "--disable-crash-reporter",
                    "--start-maximized"
                ]
            )
            page = browser.new_page()

            for page_number in range(1, total_pages + 1):
                time.sleep(2)  # Small delay to avoid overwhelming the server
                search_url = f"{self.base_url}/w/wholesale-{search_term.replace(' ', '-')}.html?page={page_number}&sortType=total_tranpro_desc"
                print(f"Scraping: {search_url}")

                try:
                    page.goto(search_url, timeout=60000)
                    page.wait_for_selector('#card-list', timeout=30000)

                    # Scroll to load more products
                    for _ in range(5):
                        page.evaluate("window.scrollBy(0, window.innerHeight)")
                        page.wait_for_timeout(1000)

                    soup = BeautifulSoup(page.inner_html("#card-list"), "lxml")
                    items = soup.select('.search-item-card-wrapper-gallery, .search-item-card-wrapper-list, .search-card-item')

                    for item in items:
                        product_info = extract_all_product_info(item)

                        if (product_info and
                            product_info.get('name') and
                            self.is_valid_product_link(product_info.get('link')) and
                            product_info.get('price') and
                            product_info.get('price').strip()):  # Only add if we have valid data, valid link, and non-empty price
                            product_info['search_term'] = search_term
                            product_info['page'] = page_number
                            products_data.append(product_info)

                except Exception as e:
                    print(f"Error scraping {search_url}: {e}")
                    continue

            browser.close()

        return products_data

    def scrape_trending_categories(self, total_pages: int = 2, include_store_details: bool = False) -> List[Dict]:
        """Scrape trending products from category URLs"""
        products_data = []

        with sync_playwright() as p:
            browser = p.chromium.launch_persistent_context(
                self.user_data_dir,
                headless=self.headless,
                args=[
                    "--disable-gpu",
                    "--no-sandbox",
                    "--disable-dev-shm-usage",
                    "--disable-blink-features=AutomationControlled",
                    "--disable-crash-reporter",
                    "--start-maximized"
                ]
            )
            page = browser.new_page()

            for url in self.get_trendy_categories_urls():
                for page_number in range(1, total_pages + 1):
                    page_url = f"{url}&page={page_number}"
                    print(f"Scraping category: {page_url}")

                    try:
                        page.goto(page_url, timeout=60000)
                        page.wait_for_selector('#card-list', timeout=30000)

                        # Scroll to load more products
                        for _ in range(5):
                            page.evaluate("window.scrollBy(0, window.innerHeight)")
                            page.wait_for_timeout(1000)

                        soup = BeautifulSoup(page.inner_html("#card-list"), "lxml")
                        items = soup.select('.search-item-card-wrapper-gallery, .search-item-card-wrapper-list, .search-card-item')

                        for item in items:
                            product_info = extract_all_product_info(item)

                            if (product_info and
                                product_info.get('name') and
                                self.is_valid_product_link(product_info.get('link')) and
                                product_info.get('price') and
                                product_info.get('price').strip()):
                                product_info['category_url'] = url
                                product_info['page'] = page_number
                                products_data.append(product_info)

                    except Exception as e:
                        print(f"Error scraping {page_url}: {e}")
                        continue

            browser.close()

        return products_data

    def scrape_demo_trending(self, total_pages: int = 1, include_store_details: bool = False) -> List[Dict]:
        """Demo mode: Scrape only from wholesale-trending.html for fast testing"""
        products_data = []

        with sync_playwright() as p:
            browser = p.chromium.launch_persistent_context(
                self.user_data_dir,
                headless=self.headless,
                args=[
                    "--disable-gpu",
                    "--no-sandbox",
                    "--disable-dev-shm-usage",
                    "--disable-blink-features=AutomationControlled",
                    "--disable-crash-reporter",
                    "--start-maximized"
                ]
            )
            page = browser.new_page()

            # Only scrape from wholesale-trending.html
            demo_url = f"{self.base_url}/w/wholesale-trending.html"

            for page_number in range(1, total_pages + 1):
                page_url = f"{demo_url}?page={page_number}&sortType=total_tranpro_desc"
                print(f"🚀 Demo Mode - Scraping: {page_url}")

                try:
                    page.goto(page_url, timeout=60000)
                    page.wait_for_selector('#card-list', timeout=30000)

                    # Light scrolling for demo mode
                    for _ in range(3):
                        page.evaluate("window.scrollBy(0, window.innerHeight)")
                        page.wait_for_timeout(1000)

                    soup = BeautifulSoup(page.inner_html("#card-list"), "lxml")
                    items = soup.select('.search-item-card-wrapper-gallery, .search-item-card-wrapper-list, .search-card-item')

                    print(f"📦 Found {len(items)} items on page {page_number}")

                    for item in items:
                        product_info = extract_all_product_info(item)

                        if (product_info and
                            product_info.get('name') and
                            self.is_valid_product_link(product_info.get('link')) and
                            product_info.get('price') and
                            product_info.get('price').strip()):
                            product_info['source'] = 'demo-trending'
                            product_info['demo_url'] = demo_url
                            product_info['page'] = page_number
                            products_data.append(product_info)

                except Exception as e:
                    print(f"❌ Error scraping {page_url}: {e}")
                    continue

            browser.close()

        print(f"✅ Demo completed! Found {len(products_data)} products from trending page.")
        return products_data

    def get_trending_products(self, method: str = "search", pages: int = 3, include_store_details: bool = False, demo_mode: bool = False) -> List[Dict]:
        """Get trending products using specified method"""
        all_products = []

        if demo_mode:
            print("🚀 Demo Mode: Scraping only wholesale-trending.html for fast testing...")
            all_products = self.scrape_demo_trending(pages, include_store_details)

        elif method == "search":
            print("Scraping trending products by search terms...")
            for search_term in self.get_trending_search_terms()[:5]:  # Limit to first 5 terms
                print(f"Searching for: {search_term}")
                products = self.scrape_trending_by_search(search_term, pages, include_store_details)
                all_products.extend(products)

        elif method == "categories":
            print("Scraping trending products from categories...")
            all_products = self.scrape_trending_categories(pages, include_store_details)

        else:
            print("Using both methods...")
            # Search method
            for search_term in self.get_trending_search_terms()[:3]:
                products = self.scrape_trending_by_search(search_term, pages, include_store_details)
                all_products.extend(products)
            # Category method
            category_products = self.scrape_trending_categories(pages, include_store_details)
            all_products.extend(category_products)

        # Remove duplicates based on product link
        unique_products = []
        seen_links = set()

        for product in all_products:
            link = product.get('link', '')
            if link and link not in seen_links:
                seen_links.add(link)
                unique_products.append(product)

        # Sort by sales count (sold) and rating
        sorted_products = sorted(
            unique_products,
            key=lambda x: (x.get('sold', 0), x.get('rate', 0)),
            reverse=True
        )

        return sorted_products

    def save_to_json(self, products: List[Dict], filename: str = "aliexpress_trending_products.json"):
        """Save products data to JSON file"""
        with open(filename, 'w', encoding='utf-8') as f:
            json.dump(products, f, indent=2, ensure_ascii=False)
        print(f"Saved {len(products)} products to {filename}")

    def print_summary(self, products: List[Dict]):
        """Print a summary of scraped products"""
        print(f"\n=== AliExpress Trending Products Summary ===")
        print(f"Total products found: {len(products)}")

        if products:
            avg_price = sum(float(p.get('price', '0').replace('$', '').replace('€', '').replace('£', ''))
                          for p in products if p.get('price', '').replace('$', '').replace('€', '').replace('£', '').replace('.', '').isdigit()) / len(products)
            avg_sold = sum(p.get('sold', 0) for p in products) / len(products)
            avg_rating = sum(p.get('rate', 0) for p in products) / len(products)

            print(f"Average price: ${avg_price:.2f}")
            print(f"Average sold count: {avg_sold:.0f}")
            print(f"Average rating: {avg_rating:.1f}")

            print(f"\nTop 5 trending products:")
            for i, product in enumerate(products[:5], 1):
                print(f"{i}. {product.get('name', 'N/A')[:50]}...")
                print(f"   Price: {product.get('price', 'N/A')} | Sold: {product.get('sold', 0)} | Rating: {product.get('rate', 0)}")


def main():
    parser = argparse.ArgumentParser(description="Scrape AliExpress trending products")
    parser.add_argument('--method', choices=['search', 'categories', 'both'], default='search',
                       help='Method to use for scraping (default: search)')
    parser.add_argument('--pages', type=int, default=3,
                       help='Number of pages to scrape per search/category (default: 3)')
    parser.add_argument('--output', type=str, default='aliexpress_trending_products.json',
                       help='Output JSON filename (default: aliexpress_trending_products.json)')
    parser.add_argument('--store-details', action='store_true',
                       help='Include detailed store information (slower but more complete)')
    parser.add_argument('--demo', action='store_true',
                       help='Demo mode: Only scrape wholesale-trending.html for fast testing')
    parser.add_argument('--count', type=int, default=None,
                       help='Limit the number of products to fetch store info for (default: all)')
    parser.add_argument('--headless', action='store_true',
                       help='Run browser in headless mode (default: visible)')

    args = parser.parse_args()

    scraper = AliExpressTrendyScraper(headless=args.headless)

    if args.demo:
        print("🚀 Starting AliExpress Demo Mode (Fast Testing)...")
        print("📍 Only scraping wholesale-trending.html")
        if args.store_details:
            print("⚠️  Store details mode enabled - this will be slower but more comprehensive")

        products = scraper.get_trending_products(
            pages=args.pages,
            include_store_details=args.store_details,
            demo_mode=True
        )
    else:
        print("Starting AliExpress trending products scraping...")
        if args.store_details:
            print("⚠️  Store details mode enabled - this will be slower but more comprehensive")

        products = scraper.get_trending_products(
            method=args.method,
            pages=args.pages,
            include_store_details=args.store_details,
            demo_mode=False
        )

    # If --count is set and --store-details is set, only fetch store info for the first N products
    if args.store_details and args.count is not None:
        products = products[:args.count]

    scraper.save_to_json(products, args.output)
    scraper.print_summary(products)

    print(f"\nScraping completed! Check {args.output} for detailed results.")
    # Get the current directory of this script
    current_dir = os.path.dirname(os.path.abspath(__file__))
    fix_script_path = os.path.join(current_dir, "fix_store_info.py")

    # Run the fix_store_info.py script from the same directory
    subprocess.run([os.path.join(current_dir, "myenv/bin/python3"), fix_script_path])



if __name__ == "__main__":
    main()
