import asyncio from playwright.async_api import async_playwright import os import json import configparser import time import sys import argparse from datetime import datetime, timedelta class EbayLogin: def __init__(self, debug=False): self.playwright = None self.context = None # Will be the persistent context self.page = None self.user_data_dir = "./browser-data" # Keep browser data separate self.files_dir = "./files" # New directory for output files self.cookies_file = f"{self.user_data_dir}/cookies.json" self.session_file = f"{self.user_data_dir}/session.json" self.storage_state_file = f"{self.user_data_dir}/ebay_state.json" self.start_time = None self.operation_times = {} self.login_status_cache = None # Cache login status to avoid repeated checks self.cache_timeout = 300 # Cache for 5 minutes self.debug = debug # Debug mode flag def ensure_files_directory(self): """Ensure the files directory exists""" os.makedirs(self.files_dir, exist_ok=True) def debug_print(self, message): """Print message only if debug mode is enabled""" if self.debug: print(message) def log_time(self, operation_name, start_time=None): """Log the time taken for an operation""" current_time = time.time() if start_time: duration = current_time - start_time self.operation_times[operation_name] = duration self.debug_print(f"⏱️ {operation_name}: {duration:.2f} seconds") return current_time else: return current_time def get_runtime_summary(self): """Get a summary of all operation times""" if not self.operation_times: return "No operations timed" total_time = sum(self.operation_times.values()) summary = f"\n{'='*50}\n📊 RUNTIME SUMMARY\n{'='*50}\n" for operation, duration in self.operation_times.items(): percentage = (duration / total_time) * 100 if total_time > 0 else 0 summary += f"{operation:<35}: {duration:>8.2f}s ({percentage:>5.1f}%)\n" summary += f"{'─'*50}\n" summary += f"{'TOTAL RUNTIME':<35}: {total_time:>8.2f}s (100.0%)\n" summary += f"{'='*50}\n" return summary def load_credentials(self, config_file="config.info"): """Load email and password from config file""" start_time = self.log_time("Loading credentials") try: config = configparser.ConfigParser() # If config_file is just a filename, use the script's directory if not os.path.isabs(config_file): script_dir = os.path.dirname(os.path.abspath(__file__)) config_file = os.path.join(script_dir, config_file) self.debug_print(f"Looking for config file at: {config_file}") config.read(config_file) if not config.has_section('settings'): raise configparser.NoSectionError('settings') email = config.get('settings', 'username') password = config.get('settings', 'password') self.debug_print(f"Loaded credentials for user: {email}") self.log_time("Load credentials", start_time) return email, password except Exception as e: print(f"Error loading credentials from {config_file}: {str(e)}") # Keep error messages always visible self.log_time("Load credentials (failed)", start_time) return None, None def clean_html_wrapper(self, content): """Remove HTML wrapper tags from response content to extract clean JSON""" import re # Remove common HTML wrapper patterns patterns = [ # Firefox plain text wrapper r'

(.*?)

', # Generic HTML wrappers r']*>.*?]*>.*?]*>(.*?).*?.*?', r']*>(.*?)', # Basic HTML tags r']*>|', r']*>.*?', r']*>|', r']*>', ] cleaned_content = content for pattern in patterns: match = re.search(pattern, cleaned_content, re.DOTALL | re.IGNORECASE) if match and len(match.groups()) > 0: # If there's a capture group, use it (the content inside tags) cleaned_content = match.group(1) break else: # Remove the tags without capture groups cleaned_content = re.sub(pattern, '', cleaned_content, flags=re.DOTALL | re.IGNORECASE) # Remove any remaining HTML entities import html cleaned_content = html.unescape(cleaned_content) # remove empty lines cleaned_content = "\n".join([line for line in cleaned_content.splitlines() if line.strip() != ""]) # remove first line cleaned_content = "\n".join(cleaned_content.splitlines()[1:]) return cleaned_content.strip() async def save_cookies(self): """Save browser cookies to file""" try: if self.context: cookies = await self.context.cookies() with open(self.cookies_file, 'w') as f: json.dump(cookies, f, indent=2) self.debug_print(f"Cookies saved to {self.cookies_file}") return True except Exception as e: print(f"Error saving cookies: {str(e)}") # Keep error messages visible return False async def load_cookies(self): """Load browser cookies from file""" try: if os.path.exists(self.cookies_file): with open(self.cookies_file, 'r') as f: cookies = json.load(f) if self.context and cookies: await self.context.add_cookies(cookies) self.debug_print(f"Cookies loaded from {self.cookies_file}") return True except Exception as e: print(f"Error loading cookies: {str(e)}") # Keep error messages visible return False async def save_session_data(self): """Save complete session state including storage, cookies, and local storage""" try: if self.context: # Save storage state (includes cookies, local storage, session storage) await self.context.storage_state(path=self.storage_state_file) # Also save cookies separately for easy access await self.save_cookies() # Save additional session info session_info = { "timestamp": asyncio.get_event_loop().time(), "user_agent": self.context._impl_obj._options.get("userAgent", ""), "viewport": self.context._impl_obj._options.get("viewport", {}), "url": self.page.url if self.page else None } with open(self.session_file, 'w') as f: json.dump(session_info, f, indent=2) self.debug_print("Complete session data saved successfully") return True except Exception as e: print(f"Error saving session data: {str(e)}") # Keep error messages visible return False async def restore_session_data(self): """Restore complete session state""" try: if os.path.exists(self.storage_state_file): self.debug_print("Session data will be restored by persistent context...") return True elif os.path.exists(self.cookies_file): self.debug_print("Restoring cookies only...") await self.load_cookies() return True else: self.debug_print("No session data found, starting fresh session") return False except Exception as e: print(f"Error restoring session data: {str(e)}") return False async def start_browser(self, headless=False): """Start Firefox browser with persistent context""" start_time = self.log_time("Starting browser") self.playwright = await async_playwright().start() # Create user data directory if it doesn't exist os.makedirs(self.user_data_dir, exist_ok=True) # Launch Firefox with persistent context - this automatically handles session persistence self.context = await self.playwright.firefox.launch_persistent_context( user_data_dir=self.user_data_dir, headless=headless, slow_mo=0, # Remove artificial delays for faster execution viewport={'width': 1136, 'height': 500}, user_agent='Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0' ) # Get the first page or create a new one if self.context.pages: self.page = self.context.pages[0] else: self.page = await self.context.new_page() self.debug_print("Browser started with persistent context") self.log_time("Browser startup", start_time) async def check_login_status(self): """Check if user is currently logged in to eBay""" start_time = self.log_time("Checking login status") # Use cached result if available and recent if self.login_status_cache is not None: cache_age = time.time() - self.login_status_cache.get('timestamp', 0) if cache_age < self.cache_timeout: self.debug_print(f"Using cached login status: {'✅ Logged in' if self.login_status_cache['status'] else '❌ Not logged in'}") self.log_time("Login status check (cached)", start_time) return self.login_status_cache['status'] try: await self.page.goto('https://www.ebay.com/sh/ovw', wait_until='domcontentloaded', timeout=10000) # Quick check for login inputs user_input = await self.page.query_selector("input#userid") pass_input = await self.page.query_selector("input#pass") send_button = await self.page.query_selector("button#send-button") is_logged_in = not (user_input and pass_input and send_button) # Cache the result self.login_status_cache = { 'status': is_logged_in, 'timestamp': time.time() } if is_logged_in: self.debug_print("Login inputs not found on the page. User appears to be logged in.") self.log_time("Login status check (logged in)", start_time) return True else: self.debug_print("❌ Not logged in to eBay") self.log_time("Login status check (not logged in)", start_time) return False except Exception as e: print(f"Error checking login status: {str(e)}") self.log_time("Login status check (error)", start_time) return False async def login_to_ebay(self, email, password): """Login to eBay with provided credentials""" start_time = self.log_time("eBay login process") try: # Navigate to eBay login page with faster loading await self.page.goto('https://www.ebay.com/sh/ovw', wait_until='domcontentloaded', timeout=10000) # If "switch account" anchor exists on the page, click it to reveal the login form switch_anchor = await self.page.query_selector("#switch-account-anchor") user_input = await self.page.query_selector("input#userid") pass_input = await self.page.query_selector("input#pass") continue_btn = await self.page.query_selector("#signin-continue-btn") # If the login inputs are not found, attempt to detect whether we're already logged in if not user_input or not pass_input: print("Login inputs not found on the page. Checking login indicators as a fallback...") login_indicators = [ 'text=usspan', 'css=span.textbox.search-input-panel__inputBox', 'css=.textbox.search-input-panel__inputBox', 'xpath=//span[contains(@class,"textbox") and contains(@class,"search-input-panel__inputBox")]', 'css=.shui-header__user-profile', 'xpath=//*[contains(@class,"shui-header__user-profile")]', 'text=My eBay', '[data-testid="x-header-my-ebay"]', '.gh-ua span:has-text("Hi")', '#gh-uo a[title*="My eBay"]' ] for indicator in login_indicators: try: print(f"Checking for login indicator: {indicator}") element = await self.page.locator(indicator).first if await element.is_visible(): print("✅ Already logged in to eBay (fallback detected)") return True except Exception: continue # Check for CAPTCHA on the first page before redirecting captcha_selectors = [ "iframe[src*='captcha']", ".captcha", "#captcha", "[id*='captcha']", "[class*='captcha']", "iframe[src*='recaptcha']", ".g-recaptcha", "#g-recaptcha" ] captcha_on_first_page = False for selector in captcha_selectors: if await self.page.query_selector(selector): captcha_on_first_page = True print("CAPTCHA detected on first page! Not redirecting to signin page.") break if not captcha_on_first_page: # If we reach here and no CAPTCHA, try opening the signin page explicitly and re-query inputs. print("Login indicators not found. Navigating to explicit signin page and retrying inputs...") await self.page.goto('https://signin.ebay.com/', wait_until='domcontentloaded', timeout=10000) await self.page.wait_for_timeout(1000) # Reduced from 2000ms # Re-query inputs after navigating to signin page user_input = await self.page.query_selector("input#userid") pass_input = await self.page.query_selector("input#pass") continue_btn = await self.page.query_selector("#signin-continue-btn") # Check for CAPTCHA before proceeding captcha_selectors = [ "iframe[src*='captcha']", ".captcha", "#captcha", "[id*='captcha']", "[class*='captcha']", "iframe[src*='recaptcha']", ".g-recaptcha", "#g-recaptcha" ] captcha_present = False for selector in captcha_selectors: if await self.page.query_selector(selector): captcha_present = True break if captcha_present: print("CAPTCHA detected! Waiting 5 seconds before retrying...") await self.page.wait_for_timeout(5000) # Reduced from 10 seconds # Re-check elements after waiting switch_anchor = await self.page.query_selector("#switch-account-anchor") pass_input = await self.page.query_selector("input#pass") if switch_anchor and pass_input: await switch_anchor.click() await self.page.wait_for_timeout(500) # Reduced from 1000ms if user_input: await user_input.fill(email) await continue_btn.click() await self.page.wait_for_timeout(1000) # Reduced from 2000ms if pass_input: await pass_input.fill(password) # Click sign in / submit button if present sign_in_btn = await self.page.query_selector("button#sgnBt, button#signin-btn, button[type='submit']") if sign_in_btn: await sign_in_btn.click() await self.page.wait_for_load_state("domcontentloaded", timeout=10000) # Faster load state await self.page.wait_for_timeout(1000) # Reduced from 2000ms # Save session data after login attempt await self.save_session_data() # Check if login was successful if await self.check_login_status(): self.debug_print("✅ Login successful!") self.log_time("eBay login (successful)", start_time) return True else: print("❌ Login may have failed or requires additional verification") # Keep important status visible self.log_time("eBay login (failed)", start_time) return False except Exception as e: print(f"Error during login: {str(e)}") self.log_time("eBay login (error)", start_time) return False async def manual_login_assistance(self, email, password): """Open login page, populate credentials, and wait for user to complete login manually""" start_time = self.log_time("Manual login assistance") try: print("🔧 Starting manual login assistance...") # Navigate to eBay login page await self.page.goto('https://signin.ebay.com/', wait_until='domcontentloaded', timeout=15000) await self.page.wait_for_timeout(2000) # Wait for page to fully load # Try to find and populate email field user_input = await self.page.query_selector("input#userid") if user_input: await user_input.fill(email) print(f"✅ Email populated: {email}") # Look for continue button and click it continue_btn = await self.page.query_selector("#signin-continue-btn") if continue_btn: await continue_btn.click() await self.page.wait_for_timeout(2000) print("✅ Clicked continue button") else: print("⚠️ Email field not found, you may need to enter it manually") # Try to find and populate password field pass_input = await self.page.query_selector("input#pass") if pass_input: await pass_input.fill(password) print("✅ Password populated") else: print("⚠️ Password field not found, you may need to enter it manually") # Check for switch account option switch_anchor = await self.page.query_selector("#switch-account-anchor") if switch_anchor: print("ℹ️ Switch account option available if needed") # Don't automatically submit - let user complete manually print("\n" + "="*60) print("🚀 LOGIN PAGE PREPARED!") print("="*60) print("✅ Browser opened to eBay login page") print("✅ Email and password have been populated") print("") print("📋 Next steps:") print(" 1. Review the populated credentials") print(" 2. Complete any CAPTCHA if present") print(" 3. Click 'Sign In' button") print(" 4. Complete any 2FA/verification if required") print(" 5. Wait until you see your eBay dashboard/homepage") print(" 6. Press Enter here to continue...") print("="*60) input("Press Enter once you have successfully logged in...") # Verify login was successful if await self.check_login_status(): print("✅ Login verification successful!") await self.save_session_data() self.log_time("Manual login assistance (successful)", start_time) return True else: print("❌ Login verification failed. Please check your login status.") self.log_time("Manual login assistance (failed)", start_time) return False except Exception as e: print(f"Error during manual login assistance: {str(e)}") self.log_time("Manual login assistance (error)", start_time) return False async def fetch_ebay_research_data(self, keywords="iphone case", day_range=90, offset=0, limit=50, category_id=0, skip_login_check=False, minPrice=None, maxPrice=None, topRatedProducts=None, marketplace="EBAY-US", tabName="SOLD", tz="Africa%2FCairo", modules="searchResults", endDate=None, startDate=None, format=None, BuyerLocation=None): """Fetch data from eBay research API and save response""" start_time = self.log_time(f"Fetching data: '{keywords}' (offset: {offset})") try: # Ensure we're logged in first (unless skipping check for performance) if not skip_login_check: if not await self.check_login_status(): print("❌ Not logged in. Cannot fetch research data.") print("🔄 Please log in manually in the browser window and press Enter to continue...") input("Press Enter after logging in manually...") # Check again after manual login if not await self.check_login_status(): print("❌ Still not logged in. Stopping execution.") self.log_time(f"Fetch data failed: not logged in", start_time) return None else: print("✅ Login detected! Continuing with data fetch...") else: self.debug_print("⚡ Skipping login check for faster execution...") # Calculate timestamps for the date range (use provided dates or calculate from day_range) import time if endDate is None: end_timestamp = int(time.time() * 1000) # Current time in milliseconds else: end_timestamp = endDate if startDate is None: start_timestamp = end_timestamp - (day_range * 24 * 60 * 60 * 1000) # X days ago else: start_timestamp = startDate # Construct the API URL with parameters api_url = ( f"https://www.ebay.com/sh/research/api/search?" f"marketplace={marketplace}&" f"keywords={keywords.replace(' ', '+')}&" f"dayRange={day_range}&" f"endDate={end_timestamp}&" f"startDate={start_timestamp}&" f"categoryId={category_id}&" f"offset={offset}&" f"limit={limit}&" f"tabName={tabName}&" f"tz={tz}&" f"modules={modules}" ) # Add optional parameters only if they are provided if minPrice is not None and minPrice != "": api_url += f"&minPrice={minPrice}" if maxPrice is not None and maxPrice != "": api_url += f"&maxPrice={maxPrice}" if topRatedProducts is not None and topRatedProducts != "": api_url += f"&topRatedProducts={topRatedProducts}" if format is not None and format != "": api_url += f"&format={format}" if BuyerLocation is not None and BuyerLocation != "": api_url += f"&BuyerLocation={BuyerLocation}" self.debug_print(f"🔍 Fetching eBay research data for: {keywords}") self.debug_print(f"📊 Parameters: {day_range} days, offset: {offset}, limit: {limit}") self.debug_print(f"🔗 API URL: {api_url}") # Navigate to the API endpoint with optimized timeout response = await self.page.goto(api_url, wait_until='domcontentloaded', timeout=15000) # Wait for the JSON response to load with shorter timeout try: # Wait for the body to contain data or timeout after 5 seconds await self.page.wait_for_function( "document.body.textContent && document.body.textContent.length > 100", timeout=5000 ) except: print("⚠️ Quick response check timed out, proceeding anyway...") # Get the response content content = await self.page.content() # Try to extract JSON from the page try: # Check if the page contains JSON data page_text = await self.page.evaluate("document.body.textContent") # Clean HTML wrapper tags if present cleaned_text = self.clean_html_wrapper(page_text) self.debug_print(f"🧹 Cleaned response length: {len(cleaned_text)} chars") # Quick validation - check if response looks like JSON if not cleaned_text.strip().startswith('{'): raise json.JSONDecodeError("Response doesn't appear to be JSON", cleaned_text, 0) # Try to parse as JSON data = json.loads(cleaned_text) # Quick validation of expected structure if 'results' not in data: print("⚠️ Warning: No 'results' key found in response") return {"content": cleaned_text, "url": api_url} # Optimized data extraction - only process needed fields ar = [] results = data.get('results', []) for item in results: # Skip items with dsaNotification to save processing time if item.get('dsaNotification'): continue # Use get with defaults to avoid multiple nested get calls listing = item.get('listing', {}) image = listing.get('image', {}).get('URL') # Extract text safely with fallbacks def safe_extract_text(obj, path, index=0): try: return obj.get(path, {}).get('textSpans', [])[index].get('text', '') except (IndexError, AttributeError): return None tmp = { "image": image, "itemId": listing.get('itemId', {}).get('value'), "title": safe_extract_text(listing, 'title'), "price": item.get('avgsalesprice', {}).get('avgsalesprice', {}).get('textSpans', [])[0].get('text', ''), "itemsSold": safe_extract_text(item, 'itemssold', 0).replace(',', '') if safe_extract_text(item, 'itemssold', 0) else '', "totalsales": safe_extract_text(item, 'totalsales', 0).replace(',', '') if safe_extract_text(item, 'totalsales', 0) else '', "datelastsold": safe_extract_text(item, 'datelastsold', 0), } ar.append(tmp) # Create filename with timestamp and keywords timestamp = int(time.time()) safe_keywords = keywords.replace(' ', '_').replace('+', '_') filename = f"ebay_research_{safe_keywords}_{timestamp}.json" # Ensure files directory exists and save to files folder self.ensure_files_directory() filepath = f"{self.files_dir}/{filename}" # Save the response data with optimized JSON writing with open(filepath, 'w', encoding='utf-8') as f: json.dump(ar, f, separators=(',', ':'), ensure_ascii=False) # Compact JSON for faster writing self.debug_print(f"✅ Research data saved to: {filepath}") self.debug_print(f"📈 Found {len(ar)} items processed from {len(results)} total results") self.log_time(f"Fetch data successful: '{keywords}'", start_time) return data except json.JSONDecodeError as e: print(f"❌ JSON parsing failed: {str(e)}") # If not JSON, save cleaned content as text/HTML import time timestamp = int(time.time()) safe_keywords = keywords.replace(' ', '_').replace('+', '_') # Try to save cleaned content first cleaned_content = self.clean_html_wrapper(page_text) if cleaned_content and cleaned_content != page_text: filename = f"ebay_research_{safe_keywords}_{timestamp}_cleaned.txt" self.ensure_files_directory() filepath = f"{self.files_dir}/{filename}" with open(filepath, 'w', encoding='utf-8') as f: f.write(cleaned_content) print(f"✅ Cleaned response saved to: {filepath}") else: # Save original HTML if cleaning didn't help filename = f"ebay_research_{safe_keywords}_{timestamp}.html" self.ensure_files_directory() filepath = f"{self.files_dir}/{filename}" with open(filepath, 'w', encoding='utf-8') as f: f.write(content) print(f"✅ Original response saved as HTML to: {filepath}") self.log_time(f"Fetch data (non-JSON): '{keywords}'", start_time) return {"content": cleaned_content or content, "url": api_url} except Exception as e: print(f"❌ Error fetching eBay research data: {str(e)}") self.log_time(f"Fetch data error: '{keywords}'", start_time) return None async def fetch_multiple_pages(self, keywords="iphone case", day_range=90, total_items=200, items_per_page=50): """Fetch multiple pages of eBay research data""" start_time = self.log_time(f"Multi-page fetch: '{keywords}' ({total_items} items)") try: # Check login status before starting the fetch process if not await self.check_login_status(): print("❌ Not logged in. Cannot fetch research data.") print("🔄 Please log in manually in the browser window and press Enter to continue...") input("Press Enter after logging in manually...") # Check again after manual login if not await self.check_login_status(): print("❌ Still not logged in. Stopping execution.") self.log_time(f"Multi-page fetch failed: not logged in", start_time) return None else: print("✅ Login detected! Continuing with multi-page fetch...") all_data = [] pages_fetched = 0 offset = 0 print(f"🔄 Starting multi-page fetch for: {keywords}") print(f"📊 Target: {total_items} items, {items_per_page} per page") while len(all_data) < total_items: print(f"\n📄 Fetching page {pages_fetched + 1} (offset: {offset})...") page_data = await self.fetch_ebay_research_data( keywords=keywords, day_range=day_range, offset=offset, limit=items_per_page ) if page_data and 'searchResults' in page_data: items = page_data['searchResults'].get('items', []) all_data.extend(items) print(f"✅ Page {pages_fetched + 1}: Found {len(items)} items") print(f"📈 Total collected: {len(all_data)} items") # If we got fewer items than requested, we've reached the end if len(items) < items_per_page: print("📄 Reached end of results") break else: print(f"❌ Failed to fetch page {pages_fetched + 1}") break pages_fetched += 1 offset += items_per_page # Minimal delay between requests to be respectful but faster await asyncio.sleep(0.5) # Reduced from 1 second # Save combined results if all_data: import time timestamp = int(time.time()) safe_keywords = keywords.replace(' ', '_').replace('+', '_') filename = f"ebay_research_combined_{safe_keywords}_{timestamp}.json" self.ensure_files_directory() filepath = f"{self.files_dir}/{filename}" combined_data = { "search_parameters": { "keywords": keywords, "day_range": day_range, "total_items_requested": total_items, "items_per_page": items_per_page, "pages_fetched": pages_fetched }, "total_items": len(all_data), "items": all_data } with open(filepath, 'w', encoding='utf-8') as f: json.dump(combined_data, f, indent=2, ensure_ascii=False) print(f"\n🎉 Combined results saved to: {filepath}") print(f"📊 Total items collected: {len(all_data)}") print(f"📄 Pages fetched: {pages_fetched}") self.log_time(f"Multi-page fetch successful: '{keywords}'", start_time) return combined_data self.log_time(f"Multi-page fetch (no data): '{keywords}'", start_time) return None except Exception as e: print(f"❌ Error in multi-page fetch: {str(e)}") self.log_time(f"Multi-page fetch error: '{keywords}'", start_time) return None async def fetch_multiple_offsets(self, keywords="iphone case", day_range=90, limit=50, offsets=[0, 50], minPrice=None, maxPrice=None, topRatedProducts=None, marketplace="EBAY-US", category_id=0, tabName="SOLD", tz="Africa%2FCairo", modules="searchResults", endDate=None, startDate=None, format=None, BuyerLocation=None): """Fetch data from multiple offsets and combine results""" start_time = self.log_time(f"Multi-offset fetch: '{keywords}' (offsets: {offsets})") try: # Check login status before starting the fetch process if not await self.check_login_status(): print("❌ Not logged in. Cannot fetch research data.") print("🔄 Please log in manually in the browser window and press Enter to continue...") input("Press Enter after logging in manually...") # Check again after manual login if not await self.check_login_status(): print("❌ Still not logged in. Stopping execution.") self.log_time(f"Multi-offset fetch failed: not logged in", start_time) return None else: print("✅ Login detected! Continuing with multi-offset fetch...") all_items = [] successful_fetches = 0 self.debug_print(f"🔄 Starting multi-offset fetch for: {keywords}") self.debug_print(f"📊 Offsets to fetch: {offsets}") for i, offset in enumerate(offsets): self.debug_print(f"\n📄 Fetching offset {offset} ({i+1}/{len(offsets)})...") # Skip login check after first successful fetch (we're already logged in) if successful_fetches > 0: self.debug_print("⚡ Skipping login check - already authenticated") # Fetch data for this offset page_data = await self.fetch_ebay_research_data( keywords=keywords, day_range=day_range, offset=offset, limit=limit, category_id=category_id, skip_login_check=(successful_fetches > 0), # Skip login check after first successful fetch minPrice=minPrice, maxPrice=maxPrice, topRatedProducts=topRatedProducts, marketplace=marketplace, tabName=tabName, tz=tz, modules=modules, endDate=endDate, startDate=startDate, format=format, BuyerLocation=BuyerLocation ) if page_data and isinstance(page_data, dict): # Check if we got valid JSON data with results if 'results' in page_data: # Extract the processed items results = page_data.get('results', []) processed_count = 0 for item in results: # Skip items with dsaNotification if item.get('dsaNotification'): continue processed_count += 1 self.debug_print(f"✅ Offset {offset}: Found {processed_count} valid items from {len(results)} total") all_items.extend(results) successful_fetches += 1 # If no results found, assume we've reached the end if len(results) == 0: self.debug_print(f"📄 No more results at offset {offset}, stopping...") break else: self.debug_print(f"⚠️ Offset {offset}: No 'results' in response") else: self.debug_print(f"❌ Offset {offset}: Failed to fetch data or invalid response") # Continue to next offset instead of breaking continue # Small delay between requests if i < len(offsets) - 1: # Don't wait after the last request await asyncio.sleep(0.5) # Process and save combined results if all_items: # Process all items into the final format combined_processed = [] for item in all_items: if item.get('dsaNotification'): continue listing = item.get('listing', {}) image = listing.get('image', {}).get('URL') # Extract text safely with fallbacks def safe_extract_text(obj, path, index=0): try: return obj.get(path, {}).get('textSpans', [])[index].get('text', '') except (IndexError, AttributeError): return None def safe_extract_price(obj): try: return obj.get('avgsalesprice', {}).get('avgsalesprice', {}).get('textSpans', [])[0].get('text', '') except (IndexError, AttributeError): return None tmp = { "image": image, "itemId": listing.get('itemId', {}).get('value'), "price": safe_extract_price(item), "title": safe_extract_text(listing, 'title'), "itemsSold": safe_extract_text(item, 'itemssold', 0).replace(',', '') if safe_extract_text(item, 'itemssold', 0) else '', "totalsales": safe_extract_text(item, 'totalsales', 0).replace(',', '') if safe_extract_text(item, 'totalsales', 0) else '', "datelastsold": safe_extract_text(item, 'datelastsold', 0), } if image is not None: combined_processed.append(tmp) # Save combined results timestamp = int(time.time()) safe_keywords = keywords.replace(' ', '_').replace('+', '_') filename = f"ebay_research_combined_offsets_{safe_keywords}_{timestamp}.json" self.ensure_files_directory() filepath = f"{self.files_dir}/{filename}" combined_data = { "search_parameters": { "keywords": keywords, "day_range": day_range, "limit_per_offset": limit, "offsets_fetched": offsets[:successful_fetches] if successful_fetches < len(offsets) else offsets, "successful_fetches": successful_fetches }, "total_items": len(combined_processed), "items": combined_processed } with open(filepath, 'w', encoding='utf-8') as f: json.dump(combined_data, f, separators=(',', ':'), ensure_ascii=False) # In debug mode: show detailed messages self.debug_print(f"\n🎉 Combined results saved to: {filepath}") self.debug_print(f"📊 Total items collected: {len(combined_processed)}") self.debug_print(f"📄 Successful fetches: {successful_fetches}/{len(offsets)}") # In normal mode: show only the file path in JSON format if not self.debug: import os full_path = os.path.abspath(filepath) result = {"file": full_path} print(json.dumps(result)) self.log_time(f"Multi-offset fetch successful: '{keywords}'", start_time) return combined_data else: print("❌ No valid data collected from any offset") self.log_time(f"Multi-offset fetch (no data): '{keywords}'", start_time) return None except Exception as e: print(f"❌ Error in multi-offset fetch: {str(e)}") self.log_time(f"Multi-offset fetch error: '{keywords}'", start_time) return None async def close_browser(self): """Close the browser and cleanup""" try: # Save session data before closing await self.save_session_data() if self.page: await self.page.close() self.page = None if self.context: await self.context.close() self.context = None if self.playwright: await self.playwright.stop() self.playwright = None # Small delay to ensure cleanup completes await asyncio.sleep(0.1) self.debug_print("Browser closed and session data saved") except Exception as e: print(f"Warning: Error during browser cleanup: {str(e)}") # Keep warnings visible return async def main(): """Example usage""" # Parse command line arguments parser = argparse.ArgumentParser(description='eBay Research Data Fetcher') # Core search parameters parser.add_argument('keywords', nargs='?', default='iphone case', help='Search keywords (default: "iphone case")') parser.add_argument('--marketplace', default='EBAY-US', help='eBay marketplace (default: EBAY-US)') parser.add_argument('--categoryId', type=int, default=0, help='Category ID (default: 0 for all categories)') # Price and filtering parameters parser.add_argument('--minPrice', type=float, default=None, help='Minimum price filter') parser.add_argument('--maxPrice', type=float, default=None, help='Maximum price filter') parser.add_argument('--topRatedProducts', choices=['SHOW', 'EXCLUDE'], default=None, help='Top-rated products filter: SHOW (only top-rated) or EXCLUDE (exclude top-rated)') parser.add_argument('--format', choices=['FIXED_PRICE', 'AUCTION'], default=None, help='Listing format: FIXED_PRICE or AUCTION') parser.add_argument('--BuyerLocation', type=str, default=None, help='Buyer location filter (e.g., "US", "CA", "UK")') # Time and pagination parameters parser.add_argument('--dayRange', type=int, default=90, help='Day range for search (default: 90)') parser.add_argument('--offset', type=int, default=0, help='Search offset (default: 0)') parser.add_argument('--limit', type=int, default=50, help='Results limit per request (default: 50)') parser.add_argument('--endDate', type=int, default=None, help='End date timestamp (milliseconds)') parser.add_argument('--startDate', type=int, default=None, help='Start date timestamp (milliseconds)') # Technical parameters parser.add_argument('--tabName', default='SOLD', help='Tab name (default: SOLD)') parser.add_argument('--tz', default='Africa%2FCairo', help='Timezone (default: Africa/Cairo)') parser.add_argument('--modules', default='searchResults', help='API modules (default: searchResults)') # System parameters parser.add_argument('--debug', action='store_true', help='Enable debug output') parser.add_argument('--headless', action='store_true', help='Run browser in headless mode (no GUI)') parser.add_argument('--manual-login', action='store_true', help='Open login page, populate credentials, and wait for manual completion (useful for CAPTCHA/2FA)') args = parser.parse_args() total_start_time = time.time() ebay_login = EbayLogin(debug=args.debug) try: ebay_login.debug_print(f"🚀 Starting eBay research script at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") if args.debug: print("🐛 Debug mode enabled - showing detailed output") # Load credentials from config file email, password = ebay_login.load_credentials() if not email or not password: print("Failed to load credentials from config.info file") # Keep critical errors visible return # Start browser (force non-headless if manual login is requested) browser_headless = args.headless and not args.manual_login await ebay_login.start_browser(headless=browser_headless) # Handle different login scenarios if args.manual_login: # Manual login assistance mode print("🔧 Manual login mode activated") success = await ebay_login.manual_login_assistance(email, password) if not success: print("❌ Manual login assistance failed. Stopping execution.") return else: # Automatic login mode # Check if already logged in before attempting login if await ebay_login.check_login_status(): ebay_login.debug_print("🎉 Already logged in! Using saved session.") else: ebay_login.debug_print("🔐 Not logged in, attempting login...") success = await ebay_login.login_to_ebay(email, password) if not success: print("❌ Automated login failed!") print("🔄 Please log in manually in the browser window and press Enter to continue...") input("Press Enter after logging in manually...") # Final check after manual login attempt if not await ebay_login.check_login_status(): print("❌ Still not logged in after manual attempt. Stopping execution.") return else: print("✅ Manual login successful! Continuing...") # Example: Fetch eBay research data with multiple offsets ebay_login.debug_print("\n" + "="*50) ebay_login.debug_print("🔬 Starting eBay Research Data Collection") ebay_login.debug_print("="*50) # Multi-offset fetch with automatic retry and combination combined_data = await ebay_login.fetch_multiple_offsets( keywords=args.keywords, day_range=args.dayRange, limit=args.limit, offsets=[args.offset, args.offset + 50], # Keep only offsets [0,50] minPrice=args.minPrice, maxPrice=args.maxPrice, topRatedProducts=args.topRatedProducts, marketplace=args.marketplace, category_id=args.categoryId, tabName=args.tabName, tz=args.tz, modules=args.modules, endDate=args.endDate, startDate=args.startDate, format=args.format, BuyerLocation=args.BuyerLocation ) if combined_data: ebay_login.debug_print("✅ Multi-offset data fetch completed!") ebay_login.debug_print(f"📊 Total items collected: {combined_data.get('total_items', 0)}") else: print("❌ No data was collected from any offset") # Keep critical errors visible # Optional: Single page fetch example (uncomment to use instead) # data = await ebay_login.fetch_ebay_research_data( # keywords="iphone case", # day_range=90, # offset=0, # limit=50 # ) # if data: # print("✅ Single page data fetch completed!") # Optional: Multi-page fetch example (uncomment to use) # combined_data = await ebay_login.fetch_multiple_pages( # keywords="iphone case", # day_range=90, # total_items=200, # items_per_page=50 # ) # Keep browser open for testing (remove this in production) # await asyncio.sleep(5) except Exception as e: print(f"Error: {str(e)}") finally: # Calculate total runtime total_runtime = time.time() - total_start_time ebay_login.operation_times['TOTAL SCRIPT RUNTIME'] = total_runtime # Print runtime summary only in debug mode if ebay_login.debug: print(ebay_login.get_runtime_summary()) ebay_login.debug_print(f"🏁 Script completed at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") # Proper cleanup try: await ebay_login.close_browser() except Exception as e: print(f"Cleanup warning: {str(e)}") # Give event loop time to complete cleanup await asyncio.sleep(0.2) if __name__ == "__main__": try: asyncio.run(main()) except KeyboardInterrupt: print("\n⚠️ Script interrupted by user") except Exception as e: print(f"⚠️ Script error: {str(e)}") finally: # Ensure clean exit try: # Close any remaining event loop tasks loop = asyncio.get_event_loop() if not loop.is_closed(): tasks = asyncio.all_tasks(loop) for task in tasks: task.cancel() except: pass # Ignore cleanup errors