diff --git a/ocean_scraper.py b/ocean_scraper.py deleted file mode 100644 index bf23718..0000000 --- a/ocean_scraper.py +++ /dev/null @@ -1,564 +0,0 @@ -""" -Enhanced web scraping solution for Ocean.xyz mining dashboard -""" -import logging -import re -import time -import json -from datetime import datetime, timedelta -from zoneinfo import ZoneInfo -from bs4 import BeautifulSoup -import requests -from models import OceanData, convert_to_ths - -class OceanScraper: - """ - Enhanced web scraper for Ocean.xyz data that focuses on - getting all the critical fields for dashboard display. - """ - - def __init__(self, wallet): - """ - Initialize the scraper with the wallet address. - - Args: - wallet (str): Bitcoin wallet address - """ - self.wallet = wallet - self.session = requests.Session() - self.session.headers.update({ - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Cache-Control': 'no-cache' - }) - - # Constants - self.stats_url = f"https://ocean.xyz/stats/{self.wallet}" - self.sats_per_btc = 100_000_000 - - def get_ocean_data(self): - """ - Get complete mining data from Ocean.xyz via web scraping. - - Returns: - OceanData: Ocean.xyz mining data - """ - data = OceanData() - - try: - # Load the stats page - response = self.session.get(self.stats_url, timeout=10) - if not response.ok: - logging.error(f"Error fetching ocean data: status code {response.status_code}") - return None - - soup = BeautifulSoup(response.text, 'html.parser') - - # Extract all required data - self._extract_pool_status(soup, data) - self._extract_block_earnings(soup, data) - self._extract_hashrates(soup, data) - self._extract_payout_stats(soup, data) - self._extract_user_stats(soup, data) - self._extract_blocks_found(soup, data) - self._extract_last_share_time(soup, data) - - # Calculate estimated earnings per day (if not already set) - if data.estimated_earnings_per_day is None or data.estimated_earnings_per_day == 0: - if data.estimated_earnings_next_block: - # Approximately 144 blocks per day - blocks_per_day = 144 - data.estimated_earnings_per_day = data.estimated_earnings_next_block * blocks_per_day - - # Log the extracted data for debugging - logging.info("Extracted Ocean data successfully") - logging.info(f"Last Block: {data.last_block_height} - {data.last_block_time} - {data.last_block_earnings} SATS") - logging.info(f"Est. Time to Payout: {data.est_time_to_payout}") - logging.info(f"Blocks Found: {data.blocks_found}") - logging.info(f"Est. Earnings/Day: {data.estimated_earnings_per_day} BTC") - - return data - - except Exception as e: - logging.error(f"Error extracting Ocean data: {e}") - import traceback - logging.error(traceback.format_exc()) - return None - - def _extract_pool_status(self, soup, data): - """ - Extract pool status information (pool hashrate and last block). - - Args: - soup: BeautifulSoup object - data: OceanData object to populate - """ - try: - pool_status = soup.find("p", id="pool-status-item") - if pool_status: - # Extract pool hashrate - text = pool_status.get_text(strip=True) - m_total = re.search(r'HASHRATE:\s*([\d\.]+)\s*(\w+/s)', text, re.IGNORECASE) - if m_total: - raw_val = float(m_total.group(1)) - unit = m_total.group(2) - data.pool_total_hashrate = raw_val - data.pool_total_hashrate_unit = unit - - # Extract last block info - span = pool_status.find("span", class_="pool-status-newline") - if span: - last_block_text = span.get_text(strip=True) - m_block = re.search(r'LAST BLOCK:\s*(\d+\s*\(.*\))', last_block_text, re.IGNORECASE) - if m_block: - full_last_block = m_block.group(1) - data.last_block = full_last_block - match = re.match(r'(\d+)\s*\((.*?)\)', full_last_block) - if match: - data.last_block_height = match.group(1) - data.last_block_time = match.group(2) - else: - data.last_block_height = full_last_block - data.last_block_time = "" - except Exception as e: - logging.error(f"Error extracting pool status: {e}") - - def _extract_block_earnings(self, soup, data): - """ - Extract block earnings from the earnings table. - - Args: - soup: BeautifulSoup object - data: OceanData object to populate - """ - try: - earnings_table = soup.find('tbody', id='earnings-tablerows') - if earnings_table: - latest_row = earnings_table.find('tr', class_='table-row') - if latest_row: - cells = latest_row.find_all('td', class_='table-cell') - if len(cells) >= 3: - earnings_text = cells[2].get_text(strip=True) - earnings_value = earnings_text.replace('BTC', '').strip() - try: - btc_earnings = float(earnings_value) - sats = int(round(btc_earnings * self.sats_per_btc)) - data.last_block_earnings = str(sats) - except Exception: - data.last_block_earnings = earnings_value - except Exception as e: - logging.error(f"Error extracting block earnings: {e}") - - def _extract_hashrates(self, soup, data): - """ - Extract hashrate data from the hashrates table. - - Args: - soup: BeautifulSoup object - data: OceanData object to populate - """ - try: - time_mapping = { - '24 hrs': ('hashrate_24hr', 'hashrate_24hr_unit'), - '3 hrs': ('hashrate_3hr', 'hashrate_3hr_unit'), - '10 min': ('hashrate_10min', 'hashrate_10min_unit'), - '5 min': ('hashrate_5min', 'hashrate_5min_unit'), - '60 sec': ('hashrate_60sec', 'hashrate_60sec_unit') - } - hashrate_table = soup.find('tbody', id='hashrates-tablerows') - if hashrate_table: - for row in hashrate_table.find_all('tr', class_='table-row'): - cells = row.find_all('td', class_='table-cell') - if len(cells) >= 2: - period_text = cells[0].get_text(strip=True).lower() - hashrate_str = cells[1].get_text(strip=True).lower() - try: - parts = hashrate_str.split() - hashrate_val = float(parts[0]) - unit = parts[1] if len(parts) > 1 else 'th/s' - for key, (attr, unit_attr) in time_mapping.items(): - if key.lower() in period_text: - setattr(data, attr, hashrate_val) - setattr(data, unit_attr, unit) - break - except Exception as e: - logging.error(f"Error parsing hashrate '{hashrate_str}': {e}") - except Exception as e: - logging.error(f"Error extracting hashrates: {e}") - - def _extract_payout_stats(self, soup, data): - """ - Extract payout stats from the payout snapshot card with enhanced debugging. - - Args: - soup: BeautifulSoup object - data: OceanData object to populate - """ - try: - # Try to find earnings per day in multiple potential locations - - # First check in payoutsnap-statcards - payout_snap = soup.find('div', id='payoutsnap-statcards') - if payout_snap: - logging.info("Found payoutsnap-statcards") - for container in payout_snap.find_all('div', class_='blocks dashboard-container'): - label_div = container.find('div', class_='blocks-label') - if label_div: - label_text = label_div.get_text(strip=True).lower() - logging.info(f"Found label: '{label_text}'") - - earnings_span = label_div.find_next('span', class_=lambda x: x != 'tooltiptext') - if earnings_span: - span_text = earnings_span.get_text(strip=True) - logging.info(f"Label '{label_text}' has value: '{span_text}'") - - try: - # Extract just the number, handling comma separators - parts = span_text.split() - if parts: - earnings_text = parts[0].replace(',', '') - earnings_value = float(earnings_text) - - # Use more flexible matching and set directly - if any(x in label_text for x in ["earnings per day", "daily earnings", "per day"]): - data.estimated_earnings_per_day = earnings_value - logging.info(f"Set estimated_earnings_per_day = {earnings_value}") - elif any(x in label_text for x in ["earnings per block", "next block"]): - data.estimated_earnings_next_block = earnings_value - logging.info(f"Set estimated_earnings_next_block = {earnings_value}") - elif any(x in label_text for x in ["rewards in window", "window"]): - data.estimated_rewards_in_window = earnings_value - logging.info(f"Set estimated_rewards_in_window = {earnings_value}") - except Exception as e: - logging.error(f"Error parsing value '{span_text}': {e}") - - # Also check in lifetimesnap-statcards for day earnings - lifetime_snap = soup.find('div', id='lifetimesnap-statcards') - if lifetime_snap: - logging.info("Found lifetimesnap-statcards") - for container in lifetime_snap.find_all('div', class_='blocks dashboard-container'): - label_div = container.find('div', class_='blocks-label') - if label_div: - label_text = label_div.get_text(strip=True).lower() - logging.info(f"Found label: '{label_text}'") - - earnings_span = label_div.find_next('span', class_=lambda x: x != 'tooltiptext') - if earnings_span: - span_text = earnings_span.get_text(strip=True) - logging.info(f"Label '{label_text}' has value: '{span_text}'") - - try: - # Extract just the number, handling comma separators - parts = span_text.split() - if parts: - earnings_text = parts[0].replace(',', '') - earnings_value = float(earnings_text) - - # Check for day earnings here too - if any(x in label_text for x in ["earnings per day", "daily earnings", "per day"]): - data.estimated_earnings_per_day = earnings_value - logging.info(f"Set estimated_earnings_per_day from lifetime stats = {earnings_value}") - except Exception as e: - logging.error(f"Error parsing value '{span_text}': {e}") - - # Ensure we have the value after all extraction attempts - if data.estimated_earnings_per_day == 0 or data.estimated_earnings_per_day is None: - # As a fallback, try to set the hard-coded value we know is correct - data.estimated_earnings_per_day = 0.00070100 - logging.info(f"Using hardcoded fallback for estimated_earnings_per_day = 0.00070100") - - # Also ensure the other values are set to at least something reasonable - if data.estimated_earnings_next_block == 0 or data.estimated_earnings_next_block is None: - # Estimate per block from daily / 144 - if data.estimated_earnings_per_day: - data.estimated_earnings_next_block = data.estimated_earnings_per_day / 144 - logging.info(f"Calculated estimated_earnings_next_block = {data.estimated_earnings_next_block}") - - if data.estimated_rewards_in_window == 0 or data.estimated_rewards_in_window is None: - # Set same as block by default - if data.estimated_earnings_next_block: - data.estimated_rewards_in_window = data.estimated_earnings_next_block - logging.info(f"Set estimated_rewards_in_window = {data.estimated_rewards_in_window}") - - except Exception as e: - logging.error(f"Error extracting payout stats: {e}") - - def _extract_user_stats(self, soup, data): - """ - Extract user stats from the user snapshot card. - - Args: - soup: BeautifulSoup object - data: OceanData object to populate - """ - try: - usersnap = soup.find('div', id='usersnap-statcards') - if usersnap: - for container in usersnap.find_all('div', class_='blocks dashboard-container'): - label_div = container.find('div', class_='blocks-label') - if label_div: - label_text = label_div.get_text(strip=True).lower() - value_span = label_div.find_next('span', class_=lambda x: x != 'tooltiptext') - if value_span: - span_text = value_span.get_text(strip=True) - if "workers currently hashing" in label_text: - try: - data.workers_hashing = int(span_text.replace(",", "")) - except Exception: - pass - elif "unpaid earnings" in label_text and "btc" in span_text.lower(): - try: - data.unpaid_earnings = float(span_text.split()[0].replace(',', '')) - except Exception: - pass - elif "estimated time until minimum payout" in label_text: - data.est_time_to_payout = span_text - except Exception as e: - logging.error(f"Error extracting user stats: {e}") - - def _extract_blocks_found(self, soup, data): - """ - Extract blocks found data. - - Args: - soup: BeautifulSoup object - data: OceanData object to populate - """ - try: - blocks_container = soup.find(lambda tag: tag.name == "div" and "blocks found" in tag.get_text(strip=True).lower()) - if blocks_container: - span = blocks_container.find_next_sibling("span") - if span: - num_match = re.search(r'(\d+)', span.get_text(strip=True)) - if num_match: - data.blocks_found = num_match.group(1) - except Exception as e: - logging.error(f"Error extracting blocks found: {e}") - - def _extract_last_share_time(self, soup, data): - """ - Extract last share time from the workers table. - - Args: - soup: BeautifulSoup object - data: OceanData object to populate - """ - try: - workers_table = soup.find("tbody", id="workers-tablerows") - if workers_table: - for row in workers_table.find_all("tr", class_="table-row"): - cells = row.find_all("td") - if cells and cells[0].get_text(strip=True).lower().startswith("total"): - last_share_str = cells[2].get_text(strip=True) - try: - naive_dt = datetime.strptime(last_share_str, "%Y-%m-%d %H:%M") - utc_dt = naive_dt.replace(tzinfo=ZoneInfo("UTC")) - la_dt = utc_dt.astimezone(ZoneInfo("America/Los_Angeles")) - data.total_last_share = la_dt.strftime("%Y-%m-%d %I:%M %p") - except Exception as e: - logging.error(f"Error converting last share time '{last_share_str}': {e}") - data.total_last_share = last_share_str - break - except Exception as e: - logging.error(f"Error extracting last share time: {e}") - - def get_workers_data(self): - """ - Get worker data from Ocean.xyz via web scraping. - - Returns: - dict: Worker data dictionary with stats and list of workers - """ - try: - # Load the stats page - response = self.session.get(self.stats_url, timeout=10) - if not response.ok: - logging.error(f"Error fetching worker data: status code {response.status_code}") - return None - - soup = BeautifulSoup(response.text, 'html.parser') - - workers = [] - total_hashrate = 0 - total_earnings = 0 - workers_online = 0 - workers_offline = 0 - - # Get all worker rows from the page - workers_table = soup.find('tbody', id='workers-tablerows') - if not workers_table: - logging.error("Workers table not found") - return None - - # Process each worker row - for row in workers_table.find_all('tr', class_='table-row'): - cells = row.find_all('td', class_='table-cell') - - # Skip rows that don't have enough cells - if len(cells) < 3: - continue - - try: - # Extract worker name - name_cell = cells[0] - name_text = name_cell.get_text(strip=True) - - # Skip the total row - if name_text.lower() == 'total': - continue - - # Create worker object - worker = { - "name": name_text.strip(), - "status": "offline", # Default - "type": "ASIC", - "model": "Unknown", - "hashrate_60sec": 0, - "hashrate_60sec_unit": "TH/s", - "hashrate_3hr": 0, - "hashrate_3hr_unit": "TH/s", - "efficiency": 90.0, - "last_share": "N/A", - "earnings": 0, - "acceptance_rate": 95.0, - "power_consumption": 0, - "temperature": 0 - } - - # Parse status - if len(cells) > 1: - status_cell = cells[1] - status_text = status_cell.get_text(strip=True).lower() - worker["status"] = "online" if "online" in status_text else "offline" - - if worker["status"] == "online": - workers_online += 1 - else: - workers_offline += 1 - - # Parse last share - if len(cells) > 2: - last_share_cell = cells[2] - worker["last_share"] = last_share_cell.get_text(strip=True) - - # Parse 60sec hashrate - if len(cells) > 3: - hashrate_60s_cell = cells[3] - hashrate_60s_text = hashrate_60s_cell.get_text(strip=True) - - try: - parts = hashrate_60s_text.split() - if parts and len(parts) > 0: - try: - numeric_value = float(parts[0]) - worker["hashrate_60sec"] = numeric_value - - if len(parts) > 1 and 'btc' not in parts[1].lower(): - worker["hashrate_60sec_unit"] = parts[1] - except ValueError: - pass - except Exception: - pass - - # Parse 3hr hashrate - if len(cells) > 4: - hashrate_3hr_cell = cells[4] - hashrate_3hr_text = hashrate_3hr_cell.get_text(strip=True) - - try: - parts = hashrate_3hr_text.split() - if parts and len(parts) > 0: - try: - numeric_value = float(parts[0]) - worker["hashrate_3hr"] = numeric_value - - if len(parts) > 1 and 'btc' not in parts[1].lower(): - worker["hashrate_3hr_unit"] = parts[1] - - # Add to total hashrate (normalized to TH/s) - total_hashrate += convert_to_ths(worker["hashrate_3hr"], worker["hashrate_3hr_unit"]) - except ValueError: - pass - except Exception: - pass - - # Parse earnings - if len(cells) > 5: - earnings_cell = cells[5] - earnings_text = earnings_cell.get_text(strip=True) - - try: - # Remove BTC or other text - earnings_value = earnings_text.replace('BTC', '').strip() - try: - worker["earnings"] = float(earnings_value) - total_earnings += worker["earnings"] - except ValueError: - pass - except Exception: - pass - - # Set worker type based on name - lower_name = worker["name"].lower() - if 'antminer' in lower_name: - worker["type"] = 'ASIC' - worker["model"] = 'Bitmain Antminer' - elif 'whatsminer' in lower_name: - worker["type"] = 'ASIC' - worker["model"] = 'MicroBT Whatsminer' - elif 'bitaxe' in lower_name or 'nerdqaxe' in lower_name: - worker["type"] = 'Bitaxe' - worker["model"] = 'BitAxe Gamma 601' - - workers.append(worker) - - except Exception as e: - logging.error(f"Error parsing worker row: {e}") - continue - - # Get daily sats - daily_sats = 0 - try: - payout_snap = soup.find('div', id='payoutsnap-statcards') - if payout_snap: - for container in payout_snap.find_all('div', class_='blocks dashboard-container'): - label_div = container.find('div', class_='blocks-label') - if label_div and "earnings per day" in label_div.get_text(strip=True).lower(): - value_span = label_div.find_next('span') - if value_span: - value_text = value_span.get_text(strip=True) - try: - btc_per_day = float(value_text.split()[0]) - daily_sats = int(btc_per_day * self.sats_per_btc) - except (ValueError, IndexError): - pass - except Exception as e: - logging.error(f"Error parsing daily sats: {e}") - - # Check if we found any workers - if not workers: - logging.warning("No workers found in the web page") - return None - - # Return worker stats - result = { - 'workers': workers, - 'total_hashrate': total_hashrate, - 'hashrate_unit': 'TH/s', - 'workers_total': len(workers), - 'workers_online': workers_online, - 'workers_offline': workers_offline, - 'total_earnings': total_earnings, - 'avg_acceptance_rate': 95.0, - 'daily_sats': daily_sats, - 'timestamp': datetime.now(ZoneInfo("America/Los_Angeles")).isoformat() - } - - logging.info(f"Successfully retrieved {len(workers)} workers from web scraping") - return result - - except Exception as e: - logging.error(f"Error getting workers data: {e}") - import traceback - logging.error(traceback.format_exc()) - return None \ No newline at end of file