Update App.py

This commit is contained in:
DJObleezy 2025-03-28 19:09:59 -07:00 committed by GitHub
parent b651320bfb
commit 1a9b09afae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

8
App.py
View File

@ -91,6 +91,8 @@ def update_metrics_job(force=False):
""" """
global cached_metrics, last_metrics_update_time, scheduler, scheduler_last_successful_run global cached_metrics, last_metrics_update_time, scheduler, scheduler_last_successful_run
logging.info("Starting update_metrics_job")
try: try:
# Check scheduler health - enhanced logic to detect failed executors # Check scheduler health - enhanced logic to detect failed executors
if not scheduler or not hasattr(scheduler, 'running'): if not scheduler or not hasattr(scheduler, 'running'):
@ -147,6 +149,7 @@ def update_metrics_job(force=False):
# Set last update time to now # Set last update time to now
last_metrics_update_time = current_time last_metrics_update_time = current_time
logging.info(f"Updated last_metrics_update_time: {last_metrics_update_time}")
# Add timeout handling with a timer # Add timeout handling with a timer
job_timeout = 45 # seconds job_timeout = 45 # seconds
@ -165,6 +168,7 @@ def update_metrics_job(force=False):
# Use the dashboard service to fetch metrics # Use the dashboard service to fetch metrics
metrics = dashboard_service.fetch_metrics() metrics = dashboard_service.fetch_metrics()
if metrics: if metrics:
logging.info("Fetched metrics successfully")
# Update cached metrics # Update cached metrics
cached_metrics = metrics cached_metrics = metrics
@ -183,16 +187,19 @@ def update_metrics_job(force=False):
# Mark successful run time for watchdog # Mark successful run time for watchdog
scheduler_last_successful_run = time.time() scheduler_last_successful_run = time.time()
logging.info(f"Updated scheduler_last_successful_run: {scheduler_last_successful_run}")
# Persist critical state # Persist critical state
state_manager.persist_critical_state(cached_metrics, scheduler_last_successful_run, last_metrics_update_time) state_manager.persist_critical_state(cached_metrics, scheduler_last_successful_run, last_metrics_update_time)
# Periodically check and prune data to prevent memory growth # Periodically check and prune data to prevent memory growth
if current_time % 300 < 60: # Every ~5 minutes if current_time % 300 < 60: # Every ~5 minutes
logging.info("Pruning old data")
state_manager.prune_old_data() state_manager.prune_old_data()
# Only save state to Redis on a similar schedule, not every update # Only save state to Redis on a similar schedule, not every update
if current_time % 300 < 60: # Every ~5 minutes if current_time % 300 < 60: # Every ~5 minutes
logging.info("Saving graph state")
state_manager.save_graph_state() state_manager.save_graph_state()
# Periodic full memory cleanup (every 2 hours) # Periodic full memory cleanup (every 2 hours)
@ -213,6 +220,7 @@ def update_metrics_job(force=False):
logging.error(f"Background job: Unhandled exception: {e}") logging.error(f"Background job: Unhandled exception: {e}")
import traceback import traceback
logging.error(traceback.format_exc()) logging.error(traceback.format_exc())
logging.info("Completed update_metrics_job")
# --- SchedulerWatchdog to monitor and recover --- # --- SchedulerWatchdog to monitor and recover ---
def scheduler_watchdog(): def scheduler_watchdog():