Update App.py

This commit is contained in:
DJObleezy 2025-03-28 19:09:59 -07:00 committed by GitHub
parent b651320bfb
commit 1a9b09afae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

8
App.py
View File

@ -91,6 +91,8 @@ def update_metrics_job(force=False):
"""
global cached_metrics, last_metrics_update_time, scheduler, scheduler_last_successful_run
logging.info("Starting update_metrics_job")
try:
# Check scheduler health - enhanced logic to detect failed executors
if not scheduler or not hasattr(scheduler, 'running'):
@ -147,6 +149,7 @@ def update_metrics_job(force=False):
# Set last update time to now
last_metrics_update_time = current_time
logging.info(f"Updated last_metrics_update_time: {last_metrics_update_time}")
# Add timeout handling with a timer
job_timeout = 45 # seconds
@ -165,6 +168,7 @@ def update_metrics_job(force=False):
# Use the dashboard service to fetch metrics
metrics = dashboard_service.fetch_metrics()
if metrics:
logging.info("Fetched metrics successfully")
# Update cached metrics
cached_metrics = metrics
@ -183,16 +187,19 @@ def update_metrics_job(force=False):
# Mark successful run time for watchdog
scheduler_last_successful_run = time.time()
logging.info(f"Updated scheduler_last_successful_run: {scheduler_last_successful_run}")
# Persist critical state
state_manager.persist_critical_state(cached_metrics, scheduler_last_successful_run, last_metrics_update_time)
# Periodically check and prune data to prevent memory growth
if current_time % 300 < 60: # Every ~5 minutes
logging.info("Pruning old data")
state_manager.prune_old_data()
# Only save state to Redis on a similar schedule, not every update
if current_time % 300 < 60: # Every ~5 minutes
logging.info("Saving graph state")
state_manager.save_graph_state()
# Periodic full memory cleanup (every 2 hours)
@ -213,6 +220,7 @@ def update_metrics_job(force=False):
logging.error(f"Background job: Unhandled exception: {e}")
import traceback
logging.error(traceback.format_exc())
logging.info("Completed update_metrics_job")
# --- SchedulerWatchdog to monitor and recover ---
def scheduler_watchdog():