mirror of
https://github.com/dptech-corp/Uni-Lab-OS.git
synced 2026-02-06 23:15:10 +00:00
Merge branch 'dev' into prcix9320
This commit is contained in:
@@ -19,6 +19,11 @@ if unilabos_dir not in sys.path:
|
||||
|
||||
from unilabos.utils.banner_print import print_status, print_unilab_banner
|
||||
from unilabos.config.config import load_config, BasicConfig, HTTPConfig
|
||||
from unilabos.app.utils import cleanup_for_restart
|
||||
|
||||
# Global restart flags (used by ws_client and web/server)
|
||||
_restart_requested: bool = False
|
||||
_restart_reason: str = ""
|
||||
|
||||
|
||||
def load_config_from_file(config_path):
|
||||
@@ -156,6 +161,11 @@ def parse_args():
|
||||
default=False,
|
||||
help="Complete registry information",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no_update_feedback",
|
||||
action="store_true",
|
||||
help="Disable sending update feedback to server",
|
||||
)
|
||||
# workflow upload subcommand
|
||||
workflow_parser = subparsers.add_parser(
|
||||
"workflow_upload",
|
||||
@@ -297,6 +307,7 @@ def main():
|
||||
BasicConfig.is_host_mode = not args_dict.get("is_slave", False)
|
||||
BasicConfig.slave_no_host = args_dict.get("slave_no_host", False)
|
||||
BasicConfig.upload_registry = args_dict.get("upload_registry", False)
|
||||
BasicConfig.no_update_feedback = args_dict.get("no_update_feedback", False)
|
||||
BasicConfig.communication_protocol = "websocket"
|
||||
machine_name = os.popen("hostname").read().strip()
|
||||
machine_name = "".join([c if c.isalnum() or c == "_" else "_" for c in machine_name])
|
||||
@@ -497,13 +508,19 @@ def main():
|
||||
time.sleep(1)
|
||||
else:
|
||||
start_backend(**args_dict)
|
||||
start_server(
|
||||
restart_requested = start_server(
|
||||
open_browser=not args_dict["disable_browser"],
|
||||
port=BasicConfig.port,
|
||||
)
|
||||
if restart_requested:
|
||||
print_status("[Main] Restart requested, cleaning up...", "info")
|
||||
cleanup_for_restart()
|
||||
return
|
||||
else:
|
||||
start_backend(**args_dict)
|
||||
start_server(
|
||||
|
||||
# 启动服务器(默认支持WebSocket触发重启)
|
||||
restart_requested = start_server(
|
||||
open_browser=not args_dict["disable_browser"],
|
||||
port=BasicConfig.port,
|
||||
)
|
||||
|
||||
144
unilabos/app/utils.py
Normal file
144
unilabos/app/utils.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""
|
||||
UniLabOS 应用工具函数
|
||||
|
||||
提供清理、重启等工具函数
|
||||
"""
|
||||
|
||||
import gc
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
from unilabos.utils.banner_print import print_status
|
||||
|
||||
|
||||
def cleanup_for_restart() -> bool:
|
||||
"""
|
||||
Clean up all resources for restart without exiting the process.
|
||||
|
||||
This function prepares the system for re-initialization by:
|
||||
1. Stopping all communication clients
|
||||
2. Destroying ROS nodes
|
||||
3. Resetting singletons
|
||||
4. Waiting for threads to finish
|
||||
|
||||
Returns:
|
||||
bool: True if cleanup was successful, False otherwise
|
||||
"""
|
||||
print_status("[Restart] Starting cleanup for restart...", "info")
|
||||
|
||||
# Step 1: Stop WebSocket communication client
|
||||
print_status("[Restart] Step 1: Stopping WebSocket client...", "info")
|
||||
try:
|
||||
from unilabos.app.communication import get_communication_client
|
||||
|
||||
comm_client = get_communication_client()
|
||||
if comm_client is not None:
|
||||
comm_client.stop()
|
||||
print_status("[Restart] WebSocket client stopped", "info")
|
||||
except Exception as e:
|
||||
print_status(f"[Restart] Error stopping WebSocket: {e}", "warning")
|
||||
|
||||
# Step 2: Get HostNode and cleanup ROS
|
||||
print_status("[Restart] Step 2: Cleaning up ROS nodes...", "info")
|
||||
try:
|
||||
from unilabos.ros.nodes.presets.host_node import HostNode
|
||||
import rclpy
|
||||
from rclpy.timer import Timer
|
||||
|
||||
host_instance = HostNode.get_instance(timeout=5)
|
||||
if host_instance is not None:
|
||||
print_status(f"[Restart] Found HostNode: {host_instance.device_id}", "info")
|
||||
|
||||
# Gracefully shutdown background threads
|
||||
print_status("[Restart] Shutting down background threads...", "info")
|
||||
HostNode.shutdown_background_threads(timeout=5.0)
|
||||
print_status("[Restart] Background threads shutdown complete", "info")
|
||||
|
||||
# Stop discovery timer
|
||||
if hasattr(host_instance, "_discovery_timer") and isinstance(host_instance._discovery_timer, Timer):
|
||||
host_instance._discovery_timer.cancel()
|
||||
print_status("[Restart] Discovery timer cancelled", "info")
|
||||
|
||||
# Destroy device nodes
|
||||
device_count = len(host_instance.devices_instances)
|
||||
print_status(f"[Restart] Destroying {device_count} device instances...", "info")
|
||||
for device_id, device_node in list(host_instance.devices_instances.items()):
|
||||
try:
|
||||
if hasattr(device_node, "ros_node_instance") and device_node.ros_node_instance is not None:
|
||||
device_node.ros_node_instance.destroy_node()
|
||||
print_status(f"[Restart] Device {device_id} destroyed", "info")
|
||||
except Exception as e:
|
||||
print_status(f"[Restart] Error destroying device {device_id}: {e}", "warning")
|
||||
|
||||
# Clear devices instances
|
||||
host_instance.devices_instances.clear()
|
||||
host_instance.devices_names.clear()
|
||||
|
||||
# Destroy host node
|
||||
try:
|
||||
host_instance.destroy_node()
|
||||
print_status("[Restart] HostNode destroyed", "info")
|
||||
except Exception as e:
|
||||
print_status(f"[Restart] Error destroying HostNode: {e}", "warning")
|
||||
|
||||
# Reset HostNode state
|
||||
HostNode.reset_state()
|
||||
print_status("[Restart] HostNode state reset", "info")
|
||||
|
||||
# Shutdown executor first (to stop executor.spin() gracefully)
|
||||
if hasattr(rclpy, "__executor") and rclpy.__executor is not None:
|
||||
try:
|
||||
rclpy.__executor.shutdown()
|
||||
rclpy.__executor = None # Clear for restart
|
||||
print_status("[Restart] ROS executor shutdown complete", "info")
|
||||
except Exception as e:
|
||||
print_status(f"[Restart] Error shutting down executor: {e}", "warning")
|
||||
|
||||
# Shutdown rclpy
|
||||
if rclpy.ok():
|
||||
rclpy.shutdown()
|
||||
print_status("[Restart] rclpy shutdown complete", "info")
|
||||
|
||||
except ImportError as e:
|
||||
print_status(f"[Restart] ROS modules not available: {e}", "warning")
|
||||
except Exception as e:
|
||||
print_status(f"[Restart] Error in ROS cleanup: {e}", "warning")
|
||||
return False
|
||||
|
||||
# Step 3: Reset communication client singleton
|
||||
print_status("[Restart] Step 3: Resetting singletons...", "info")
|
||||
try:
|
||||
from unilabos.app import communication
|
||||
|
||||
if hasattr(communication, "_communication_client"):
|
||||
communication._communication_client = None
|
||||
print_status("[Restart] Communication client singleton reset", "info")
|
||||
except Exception as e:
|
||||
print_status(f"[Restart] Error resetting communication singleton: {e}", "warning")
|
||||
|
||||
# Step 4: Wait for threads to finish
|
||||
print_status("[Restart] Step 4: Waiting for threads to finish...", "info")
|
||||
time.sleep(3) # Give threads time to finish
|
||||
|
||||
# Check remaining threads
|
||||
remaining_threads = []
|
||||
for t in threading.enumerate():
|
||||
if t.name != "MainThread" and t.is_alive():
|
||||
remaining_threads.append(t.name)
|
||||
|
||||
if remaining_threads:
|
||||
print_status(
|
||||
f"[Restart] Warning: {len(remaining_threads)} threads still running: {remaining_threads}", "warning"
|
||||
)
|
||||
else:
|
||||
print_status("[Restart] All threads stopped", "info")
|
||||
|
||||
# Step 5: Force garbage collection
|
||||
print_status("[Restart] Step 5: Running garbage collection...", "info")
|
||||
gc.collect()
|
||||
gc.collect() # Run twice for weak references
|
||||
print_status("[Restart] Garbage collection complete", "info")
|
||||
|
||||
print_status("[Restart] Cleanup complete. Ready for re-initialization.", "info")
|
||||
return True
|
||||
@@ -6,7 +6,6 @@ Web服务器模块
|
||||
|
||||
import webbrowser
|
||||
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from starlette.responses import Response
|
||||
@@ -96,7 +95,7 @@ def setup_server() -> FastAPI:
|
||||
return app
|
||||
|
||||
|
||||
def start_server(host: str = "0.0.0.0", port: int = 8002, open_browser: bool = True) -> None:
|
||||
def start_server(host: str = "0.0.0.0", port: int = 8002, open_browser: bool = True) -> bool:
|
||||
"""
|
||||
启动服务器
|
||||
|
||||
@@ -104,7 +103,14 @@ def start_server(host: str = "0.0.0.0", port: int = 8002, open_browser: bool = T
|
||||
host: 服务器主机
|
||||
port: 服务器端口
|
||||
open_browser: 是否自动打开浏览器
|
||||
|
||||
Returns:
|
||||
bool: True if restart was requested, False otherwise
|
||||
"""
|
||||
import threading
|
||||
import time
|
||||
from uvicorn import Config, Server
|
||||
|
||||
# 设置服务器
|
||||
setup_server()
|
||||
|
||||
@@ -123,7 +129,37 @@ def start_server(host: str = "0.0.0.0", port: int = 8002, open_browser: bool = T
|
||||
|
||||
# 启动服务器
|
||||
info(f"[Web] 启动FastAPI服务器: {host}:{port}")
|
||||
uvicorn.run(app, host=host, port=port, log_config=log_config)
|
||||
|
||||
# 使用支持重启的模式
|
||||
config = Config(app=app, host=host, port=port, log_config=log_config)
|
||||
server = Server(config)
|
||||
|
||||
# 启动服务器线程
|
||||
server_thread = threading.Thread(target=server.run, daemon=True, name="uvicorn_server")
|
||||
server_thread.start()
|
||||
|
||||
info("[Web] Server started, monitoring for restart requests...")
|
||||
|
||||
# 监控重启标志
|
||||
import unilabos.app.main as main_module
|
||||
|
||||
while server_thread.is_alive():
|
||||
if hasattr(main_module, "_restart_requested") and main_module._restart_requested:
|
||||
info(
|
||||
f"[Web] Restart requested via WebSocket, reason: {getattr(main_module, '_restart_reason', 'unknown')}"
|
||||
)
|
||||
main_module._restart_requested = False
|
||||
|
||||
# 停止服务器
|
||||
server.should_exit = True
|
||||
server_thread.join(timeout=5)
|
||||
|
||||
info("[Web] Server stopped, ready for restart")
|
||||
return True
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
return False
|
||||
|
||||
|
||||
# 当脚本直接运行时启动服务器
|
||||
|
||||
@@ -359,7 +359,7 @@ class MessageProcessor:
|
||||
self.device_manager = device_manager
|
||||
self.queue_processor = None # 延迟设置
|
||||
self.websocket_client = None # 延迟设置
|
||||
self.session_id = ""
|
||||
self.session_id = str(uuid.uuid4())[:6] # 产生一个随机的session_id
|
||||
|
||||
# WebSocket连接
|
||||
self.websocket = None
|
||||
@@ -488,7 +488,16 @@ class MessageProcessor:
|
||||
async for message in self.websocket:
|
||||
try:
|
||||
data = json.loads(message)
|
||||
await self._process_message(data)
|
||||
message_type = data.get("action", "")
|
||||
message_data = data.get("data")
|
||||
if self.session_id and self.session_id == data.get("edge_session"):
|
||||
await self._process_message(message_type, message_data)
|
||||
else:
|
||||
if message_type.endswith("_material"):
|
||||
logger.trace(f"[MessageProcessor] 收到一条归属 {data.get('edge_session')} 的旧消息:{data}")
|
||||
logger.debug(f"[MessageProcessor] 跳过了一条归属 {data.get('edge_session')} 的旧消息: {data.get('action')}")
|
||||
else:
|
||||
await self._process_message(message_type, message_data)
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"[MessageProcessor] Invalid JSON received: {message}")
|
||||
except Exception as e:
|
||||
@@ -554,11 +563,8 @@ class MessageProcessor:
|
||||
finally:
|
||||
logger.debug("[MessageProcessor] Send handler stopped")
|
||||
|
||||
async def _process_message(self, data: Dict[str, Any]):
|
||||
async def _process_message(self, message_type: str, message_data: Dict[str, Any]):
|
||||
"""处理收到的消息"""
|
||||
message_type = data.get("action", "")
|
||||
message_data = data.get("data")
|
||||
|
||||
logger.debug(f"[MessageProcessor] Processing message: {message_type}")
|
||||
|
||||
try:
|
||||
@@ -571,16 +577,19 @@ class MessageProcessor:
|
||||
elif message_type == "cancel_action" or message_type == "cancel_task":
|
||||
await self._handle_cancel_action(message_data)
|
||||
elif message_type == "add_material":
|
||||
# noinspection PyTypeChecker
|
||||
await self._handle_resource_tree_update(message_data, "add")
|
||||
elif message_type == "update_material":
|
||||
# noinspection PyTypeChecker
|
||||
await self._handle_resource_tree_update(message_data, "update")
|
||||
elif message_type == "remove_material":
|
||||
# noinspection PyTypeChecker
|
||||
await self._handle_resource_tree_update(message_data, "remove")
|
||||
elif message_type == "session_id":
|
||||
self.session_id = message_data.get("session_id")
|
||||
logger.info(f"[MessageProcessor] Session ID: {self.session_id}")
|
||||
elif message_type == "request_reload":
|
||||
await self._handle_request_reload(message_data)
|
||||
# elif message_type == "session_id":
|
||||
# self.session_id = message_data.get("session_id")
|
||||
# logger.info(f"[MessageProcessor] Session ID: {self.session_id}")
|
||||
elif message_type == "request_restart":
|
||||
await self._handle_request_restart(message_data)
|
||||
else:
|
||||
logger.debug(f"[MessageProcessor] Unknown message type: {message_type}")
|
||||
|
||||
@@ -890,19 +899,48 @@ class MessageProcessor:
|
||||
)
|
||||
thread.start()
|
||||
|
||||
async def _handle_request_reload(self, data: Dict[str, Any]):
|
||||
async def _handle_request_restart(self, data: Dict[str, Any]):
|
||||
"""
|
||||
处理重载请求
|
||||
处理重启请求
|
||||
|
||||
当LabGo发送request_reload时,重新发送设备注册信息
|
||||
当LabGo发送request_restart时,执行清理并触发重启
|
||||
"""
|
||||
reason = data.get("reason", "unknown")
|
||||
logger.info(f"[MessageProcessor] Received reload request, reason: {reason}")
|
||||
delay = data.get("delay", 2) # 默认延迟2秒
|
||||
logger.info(f"[MessageProcessor] Received restart request, reason: {reason}, delay: {delay}s")
|
||||
|
||||
# 重新发送host_node_ready信息
|
||||
# 发送确认消息
|
||||
if self.websocket_client:
|
||||
self.websocket_client.publish_host_ready()
|
||||
logger.info("[MessageProcessor] Re-sent host_node_ready after reload request")
|
||||
await self.websocket_client.send_message({
|
||||
"action": "restart_acknowledged",
|
||||
"data": {"reason": reason, "delay": delay}
|
||||
})
|
||||
|
||||
# 设置全局重启标志
|
||||
import unilabos.app.main as main_module
|
||||
main_module._restart_requested = True
|
||||
main_module._restart_reason = reason
|
||||
|
||||
# 延迟后执行清理
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
# 在新线程中执行清理,避免阻塞当前事件循环
|
||||
def do_cleanup():
|
||||
import time
|
||||
time.sleep(0.5) # 给当前消息处理完成的时间
|
||||
logger.info(f"[MessageProcessor] Starting cleanup for restart, reason: {reason}")
|
||||
try:
|
||||
from unilabos.app.utils import cleanup_for_restart
|
||||
if cleanup_for_restart():
|
||||
logger.info("[MessageProcessor] Cleanup successful, main() will restart")
|
||||
else:
|
||||
logger.error("[MessageProcessor] Cleanup failed")
|
||||
except Exception as e:
|
||||
logger.error(f"[MessageProcessor] Error during cleanup: {e}")
|
||||
|
||||
cleanup_thread = threading.Thread(target=do_cleanup, name="RestartCleanupThread", daemon=True)
|
||||
cleanup_thread.start()
|
||||
logger.info(f"[MessageProcessor] Restart cleanup scheduled")
|
||||
|
||||
async def _send_action_state_response(
|
||||
self, device_id: str, action_name: str, task_id: str, job_id: str, typ: str, free: bool, need_more: int
|
||||
|
||||
Reference in New Issue
Block a user