Resource update & asyncio fix

correct bioyond config prcxi example fix append_resource fix regularcontainer fix cancel error fix resource_get param fix json dumps support name change during materials change enable slave mode change uuid logger to trace level correct remove_resource stats disable slave connect websocket adjust with_children param modify devices to use correct executor (sleep, create_task) support sleep and create_task in node fix run async execution error
2026-02-04 21:35:09 +00:00 · 2025-10-31 21:43:25 +08:00
parent 8807865649
commit b6dfe2b944
32 changed files with 1456 additions and 943 deletions
--- a/unilabos/app/backend.py
+++ b/unilabos/app/backend.py
@@ -13,7 +13,7 @@ def start_backend(
    graph=None,
    controllers_config: dict = {},
    bridges=[],
-    without_host: bool = False,
+    is_slave: bool = False,
    visual: str = "None",
    resources_mesh_config: dict = {},
    **kwargs,
@@ -32,7 +32,7 @@ def start_backend(
        raise ValueError(f"Unsupported backend: {backend}")

    backend_thread = threading.Thread(
-        target=main if not without_host else slave,
+        target=main if not is_slave else slave,
        args=(
            devices_config,
            resources_config,
--- a/unilabos/app/main.py
+++ b/unilabos/app/main.py
@@ -375,22 +375,23 @@ def main():

    args_dict["bridges"] = []

-    # 获取通信客户端（仅支持WebSocket）
-    comm_client = get_communication_client()
-
-    if "websocket" in args_dict["app_bridges"]:
-        args_dict["bridges"].append(comm_client)
    if "fastapi" in args_dict["app_bridges"]:
        args_dict["bridges"].append(http_client)
-    if "websocket" in args_dict["app_bridges"]:
+    # 获取通信客户端（仅支持WebSocket）
+    if BasicConfig.is_host_mode:
+        comm_client = get_communication_client()
+        if "websocket" in args_dict["app_bridges"]:
+            args_dict["bridges"].append(comm_client)
+            def _exit(signum, frame):
+                comm_client.stop()
+                sys.exit(0)

-        def _exit(signum, frame):
-            comm_client.stop()
-            sys.exit(0)
+            signal.signal(signal.SIGINT, _exit)
+            signal.signal(signal.SIGTERM, _exit)
+            comm_client.start()
+    else:
+        print_status("SlaveMode跳过Websocket连接")

-        signal.signal(signal.SIGINT, _exit)
-        signal.signal(signal.SIGTERM, _exit)
-        comm_client.start()
    args_dict["resources_mesh_config"] = {}
    args_dict["resources_edge_config"] = resource_edge_info
    # web visiualize 2D
--- a/unilabos/app/register.py
+++ b/unilabos/app/register.py
@@ -1,11 +1,12 @@
 import json
 import time
+from typing import Optional, Tuple, Dict, Any

 from unilabos.utils.log import logger
 from unilabos.utils.type_check import TypeEncoder


-def register_devices_and_resources(lab_registry):
+def register_devices_and_resources(lab_registry, gather_only=False) -> Optional[Tuple[Dict[str, Any], Dict[str, Any]]]:
    """
    注册设备和资源到服务器（仅支持HTTP）
    """
@@ -28,6 +29,8 @@ def register_devices_and_resources(lab_registry):
        resources_to_register[resource_info["id"]] = resource_info
        logger.debug(f"[UniLab Register] 收集资源: {resource_info['id']}")

+    if gather_only:
+        return devices_to_register, resources_to_register
    # 注册设备
    if devices_to_register:
        try:
--- a/unilabos/app/ws_client.py
+++ b/unilabos/app/ws_client.py
@@ -261,29 +261,28 @@ class DeviceActionManager:
            device_key = job_info.device_action_key

            # 如果是正在执行的任务
-            if (
-                device_key in self.active_jobs and self.active_jobs[device_key].job_id == job_id
-            ):  # 后面需要和cancel_goal进行联动，而不是在这里进行处理，现在默认等待这个job结束
-                # del self.active_jobs[device_key]
-                # job_info.status = JobStatus.ENDED
-                # # 从all_jobs中移除
-                # del self.all_jobs[job_id]
-                # job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
-                # logger.info(f"[DeviceActionManager] Active job {job_log} cancelled for {device_key}")
+            if device_key in self.active_jobs and self.active_jobs[device_key].job_id == job_id:
+                # 清理active job状态
+                del self.active_jobs[device_key]
+                job_info.status = JobStatus.ENDED
+                # 从all_jobs中移除
+                del self.all_jobs[job_id]
+                job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
+                logger.info(f"[DeviceActionManager] Active job {job_log} cancelled for {device_key}")

-                # # 启动下一个任务
-                # if device_key in self.device_queues and self.device_queues[device_key]:
-                #     next_job = self.device_queues[device_key].pop(0)
-                #     # 将下一个job设置为READY状态并放入active_jobs
-                #     next_job.status = JobStatus.READY
-                #     next_job.update_timestamp()
-                #     next_job.set_ready_timeout(10)
-                #     self.active_jobs[device_key] = next_job
-                #     next_job_log = format_job_log(next_job.job_id, next_job.task_id,
-                #                                   next_job.device_id, next_job.action_name)
-                #     logger.info(f"[DeviceActionManager] Next job {next_job_log} can start after cancel")
-                #     return True
-                pass
+                # 启动下一个任务
+                if device_key in self.device_queues and self.device_queues[device_key]:
+                    next_job = self.device_queues[device_key].pop(0)
+                    # 将下一个job设置为READY状态并放入active_jobs
+                    next_job.status = JobStatus.READY
+                    next_job.update_timestamp()
+                    next_job.set_ready_timeout(10)
+                    self.active_jobs[device_key] = next_job
+                    next_job_log = format_job_log(
+                        next_job.job_id, next_job.task_id, next_job.device_id, next_job.action_name
+                    )
+                    logger.info(f"[DeviceActionManager] Next job {next_job_log} can start after cancel")
+                return True

            # 如果是排队中的任务
            elif device_key in self.device_queues:
@@ -741,31 +740,51 @@ class MessageProcessor:
                job_info.action_name if job_info else "",
            )

-            # 按job_id取消单个job
+            # 先通知HostNode取消ROS2 action（如果存在）
+            host_node = HostNode.get_instance(0)
+            ros_cancel_success = False
+            if host_node:
+                ros_cancel_success = host_node.cancel_goal(job_id)
+                if ros_cancel_success:
+                    logger.info(f"[MessageProcessor] ROS2 cancel request sent for job {job_log}")
+                else:
+                    logger.debug(
+                        f"[MessageProcessor] Job {job_log} not in ROS2 goals " "(may be queued or already finished)"
+                    )
+
+            # 按job_id取消单个job（清理状态机）
            success = self.device_manager.cancel_job(job_id)
            if success:
-                # 通知HostNode取消
-                host_node = HostNode.get_instance(0)
-                if host_node:
-                    host_node.cancel_goal(job_id)
-                logger.info(f"[MessageProcessor] Job {job_log} cancelled")
+                logger.info(f"[MessageProcessor] Job {job_log} cancelled from queue/active list")

                # 通知QueueProcessor有队列更新
                if self.queue_processor:
                    self.queue_processor.notify_queue_update()
            else:
-                logger.warning(f"[MessageProcessor] Failed to cancel job {job_log}")
+                logger.warning(f"[MessageProcessor] Failed to cancel job {job_log} from queue")

        elif task_id:
-            # 按task_id取消所有相关job
+            # 先通知HostNode取消所有ROS2 actions
+            # 需要先获取所有相关job_ids
+            jobs_to_cancel = []
+            with self.device_manager.lock:
+                jobs_to_cancel = [
+                    job_info for job_info in self.device_manager.all_jobs.values() if job_info.task_id == task_id
+                ]
+
+            host_node = HostNode.get_instance(0)
+            if host_node and jobs_to_cancel:
+                ros_cancelled_count = 0
+                for job_info in jobs_to_cancel:
+                    if host_node.cancel_goal(job_info.job_id):
+                        ros_cancelled_count += 1
+                logger.info(
+                    f"[MessageProcessor] Sent ROS2 cancel for " f"{ros_cancelled_count}/{len(jobs_to_cancel)} jobs"
+                )
+
+            # 按task_id取消所有相关job（清理状态机）
            cancelled_job_ids = self.device_manager.cancel_jobs_by_task_id(task_id)
            if cancelled_job_ids:
-                # 通知HostNode取消所有job
-                host_node = HostNode.get_instance(0)
-                if host_node:
-                    for cancelled_job_id in cancelled_job_ids:
-                        host_node.cancel_goal(cancelled_job_id)
-
                logger.info(f"[MessageProcessor] Cancelled {len(cancelled_job_ids)} jobs for task_id: {task_id}")

                # 通知QueueProcessor有队列更新
@@ -1056,11 +1075,19 @@ class QueueProcessor:
        """处理任务完成"""
        # 获取job信息用于日志
        job_info = self.device_manager.get_job_info(job_id)
+
+        # 如果job不存在，说明可能已被手动取消
+        if not job_info:
+            logger.debug(
+                f"[QueueProcessor] Job {job_id[:8]} not found in manager " "(may have been cancelled manually)"
+            )
+            return
+
        job_log = format_job_log(
            job_id,
-            job_info.task_id if job_info else "",
-            job_info.device_id if job_info else "",
-            job_info.action_name if job_info else "",
+            job_info.task_id,
+            job_info.device_id,
+            job_info.action_name,
        )

        logger.info(f"[QueueProcessor] Job {job_log} completed with status: {status}")