Resource update & asyncio fix

correct bioyond config

prcxi example

fix append_resource

fix regularcontainer

fix cancel error

fix resource_get param

fix json dumps

support name change during materials change

enable slave mode

change uuid logger to trace level

correct remove_resource stats

disable slave connect websocket

adjust with_children param

modify devices to use correct executor (sleep, create_task)

support sleep and create_task in node

fix run async execution error
This commit is contained in:
Xuwznln
2025-10-31 21:43:25 +08:00
parent 8807865649
commit b6dfe2b944
32 changed files with 1456 additions and 943 deletions

View File

@@ -13,7 +13,7 @@ def start_backend(
graph=None,
controllers_config: dict = {},
bridges=[],
without_host: bool = False,
is_slave: bool = False,
visual: str = "None",
resources_mesh_config: dict = {},
**kwargs,
@@ -32,7 +32,7 @@ def start_backend(
raise ValueError(f"Unsupported backend: {backend}")
backend_thread = threading.Thread(
target=main if not without_host else slave,
target=main if not is_slave else slave,
args=(
devices_config,
resources_config,

View File

@@ -375,22 +375,23 @@ def main():
args_dict["bridges"] = []
# 获取通信客户端仅支持WebSocket
comm_client = get_communication_client()
if "websocket" in args_dict["app_bridges"]:
args_dict["bridges"].append(comm_client)
if "fastapi" in args_dict["app_bridges"]:
args_dict["bridges"].append(http_client)
if "websocket" in args_dict["app_bridges"]:
# 获取通信客户端仅支持WebSocket
if BasicConfig.is_host_mode:
comm_client = get_communication_client()
if "websocket" in args_dict["app_bridges"]:
args_dict["bridges"].append(comm_client)
def _exit(signum, frame):
comm_client.stop()
sys.exit(0)
def _exit(signum, frame):
comm_client.stop()
sys.exit(0)
signal.signal(signal.SIGINT, _exit)
signal.signal(signal.SIGTERM, _exit)
comm_client.start()
else:
print_status("SlaveMode跳过Websocket连接")
signal.signal(signal.SIGINT, _exit)
signal.signal(signal.SIGTERM, _exit)
comm_client.start()
args_dict["resources_mesh_config"] = {}
args_dict["resources_edge_config"] = resource_edge_info
# web visiualize 2D

View File

@@ -1,11 +1,12 @@
import json
import time
from typing import Optional, Tuple, Dict, Any
from unilabos.utils.log import logger
from unilabos.utils.type_check import TypeEncoder
def register_devices_and_resources(lab_registry):
def register_devices_and_resources(lab_registry, gather_only=False) -> Optional[Tuple[Dict[str, Any], Dict[str, Any]]]:
"""
注册设备和资源到服务器仅支持HTTP
"""
@@ -28,6 +29,8 @@ def register_devices_and_resources(lab_registry):
resources_to_register[resource_info["id"]] = resource_info
logger.debug(f"[UniLab Register] 收集资源: {resource_info['id']}")
if gather_only:
return devices_to_register, resources_to_register
# 注册设备
if devices_to_register:
try:

View File

@@ -261,29 +261,28 @@ class DeviceActionManager:
device_key = job_info.device_action_key
# 如果是正在执行的任务
if (
device_key in self.active_jobs and self.active_jobs[device_key].job_id == job_id
): # 后面需要和cancel_goal进行联动而不是在这里进行处理现在默认等待这个job结束
# del self.active_jobs[device_key]
# job_info.status = JobStatus.ENDED
# # 从all_jobs中移除
# del self.all_jobs[job_id]
# job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
# logger.info(f"[DeviceActionManager] Active job {job_log} cancelled for {device_key}")
if device_key in self.active_jobs and self.active_jobs[device_key].job_id == job_id:
# 清理active job状态
del self.active_jobs[device_key]
job_info.status = JobStatus.ENDED
# 从all_jobs中移除
del self.all_jobs[job_id]
job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
logger.info(f"[DeviceActionManager] Active job {job_log} cancelled for {device_key}")
# # 启动下一个任务
# if device_key in self.device_queues and self.device_queues[device_key]:
# next_job = self.device_queues[device_key].pop(0)
# # 将下一个job设置为READY状态并放入active_jobs
# next_job.status = JobStatus.READY
# next_job.update_timestamp()
# next_job.set_ready_timeout(10)
# self.active_jobs[device_key] = next_job
# next_job_log = format_job_log(next_job.job_id, next_job.task_id,
# next_job.device_id, next_job.action_name)
# logger.info(f"[DeviceActionManager] Next job {next_job_log} can start after cancel")
# return True
pass
# 启动下一个任务
if device_key in self.device_queues and self.device_queues[device_key]:
next_job = self.device_queues[device_key].pop(0)
# 将下一个job设置为READY状态并放入active_jobs
next_job.status = JobStatus.READY
next_job.update_timestamp()
next_job.set_ready_timeout(10)
self.active_jobs[device_key] = next_job
next_job_log = format_job_log(
next_job.job_id, next_job.task_id, next_job.device_id, next_job.action_name
)
logger.info(f"[DeviceActionManager] Next job {next_job_log} can start after cancel")
return True
# 如果是排队中的任务
elif device_key in self.device_queues:
@@ -741,31 +740,51 @@ class MessageProcessor:
job_info.action_name if job_info else "",
)
# 按job_id取消单个job
# 先通知HostNode取消ROS2 action如果存在
host_node = HostNode.get_instance(0)
ros_cancel_success = False
if host_node:
ros_cancel_success = host_node.cancel_goal(job_id)
if ros_cancel_success:
logger.info(f"[MessageProcessor] ROS2 cancel request sent for job {job_log}")
else:
logger.debug(
f"[MessageProcessor] Job {job_log} not in ROS2 goals " "(may be queued or already finished)"
)
# 按job_id取消单个job清理状态机
success = self.device_manager.cancel_job(job_id)
if success:
# 通知HostNode取消
host_node = HostNode.get_instance(0)
if host_node:
host_node.cancel_goal(job_id)
logger.info(f"[MessageProcessor] Job {job_log} cancelled")
logger.info(f"[MessageProcessor] Job {job_log} cancelled from queue/active list")
# 通知QueueProcessor有队列更新
if self.queue_processor:
self.queue_processor.notify_queue_update()
else:
logger.warning(f"[MessageProcessor] Failed to cancel job {job_log}")
logger.warning(f"[MessageProcessor] Failed to cancel job {job_log} from queue")
elif task_id:
# 按task_id取消所有相关job
# 先通知HostNode取消所有ROS2 actions
# 需要先获取所有相关job_ids
jobs_to_cancel = []
with self.device_manager.lock:
jobs_to_cancel = [
job_info for job_info in self.device_manager.all_jobs.values() if job_info.task_id == task_id
]
host_node = HostNode.get_instance(0)
if host_node and jobs_to_cancel:
ros_cancelled_count = 0
for job_info in jobs_to_cancel:
if host_node.cancel_goal(job_info.job_id):
ros_cancelled_count += 1
logger.info(
f"[MessageProcessor] Sent ROS2 cancel for " f"{ros_cancelled_count}/{len(jobs_to_cancel)} jobs"
)
# 按task_id取消所有相关job清理状态机
cancelled_job_ids = self.device_manager.cancel_jobs_by_task_id(task_id)
if cancelled_job_ids:
# 通知HostNode取消所有job
host_node = HostNode.get_instance(0)
if host_node:
for cancelled_job_id in cancelled_job_ids:
host_node.cancel_goal(cancelled_job_id)
logger.info(f"[MessageProcessor] Cancelled {len(cancelled_job_ids)} jobs for task_id: {task_id}")
# 通知QueueProcessor有队列更新
@@ -1056,11 +1075,19 @@ class QueueProcessor:
"""处理任务完成"""
# 获取job信息用于日志
job_info = self.device_manager.get_job_info(job_id)
# 如果job不存在说明可能已被手动取消
if not job_info:
logger.debug(
f"[QueueProcessor] Job {job_id[:8]} not found in manager " "(may have been cancelled manually)"
)
return
job_log = format_job_log(
job_id,
job_info.task_id if job_info else "",
job_info.device_id if job_info else "",
job_info.action_name if job_info else "",
job_info.task_id,
job_info.device_id,
job_info.action_name,
)
logger.info(f"[QueueProcessor] Job {job_log} completed with status: {status}")