Dev backward (#228)

* Workbench example, adjust log level, and ci check (#220)

* TestLatency Return Value Example & gitignore update

* Adjust log level & Add workbench virtual example & Add not action decorator & Add check_mode &

* Add CI Check

* CI Check Fix 1

* CI Check Fix 2

* CI Check Fix 3

* CI Check Fix 4

* CI Check Fix 5

* Upgrade to py 3.11.14; ros 0.7; unilabos 0.10.16

* Update to ROS2 Humble 0.7

* Fix Build 1

* Fix Build 2

* Fix Build 3

* Fix Build 4

* Fix Build 5

* Fix Build 6

* Fix Build 7

* ci(deps): bump actions/configure-pages from 4 to 5 (#222)

Bumps [actions/configure-pages](https://github.com/actions/configure-pages) from 4 to 5.
- [Release notes](https://github.com/actions/configure-pages/releases)
- [Commits](https://github.com/actions/configure-pages/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/configure-pages
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* ci(deps): bump actions/upload-artifact from 4 to 6 (#224)

Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4 to 6.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](https://github.com/actions/upload-artifact/compare/v4...v6)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* ci(deps): bump actions/upload-pages-artifact from 3 to 4 (#225)

Bumps [actions/upload-pages-artifact](https://github.com/actions/upload-pages-artifact) from 3 to 4.
- [Release notes](https://github.com/actions/upload-pages-artifact/releases)
- [Commits](https://github.com/actions/upload-pages-artifact/compare/v3...v4)

---
updated-dependencies:
- dependency-name: actions/upload-pages-artifact
  dependency-version: '4'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* ci(deps): bump actions/checkout from 4 to 6 (#223)

Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 6.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v4...v6)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>

* Fix Build 8

* Fix Build 9

* Fix Build 10

* Fix Build 11

* Fix Build 12

* Fix Build 13

* v0.10.17

(cherry picked from commit 176de521b4)

* CI Check use production mode

* Fix OT2 & ReAdd Virtual Devices

* add msg goal

* transfer liquid handles

* gather query

* add unilabos_class

* Support root node change pos

* save class name when deserialize & protocol execute test

* fix upload workflow json

* workflow upload & set liquid fix & add set liquid with plate

* speed up registry load

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: hanhua@dp.tech <2509856570@qq.com>
This commit is contained in:
Xuwznln
2026-02-02 23:57:13 +08:00
committed by GitHub
parent c4a3be1498
commit e30c01d54e
65 changed files with 4603 additions and 1655 deletions

View File

@@ -7,7 +7,6 @@ import sys
import threading
import time
from typing import Dict, Any, List
import networkx as nx
import yaml
@@ -17,9 +16,9 @@ unilabos_dir = os.path.dirname(os.path.dirname(current_dir))
if unilabos_dir not in sys.path:
sys.path.append(unilabos_dir)
from unilabos.app.utils import cleanup_for_restart
from unilabos.utils.banner_print import print_status, print_unilab_banner
from unilabos.config.config import load_config, BasicConfig, HTTPConfig
from unilabos.app.utils import cleanup_for_restart
# Global restart flags (used by ws_client and web/server)
_restart_requested: bool = False
@@ -161,6 +160,12 @@ def parse_args():
default=False,
help="Complete registry information",
)
parser.add_argument(
"--check_mode",
action="store_true",
default=False,
help="Run in check mode for CI: validates registry imports and ensures no file changes",
)
parser.add_argument(
"--no_update_feedback",
action="store_true",
@@ -211,7 +216,10 @@ def main():
args_dict = vars(args)
# 环境检查 - 检查并自动安装必需的包 (可选)
if not args_dict.get("skip_env_check", False):
skip_env_check = args_dict.get("skip_env_check", False)
check_mode = args_dict.get("check_mode", False)
if not skip_env_check:
from unilabos.utils.environment_check import check_environment
if not check_environment(auto_install=True):
@@ -222,7 +230,21 @@ def main():
# 加载配置文件优先加载config然后从env读取
config_path = args_dict.get("config")
if os.getcwd().endswith("unilabos_data"):
if check_mode:
args_dict["working_dir"] = os.path.abspath(os.getcwd())
# 当 skip_env_check 时,默认使用当前目录作为 working_dir
if skip_env_check and not args_dict.get("working_dir") and not config_path:
working_dir = os.path.abspath(os.getcwd())
print_status(f"跳过环境检查模式:使用当前目录作为工作目录 {working_dir}", "info")
# 检查当前目录是否有 local_config.py
local_config_in_cwd = os.path.join(working_dir, "local_config.py")
if os.path.exists(local_config_in_cwd):
config_path = local_config_in_cwd
print_status(f"发现本地配置文件: {config_path}", "info")
else:
print_status(f"未指定config路径可通过 --config 传入 local_config.py 文件路径", "info")
elif os.getcwd().endswith("unilabos_data"):
working_dir = os.path.abspath(os.getcwd())
else:
working_dir = os.path.abspath(os.path.join(os.getcwd(), "unilabos_data"))
@@ -241,7 +263,7 @@ def main():
working_dir = os.path.dirname(config_path)
elif os.path.exists(working_dir) and os.path.exists(os.path.join(working_dir, "local_config.py")):
config_path = os.path.join(working_dir, "local_config.py")
elif not config_path and (
elif not skip_env_check and not config_path and (
not os.path.exists(working_dir) or not os.path.exists(os.path.join(working_dir, "local_config.py"))
):
print_status(f"未指定config路径可通过 --config 传入 local_config.py 文件路径", "info")
@@ -255,9 +277,11 @@ def main():
print_status(f"已创建 local_config.py 路径: {config_path}", "info")
else:
os._exit(1)
# 加载配置文件
# 加载配置文件 (check_mode 跳过)
print_status(f"当前工作目录为 {working_dir}", "info")
load_config_from_file(config_path)
if not check_mode:
load_config_from_file(config_path)
# 根据配置重新设置日志级别
from unilabos.utils.log import configure_logger, logger
@@ -313,6 +337,7 @@ def main():
machine_name = "".join([c if c.isalnum() or c == "_" else "_" for c in machine_name])
BasicConfig.machine_name = machine_name
BasicConfig.vis_2d_enable = args_dict["2d_vis"]
BasicConfig.check_mode = check_mode
from unilabos.resources.graphio import (
read_node_link_json,
@@ -331,10 +356,14 @@ def main():
# 显示启动横幅
print_unilab_banner(args_dict)
# 注册表
lab_registry = build_registry(
args_dict["registry_path"], args_dict.get("complete_registry", False), BasicConfig.upload_registry
)
# 注册表 - check_mode 时强制启用 complete_registry
complete_registry = args_dict.get("complete_registry", False) or check_mode
lab_registry = build_registry(args_dict["registry_path"], complete_registry, BasicConfig.upload_registry)
# Check mode: complete_registry 完成后直接退出git diff 检测由 CI workflow 执行
if check_mode:
print_status("Check mode: complete_registry 完成,退出", "info")
os._exit(0)
if BasicConfig.upload_registry:
# 设备注册到服务端 - 需要 ak 和 sk

View File

@@ -4,8 +4,40 @@ UniLabOS 应用工具函数
提供清理、重启等工具函数
"""
import gc
import glob
import os
import shutil
import sys
def patch_rclpy_dll_windows():
"""在 Windows + conda 环境下为 rclpy 打 DLL 加载补丁"""
if sys.platform != "win32" or not os.environ.get("CONDA_PREFIX"):
return
try:
import rclpy
return
except ImportError as e:
if not str(e).startswith("DLL load failed"):
return
cp = os.environ["CONDA_PREFIX"]
impl = os.path.join(cp, "Lib", "site-packages", "rclpy", "impl", "implementation_singleton.py")
pyd = glob.glob(os.path.join(cp, "Lib", "site-packages", "rclpy", "_rclpy_pybind11*.pyd"))
if not os.path.exists(impl) or not pyd:
return
with open(impl, "r", encoding="utf-8") as f:
content = f.read()
lib_bin = os.path.join(cp, "Library", "bin").replace("\\", "/")
patch = f'# UniLabOS DLL Patch\nimport os,ctypes\nos.add_dll_directory("{lib_bin}") if hasattr(os,"add_dll_directory") else None\ntry: ctypes.CDLL("{pyd[0].replace(chr(92),"/")}")\nexcept: pass\n# End Patch\n'
shutil.copy2(impl, impl + ".bak")
with open(impl, "w", encoding="utf-8") as f:
f.write(patch + content)
patch_rclpy_dll_windows()
import gc
import threading
import time

View File

@@ -359,9 +359,7 @@ class HTTPClient:
Returns:
Dict: API响应数据包含 code 和 data (uuid, name)
"""
# target_lab_uuid 暂时使用默认值,后续由后端根据 ak/sk 获取
payload = {
"target_lab_uuid": "28c38bb0-63f6-4352-b0d8-b5b8eb1766d5",
"name": name,
"data": {
"workflow_uuid": workflow_uuid,

View File

@@ -58,14 +58,14 @@ class JobResultStore:
feedback=feedback or {},
timestamp=time.time(),
)
logger.debug(f"[JobResultStore] Stored result for job {job_id[:8]}, status={status}")
logger.trace(f"[JobResultStore] Stored result for job {job_id[:8]}, status={status}")
def get_and_remove(self, job_id: str) -> Optional[JobResult]:
"""获取并删除任务结果"""
with self._results_lock:
result = self._results.pop(job_id, None)
if result:
logger.debug(f"[JobResultStore] Retrieved and removed result for job {job_id[:8]}")
logger.trace(f"[JobResultStore] Retrieved and removed result for job {job_id[:8]}")
return result
def get_result(self, job_id: str) -> Optional[JobResult]:

View File

@@ -23,7 +23,7 @@ from typing import Optional, Dict, Any, List
from urllib.parse import urlparse
from enum import Enum
from jedi.inference.gradual.typing import TypedDict
from typing_extensions import TypedDict
from unilabos.app.model import JobAddReq
from unilabos.ros.nodes.presets.host_node import HostNode
@@ -154,7 +154,7 @@ class DeviceActionManager:
job_info.set_ready_timeout(10) # 设置10秒超时
self.active_jobs[device_key] = job_info
job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
logger.info(f"[DeviceActionManager] Job {job_log} can start immediately for {device_key}")
logger.trace(f"[DeviceActionManager] Job {job_log} can start immediately for {device_key}")
return True
def start_job(self, job_id: str) -> bool:
@@ -210,8 +210,9 @@ class DeviceActionManager:
job_info.update_timestamp()
# 从all_jobs中移除已结束的job
del self.all_jobs[job_id]
job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
logger.info(f"[DeviceActionManager] Job {job_log} ended for {device_key}")
# job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
# logger.debug(f"[DeviceActionManager] Job {job_log} ended for {device_key}")
pass
else:
job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
logger.warning(f"[DeviceActionManager] Job {job_log} was not active for {device_key}")
@@ -227,7 +228,7 @@ class DeviceActionManager:
next_job_log = format_job_log(
next_job.job_id, next_job.task_id, next_job.device_id, next_job.action_name
)
logger.info(f"[DeviceActionManager] Next job {next_job_log} can start for {device_key}")
logger.trace(f"[DeviceActionManager] Next job {next_job_log} can start for {device_key}")
return next_job
return None
@@ -268,7 +269,7 @@ class DeviceActionManager:
# 从all_jobs中移除
del self.all_jobs[job_id]
job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
logger.info(f"[DeviceActionManager] Active job {job_log} cancelled for {device_key}")
logger.trace(f"[DeviceActionManager] Active job {job_log} cancelled for {device_key}")
# 启动下一个任务
if device_key in self.device_queues and self.device_queues[device_key]:
@@ -281,7 +282,7 @@ class DeviceActionManager:
next_job_log = format_job_log(
next_job.job_id, next_job.task_id, next_job.device_id, next_job.action_name
)
logger.info(f"[DeviceActionManager] Next job {next_job_log} can start after cancel")
logger.trace(f"[DeviceActionManager] Next job {next_job_log} can start after cancel")
return True
# 如果是排队中的任务
@@ -295,7 +296,7 @@ class DeviceActionManager:
job_log = format_job_log(
job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name
)
logger.info(f"[DeviceActionManager] Queued job {job_log} cancelled for {device_key}")
logger.trace(f"[DeviceActionManager] Queued job {job_log} cancelled for {device_key}")
return True
job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
@@ -494,8 +495,12 @@ class MessageProcessor:
await self._process_message(message_type, message_data)
else:
if message_type.endswith("_material"):
logger.trace(f"[MessageProcessor] 收到一条归属 {data.get('edge_session')} 的旧消息:{data}")
logger.debug(f"[MessageProcessor] 跳过了一条归属 {data.get('edge_session')} 的旧消息: {data.get('action')}")
logger.trace(
f"[MessageProcessor] 收到一条归属 {data.get('edge_session')} 的旧消息{data}"
)
logger.debug(
f"[MessageProcessor] 跳过了一条归属 {data.get('edge_session')} 的旧消息: {data.get('action')}"
)
else:
await self._process_message(message_type, message_data)
except json.JSONDecodeError:
@@ -565,7 +570,7 @@ class MessageProcessor:
async def _process_message(self, message_type: str, message_data: Dict[str, Any]):
"""处理收到的消息"""
logger.debug(f"[MessageProcessor] Processing message: {message_type}")
logger.trace(f"[MessageProcessor] Processing message: {message_type}")
try:
if message_type == "pong":
@@ -637,13 +642,13 @@ class MessageProcessor:
await self._send_action_state_response(
device_id, action_name, task_id, job_id, "query_action_status", True, 0
)
logger.info(f"[MessageProcessor] Job {job_log} can start immediately")
logger.trace(f"[MessageProcessor] Job {job_log} can start immediately")
else:
# 需要排队
await self._send_action_state_response(
device_id, action_name, task_id, job_id, "query_action_status", False, 10
)
logger.info(f"[MessageProcessor] Job {job_log} queued")
logger.trace(f"[MessageProcessor] Job {job_log} queued")
# 通知QueueProcessor有新的队列更新
if self.queue_processor:
@@ -847,9 +852,7 @@ class MessageProcessor:
device_action_groups[key_add] = []
device_action_groups[key_add].append(item["uuid"])
logger.info(
f"[资源同步] 跨站Transfer: {item['uuid'][:8]} from {device_old_id} to {device_id}"
)
logger.info(f"[资源同步] 跨站Transfer: {item['uuid'][:8]} from {device_old_id} to {device_id}")
else:
# 正常update
key = (device_id, "update")
@@ -863,7 +866,9 @@ class MessageProcessor:
device_action_groups[key] = []
device_action_groups[key].append(item["uuid"])
logger.trace(f"[资源同步] 动作 {action} 分组数量: {len(device_action_groups)}, 总数量: {len(resource_uuid_list)}")
logger.trace(
f"[资源同步] 动作 {action} 分组数量: {len(device_action_groups)}, 总数量: {len(resource_uuid_list)}"
)
# 为每个(device_id, action)创建独立的更新线程
for (device_id, actual_action), items in device_action_groups.items():
@@ -911,13 +916,13 @@ class MessageProcessor:
# 发送确认消息
if self.websocket_client:
await self.websocket_client.send_message({
"action": "restart_acknowledged",
"data": {"reason": reason, "delay": delay}
})
await self.websocket_client.send_message(
{"action": "restart_acknowledged", "data": {"reason": reason, "delay": delay}}
)
# 设置全局重启标志
import unilabos.app.main as main_module
main_module._restart_requested = True
main_module._restart_reason = reason
@@ -927,10 +932,12 @@ class MessageProcessor:
# 在新线程中执行清理,避免阻塞当前事件循环
def do_cleanup():
import time
time.sleep(0.5) # 给当前消息处理完成的时间
logger.info(f"[MessageProcessor] Starting cleanup for restart, reason: {reason}")
try:
from unilabos.app.utils import cleanup_for_restart
if cleanup_for_restart():
logger.info("[MessageProcessor] Cleanup successful, main() will restart")
else:
@@ -1128,7 +1135,7 @@ class QueueProcessor:
success = self.message_processor.send_message(message)
job_log = format_job_log(job_info.job_id, job_info.task_id, job_info.device_id, job_info.action_name)
if success:
logger.debug(f"[QueueProcessor] Sent busy/need_more for queued job {job_log}")
logger.trace(f"[QueueProcessor] Sent busy/need_more for queued job {job_log}")
else:
logger.warning(f"[QueueProcessor] Failed to send busy status for job {job_log}")
@@ -1151,7 +1158,7 @@ class QueueProcessor:
job_info.action_name,
)
logger.info(f"[QueueProcessor] Job {job_log} completed with status: {status}")
logger.trace(f"[QueueProcessor] Job {job_log} completed with status: {status}")
# 结束任务,获取下一个可执行的任务
next_job = self.device_manager.end_job(job_id)
@@ -1171,8 +1178,8 @@ class QueueProcessor:
},
}
self.message_processor.send_message(message)
next_job_log = format_job_log(next_job.job_id, next_job.task_id, next_job.device_id, next_job.action_name)
logger.info(f"[QueueProcessor] Notified next job {next_job_log} can start")
# next_job_log = format_job_log(next_job.job_id, next_job.task_id, next_job.device_id, next_job.action_name)
# logger.debug(f"[QueueProcessor] Notified next job {next_job_log} can start")
# 立即触发下一轮状态检查
self.notify_queue_update()
@@ -1314,7 +1321,7 @@ class WebSocketClient(BaseCommunicationClient):
except (KeyError, AttributeError):
logger.warning(f"[WebSocketClient] Failed to remove job {item.job_id} from HostNode status")
logger.info(f"[WebSocketClient] Intercepting final status for job_id: {item.job_id} - {status}")
# logger.debug(f"[WebSocketClient] Intercepting final status for job_id: {item.job_id} - {status}")
# 通知队列处理器job完成包括timeout的job
self.queue_processor.handle_job_completed(item.job_id, status)
@@ -1381,7 +1388,9 @@ class WebSocketClient(BaseCommunicationClient):
if host_node:
# 获取设备信息
for device_id, namespace in host_node.devices_names.items():
device_key = f"{namespace}/{device_id}" if namespace.startswith("/") else f"/{namespace}/{device_id}"
device_key = (
f"{namespace}/{device_id}" if namespace.startswith("/") else f"/{namespace}/{device_id}"
)
is_online = device_key in host_node._online_devices
# 获取设备的动作信息
@@ -1395,14 +1404,16 @@ class WebSocketClient(BaseCommunicationClient):
"action_type": str(type(client).__name__),
}
devices.append({
"device_id": device_id,
"namespace": namespace,
"device_key": device_key,
"is_online": is_online,
"machine_name": host_node.device_machine_names.get(device_id, machine_name),
"actions": actions,
})
devices.append(
{
"device_id": device_id,
"namespace": namespace,
"device_key": device_key,
"is_online": is_online,
"machine_name": host_node.device_machine_names.get(device_id, machine_name),
"actions": actions,
}
)
logger.info(f"[WebSocketClient] Collected {len(devices)} devices for host_ready")
except Exception as e: