Uni-Lab-OS/unilabos/workflow/from_xdl.py

from typing import List, Any, Dict
import xml.etree.ElementTree as ET


def convert_to_type(val: str) -> Any:
    """将字符串值转换为适当的数据类型"""
    if val == "True":
        return True
    if val == "False":
        return False
    if val == "?":
        return None
    if val.endswith(" g"):
        return float(val.split(" ")[0])
    if val.endswith("mg"):
        return float(val.split("mg")[0])
    elif val.endswith("mmol"):
        return float(val.split("mmol")[0]) / 1000
    elif val.endswith("mol"):
        return float(val.split("mol")[0])
    elif val.endswith("ml"):
        return float(val.split("ml")[0])
    elif val.endswith("RPM"):
        return float(val.split("RPM")[0])
    elif val.endswith(" °C"):
        return float(val.split(" ")[0])
    elif val.endswith(" %"):
        return float(val.split(" ")[0])
    return val


def flatten_xdl_procedure(procedure_elem: ET.Element) -> List[ET.Element]:
    """展平嵌套的XDL程序结构"""
    flattened_operations = []
    TEMP_UNSUPPORTED_PROTOCOL = ["Purge", "Wait", "Stir", "ResetHandling"]

    def extract_operations(element: ET.Element):
        if element.tag not in ["Prep", "Reaction", "Workup", "Purification", "Procedure"]:
            if element.tag not in TEMP_UNSUPPORTED_PROTOCOL:
                flattened_operations.append(element)

        for child in element:
            extract_operations(child)

    for child in procedure_elem:
        extract_operations(child)

    return flattened_operations


def parse_xdl_content(xdl_content: str) -> tuple:
    """解析XDL内容"""
    try:
        xdl_content_cleaned = "".join(c for c in xdl_content if c.isprintable())
        root = ET.fromstring(xdl_content_cleaned)

        synthesis_elem = root.find("Synthesis")
        if synthesis_elem is None:
            return None, None, None

        # 解析硬件组件
        hardware_elem = synthesis_elem.find("Hardware")
        hardware = []
        if hardware_elem is not None:
            hardware = [{"id": c.get("id"), "type": c.get("type")} for c in hardware_elem.findall("Component")]

        # 解析试剂
        reagents_elem = synthesis_elem.find("Reagents")
        reagents = []
        if reagents_elem is not None:
            reagents = [{"name": r.get("name"), "role": r.get("role", "")} for r in reagents_elem.findall("Reagent")]

        # 解析程序
        procedure_elem = synthesis_elem.find("Procedure")
        if procedure_elem is None:
            return None, None, None

        flattened_operations = flatten_xdl_procedure(procedure_elem)
        return hardware, reagents, flattened_operations

    except ET.ParseError as e:
        raise ValueError(f"Invalid XDL format: {e}")


def convert_xdl_to_dict(xdl_content: str) -> Dict[str, Any]:
    """
    将XDL XML格式转换为标准的字典格式

    Args:
        xdl_content: XDL XML内容

    Returns:
        转换结果，包含步骤和器材信息
    """
    try:
        hardware, reagents, flattened_operations = parse_xdl_content(xdl_content)
        if hardware is None:
            return {"error": "Failed to parse XDL content", "success": False}

        # 将XDL元素转换为字典格式
        steps_data = []
        for elem in flattened_operations:
            # 转换参数类型
            parameters = {}
            for key, val in elem.attrib.items():
                converted_val = convert_to_type(val)
                if converted_val is not None:
                    parameters[key] = converted_val

            step_dict = {
                "operation": elem.tag,
                "parameters": parameters,
                "description": elem.get("purpose", f"Operation: {elem.tag}"),
            }
            steps_data.append(step_dict)

        # 合并硬件和试剂为统一的labware_info格式
        labware_data = []
        labware_data.extend({"id": hw["id"], "type": "hardware", **hw} for hw in hardware)
        labware_data.extend({"name": reagent["name"], "type": "reagent", **reagent} for reagent in reagents)

        return {
            "success": True,
            "steps": steps_data,
            "labware": labware_data,
            "message": f"Successfully converted XDL to dict format. Found {len(steps_data)} steps and {len(labware_data)} labware items.",
        }

    except Exception as e:
        error_msg = f"XDL conversion failed: {str(e)}"
        return {"error": error_msg, "success": False}