diff --git a/other/materials_designer/workflows/total_energy.ipynb b/other/materials_designer/workflows/total_energy.ipynb new file mode 100644 index 00000000..8e7d3197 --- /dev/null +++ b/other/materials_designer/workflows/total_energy.ipynb @@ -0,0 +1,560 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Total Energy\n", + "\n", + "This notebook demonstrates how to run a workflow calculation using the Mat3ra API.\n", + "\n", + "## Process Overview\n", + "1. Set up environment and parameters\n", + "2. Authenticate and initialize API client\n", + "3. Select account and project\n", + "4. Load and save materials\n", + "5. Configure workflow\n", + "6. Set up compute resources\n", + "7. Create and submit job\n", + "8. Monitor execution\n", + "9. Retrieve and visualize results" + ] + }, + { + "cell_type": "markdown", + "id": "1", + "metadata": {}, + "source": [ + "## 1. Set up the environment and parameters\n", + "### 1.1. Install packages (JupyterLite)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "\n", + "if sys.platform == \"emscripten\":\n", + " import micropip\n", + "\n", + " await micropip.install(\"mat3ra-api-examples\", deps=False)\n", + " await micropip.install(\"mat3ra-utils\")\n", + " from mat3ra.utils.jupyterlite.packages import install_packages\n", + "\n", + " await install_packages(\"api_examples\")\n" + ] + }, + { + "cell_type": "markdown", + "id": "3", + "metadata": {}, + "source": [ + "### 1.1. Set parameters and configurations for the workflow and job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "from mat3ra.ide.compute import QueueName\n", + "\n", + "# 2. Auth and organization parameters\n", + "# Set organization name to use it as the owner, otherwise your personal account is used\n", + "ORGANIZATION_NAME = None\n", + "\n", + "# 3. Material parameters\n", + "FOLDER = \"../uploads\"\n", + "MATERIAL_NAME = \"Silicon\" # Name of the material to load from local file or Standata\n", + "\n", + "# 4. Workflow parameters\n", + "WORKFLOW_SEARCH_TERM = \"total_energy.json\"\n", + "MY_WORKFLOW_NAME = \"Total Energy\"\n", + "APPLICATION_NAME = \"espresso\" # Specify application name (e.g., \"espresso\", \"vasp\", \"nwchem\")\n", + "ADD_RELAXATION = True # Whether to add relaxation subworkflow\n", + "SAVE_WF_TO_COLLECTION = True # If True, workflow is saved to collection\n", + "\n", + "# 5. Compute parameters\n", + "CLUSTER_NAME = None # specify i.e. \"cluster-001\" to use that cluster\n", + "QUEUE_NAME = QueueName.D\n", + "PPN = 2\n", + "\n", + "# 6. Job parameters\n", + "timestamp = datetime.now().strftime(\"%Y-%m-%d %H:%M\")\n", + "JOB_NAME = f\"Total Energy {timestamp}\"\n", + "POLL_INTERVAL = 30 # seconds\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "### 1.2. API Configuration (optional, for local development)" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## 2. Authenticate and initialize API client\n", + "### 2.1. Authenticate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.auth import authenticate\n", + "\n", + "# Authenticate in the browser and have credentials stored in environment variables\n", + "await authenticate()" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "### 2.2. Initialize API Client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.api_client import APIClient\n", + "\n", + "client = APIClient.authenticate()\n", + "client" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "### 2.3. Select account to work under" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "client.list_accounts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "selected_account = client.my_account\n", + "\n", + "if ORGANIZATION_NAME:\n", + " selected_account = client.get_account(name=ORGANIZATION_NAME)\n", + "\n", + "ACCOUNT_ID = selected_account.id\n", + "print(f\"✅ Selected account ID: {ACCOUNT_ID}, name: {selected_account.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "13", + "metadata": {}, + "source": [ + "### 2.4. Select project" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14", + "metadata": {}, + "outputs": [], + "source": [ + "projects = client.projects.list({\"isDefault\": True, \"owner._id\": ACCOUNT_ID})\n", + "project_id = projects[0][\"_id\"]\n", + "print(f\"✅ Using project: {projects[0]['name']} ({project_id})\")" + ] + }, + { + "cell_type": "markdown", + "id": "15", + "metadata": {}, + "source": [ + "## 3. Create material\n", + "### 3.1. Load material from local file (or Standata)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.made.material import Material\n", + "from mat3ra.standata.materials import Materials\n", + "from utils.visualize import visualize_materials as visualize\n", + "from utils.jupyterlite import load_material_from_folder\n", + "\n", + "material = load_material_from_folder(FOLDER, MATERIAL_NAME) or Material.create(\n", + " Materials.get_by_name_first_match(MATERIAL_NAME))\n", + "\n", + "visualize(material)" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "### 3.2. Save material to the platform" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.api import get_or_create_material\n", + "from utils.generic import dict_to_namespace\n", + "\n", + "saved_material_response = get_or_create_material(client.materials, material, ACCOUNT_ID)\n", + "saved_material = dict_to_namespace(saved_material_response)" + ] + }, + { + "cell_type": "markdown", + "id": "19", + "metadata": {}, + "source": [ + "## 4. Create workflow and set its parameters\n", + "### 4.1. Get list of applications and select one" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.standata.applications import ApplicationStandata\n", + "from mat3ra.ade.application import Application\n", + "\n", + "app_config = ApplicationStandata.get_by_name_first_match(APPLICATION_NAME)\n", + "app = Application(**app_config)\n", + "print(f\"Using application: {app.name}\")" + ] + }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "### 4.2. Create workflow from standard workflows and preview it" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.standata.workflows import WorkflowStandata\n", + "from mat3ra.wode.workflows import Workflow\n", + "from utils.visualize import visualize_workflow\n", + "\n", + "workflow_config = WorkflowStandata.filter_by_application(app.name).get_by_name_first_match(WORKFLOW_SEARCH_TERM)\n", + "workflow = Workflow.create(workflow_config)\n", + "workflow.name = MY_WORKFLOW_NAME\n", + "\n", + "visualize_workflow(workflow)" + ] + }, + { + "cell_type": "markdown", + "id": "23", + "metadata": {}, + "source": [ + "### 4.3. Modify workflow (Optional)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: Add workflow modifications here\n", + "\n", + "# Example: Add relaxation subworkflow\n", + "# if ADD_RELAXATION:\n", + "# workflow.add_relaxation()\n", + "\n", + "# Example: Change model parameters\n", + "# from mat3ra.mode import Model\n", + "# from mat3ra.standata.model_tree import ModelTreeStandata\n", + "#\n", + "# model_config = ModelTreeStandata.get_model_by_parameters(\n", + "# type=MODEL_TYPE,\n", + "# subtype=MODEL_SUBTYPE,\n", + "# functional={\"slug\": MODEL_FUNCTIONAL},\n", + "# )\n", + "# model_config[\"method\"] = {\"type\": \"pseudopotential\", \"subtype\": \"us\"}\n", + "# model = Model.create(model_config)\n", + "#\n", + "# for subworkflow in workflow.subworkflows:\n", + "# subworkflow.model = model\n", + "\n", + "# Example: Modify k-grids\n", + "# from mat3ra.wode.context.providers import PointsGridDataProvider\n", + "#\n", + "# new_context = PointsGridDataProvider(dimensions=KGRID, isEdited=True).yield_data()\n", + "# subworkflow = workflow.subworkflows[0]\n", + "# unit = subworkflow.get_unit_by_name(name=\"pw_scf\") # Adjust unit name as needed\n", + "# unit.add_context(new_context)\n", + "# subworkflow.set_unit(unit)\n", + "\n", + "# Preview modified workflow\n", + "# visualize_workflow(workflow)\n" + ] + }, + { + "cell_type": "markdown", + "id": "25", + "metadata": {}, + "source": [ + "### 4.4. Save workflow to collection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "26", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.generic import dict_to_namespace\n", + "\n", + "workflow_id_or_dict = None\n", + "\n", + "if SAVE_WF_TO_COLLECTION:\n", + " from utils.api import get_or_create_workflow\n", + "\n", + " saved_workflow_response = get_or_create_workflow(client.workflows, workflow, ACCOUNT_ID)\n", + " saved_workflow = dict_to_namespace(saved_workflow_response)\n", + " workflow_id_or_dict = saved_workflow._id\n", + " print(f\"Workflow ID: {saved_workflow._id}\")\n", + "else:\n", + " workflow_id_or_dict = workflow.to_dict()\n", + " print(\"Workflow will be embedded into job (not saved to collection)\")" + ] + }, + { + "cell_type": "markdown", + "id": "27", + "metadata": {}, + "source": [ + "## 5. Create the compute configuration\n", + "### 5.1. Get list of clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "28", + "metadata": {}, + "outputs": [], + "source": [ + "clusters = client.clusters.list()\n", + "print(f\"Available clusters: {[c for c in clusters]}\")" + ] + }, + { + "cell_type": "markdown", + "id": "29", + "metadata": {}, + "source": [ + "### 5.2. Create compute configuration for the job\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.ide.compute import Compute\n", + "\n", + "# Select first available cluster or use specified name\n", + "cluster = next((c for c in clusters if c[\"hostname\"] == CLUSTER_NAME), clusters[0] if clusters else None)\n", + "\n", + "compute = Compute(\n", + " cluster=cluster,\n", + " queue=QUEUE_NAME,\n", + " ppn=PPN\n", + ")\n", + "compute.to_dict()" + ] + }, + { + "cell_type": "markdown", + "id": "31", + "metadata": {}, + "source": [ + "## 6. Create the job with material and workflow configuration\n", + "### 6.1. Create job" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.api import create_job\n", + "from utils.visualize import display_JSON\n", + "\n", + "print(f\"Material: {saved_material._id}\")\n", + "print(f\"Workflow: {workflow_id_or_dict if SAVE_WF_TO_COLLECTION else '(embedded)'}\")\n", + "print(f\"Project: {project_id}\")\n", + "\n", + "job_response = create_job(\n", + " jobs_endpoint=client.jobs,\n", + " materials=[vars(saved_material)],\n", + " workflow_id_or_dict=workflow_id_or_dict,\n", + " project_id=project_id,\n", + " owner_id=ACCOUNT_ID,\n", + " prefix=JOB_NAME,\n", + " compute=compute.to_dict(),\n", + " save_to_collection=SAVE_WF_TO_COLLECTION,\n", + ")\n", + "\n", + "job_dict = job_response[0]\n", + "job = dict_to_namespace(job_dict)\n", + "job_id = job._id\n", + "print(\"✅ Job created successfully!\")\n", + "print(f\"Job ID: {job_id}\")\n", + "display_JSON(job_response)" + ] + }, + { + "cell_type": "markdown", + "id": "33", + "metadata": {}, + "source": [ + "## 7. Submit the job and monitor the status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34", + "metadata": {}, + "outputs": [], + "source": [ + "client.jobs.submit(job_id)\n", + "print(f\"✅ Job {job_id} submitted successfully!\")" + ] + }, + { + "cell_type": "markdown", + "id": "35", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36", + "metadata": {}, + "outputs": [], + "source": [ + "from utils.api import wait_for_jobs_to_finish\n", + "\n", + "wait_for_jobs_to_finish(client.jobs, [job_id], poll_interval=POLL_INTERVAL)" + ] + }, + { + "cell_type": "markdown", + "id": "37", + "metadata": {}, + "source": [ + "## 8. Retrieve results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38", + "metadata": {}, + "outputs": [], + "source": [ + "from mat3ra.prode import PropertyName\n", + "from utils.visualize import visualize_properties\n", + "\n", + "property_data = client.properties.get_for_job(job_id, property_name=PropertyName.scalar.total_energy)\n", + "visualize_properties(property_data, title=\"Total Energy\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "39", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/utils/api.py b/utils/api.py index a42a2944..2ee049bb 100644 --- a/utils/api.py +++ b/utils/api.py @@ -3,11 +3,13 @@ import os import time import urllib.request -from typing import List +from typing import List, Optional from mat3ra.api_client.endpoints.bank_workflows import BankWorkflowEndpoints from mat3ra.api_client.endpoints.jobs import JobEndpoints +from mat3ra.api_client.endpoints.materials import MaterialEndpoints from mat3ra.api_client.endpoints.properties import PropertiesEndpoints +from mat3ra.api_client.endpoints.workflows import WorkflowEndpoints from tabulate import tabulate @@ -123,3 +125,102 @@ def get_property_by_subworkflow_and_unit_indicies( def get_cluster_name(name: str = "cluster-001") -> str: clusters = json.loads(os.environ.get("CLUSTERS", "[]") or "[]") return clusters[0] if clusters else name + + +def get_or_create_material(endpoint: MaterialEndpoints, material, owner_id: str) -> dict: + """ + Returns an existing material from the collection if one with the same structural hash + exists under the given owner, otherwise creates a new one. + Uses the client-side hash (mat3ra-made Material.hash) to avoid unnecessary DB writes. + + Args: + endpoint (MaterialEndpoints): Material endpoint from the API client. + material: mat3ra-made Material object (must have a .hash property). + owner_id (str): Account ID under which to search and create. + + Returns: + dict: The material dict (existing or newly created). + """ + existing = endpoint.list({"hash": material.hash, "owner._id": owner_id}) + if existing: + print(f"♻️ Reusing already existing Material: {existing[0]['_id']}") + return existing[0] + created = endpoint.create(material.to_dict(), owner_id=owner_id) + print(f"✅ Material created: {created['_id']}") + return created + + +def get_or_create_workflow(endpoint: WorkflowEndpoints, workflow, owner_id: str) -> dict: + """ + Creates the workflow on the server, then uses the server-assigned hash to check for + pre-existing duplicates. If a duplicate exists, deletes the new entry and returns the + original. The server is the authoritative source for structural deduplication. + + Args: + endpoint (WorkflowEndpoints): Workflow endpoint from the API client. + workflow: mat3ra-wode Workflow object with a .to_dict() method. + owner_id (str): Account ID under which to search and create. + + Returns: + dict: The workflow dict (existing or newly created). + """ + existing = endpoint.list({"hash": workflow.hash, "owner._id": owner_id}) + if existing: + print(f"♻️ Reusing already existing Workflow: {existing[0]['_id']}") + return existing[0] + created = endpoint.create(workflow.to_dict(), owner_id=owner_id) + print(f"✅ Workflow created: {created['_id']}") + return created + + +def create_job( + jobs_endpoint: JobEndpoints, + materials: List[dict], + workflow_id_or_dict, + project_id: str, + owner_id: str, + prefix: str, + compute: Optional[dict] = None, + save_to_collection: bool = True, +) -> List[dict]: + """ + Creates jobs for each material using either collection references or an embedded workflow. + + Args: + jobs_endpoint (JobEndpoints): Job endpoint from the API client. + materials (list[dict]): List of material dicts (must include _id and formula). + workflow_id_or_dict: Workflow _id (str) if save_to_collection=True, + or full workflow dict if save_to_collection=False. + project_id (str): Project ID. + owner_id (str): Account ID. + prefix (str): Job name prefix. + compute (dict, optional): Compute configuration dict. + save_to_collection (bool): If True, uses create_by_ids; otherwise embeds the workflow. + + Returns: + list[dict]: List of created job dicts. + """ + if save_to_collection: + return jobs_endpoint.create_by_ids( + materials=materials, + workflow_id=workflow_id_or_dict, + project_id=project_id, + prefix=prefix, + owner_id=owner_id, + compute=compute, + ) + jobs = [] + for material in materials: + job_name = " ".join((prefix, material["formula"])) + embedded_workflow = {k: v for k, v in workflow_id_or_dict.items() if k != "_id"} + config = { + "_project": {"_id": project_id}, + "workflow": embedded_workflow, + "_material": {"_id": material["_id"]}, + "owner": {"_id": owner_id}, + "name": job_name, + } + if compute: + config["compute"] = compute + jobs.append(jobs_endpoint.create(config)) + return jobs