remove use_stdin code paths for simplicity

krokicki · krokicki · commit ef89a3294624 · 2026-02-13T18:47:59.000-05:00
diff --git a/README.md b/README.md
@@ -116,7 +116,6 @@ profiles:
     queue: normal
     memory: "8 GB"
     walltime: "04:00"
-    use_stdin: true
     script_prologue:
       - "module load java/11"
 
@@ -142,7 +141,6 @@ profiles:
 | `extra_directives` | `[]` | Additional scheduler flags (directive prefix added automatically) |
 | `directives_skip` | `[]` | Substrings to filter out of directives |
 | `extra_args` | `[]` | Extra CLI args appended to the submit command (e.g. `bsub`) |
-| `use_stdin` | `false` | Submit via stdin (`bsub < script.sh`) |
 | `lsf_units` | `"MB"` | LSF memory units (`KB`, `MB`, `GB`) |
 | `suppress_job_email` | `true` | Set `LSB_JOB_REPORT_MAIL=N` |
 | `command_timeout` | `100.0` | Timeout in seconds for scheduler commands |
diff --git a/cluster_api/config.py b/cluster_api/config.py
@@ -53,7 +53,6 @@ class ClusterConfig:
     directives_skip: list[str] = field(default_factory=list)
     extra_args: list[str] = field(default_factory=list)
     lsf_units: str = "MB"
-    use_stdin: bool = False
     job_name_prefix: str | None = None
     zombie_timeout_minutes: float = 30.0
     completed_retention_minutes: float = 10.0
diff --git a/cluster_api/core.py b/cluster_api/core.py
@@ -268,7 +268,6 @@ async def _call(
         shell: bool = False,
         timeout: float = 100.0,
         env: dict[str, str] | None = None,
-        stdin_data: str | None = None,
     ) -> str:
         """Run a subprocess and return stdout.
 
@@ -291,12 +290,11 @@ async def _call(
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
                 env=full_env,
-                stdin=asyncio.subprocess.PIPE if stdin_data else None,
             )
 
         try:
             stdout, stderr = await asyncio.wait_for(
-                proc.communicate(stdin_data.encode() if stdin_data else None),
+                proc.communicate(),
                 timeout=timeout,
             )
         except asyncio.TimeoutError:
diff --git a/cluster_api/executors/lsf.py b/cluster_api/executors/lsf.py
@@ -160,27 +160,15 @@ def _collect_extra_args(self, resources: ResourceSpec | None = None) -> list[str
         return args
 
     async def _bsub(
-        self, script_path: str, content: str | None, env: dict[str, str] | None,
+        self, script_path: str, env: dict[str, str] | None,
         extra_args: list[str] | None = None,
     ) -> str:
-        """Run bsub via stdin or file and return raw output."""
+        """Run bsub with a script file and return raw output."""
         submit_env = self._build_submit_env(env)
-        cmd = [self.submit_command, *(extra_args or [])]
-        if self.config.use_stdin:
-            if content is None:
-                with open(script_path) as f:
-                    content = f.read()
-            logger.debug("Running: %s (via stdin)", " ".join(cmd))
-            return await self._call(
-                cmd,
-                env=submit_env,
-                timeout=self.config.command_timeout,
-                stdin_data=content,
-            )
-        full_cmd = [*cmd, script_path]
-        logger.debug("Running: %s", " ".join(full_cmd))
+        cmd = [self.submit_command, *(extra_args or []), script_path]
+        logger.debug("Running: %s", " ".join(cmd))
         return await self._call(
-            full_cmd,
+            cmd,
             env=submit_env,
             timeout=self.config.command_timeout,
         )
@@ -202,7 +190,7 @@ async def _submit_job(
         script_path = write_script(resources.work_dir, script, name, next(self._script_counter))
 
         extra_args = self._collect_extra_args(resources)
-        out = await self._bsub(script_path, None, env, extra_args)
+        out = await self._bsub(script_path, env, extra_args)
         return self._job_id_from_submit_output(out), script_path
 
     async def _submit_array_job(
@@ -245,7 +233,7 @@ async def _submit_array_job(
             f.write(content)
 
         extra_args = self._collect_extra_args(resources)
-        out = await self._bsub(script_path, content, env, extra_args)
+        out = await self._bsub(script_path, env, extra_args)
         return self._job_id_from_submit_output(out), script_path
 
     def _build_status_args(self) -> list[str]:
diff --git a/docs/Development.md b/docs/Development.md
@@ -133,7 +133,7 @@ Terminal jobs are purged from memory after `completed_retention_minutes` (once a
 ### Key design decisions
 
 - **Poll-based monitoring** — unlike dask-jobqueue (which relies on workers phoning home), this library actively polls the scheduler. This means it works with any executable, not just Python workers.
-- **Stdin submission** — LSF's `bsub < script.sh` mode avoids filesystem race conditions on shared storage. Controlled by `use_stdin` config.
+- **File-based submission** — jobs are submitted via `bsub script.sh`, passing the script file path directly. The script is always written to disk before submission.
 - **Job name prefixing** — all jobs get a `{prefix}-{name}` name. The prefix is either configured (`job_name_prefix`) or randomly generated, so concurrent sessions don't collide when polling by name.
 - **Array status aggregation** — parent array job status is computed from element statuses. Only transitions to terminal when ALL expected elements are terminal.
 
diff --git a/tests/cluster_config.example.yaml b/tests/cluster_config.example.yaml
@@ -4,7 +4,6 @@
 
 queue: normal
 memory: "1 GB"
-use_stdin: true
 lsf_units: MB
 suppress_job_email: true
 
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -37,7 +37,6 @@ def lsf_config():
         walltime="04:00",
         poll_interval=0.5,
         command_timeout=10.0,
-        use_stdin=True,
         suppress_job_email=True,
         lsf_units="MB",
     )
diff --git a/tests/test_lsf.py b/tests/test_lsf.py
@@ -270,7 +270,7 @@ def test_status_args(self, lsf_config):
 
 class TestSubmission:
 
-    async def test_submit_stdin(self, lsf_config, work_dir):
+    async def test_submit(self, lsf_config, work_dir):
         executor = LSFExecutor(lsf_config)
         with patch.object(
             executor, "_call",
@@ -285,9 +285,9 @@ async def test_submit_stdin(self, lsf_config, work_dir):
             assert job.job_id == "12345"
             assert job.name == "test-my-job"
             assert job.status == JobStatus.PENDING
-            # Verify stdin submission was used
-            call_args = mock_call.call_args
-            assert call_args.kwargs.get("stdin_data") is not None
+            # Verify file-based submission (script path in cmd args)
+            cmd = mock_call.call_args[0][0]
+            assert cmd[-1].endswith(".sh")
 
 
     async def test_submit_email_suppression(self, lsf_config, work_dir):
@@ -322,10 +322,11 @@ async def test_submit_array(self, lsf_config, work_dir):
             )
             assert job.job_id == "12345"
             assert job.metadata["array_range"] == (1, 50)
-            # Verify stdin submission included array name
-            call_args = mock_call.call_args
-            stdin = call_args.kwargs.get("stdin_data", "")
-            assert "[1-50]" in stdin
+            # Verify script file contains array name
+            script_path = mock_call.call_args[0][0][-1]
+            with open(script_path) as f:
+                script = f.read()
+            assert "[1-50]" in script
 
 
 class TestArrayScriptRewriting:
@@ -343,9 +344,11 @@ async def test_percent_i_substitution(self, lsf_config, work_dir):
                 array_range=(1, 10),
                 resources=ResourceSpec(work_dir=work_dir),
             )
-            stdin = mock_call.call_args.kwargs.get("stdin_data", "")
-            assert "stdout.%J.%I.log" in stdin
-            assert "stderr.%J.%I.log" in stdin
+            script_path = mock_call.call_args[0][0][-1]
+            with open(script_path) as f:
+                script = f.read()
+            assert "stdout.%J.%I.log" in script
+            assert "stderr.%J.%I.log" in script
 
 
 class TestCancelByName:
@@ -412,8 +415,10 @@ async def test_with_max_concurrent(self, lsf_config, work_dir):
             )
             assert job.job_id == "12345"
             assert job.metadata["max_concurrent"] == 15
-            stdin = mock_call.call_args.kwargs.get("stdin_data", "")
-            assert "[1-100%15]" in stdin
+            script_path = mock_call.call_args[0][0][-1]
+            with open(script_path) as f:
+                script = f.read()
+            assert "[1-100%15]" in script
 
 
     async def test_without_max_concurrent(self, lsf_config, work_dir):
@@ -429,9 +434,11 @@ async def test_without_max_concurrent(self, lsf_config, work_dir):
                 array_range=(1, 100),
                 resources=ResourceSpec(work_dir=work_dir),
             )
-            stdin = mock_call.call_args.kwargs.get("stdin_data", "")
-            assert "[1-100]" in stdin
-            j_line = [line for line in stdin.splitlines() if "-J " in line][0]
+            script_path = mock_call.call_args[0][0][-1]
+            with open(script_path) as f:
+                script = f.read()
+            assert "[1-100]" in script
+            j_line = [line for line in script.splitlines() if "-J " in line][0]
             assert "%" not in j_line
             assert "max_concurrent" not in job.metadata
 

Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,6 @@ def lsf_config():`
`37`	`37`	`walltime="04:00",`
`38`	`38`	`poll_interval=0.5,`
`39`	`39`	`command_timeout=10.0,`
`40`		`- use_stdin=True,`
`41`	`40`	`suppress_job_email=True,`
`42`	`41`	`lsf_units="MB",`
`43`	`42`	`)`