diff --git a/cytetype/__init__.py b/cytetype/__init__.py index 132e1d5..3cb2877 100644 --- a/cytetype/__init__.py +++ b/cytetype/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.16.1" +__version__ = "0.17.0" import requests diff --git a/cytetype/api/client.py b/cytetype/api/client.py index 751e20d..52fd4d1 100644 --- a/cytetype/api/client.py +++ b/cytetype/api/client.py @@ -176,7 +176,7 @@ def submit_annotation_job( transport = HTTPTransport(base_url, auth_token) try: - status_code, response = transport.post("annotate", payload, timeout=60) + status_code, response = transport.post("annotate", payload, timeout=180) job_id = response.get("job_id") if not job_id: diff --git a/cytetype/core/artifacts.py b/cytetype/core/artifacts.py index 13fd3ba..c916e1f 100644 --- a/cytetype/core/artifacts.py +++ b/cytetype/core/artifacts.py @@ -197,6 +197,8 @@ def save_features_matrix( def save_obs_duckdb( out_file: str, obs_df: pd.DataFrame, + obsm_coordinates: np.ndarray | None = None, + coordinates_key: str | None = None, table_name: str = "obs", threads: int = 4, memory_limit: str = "4GB", @@ -208,6 +210,11 @@ def save_obs_duckdb( "Invalid table_name. Use letters, numbers, and underscores only." ) + if obsm_coordinates is not None and coordinates_key is not None: + obs_df = obs_df.copy() + obs_df[f"__vis_coordinates_{coordinates_key}_1"] = obsm_coordinates[:, 0] + obs_df[f"__vis_coordinates_{coordinates_key}_2"] = obsm_coordinates[:, 1] + dd_config: dict[str, Any] = { "threads": threads, "memory_limit": memory_limit, diff --git a/cytetype/main.py b/cytetype/main.py index 8df667c..993a0f2 100644 --- a/cytetype/main.py +++ b/cytetype/main.py @@ -132,9 +132,14 @@ def __init__( adata, group_key, rank_key, gene_symbols_column, coordinates_key ) + # Use original labels as IDs if all are short (<=3 chars), otherwise enumerate + _unique_group_categories: list[str | int] = natsorted( + adata.obs[group_key].unique().tolist() + ) + _short_ids = all(len(str(x)) <= 3 for x in _unique_group_categories) self.cluster_map = { - str(x): str(n + 1) - for n, x in enumerate(natsorted(adata.obs[group_key].unique().tolist())) + str(x): str(x) if _short_ids else str(n) + for n, x in enumerate(_unique_group_categories) } self.clusters = [ self.cluster_map[str(x)] for x in adata.obs[group_key].values.tolist() @@ -199,6 +204,7 @@ def _build_and_upload_artifacts( obs_duckdb_path: str, upload_timeout_seconds: int, upload_max_workers: int = 4, + coordinates_key: str | None = None, ) -> tuple[dict[str, str], list[tuple[str, Exception]]]: """Build and upload each artifact as an independent unit. @@ -240,9 +246,16 @@ def _build_and_upload_artifacts( # --- obs.duckdb (save then upload) --- try: logger.info("Saving obs.duckdb artifact from observation metadata...") + obsm_coordinates = ( + self.adata.obsm[coordinates_key] + if coordinates_key and coordinates_key in self.adata.obsm + else None + ) save_obs_duckdb_file( out_file=obs_duckdb_path, obs_df=self.adata.obs, + obsm_coordinates=obsm_coordinates, + coordinates_key=coordinates_key, ) logger.info("Uploading obs.duckdb artifact...") obs_upload = upload_obs_duckdb_file( @@ -394,6 +407,7 @@ def run( obs_duckdb_path=obs_duckdb_path, upload_timeout_seconds=upload_timeout_seconds, upload_max_workers=upload_max_workers, + coordinates_key=self.coordinates_key, ) if uploaded_file_refs: payload["uploaded_files"] = uploaded_file_refs