Skip to content

Commit 2de222a

Browse files
Simplify type system: only core types and AttributeTypes
Major simplification of the type system to two categories: 1. Core DataJoint types (no brackets): float32, uuid, bool, json, blob, etc. 2. AttributeTypes (angle brackets): <djblob>, <object>, <attach>, etc. Changes: - declare.py: Remove EXTERNAL_TYPES, BINARY_TYPES; simplify to CORE_TYPE_ALIASES + ADAPTED - heading.py: Remove is_attachment, is_filepath, is_object, is_external flags - fetch.py: Simplify _get() to only handle uuid, json, blob, and adapters - table.py: Simplify __make_placeholder() to only handle uuid, json, blob, numeric - preview.py: Remove special object field handling (will be AttributeType) - staged_insert.py: Update object type check to use adapter All special handling (attach, filepath, object, external storage) will be implemented as built-in AttributeTypes in subsequent phases. Co-authored-by: dimitri-yatsenko <dimitri@datajoint.com>
1 parent 97bc162 commit 2de222a

File tree

6 files changed

+140
-252
lines changed

6 files changed

+140
-252
lines changed

src/datajoint/declare.py

Lines changed: 39 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,13 @@
1111

1212
from .attribute_adapter import get_adapter
1313
from .condition import translate_attribute
14-
from .errors import FILEPATH_FEATURE_SWITCH, DataJointError, _support_filepath_types
14+
from .errors import DataJointError
1515
from .settings import config
1616

17-
UUID_DATA_TYPE = "binary(16)"
18-
19-
# Type aliases for numeric types
20-
SQL_TYPE_ALIASES = {
17+
# Core DataJoint type aliases - scientist-friendly names mapped to native SQL types
18+
# These types can be used without angle brackets in table definitions
19+
CORE_TYPE_ALIASES = {
20+
# Numeric types
2121
"FLOAT32": "float",
2222
"FLOAT64": "double",
2323
"INT64": "bigint",
@@ -29,18 +29,22 @@
2929
"INT8": "tinyint",
3030
"UINT8": "tinyint unsigned",
3131
"BOOL": "tinyint",
32+
# UUID type
33+
"UUID": "binary(16)",
3234
}
35+
3336
MAX_TABLE_NAME_LENGTH = 64
3437
CONSTANT_LITERALS = {
3538
"CURRENT_TIMESTAMP",
3639
"NULL",
3740
} # SQL literals to be used without quotes (case insensitive)
38-
EXTERNAL_TABLE_ROOT = "~external"
3941

42+
# Type patterns for declaration parsing
43+
# Two categories: core type aliases and native passthrough types
4044
TYPE_PATTERN = {
4145
k: re.compile(v, re.I)
4246
for k, v in dict(
43-
# Type aliases must come before INTEGER and FLOAT patterns to avoid prefix matching
47+
# Core DataJoint type aliases (scientist-friendly names)
4448
FLOAT32=r"float32$",
4549
FLOAT64=r"float64$",
4650
INT64=r"int64$",
@@ -51,51 +55,29 @@
5155
UINT16=r"uint16$",
5256
INT8=r"int8$",
5357
UINT8=r"uint8$",
54-
BOOL=r"bool$", # aliased to tinyint
55-
# Native MySQL types
58+
BOOL=r"bool$",
59+
UUID=r"uuid$",
60+
# Native SQL types (passthrough)
5661
INTEGER=r"((tiny|small|medium|big|)int|integer)(\s*\(.+\))?(\s+unsigned)?(\s+auto_increment)?|serial$",
5762
DECIMAL=r"(decimal|numeric)(\s*\(.+\))?(\s+unsigned)?$",
5863
FLOAT=r"(double|float|real)(\s*\(.+\))?(\s+unsigned)?$",
5964
STRING=r"(var)?char\s*\(.+\)$",
6065
JSON=r"json$",
6166
ENUM=r"enum\s*\(.+\)$",
6267
TEMPORAL=r"(date|datetime|time|timestamp|year)(\s*\(.+\))?$",
63-
INTERNAL_BLOB=r"(tiny|small|medium|long|)blob$",
64-
EXTERNAL_BLOB=r"blob@(?P<store>[a-z][\-\w]*)$",
65-
INTERNAL_ATTACH=r"attach$",
66-
EXTERNAL_ATTACH=r"attach@(?P<store>[a-z][\-\w]*)$",
67-
FILEPATH=r"filepath@(?P<store>[a-z][\-\w]*)$",
68-
OBJECT=r"object(@(?P<store>[a-z][\-\w]*))?$", # managed object storage (files/folders)
69-
UUID=r"uuid$",
68+
BLOB=r"(tiny|small|medium|long|)blob$",
69+
# AttributeTypes use angle brackets
7070
ADAPTED=r"<.+>$",
7171
).items()
7272
}
7373

74-
# custom types are stored in attribute comment
75-
SPECIAL_TYPES = {
76-
"UUID",
77-
"INTERNAL_ATTACH",
78-
"EXTERNAL_ATTACH",
79-
"EXTERNAL_BLOB",
80-
"FILEPATH",
81-
"OBJECT",
82-
"ADAPTED",
83-
} | set(SQL_TYPE_ALIASES)
74+
# Types that require special handling (stored in attribute comment for reconstruction)
75+
SPECIAL_TYPES = {"ADAPTED"} | set(CORE_TYPE_ALIASES)
76+
77+
# Native SQL types that pass through without modification
8478
NATIVE_TYPES = set(TYPE_PATTERN) - SPECIAL_TYPES
85-
EXTERNAL_TYPES = {
86-
"EXTERNAL_ATTACH",
87-
"EXTERNAL_BLOB",
88-
"FILEPATH",
89-
} # data referenced by a UUID in external tables
90-
# Blob and attachment types cannot have SQL default values (other than NULL)
91-
BINARY_TYPES = {
92-
"EXTERNAL_ATTACH",
93-
"INTERNAL_ATTACH",
94-
"EXTERNAL_BLOB",
95-
"INTERNAL_BLOB",
96-
}
9779

98-
assert set().union(SPECIAL_TYPES, EXTERNAL_TYPES, BINARY_TYPES) <= set(TYPE_PATTERN)
80+
assert SPECIAL_TYPES <= set(TYPE_PATTERN)
9981

10082

10183
def match_type(attribute_type):
@@ -459,50 +441,32 @@ def format_attribute(attr):
459441

460442
def substitute_special_type(match, category, foreign_key_sql, context):
461443
"""
444+
Substitute special types with their native SQL equivalents.
445+
446+
Special types are:
447+
- Core type aliases (float32 → float, uuid → binary(16), etc.)
448+
- ADAPTED types (AttributeTypes in angle brackets)
449+
462450
:param match: dict containing with keys "type" and "comment" -- will be modified in place
463451
:param category: attribute type category from TYPE_PATTERN
464452
:param foreign_key_sql: list of foreign key declarations to add to
465453
:param context: context for looking up user-defined attribute_type adapters
466454
"""
467-
if category == "UUID":
468-
match["type"] = UUID_DATA_TYPE
469-
elif category == "INTERNAL_ATTACH":
470-
match["type"] = "LONGBLOB"
471-
elif category == "OBJECT":
472-
# Object type stores metadata as JSON - no foreign key to external table
473-
# Extract store name if present (object@store_name syntax)
474-
if "@" in match["type"]:
475-
match["store"] = match["type"].split("@", 1)[1]
476-
match["type"] = "JSON"
477-
elif category in EXTERNAL_TYPES:
478-
if category == "FILEPATH" and not _support_filepath_types():
479-
raise DataJointError(
480-
"""
481-
The filepath data type is disabled until complete validation.
482-
To turn it on as experimental feature, set the environment variable
483-
{env} = TRUE or upgrade datajoint.
484-
""".format(env=FILEPATH_FEATURE_SWITCH)
485-
)
486-
match["store"] = match["type"].split("@", 1)[1]
487-
match["type"] = UUID_DATA_TYPE
488-
foreign_key_sql.append(
489-
"FOREIGN KEY (`{name}`) REFERENCES `{{database}}`.`{external_table_root}_{store}` (`hash`) "
490-
"ON UPDATE RESTRICT ON DELETE RESTRICT".format(external_table_root=EXTERNAL_TABLE_ROOT, **match)
491-
)
492-
elif category == "ADAPTED":
455+
if category == "ADAPTED":
456+
# AttributeType - resolve to underlying dtype
493457
attr_type, store_name = get_adapter(context, match["type"])
494-
# Store the store parameter if present
495458
if store_name is not None:
496459
match["store"] = store_name
497460
match["type"] = attr_type.dtype
461+
# Recursively resolve if dtype is also a special type
498462
category = match_type(match["type"])
499463
if category in SPECIAL_TYPES:
500-
# recursive redefinition from user-defined datatypes.
501464
substitute_special_type(match, category, foreign_key_sql, context)
502-
elif category in SQL_TYPE_ALIASES:
503-
match["type"] = SQL_TYPE_ALIASES[category]
465+
elif category in CORE_TYPE_ALIASES:
466+
# Core type alias - substitute with native SQL type
467+
match["type"] = CORE_TYPE_ALIASES[category]
504468
else:
505-
assert False, "Unknown special type"
469+
assert False, f"Unknown special type: {category}"
506470

507471

508472
def compile_attribute(line, in_key, foreign_key_sql, context):
@@ -513,7 +477,7 @@ def compile_attribute(line, in_key, foreign_key_sql, context):
513477
:param in_key: set to True if attribute is in primary key set
514478
:param foreign_key_sql: the list of foreign key declarations to add to
515479
:param context: context in which to look up user-defined attribute type adapterss
516-
:returns: (name, sql, is_external) -- attribute name and sql code for its declaration
480+
:returns: (name, sql, store) -- attribute name, sql code for its declaration, and optional store name
517481
"""
518482
try:
519483
match = attribute_parser.parseString(line + "#", parseAll=True)
@@ -550,13 +514,10 @@ def compile_attribute(line, in_key, foreign_key_sql, context):
550514
match["comment"] = ":{type}:{comment}".format(**match) # insert custom type into comment
551515
substitute_special_type(match, category, foreign_key_sql, context)
552516

553-
if category in BINARY_TYPES and match["default"] not in {
554-
"DEFAULT NULL",
555-
"NOT NULL",
556-
}:
557-
raise DataJointError(
558-
"The default value for blob or attachment attributes can only be NULL in:\n{line}".format(line=line)
559-
)
517+
# Check for invalid default values on blob types (after type substitution)
518+
final_category = match_type(match["type"])
519+
if final_category == "BLOB" and match["default"] not in {"DEFAULT NULL", "NOT NULL"}:
520+
raise DataJointError("The default value for blob attributes can only be NULL in:\n{line}".format(line=line))
560521

561522
sql = ("`{name}` {type} {default}" + (' COMMENT "{comment}"' if match["comment"] else "")).format(**match)
562523
return match["name"], sql, match.get("store")

src/datajoint/fetch.py

Lines changed: 38 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,15 @@
1-
import itertools
21
import json
32
import numbers
4-
import uuid
3+
import uuid as uuid_module
54
from functools import partial
6-
from pathlib import Path
75

86
import numpy as np
97
import pandas
108

119
from datajoint.condition import Top
1210

13-
from . import hash
1411
from .errors import DataJointError
15-
from .objectref import ObjectRef
1612
from .settings import config
17-
from .storage import StorageBackend
18-
from .utils import safe_write
1913

2014

2115
class key:
@@ -39,79 +33,51 @@ def to_dicts(recarray):
3933

4034
def _get(connection, attr, data, squeeze, download_path):
4135
"""
42-
This function is called for every attribute
36+
Retrieve and decode attribute data from the database.
37+
38+
In the simplified type system:
39+
- Native types pass through unchanged
40+
- JSON types are parsed
41+
- UUID types are converted from bytes
42+
- Blob types return raw bytes (unless an adapter handles them)
43+
- Adapters (AttributeTypes) handle all custom encoding/decoding
4344
4445
:param connection: a dj.Connection object
45-
:param attr: attribute name from the table's heading
46-
:param data: literal value fetched from the table
47-
:param squeeze: if True squeeze blobs
48-
:param download_path: for fetches that download data, e.g. attachments
49-
:return: unpacked data
46+
:param attr: attribute from the table's heading
47+
:param data: raw value fetched from the database
48+
:param squeeze: if True squeeze blobs (legacy, unused)
49+
:param download_path: for fetches that download data (legacy, unused in simplified model)
50+
:return: decoded data
5051
"""
5152
if data is None:
52-
return
53-
if attr.is_object:
54-
# Object type - return ObjectRef handle
55-
json_data = json.loads(data) if isinstance(data, str) else data
56-
# Get the correct backend based on store name in metadata
57-
store_name = json_data.get("store") # None for default store
58-
try:
59-
spec = config.get_object_store_spec(store_name)
60-
backend = StorageBackend(spec)
61-
except DataJointError:
62-
backend = None
63-
return ObjectRef.from_json(json_data, backend=backend)
53+
return None
54+
55+
# JSON type - parse and optionally decode via adapter
6456
if attr.json:
65-
return json.loads(data)
66-
67-
extern = connection.schemas[attr.database].external[attr.store] if attr.is_external else None
68-
69-
# apply custom attribute type decoder if present
70-
def adapt(x):
71-
return attr.adapter.decode(x, key=None) if attr.adapter else x
72-
73-
if attr.is_filepath:
74-
return adapt(extern.download_filepath(uuid.UUID(bytes=data))[0])
75-
if attr.is_attachment:
76-
# Steps:
77-
# 1. get the attachment filename
78-
# 2. check if the file already exists at download_path, verify checksum
79-
# 3. if exists and checksum passes then return the local filepath
80-
# 4. Otherwise, download the remote file and return the new filepath
81-
_uuid = uuid.UUID(bytes=data) if attr.is_external else None
82-
attachment_name = extern.get_attachment_name(_uuid) if attr.is_external else data.split(b"\0", 1)[0].decode()
83-
local_filepath = Path(download_path) / attachment_name
84-
if local_filepath.is_file():
85-
attachment_checksum = _uuid if attr.is_external else hash.uuid_from_buffer(data)
86-
if attachment_checksum == hash.uuid_from_file(local_filepath, init_string=attachment_name + "\0"):
87-
return adapt(str(local_filepath)) # checksum passed, no need to download again
88-
# generate the next available alias filename
89-
for n in itertools.count():
90-
f = local_filepath.parent / (local_filepath.stem + "_%04x" % n + local_filepath.suffix)
91-
if not f.is_file():
92-
local_filepath = f
93-
break
94-
if attachment_checksum == hash.uuid_from_file(f, init_string=attachment_name + "\0"):
95-
return adapt(str(f)) # checksum passed, no need to download again
96-
# Save attachment
97-
if attr.is_external:
98-
extern.download_attachment(_uuid, attachment_name, local_filepath)
99-
else:
100-
# write from buffer
101-
safe_write(local_filepath, data.split(b"\0", 1)[1])
102-
return adapt(str(local_filepath)) # download file from remote store
57+
parsed = json.loads(data)
58+
if attr.adapter:
59+
return attr.adapter.decode(parsed, key=None)
60+
return parsed
10361

62+
# UUID type - convert bytes to UUID object
10463
if attr.uuid:
105-
return adapt(uuid.UUID(bytes=data))
106-
elif attr.is_blob:
107-
blob_data = extern.get(uuid.UUID(bytes=data)) if attr.is_external else data
108-
# Adapters (like <djblob>) handle deserialization in decode()
109-
# Without adapter, blob columns return raw bytes (no deserialization)
64+
result = uuid_module.UUID(bytes=data)
11065
if attr.adapter:
111-
return attr.adapter.decode(blob_data, key=None)
112-
return blob_data # raw bytes
113-
else:
114-
return adapt(data)
66+
return attr.adapter.decode(result, key=None)
67+
return result
68+
69+
# Blob type - return raw bytes or decode via adapter
70+
if attr.is_blob:
71+
if attr.adapter:
72+
return attr.adapter.decode(data, key=None)
73+
return data # raw bytes
74+
75+
# Other types with adapter
76+
if attr.adapter:
77+
return attr.adapter.decode(data, key=None)
78+
79+
# Native types - pass through unchanged
80+
return data
11581

11682

11783
class Fetch:

0 commit comments

Comments
 (0)