trying out encoding changes by zachmu · Pull Request #2559 · dolthub/doltgresql

zachmu · 2026-04-08T01:52:46Z

Companions:

dolthub/dolt#10826
dolthub/go-mysql-server#3512

github-actions · 2026-04-08T02:04:16Z

	Main	PR
covering_index_scan_postgres	1094.03/s	${\color{lightgreen}1273.97/s}$	${\color{lightgreen}+16.4\%}$
index_join_postgres	159.61/s	${\color{lightgreen}199.89/s}$	${\color{lightgreen}+25.2\%}$
index_join_scan_postgres	206.38/s	210.83/s	+2.1%
index_scan_postgres	12.12/s	12.11/s	-0.1%
oltp_point_select	2367.62/s	2395.34/s	+1.1%
oltp_read_only	1859.88/s	1882.08/s	+1.1%
select_random_points	129.59/s	134.13/s	+3.5%
select_random_ranges	815.35/s	${\color{lightgreen}1061.86/s}$	${\color{lightgreen}+30.2\%}$
table_scan_postgres	11.76/s	11.85/s	+0.7%
types_table_scan_postgres	5.47/s	5.43/s	-0.8%

github-actions · 2026-04-08T03:19:02Z

	Main	PR
Total	42090	42090
Successful	17922	17966
Failures	24168	24124
Partial Successes¹	5627	5639

	Main	PR
Successful	42.5802%	42.6847%
Failures	57.4198%	57.3153%

${\color{red}Regressions (16)}$

oid

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('1234');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('1235');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('987');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('-1040');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('99999999');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('5     ');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('   10  ');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          INSERT INTO OID_TBL(f1) VALUES ('	  15 	  ');
RECEIVED ERROR: panic: impossible conversion: id.Id cannot be converted to int

QUERY:          SELECT * FROM OID_TBL;
RECEIVED ERROR: expected row count 8 but received 0

QUERY:          SELECT o.* FROM OID_TBL o WHERE o.f1 = 1234;
RECEIVED ERROR: expected row count 1 but received 0

QUERY:          SELECT o.* FROM OID_TBL o WHERE o.f1 <> '1234';
RECEIVED ERROR: expected row count 7 but received 0

QUERY:          SELECT o.* FROM OID_TBL o WHERE o.f1 <= '1234';
RECEIVED ERROR: expected row count 5 but received 0

QUERY:          SELECT o.* FROM OID_TBL o WHERE o.f1 < '1234';
RECEIVED ERROR: expected row count 4 but received 0

QUERY:          SELECT o.* FROM OID_TBL o WHERE o.f1 >= '1234';
RECEIVED ERROR: expected row count 4 but received 0

QUERY:          SELECT o.* FROM OID_TBL o WHERE o.f1 > '1234';
RECEIVED ERROR: expected row count 3 but received 0

subselect

QUERY:          select * from
    int4_tbl i4,
    lateral (
        select i4.f1 > 1 as b, 1 as id
        from (select random() order by 1) as t1
      union all
        select true as b, 2 as id
    ) as t2
where b and f1 >= 0;
RECEIVED ERROR: rows differ
    Postgres:
        {0, true, 2}
    Doltgres:
        {123456, true, 1}

${\color{lightgreen}Progressions (61)}$

join

QUERY: select count(*) from
  tenk1 a join tenk1 b on a.unique1 = b.unique2
  left join tenk1 c on a.unique2 = b.unique1 and c.thousand = a.thousand
  join int4_tbl on b.thousand = f1;

QUERY: select * from
(
  select unique1, q1, coalesce(unique1, -1) + q1 as fault
  from int8_tbl left join tenk1 on (q2 = unique2)
) ss
where fault = 122
order by fault;

QUERY: select q1, unique2, thousand, hundred
  from int8_tbl a left join tenk1 b on q1 = unique2
  where coalesce(thousand,123) = q1 and q1 = coalesce(hundred,123);

QUERY: select f1, unique2, case when unique2 is null then f1 else 0 end
  from int4_tbl a left join tenk1 b on f1 = unique2
  where (case when unique2 is null then f1 else 0 end) = 0;

QUERY: select a.unique1, b.unique1, c.unique1, coalesce(b.twothousand, a.twothousand)
  from tenk1 a left join tenk1 b on b.thousand = a.unique1                        left join tenk1 c on c.unique2 = coalesce(b.twothousand, a.twothousand)
  where a.unique2 < 10 and coalesce(b.twothousand, a.twothousand) = 44;

numeric

QUERY: SELECT t1.id1, t1.result, t2.expected
    FROM num_result t1, num_exp_sqrt t2
    WHERE t1.id1 = t2.id
    AND t1.result != t2.expected;

QUERY: SELECT t1.id1, t1.result, t2.expected
    FROM num_result t1, num_exp_ln t2
    WHERE t1.id1 = t2.id
    AND t1.result != t2.expected;

QUERY: SELECT t1.id1, t1.result, t2.expected
    FROM num_result t1, num_exp_log10 t2
    WHERE t1.id1 = t2.id
    AND t1.result != t2.expected;

rowtypes

QUERY: create temp table quadtable(f1 int, q quad);

QUERY: create temp table people (fn fullname, bd date);

QUERY: create temp table pp (f1 text);

QUERY: insert into pp values (repeat('abcdefghijkl', 100000));

QUERY: select ROW(1,2) < ROW(1,3) as true;

QUERY: select ROW(1,2) < ROW(1,NULL) as null;

QUERY: select ROW(1,2,3) < ROW(1,3,NULL) as true;

QUERY: select ROW(11,'ABC') < ROW(11,'DEF') as true;

QUERY: select ROW(12,'ABC') > ROW(11,'DEF') as true;

QUERY: select ROW(1,2,3) < ROW(1,NULL,4) as null;

QUERY: select ROW(1,2,3) <> ROW(1,NULL,4) as true;

QUERY: select ROW(1,2) = ROW(1,2::int8);

QUERY: select ROW(1,2) in (ROW(3,4), ROW(1,2));

QUERY: select ROW(1,2) in (ROW(3,4), ROW(1,2::int8));

QUERY: select thousand, tenthous from tenk1
where (thousand, tenthous) >= (997, 5000)
order by thousand, tenthous;

QUERY: create temp table test_table (a text, b text);

QUERY: insert into test_table values ('a', 'b');

QUERY: insert into test_table select 'a', null from generate_series(1,1000);

QUERY: insert into test_table values ('b', 'a');

QUERY: create index on test_table (a,b);

QUERY: set enable_sort = off;

QUERY: select a,b from test_table where (a,b) > ('a','a') order by a,b;

QUERY: reset enable_sort;

QUERY: select ROW();

QUERY: select ROW() IS NULL;

QUERY: select array[ row(1,2), row(3,4), row(5,6) ];

QUERY: select row(1,1.1) = any (array[ row(7,7.7), row(1,1.1), row(0,0.0) ]);

QUERY: select row(1,1.1) = any (array[ row(7,7.7), row(1,1.0), row(0,0.0) ]);

QUERY: create type testtype1 as (a int, b int);

QUERY: create type testtype3 as (a int, b text);

QUERY: create type testtype5 as (a int);

QUERY: create type testtype2 as (a smallint, b bool);

These are tests that we're marking as Successful, however they do not match the expected output in some way. This is due to small differences, such as different wording on the error messages, or the column names being incorrect while the data itself is correct. ↩

Hydrocharged

Conceptually I don't mind the change as long as the internal encodings match the expected behavior of Postgres, and this isn't always true for seemingly equivalent types. I mentioned the decimal.decimal to pgtype.Numeric conversion as an example in the DM, as val.DecimalEnc will surely use the wrong internal representation (being built for decimal.Decimal). There are other things that one may not think about as well, such as infinity and NaN ordering for the floating point values, since those are invalid in MySQL (AFAICT) but valid in Postgres, so are we handling that properly?

This is ignoring stuff regarding how index ordering behaves (although we don't yet support index ordering and maybe we never will and just always take the speed hit, but worth mentioning). Maybe we implement this stuff using special Doltgres-only variations of built-in encodings for types where we need something different. For example, val.Int16Enc for the "default" case, val.Int16NFDescEnc if we're storing in descending order with nulls first.

Technically we could do that for all built-in types, and then user-defined types take the slower extended type path since they'll be less used in the average case (or we even specialize known stable encodings like pgvector).

Hydrocharged · 2026-04-16T09:12:40Z

 					Query: "SELECT DISTINCT ON(v3) v1 FROM test2;",
 					Expected: []sql.Row{
-						{1},
+						{2},


Why was this changed? I just ran this against a Postgres instance and got the original result of 1.

Hydrocharged · 2026-04-16T09:34:08Z

+	// case "json", "jsonb":
+	// 	return val.JSONAddrEnc
+	case "oid", "regclass", "regproc", "regtype":
+		return val.Int32Enc


These use id.Id, which is a string, so that's probably why some of the tests are failing.

zachmu added 4 commits April 7, 2026 18:43

new type info struct

80207fb

fully implement typeInfo

80d5be8

remove some type handling to be a bit safer

8b9a6ff

new dolt

2999365

zachmu added 2 commits April 7, 2026 19:31

couple bug fixes for encoding

45b0903

new dolt

06af91e

zachmu added 8 commits April 9, 2026 15:30

Fixed dolt_ignore table, test exectation

1b33d4f

merge main

a387c02

new dolt

dc2af8f

use extended encoding for json for now

73a42b5

lower spam output in test logs

a9bde5c

merge main

dc31756

small tweak to trigger return logic

cf2ed0d

new dolt

cac628b

zachmu requested a review from Hydrocharged April 15, 2026 23:58

Hydrocharged reviewed Apr 16, 2026

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

trying out encoding changes#2559

trying out encoding changes#2559
zachmu wants to merge 14 commits intomainfrom
zachmu/encoding

zachmu commented Apr 8, 2026 •

edited

Loading

Uh oh!

github-actions bot commented Apr 8, 2026 •

edited

Loading

Uh oh!

github-actions bot commented Apr 8, 2026 •

edited

Loading

Uh oh!

Hydrocharged left a comment

Uh oh!

Hydrocharged Apr 16, 2026

Uh oh!

Hydrocharged Apr 16, 2026

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

Uh oh!

Conversation

zachmu commented Apr 8, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Apr 8, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Apr 8, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

${\color{red}Regressions (16)}$

oid

subselect

${\color{lightgreen}Progressions (61)}$

join

numeric

rowtypes

Footnotes

Uh oh!

Hydrocharged left a comment

Choose a reason for hiding this comment

Uh oh!

Hydrocharged Apr 16, 2026

Choose a reason for hiding this comment

Uh oh!

Hydrocharged Apr 16, 2026

Choose a reason for hiding this comment

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

zachmu commented Apr 8, 2026 •

edited

Loading

github-actions bot commented Apr 8, 2026 •

edited

Loading

github-actions bot commented Apr 8, 2026 •

edited

Loading