diff --git a/arrow/array/dictionary.go b/arrow/array/dictionary.go index 109d2a97..c4b8aae2 100644 --- a/arrow/array/dictionary.go +++ b/arrow/array/dictionary.go @@ -315,6 +315,7 @@ func arrayApproxEqualDict(l, r *Dictionary, opt equalOption) bool { type IndexBuilder struct { Builder Append func(int) + UnsafeAppend func(int) } func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret IndexBuilder, err error) { @@ -324,34 +325,58 @@ func createIndexBuilder(mem memory.Allocator, dt arrow.FixedWidthDataType) (ret ret.Append = func(idx int) { ret.Builder.(*Int8Builder).Append(int8(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Int8Builder).UnsafeAppend(int8(idx)) + } case arrow.UINT8: ret.Append = func(idx int) { ret.Builder.(*Uint8Builder).Append(uint8(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Uint8Builder).UnsafeAppend(uint8(idx)) + } case arrow.INT16: ret.Append = func(idx int) { ret.Builder.(*Int16Builder).Append(int16(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Int16Builder).UnsafeAppend(int16(idx)) + } case arrow.UINT16: ret.Append = func(idx int) { ret.Builder.(*Uint16Builder).Append(uint16(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Uint16Builder).UnsafeAppend(uint16(idx)) + } case arrow.INT32: ret.Append = func(idx int) { ret.Builder.(*Int32Builder).Append(int32(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Int32Builder).UnsafeAppend(int32(idx)) + } case arrow.UINT32: ret.Append = func(idx int) { ret.Builder.(*Uint32Builder).Append(uint32(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Uint32Builder).UnsafeAppend(uint32(idx)) + } case arrow.INT64: ret.Append = func(idx int) { ret.Builder.(*Int64Builder).Append(int64(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Int64Builder).UnsafeAppend(int64(idx)) + } case arrow.UINT64: ret.Append = func(idx int) { ret.Builder.(*Uint64Builder).Append(uint64(idx)) } + ret.UnsafeAppend = func(idx int) { + ret.Builder.(*Uint64Builder).UnsafeAppend(uint64(idx)) + } default: debug.Assert(false, "dictionary index type must be integral") err = fmt.Errorf("dictionary index type must be integral, not %s", dt) @@ -646,6 +671,14 @@ func (b *dictionaryBuilder) AppendEmptyValues(n int) { } } +func (b *dictionaryBuilder) UnsafeAppendBoolToBitmap(v bool) { + if !v { + b.nulls += 1 + } + b.length += 1 + b.idxBuilder.UnsafeAppendBoolToBitmap(v) +} + func (b *dictionaryBuilder) Reserve(n int) { b.idxBuilder.Reserve(n) } @@ -781,6 +814,13 @@ func (b *dictionaryBuilder) insertDictBytes(val []byte) error { return err } +func (b *dictionaryBuilder) unsafeAppendValue(val interface{}) error { + idx, _, err := b.memoTable.GetOrInsert(val) + b.idxBuilder.UnsafeAppend(idx) + b.length += 1 + return err +} + func (b *dictionaryBuilder) appendValue(val interface{}) error { idx, _, err := b.memoTable.GetOrInsert(val) b.idxBuilder.Append(idx) @@ -990,6 +1030,27 @@ type dictBuilder[T arrow.ValueType] struct { dictionaryBuilder } + +func (b *dictBuilder[T]) UnsafeAppend(v T) error { + switch val := any(v).(type) { + case arrow.Duration: + return b.unsafeAppendValue(int64(val)) + case arrow.Timestamp: + return b.unsafeAppendValue(int64(val)) + case arrow.Time32: + return b.unsafeAppendValue(int32(val)) + case arrow.Time64: + return b.unsafeAppendValue(int64(val)) + case arrow.Date32: + return b.unsafeAppendValue(int32(val)) + case arrow.Date64: + return b.unsafeAppendValue(int64(val)) + case arrow.MonthInterval: + return b.unsafeAppendValue(int32(val)) + } + return b.unsafeAppendValue(v) +} + func (b *dictBuilder[T]) Append(v T) error { switch val := any(v).(type) { case arrow.Duration: diff --git a/arrow/array/dictionary_test.go b/arrow/array/dictionary_test.go index 9b9d3b1f..24aab674 100644 --- a/arrow/array/dictionary_test.go +++ b/arrow/array/dictionary_test.go @@ -145,6 +145,47 @@ func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderInit() { p.True(array.Equal(expected, arr)) } +func (p *PrimitiveDictionaryTestSuite) TestDictionaryBuilderReserveAndAppend() { + expectedType := &arrow.DictionaryType{IndexType: &arrow.Int8Type{}, ValueType: p.typ} + bldr := array.NewDictionaryBuilder(p.mem, expectedType) + defer bldr.Release() + + builder := reflect.ValueOf(bldr) + appendFn := builder.MethodByName("UnsafeAppend") + validFn := builder.MethodByName("UnsafeAppendBoolToBitmap") + + bldr.Reserve(7) + validFn.Call([]reflect.Value{reflect.ValueOf(true)}) + validFn.Call([]reflect.Value{reflect.ValueOf(false)}) + appendFn.Call([]reflect.Value{reflect.ValueOf(0).Convert(p.reftyp)}) + appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)}) + validFn.Call([]reflect.Value{reflect.ValueOf(false)}) + appendFn.Call([]reflect.Value{reflect.ValueOf(1).Convert(p.reftyp)}) + appendFn.Call([]reflect.Value{reflect.ValueOf(2).Convert(p.reftyp)}) + + p.EqualValues(7, bldr.Len()) + p.EqualValues(2, bldr.NullN()) + + p.EqualValues(3, bldr.DictionarySize()) + + arr := bldr.NewArray().(*array.Dictionary) + defer arr.Release() + + p.True(arrow.TypeEqual(expectedType, arr.DataType())) + expectedDict, _, err := array.FromJSON(p.mem, expectedType.ValueType, strings.NewReader("[0, 1, 2]")) + p.NoError(err) + defer expectedDict.Release() + + expectedIndices, _, err := array.FromJSON(p.mem, expectedType.IndexType, strings.NewReader("[0, null, 0, 1, null, 1, 2]")) + p.NoError(err) + defer expectedIndices.Release() + + expected := array.NewDictionaryArray(expectedType, expectedIndices, expectedDict) + defer expected.Release() + + p.True(array.Equal(expected, arr)) +} + func (p *PrimitiveDictionaryTestSuite) TestDictionaryNewBuilder() { valueType := p.typ dictArr, _, err := array.FromJSON(p.mem, valueType, strings.NewReader("[1, 2]")) diff --git a/arrow/array/encoded.go b/arrow/array/encoded.go index 08800d4b..85432a13 100644 --- a/arrow/array/encoded.go +++ b/arrow/array/encoded.go @@ -398,6 +398,10 @@ func (b *RunEndEncodedBuilder) AppendNulls(n int) { } } +func (b *RunEndEncodedBuilder) UnsafeAppendBoolToBitmap(v bool) { + panic("Calling UnsafeAppendBoolToBitmap on a run-end encoded array is semantically undefined.") +} + func (b *RunEndEncodedBuilder) NullN() int { return UnknownNullCount }