diff --git a/go/parquet/file/column_writer.go b/go/parquet/file/column_writer.go index 36663b10b89dd..4d603c547ca6a 100755 --- a/go/parquet/file/column_writer.go +++ b/go/parquet/file/column_writer.go @@ -660,7 +660,10 @@ func (w *columnWriter) maybeReplaceValidity(values arrow.Array, newNullCount int if values.Data().Offset() > 0 { data := values.Data() - buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[data.Offset()*arrow.Int32SizeBytes : data.Len()*arrow.Int32SizeBytes]) + elemSize := data.DataType().(arrow.FixedWidthDataType).Bytes() + start := data.Offset() * elemSize + end := start + data.Len()*elemSize + buffers[1] = memory.NewBufferBytes(data.Buffers()[1].Bytes()[start:end]) } data := array.NewData(values.DataType(), values.Len(), buffers, nil, int(newNullCount), 0) diff --git a/go/parquet/file/column_writer_test.go b/go/parquet/file/column_writer_test.go index 321e7b730d165..dd597e280b850 100755 --- a/go/parquet/file/column_writer_test.go +++ b/go/parquet/file/column_writer_test.go @@ -24,6 +24,8 @@ import ( "sync" "testing" + "github.com/apache/arrow/go/v16/arrow" + "github.com/apache/arrow/go/v16/arrow/array" "github.com/apache/arrow/go/v16/arrow/bitutil" "github.com/apache/arrow/go/v16/arrow/memory" arrutils "github.com/apache/arrow/go/v16/internal/utils" @@ -36,6 +38,7 @@ import ( "github.com/apache/arrow/go/v16/parquet/internal/testutils" "github.com/apache/arrow/go/v16/parquet/internal/utils" "github.com/apache/arrow/go/v16/parquet/metadata" + "github.com/apache/arrow/go/v16/parquet/pqarrow" "github.com/apache/arrow/go/v16/parquet/schema" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" @@ -736,3 +739,38 @@ func (b *BooleanValueWriterSuite) TestAlternateBooleanValues() { b.Equal(i%2 == 0, b.ValuesOut.([]bool)[i]) } } + +func TestDictionaryReslice(t *testing.T) { + pts := []arrow.DataType{ + arrow.PrimitiveTypes.Int8, + arrow.PrimitiveTypes.Int16, + arrow.PrimitiveTypes.Int32, + arrow.PrimitiveTypes.Int64, + arrow.PrimitiveTypes.Uint8, + arrow.PrimitiveTypes.Uint16, + arrow.PrimitiveTypes.Uint32, + arrow.PrimitiveTypes.Uint64, + } + for _, pt := range pts { + t.Run(pt.String(), func(t *testing.T) { + mem := memory.NewGoAllocator() + dt := &arrow.DictionaryType{ + IndexType: pt, + ValueType: &arrow.StringType{}, + } + field := arrow.Field{Name: "test_field", Type: dt, Nullable: true} + schema := arrow.NewSchema([]arrow.Field{field}, nil) + b := array.NewRecordBuilder(mem, schema) + for i := 0; i < 2000; i++ { + b.Field(0).(*array.BinaryDictionaryBuilder).AppendString("test_value") + } + rec := b.NewRecord() + out := &bytes.Buffer{} + pqw, err := pqarrow.NewFileWriter(rec.Schema(), out, nil, pqarrow.NewArrowWriterProperties()) + assert.NoError(t, err) + err = pqw.WriteBuffered(rec) + assert.NoError(t, err) + + }) + } +}