aboutsummaryrefslogtreecommitdiffstats
path: root/vendor/github.com/apache/arrow/go/v14/internal
diff options
context:
space:
mode:
authorTaras Madan <tarasmadan@google.com>2024-09-10 12:16:33 +0200
committerTaras Madan <tarasmadan@google.com>2024-09-10 14:05:26 +0000
commitc97c816133b42257d0bcf1ee4bd178bb2a7a2b9e (patch)
tree0bcbc2e540bbf8f62f6c17887cdd53b8c2cee637 /vendor/github.com/apache/arrow/go/v14/internal
parent54e657429ab892ad06c90cd7c1a4eb33ba93a3dc (diff)
vendor: update
Diffstat (limited to 'vendor/github.com/apache/arrow/go/v14/internal')
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go452
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go151
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go361
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go109
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go90
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go26
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go37
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata42
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go2833
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl349
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go443
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/json/json.go51
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go51
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile80
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go212
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go30
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go33
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/math.go49
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go212
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go55
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go65
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go90
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s927
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go56
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s324
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go31
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go30
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go30
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go88
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s1044
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go407
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl34
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata34
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go325
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl75
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go96
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go473
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s3074
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go227
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go96
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl34
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go96
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go96
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl34
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl42
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go473
-rw-r--r--vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s3074
47 files changed, 0 insertions, 17041 deletions
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go
deleted file mode 100644
index 86818bfd4..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go
+++ /dev/null
@@ -1,452 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bitutils
-
-import (
- "math"
- "math/bits"
- "unsafe"
-
- "github.com/apache/arrow/go/v14/arrow/bitutil"
- "github.com/apache/arrow/go/v14/internal/utils"
-)
-
-func loadWord(byt []byte) uint64 {
- return utils.ToLEUint64(*(*uint64)(unsafe.Pointer(&byt[0])))
-}
-
-func shiftWord(current, next uint64, shift int64) uint64 {
- if shift == 0 {
- return current
- }
- return (current >> shift) | (next << (64 - shift))
-}
-
-// BitBlockCount is returned by the various bit block counter utilities
-// in order to return a length of bits and the population count of that
-// slice of bits.
-type BitBlockCount struct {
- Len int16
- Popcnt int16
-}
-
-// NoneSet returns true if ALL the bits were 0 in this set, ie: Popcnt == 0
-func (b BitBlockCount) NoneSet() bool {
- return b.Popcnt == 0
-}
-
-// AllSet returns true if ALL the bits were 1 in this set, ie: Popcnt == Len
-func (b BitBlockCount) AllSet() bool {
- return b.Len == b.Popcnt
-}
-
-// BitBlockCounter is a utility for grabbing chunks of a bitmap at a time and efficiently
-// counting the number of bits which are 1.
-type BitBlockCounter struct {
- bitmap []byte
- bitsRemaining int64
- bitOffset int8
-}
-
-const (
- wordBits int64 = 64
- fourWordsBits int64 = wordBits * 4
-)
-
-// NewBitBlockCounter returns a BitBlockCounter for the passed bitmap starting at startOffset
-// of length nbits.
-func NewBitBlockCounter(bitmap []byte, startOffset, nbits int64) *BitBlockCounter {
- return &BitBlockCounter{
- bitmap: bitmap[startOffset/8:],
- bitsRemaining: nbits,
- bitOffset: int8(startOffset % 8),
- }
-}
-
-// getBlockSlow is for returning a block of the requested size when there aren't
-// enough bits remaining to do a full word computation.
-func (b *BitBlockCounter) getBlockSlow(blockSize int64) BitBlockCount {
- runlen := int16(utils.Min(b.bitsRemaining, blockSize))
- popcnt := int16(bitutil.CountSetBits(b.bitmap, int(b.bitOffset), int(runlen)))
- b.bitsRemaining -= int64(runlen)
- b.bitmap = b.bitmap[runlen/8:]
- return BitBlockCount{runlen, popcnt}
-}
-
-// NextFourWords returns the next run of available bits, usually 256. The
-// returned pair contains the size of run and the number of true values.
-// The last block will have a length less than 256 if the bitmap length
-// is not a multiple of 256, and will return 0-length blocks in subsequent
-// invocations.
-func (b *BitBlockCounter) NextFourWords() BitBlockCount {
- if b.bitsRemaining == 0 {
- return BitBlockCount{0, 0}
- }
-
- totalPopcnt := 0
- if b.bitOffset == 0 {
- // if we're aligned at 0 bitoffset, then we can easily just jump from
- // word to word nice and easy.
- if b.bitsRemaining < fourWordsBits {
- return b.getBlockSlow(fourWordsBits)
- }
- totalPopcnt += bits.OnesCount64(loadWord(b.bitmap))
- totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[8:]))
- totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[16:]))
- totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[24:]))
- } else {
- // When the offset is > 0, we need there to be a word beyond the last
- // aligned word in the bitmap for the bit shifting logic.
- if b.bitsRemaining < 5*fourWordsBits-int64(b.bitOffset) {
- return b.getBlockSlow(fourWordsBits)
- }
-
- current := loadWord(b.bitmap)
- next := loadWord(b.bitmap[8:])
- totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
-
- current = next
- next = loadWord(b.bitmap[16:])
- totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
-
- current = next
- next = loadWord(b.bitmap[24:])
- totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
-
- current = next
- next = loadWord(b.bitmap[32:])
- totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset)))
- }
- b.bitmap = b.bitmap[bitutil.BytesForBits(fourWordsBits):]
- b.bitsRemaining -= fourWordsBits
- return BitBlockCount{256, int16(totalPopcnt)}
-}
-
-// NextWord returns the next run of available bits, usually 64. The returned
-// pair contains the size of run and the number of true values. The last
-// block will have a length less than 64 if the bitmap length is not a
-// multiple of 64, and will return 0-length blocks in subsequent
-// invocations.
-func (b *BitBlockCounter) NextWord() BitBlockCount {
- if b.bitsRemaining == 0 {
- return BitBlockCount{0, 0}
- }
- popcnt := 0
- if b.bitOffset == 0 {
- if b.bitsRemaining < wordBits {
- return b.getBlockSlow(wordBits)
- }
- popcnt = bits.OnesCount64(loadWord(b.bitmap))
- } else {
- // When the offset is > 0, we need there to be a word beyond the last
- // aligned word in the bitmap for the bit shifting logic.
- if b.bitsRemaining < (2*wordBits - int64(b.bitOffset)) {
- return b.getBlockSlow(wordBits)
- }
- popcnt = bits.OnesCount64(shiftWord(loadWord(b.bitmap), loadWord(b.bitmap[8:]), int64(b.bitOffset)))
- }
- b.bitmap = b.bitmap[wordBits/8:]
- b.bitsRemaining -= wordBits
- return BitBlockCount{64, int16(popcnt)}
-}
-
-// OptionalBitBlockCounter is a useful counter to iterate through a possibly
-// non-existent validity bitmap to allow us to write one code path for both
-// the with-nulls and no-nulls cases without giving up a lot of performance.
-type OptionalBitBlockCounter struct {
- hasBitmap bool
- pos int64
- len int64
- counter *BitBlockCounter
-}
-
-// NewOptionalBitBlockCounter constructs and returns a new bit block counter that
-// can properly handle the case when a bitmap is null, if it is guaranteed that the
-// the bitmap is not nil, then prefer NewBitBlockCounter here.
-func NewOptionalBitBlockCounter(bitmap []byte, offset, length int64) *OptionalBitBlockCounter {
- var counter *BitBlockCounter
- if bitmap != nil {
- counter = NewBitBlockCounter(bitmap, offset, length)
- }
- return &OptionalBitBlockCounter{
- hasBitmap: bitmap != nil,
- pos: 0,
- len: length,
- counter: counter,
- }
-}
-
-// NextBlock returns block count for next word when the bitmap is available otherwise
-// return a block with length up to INT16_MAX when there is no validity
-// bitmap (so all the referenced values are not null).
-func (obc *OptionalBitBlockCounter) NextBlock() BitBlockCount {
- const maxBlockSize = math.MaxInt16
- if obc.hasBitmap {
- block := obc.counter.NextWord()
- obc.pos += int64(block.Len)
- return block
- }
-
- blockSize := int16(utils.Min(maxBlockSize, obc.len-obc.pos))
- obc.pos += int64(blockSize)
- // all values are non-null
- return BitBlockCount{blockSize, blockSize}
-}
-
-// NextWord is like NextBlock, but returns a word-sized block even when there is no
-// validity bitmap
-func (obc *OptionalBitBlockCounter) NextWord() BitBlockCount {
- const wordsize = 64
- if obc.hasBitmap {
- block := obc.counter.NextWord()
- obc.pos += int64(block.Len)
- return block
- }
- blockSize := int16(utils.Min(wordsize, obc.len-obc.pos))
- obc.pos += int64(blockSize)
- // all values are non-null
- return BitBlockCount{blockSize, blockSize}
-}
-
-// VisitBitBlocks is a utility for easily iterating through the blocks of bits in a bitmap,
-// calling the appropriate visitValid/visitInvalid function as we iterate through the bits.
-// visitValid is called with the bitoffset of the valid bit. Don't use this inside a tight
-// loop when performance is needed and instead prefer manually constructing these loops
-// in that scenario.
-func VisitBitBlocks(bitmap []byte, offset, length int64, visitValid func(pos int64), visitInvalid func()) {
- counter := NewOptionalBitBlockCounter(bitmap, offset, length)
- pos := int64(0)
- for pos < length {
- block := counter.NextBlock()
- if block.AllSet() {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- visitValid(pos)
- }
- } else if block.NoneSet() {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- visitInvalid()
- }
- } else {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- if bitutil.BitIsSet(bitmap, int(offset+pos)) {
- visitValid(pos)
- } else {
- visitInvalid()
- }
- }
- }
- }
-}
-
-// VisitBitBlocks is a utility for easily iterating through the blocks of bits in a bitmap,
-// calling the appropriate visitValid/visitInvalid function as we iterate through the bits.
-// visitValid is called with the bitoffset of the valid bit. Don't use this inside a tight
-// loop when performance is needed and instead prefer manually constructing these loops
-// in that scenario.
-func VisitBitBlocksShort(bitmap []byte, offset, length int64, visitValid func(pos int64) error, visitInvalid func() error) error {
- counter := NewOptionalBitBlockCounter(bitmap, offset, length)
- pos := int64(0)
- for pos < length {
- block := counter.NextBlock()
- if block.AllSet() {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- if err := visitValid(pos); err != nil {
- return err
- }
- }
- } else if block.NoneSet() {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- if err := visitInvalid(); err != nil {
- return err
- }
- }
- } else {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- if bitutil.BitIsSet(bitmap, int(offset+pos)) {
- if err := visitValid(pos); err != nil {
- return err
- }
- } else {
- if err := visitInvalid(); err != nil {
- return err
- }
- }
- }
- }
- }
- return nil
-}
-
-func VisitTwoBitBlocks(leftBitmap, rightBitmap []byte, leftOffset, rightOffset int64, len int64, visitValid func(pos int64), visitNull func()) {
- if leftBitmap == nil || rightBitmap == nil {
- // at most one is present
- if leftBitmap == nil {
- VisitBitBlocks(rightBitmap, rightOffset, len, visitValid, visitNull)
- } else {
- VisitBitBlocks(leftBitmap, leftOffset, len, visitValid, visitNull)
- }
- return
- }
-
- bitCounter := NewBinaryBitBlockCounter(leftBitmap, rightBitmap, leftOffset, rightOffset, len)
- var pos int64
- for pos < len {
- block := bitCounter.NextAndWord()
- if block.AllSet() {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- visitValid(pos)
- }
- } else if block.NoneSet() {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- visitNull()
- }
- } else {
- for i := 0; i < int(block.Len); i, pos = i+1, pos+1 {
- if bitutil.BitIsSet(leftBitmap, int(leftOffset+pos)) && bitutil.BitIsSet(rightBitmap, int(rightOffset+pos)) {
- visitValid(pos)
- } else {
- visitNull()
- }
- }
- }
- }
-}
-
-type bitOp struct {
- bit func(bool, bool) bool
- word func(uint64, uint64) uint64
-}
-
-var (
- bitBlockAnd = bitOp{
- bit: func(a, b bool) bool { return a && b },
- word: func(a, b uint64) uint64 { return a & b },
- }
- bitBlockAndNot = bitOp{
- bit: func(a, b bool) bool { return a && !b },
- word: func(a, b uint64) uint64 { return a &^ b },
- }
- bitBlockOr = bitOp{
- bit: func(a, b bool) bool { return a || b },
- word: func(a, b uint64) uint64 { return a | b },
- }
- bitBlockOrNot = bitOp{
- bit: func(a, b bool) bool { return a || !b },
- word: func(a, b uint64) uint64 { return a | ^b },
- }
-)
-
-// BinaryBitBlockCounter computes popcounts on the result of bitwise
-// operations between two bitmaps, 64 bits at a time. A 64-bit word
-// is loaded from each bitmap, then the popcount is computed on
-// e.g. the bitwise-and of the two words
-type BinaryBitBlockCounter struct {
- left []byte
- right []byte
- bitsRemaining int64
- leftOffset, rightOffset int64
-
- bitsRequiredForWords int64
-}
-
-// NewBinaryBitBlockCounter constructs a binary bit block counter for
-// computing the popcounts on the results of operations between
-// the passed in bitmaps, with their respective offsets.
-func NewBinaryBitBlockCounter(left, right []byte, leftOffset, rightOffset int64, length int64) *BinaryBitBlockCounter {
- ret := &BinaryBitBlockCounter{
- left: left[leftOffset/8:],
- right: right[rightOffset/8:],
- leftOffset: leftOffset % 8,
- rightOffset: rightOffset % 8,
- bitsRemaining: length,
- }
-
- leftBitsReq := int64(64)
- if ret.leftOffset != 0 {
- leftBitsReq = 64 + (64 - ret.leftOffset)
- }
- rightBitsReq := int64(64)
- if ret.rightOffset != 0 {
- rightBitsReq = 64 + (64 - ret.rightOffset)
- }
-
- if leftBitsReq > rightBitsReq {
- ret.bitsRequiredForWords = leftBitsReq
- } else {
- ret.bitsRequiredForWords = rightBitsReq
- }
-
- return ret
-}
-
-// NextAndWord returns the popcount of the bitwise-and of the next run
-// of available bits, up to 64. The returned pair contains the size of
-// the run and the number of true values. the last block will have a
-// length less than 64 if the bitmap length is not a multiple of 64,
-// and will return 0-length blocks in subsequent invocations
-func (b *BinaryBitBlockCounter) NextAndWord() BitBlockCount { return b.nextWord(bitBlockAnd) }
-
-// NextAndNotWord is like NextAndWord but performs x &^ y on each run
-func (b *BinaryBitBlockCounter) NextAndNotWord() BitBlockCount { return b.nextWord(bitBlockAndNot) }
-
-// NextOrWord is like NextAndWord but performs x | y on each run
-func (b *BinaryBitBlockCounter) NextOrWord() BitBlockCount { return b.nextWord(bitBlockOr) }
-
-// NextOrWord is like NextAndWord but performs x | ^y on each run
-func (b *BinaryBitBlockCounter) NextOrNotWord() BitBlockCount { return b.nextWord(bitBlockOrNot) }
-
-func (b *BinaryBitBlockCounter) nextWord(op bitOp) BitBlockCount {
- if b.bitsRemaining == 0 {
- return BitBlockCount{}
- }
-
- // when offset is >0, we need there to be a word beyond the last
- // aligned word in the bitmap for the bit shifting logic
- if b.bitsRemaining < b.bitsRequiredForWords {
- runLength := int16(b.bitsRemaining)
- if runLength > int16(wordBits) {
- runLength = int16(wordBits)
- }
-
- var popcount int16
- for i := int16(0); i < runLength; i++ {
- if op.bit(bitutil.BitIsSet(b.left, int(b.leftOffset)+int(i)),
- bitutil.BitIsSet(b.right, int(b.rightOffset)+int(i))) {
- popcount++
- }
- }
- // this code path should trigger _at most_ 2 times. in the "two times"
- // case, the first time the run length will be a multiple of 8.
- b.left = b.left[runLength/8:]
- b.right = b.right[runLength/8:]
- b.bitsRemaining -= int64(runLength)
- return BitBlockCount{Len: runLength, Popcnt: popcount}
- }
-
- var popcount int
- if b.leftOffset == 0 && b.rightOffset == 0 {
- popcount = bits.OnesCount64(op.word(loadWord(b.left), loadWord(b.right)))
- } else {
- leftWord := shiftWord(loadWord(b.left), loadWord(b.left[8:]), b.leftOffset)
- rightWord := shiftWord(loadWord(b.right), loadWord(b.right[8:]), b.rightOffset)
- popcount = bits.OnesCount64(op.word(leftWord, rightWord))
- }
- b.left = b.left[wordBits/8:]
- b.right = b.right[wordBits/8:]
- b.bitsRemaining -= wordBits
- return BitBlockCount{Len: int16(wordBits), Popcnt: int16(popcount)}
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go
deleted file mode 100644
index a1686a490..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go
+++ /dev/null
@@ -1,151 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bitutils
-
-import (
- "encoding/binary"
- "fmt"
- "math/bits"
- "unsafe"
-
- "github.com/apache/arrow/go/v14/arrow"
- "github.com/apache/arrow/go/v14/arrow/bitutil"
- "github.com/apache/arrow/go/v14/internal/utils"
-)
-
-// BitRun represents a run of bits with the same value of length Len
-// with Set representing if the group of bits were 1 or 0.
-type BitRun struct {
- Len int64
- Set bool
-}
-
-// BitRunReader is an interface that is usable by multiple callers to provide
-// multiple types of bit run readers such as a reverse reader and so on.
-//
-// It's a convenience interface for counting contiguous set/unset bits in a bitmap.
-// In places where BitBlockCounter can be used, then it would be preferred to use that
-// as it would be faster than using BitRunReader.
-type BitRunReader interface {
- NextRun() BitRun
-}
-
-func (b BitRun) String() string {
- return fmt.Sprintf("{Length: %d, set=%t}", b.Len, b.Set)
-}
-
-type bitRunReader struct {
- bitmap []byte
- pos int64
- length int64
- word uint64
- curRunBitSet bool
-}
-
-// NewBitRunReader returns a reader for the given bitmap, offset and length that
-// grabs runs of the same value bit at a time for easy iteration.
-func NewBitRunReader(bitmap []byte, offset int64, length int64) BitRunReader {
- ret := &bitRunReader{
- bitmap: bitmap[offset/8:],
- pos: offset % 8,
- length: (offset % 8) + length,
- }
-
- if length == 0 {
- return ret
- }
-
- ret.curRunBitSet = bitutil.BitIsNotSet(bitmap, int(offset))
- bitsRemaining := length + ret.pos
- ret.loadWord(bitsRemaining)
- ret.word = ret.word &^ LeastSignificantBitMask(ret.pos)
- return ret
-}
-
-// NextRun returns a new BitRun containing the number of contiguous bits with the
-// same value. Len == 0 indicates the end of the bitmap.
-func (b *bitRunReader) NextRun() BitRun {
- if b.pos >= b.length {
- return BitRun{0, false}
- }
-
- // This implementation relies on a efficient implementations of
- // CountTrailingZeros and assumes that runs are more often then
- // not. The logic is to incrementally find the next bit change
- // from the current position. This is done by zeroing all
- // bits in word_ up to position_ and using the TrailingZeroCount
- // to find the index of the next set bit.
-
- // The runs alternate on each call, so flip the bit.
- b.curRunBitSet = !b.curRunBitSet
-
- start := b.pos
- startOffset := start & 63
-
- // Invert the word for proper use of CountTrailingZeros and
- // clear bits so CountTrailingZeros can do it magic.
- b.word = ^b.word &^ LeastSignificantBitMask(startOffset)
-
- // Go forward until the next change from unset to set.
- newbits := int64(bits.TrailingZeros64(b.word)) - startOffset
- b.pos += newbits
-
- if IsMultipleOf64(b.pos) && b.pos < b.length {
- b.advanceUntilChange()
- }
- return BitRun{b.pos - start, b.curRunBitSet}
-}
-
-func (b *bitRunReader) advanceUntilChange() {
- newbits := int64(0)
- for {
- b.bitmap = b.bitmap[arrow.Uint64SizeBytes:]
- b.loadNextWord()
- newbits = int64(bits.TrailingZeros64(b.word))
- b.pos += newbits
- if !IsMultipleOf64(b.pos) || b.pos >= b.length || newbits <= 0 {
- break
- }
- }
-}
-
-func (b *bitRunReader) loadNextWord() {
- b.loadWord(b.length - b.pos)
-}
-
-func (b *bitRunReader) loadWord(bitsRemaining int64) {
- b.word = 0
- if bitsRemaining >= 64 {
- b.word = binary.LittleEndian.Uint64(b.bitmap)
- } else {
- nbytes := bitutil.BytesForBits(bitsRemaining)
- wordptr := (*(*[8]byte)(unsafe.Pointer(&b.word)))[:]
- copy(wordptr, b.bitmap[:nbytes])
-
- bitutil.SetBitTo(wordptr, int(bitsRemaining), bitutil.BitIsNotSet(wordptr, int(bitsRemaining-1)))
- // reset the value to little endian for big endian architectures
- b.word = utils.ToLEUint64(b.word)
- }
-
- // Two cases:
- // 1. For unset, CountTrailingZeros works naturally so we don't
- // invert the word.
- // 2. Otherwise invert so we can use CountTrailingZeros.
- if b.curRunBitSet {
- b.word = ^b.word
- }
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go
deleted file mode 100644
index a2269ffec..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go
+++ /dev/null
@@ -1,361 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bitutils
-
-import (
- "encoding/binary"
- "math/bits"
-
- "github.com/apache/arrow/go/v14/arrow/bitutil"
- "github.com/apache/arrow/go/v14/internal/utils"
-)
-
-// IsMultipleOf64 returns whether v is a multiple of 64.
-func IsMultipleOf64(v int64) bool { return v&63 == 0 }
-
-// LeastSignificantBitMask returns a bit mask to return the least significant
-// bits for a value starting from the bit index passed in. ie: if you want a
-// mask for the 4 least significant bits, you call LeastSignificantBitMask(4)
-func LeastSignificantBitMask(index int64) uint64 {
- return (uint64(1) << index) - 1
-}
-
-// SetBitRun describes a run of contiguous set bits in a bitmap with Pos being
-// the starting position of the run and Length being the number of bits.
-type SetBitRun struct {
- Pos int64
- Length int64
-}
-
-// AtEnd returns true if this bit run is the end of the set by checking
-// that the length is 0.
-func (s SetBitRun) AtEnd() bool {
- return s.Length == 0
-}
-
-// Equal returns whether rhs is the same run as s
-func (s SetBitRun) Equal(rhs SetBitRun) bool {
- return s.Pos == rhs.Pos && s.Length == rhs.Length
-}
-
-// SetBitRunReader is an interface for reading groups of contiguous set bits
-// from a bitmap. The interface allows us to create different reader implementations
-// that share the same interface easily such as a reverse set reader.
-type SetBitRunReader interface {
- // NextRun will return the next run of contiguous set bits in the bitmap
- NextRun() SetBitRun
- // Reset allows re-using the reader by providing a new bitmap, offset and length. The arguments
- // match the New function for the reader being used.
- Reset([]byte, int64, int64)
- // VisitSetBitRuns calls visitFn for each set in a loop starting from the current position
- // it's roughly equivalent to simply looping, calling NextRun and calling visitFn on the run
- // for each run.
- VisitSetBitRuns(visitFn VisitFn) error
-}
-
-type baseSetBitRunReader struct {
- bitmap []byte
- pos int64
- length int64
- remaining int64
- curWord uint64
- curNumBits int32
- reversed bool
-
- firstBit uint64
-}
-
-// NewSetBitRunReader returns a SetBitRunReader for the bitmap starting at startOffset which will read
-// numvalues bits.
-func NewSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
- return newBaseSetBitRunReader(validBits, startOffset, numValues, false)
-}
-
-// NewReverseSetBitRunReader returns a SetBitRunReader like NewSetBitRunReader, except it will
-// return runs starting from the end of the bitmap until it reaches startOffset rather than starting
-// at startOffset and reading from there. The SetBitRuns will still operate the same, so Pos
-// will still be the position of the "left-most" bit of the run or the "start" of the run. It
-// just returns runs starting from the end instead of starting from the beginning.
-func NewReverseSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader {
- return newBaseSetBitRunReader(validBits, startOffset, numValues, true)
-}
-
-func newBaseSetBitRunReader(bitmap []byte, startOffset, length int64, reverse bool) *baseSetBitRunReader {
- ret := &baseSetBitRunReader{reversed: reverse}
- ret.Reset(bitmap, startOffset, length)
- return ret
-}
-
-func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) {
- br.bitmap = bitmap
- br.length = length
- br.remaining = length
- br.curNumBits = 0
- br.curWord = 0
-
- if !br.reversed {
- br.pos = startOffset / 8
- br.firstBit = 1
-
- bitOffset := int8(startOffset % 8)
- if length > 0 && bitOffset != 0 {
- br.curNumBits = int32(utils.MinInt(int(length), int(8-bitOffset)))
- br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits))
- }
- return
- }
-
- br.pos = (startOffset + length) / 8
- br.firstBit = uint64(0x8000000000000000)
- endBitOffset := int8((startOffset + length) % 8)
- if length > 0 && endBitOffset != 0 {
- br.pos++
- br.curNumBits = int32(utils.MinInt(int(length), int(endBitOffset)))
- br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits))
- }
-}
-
-func (br *baseSetBitRunReader) consumeBits(word uint64, nbits int32) uint64 {
- if br.reversed {
- return word << nbits
- }
- return word >> nbits
-}
-
-func (br *baseSetBitRunReader) countFirstZeros(word uint64) int32 {
- if br.reversed {
- return int32(bits.LeadingZeros64(word))
- }
- return int32(bits.TrailingZeros64(word))
-}
-
-func (br *baseSetBitRunReader) loadPartial(bitOffset int8, numBits int64) uint64 {
- var word [8]byte
- nbytes := bitutil.BytesForBits(numBits)
- if br.reversed {
- br.pos -= nbytes
- copy(word[8-nbytes:], br.bitmap[br.pos:br.pos+nbytes])
- return (binary.LittleEndian.Uint64(word[:]) << bitOffset) &^ LeastSignificantBitMask(64-numBits)
- }
-
- copy(word[:], br.bitmap[br.pos:br.pos+nbytes])
- br.pos += nbytes
- return (binary.LittleEndian.Uint64(word[:]) >> bitOffset) & LeastSignificantBitMask(numBits)
-}
-
-func (br *baseSetBitRunReader) findCurrentRun() SetBitRun {
- nzeros := br.countFirstZeros(br.curWord)
- if nzeros >= br.curNumBits {
- br.remaining -= int64(br.curNumBits)
- br.curWord = 0
- br.curNumBits = 0
- return SetBitRun{0, 0}
- }
-
- br.curWord = br.consumeBits(br.curWord, nzeros)
- br.curNumBits -= nzeros
- br.remaining -= int64(nzeros)
- pos := br.position()
-
- numOnes := br.countFirstZeros(^br.curWord)
- br.curWord = br.consumeBits(br.curWord, numOnes)
- br.curNumBits -= numOnes
- br.remaining -= int64(numOnes)
- return SetBitRun{pos, int64(numOnes)}
-}
-
-func (br *baseSetBitRunReader) position() int64 {
- if br.reversed {
- return br.remaining
- }
- return br.length - br.remaining
-}
-
-func (br *baseSetBitRunReader) adjustRun(run SetBitRun) SetBitRun {
- if br.reversed {
- run.Pos -= run.Length
- }
- return run
-}
-
-func (br *baseSetBitRunReader) loadFull() (ret uint64) {
- if br.reversed {
- br.pos -= 8
- }
- ret = binary.LittleEndian.Uint64(br.bitmap[br.pos : br.pos+8])
- if !br.reversed {
- br.pos += 8
- }
- return
-}
-
-func (br *baseSetBitRunReader) skipNextZeros() {
- for br.remaining >= 64 {
- br.curWord = br.loadFull()
- nzeros := br.countFirstZeros(br.curWord)
- if nzeros < 64 {
- br.curWord = br.consumeBits(br.curWord, nzeros)
- br.curNumBits = 64 - nzeros
- br.remaining -= int64(nzeros)
- return
- }
- br.remaining -= 64
- }
- // run of zeros continues in last bitmap word
- if br.remaining > 0 {
- br.curWord = br.loadPartial(0, br.remaining)
- br.curNumBits = int32(br.remaining)
- nzeros := int32(utils.MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord))))
- br.curWord = br.consumeBits(br.curWord, nzeros)
- br.curNumBits -= nzeros
- br.remaining -= int64(nzeros)
- }
-}
-
-func (br *baseSetBitRunReader) countNextOnes() int64 {
- var length int64
- if ^br.curWord != 0 {
- numOnes := br.countFirstZeros(^br.curWord)
- br.remaining -= int64(numOnes)
- br.curWord = br.consumeBits(br.curWord, numOnes)
- br.curNumBits -= numOnes
- if br.curNumBits != 0 {
- return int64(numOnes)
- }
- length = int64(numOnes)
- } else {
- br.remaining -= 64
- br.curNumBits = 0
- length = 64
- }
-
- for br.remaining >= 64 {
- br.curWord = br.loadFull()
- numOnes := br.countFirstZeros(^br.curWord)
- length += int64(numOnes)
- br.remaining -= int64(numOnes)
- if numOnes < 64 {
- br.curWord = br.consumeBits(br.curWord, numOnes)
- br.curNumBits = 64 - numOnes
- return length
- }
- }
-
- if br.remaining > 0 {
- br.curWord = br.loadPartial(0, br.remaining)
- br.curNumBits = int32(br.remaining)
- numOnes := br.countFirstZeros(^br.curWord)
- br.curWord = br.consumeBits(br.curWord, numOnes)
- br.curNumBits -= numOnes
- br.remaining -= int64(numOnes)
- length += int64(numOnes)
- }
- return length
-}
-
-func (br *baseSetBitRunReader) NextRun() SetBitRun {
- var (
- pos int64 = 0
- length int64 = 0
- )
-
- if br.curNumBits != 0 {
- run := br.findCurrentRun()
- if run.Length != 0 && br.curNumBits != 0 {
- return br.adjustRun(run)
- }
- pos = run.Pos
- length = run.Length
- }
-
- if length == 0 {
- // we didn't get any ones in curWord, so we can skip any zeros
- // in the following words
- br.skipNextZeros()
- if br.remaining == 0 {
- return SetBitRun{0, 0}
- }
- pos = br.position()
- } else if br.curNumBits == 0 {
- if br.remaining >= 64 {
- br.curWord = br.loadFull()
- br.curNumBits = 64
- } else if br.remaining > 0 {
- br.curWord = br.loadPartial(0, br.remaining)
- br.curNumBits = int32(br.remaining)
- } else {
- return br.adjustRun(SetBitRun{pos, length})
- }
- if (br.curWord & br.firstBit) == 0 {
- return br.adjustRun(SetBitRun{pos, length})
- }
- }
-
- length += br.countNextOnes()
- return br.adjustRun(SetBitRun{pos, length})
-}
-
-// VisitFn is a callback function for visiting runs of contiguous bits
-type VisitFn func(pos int64, length int64) error
-
-func (br *baseSetBitRunReader) VisitSetBitRuns(visitFn VisitFn) error {
- for {
- run := br.NextRun()
- if run.Length == 0 {
- break
- }
-
- if err := visitFn(run.Pos, run.Length); err != nil {
- return err
- }
- }
- return nil
-}
-
-// VisitSetBitRuns is just a convenience function for calling NewSetBitRunReader and then VisitSetBitRuns
-func VisitSetBitRuns(bitmap []byte, bitmapOffset int64, length int64, visitFn VisitFn) error {
- if bitmap == nil {
- return visitFn(0, length)
- }
- rdr := NewSetBitRunReader(bitmap, bitmapOffset, length)
- for {
- run := rdr.NextRun()
- if run.Length == 0 {
- break
- }
-
- if err := visitFn(run.Pos, run.Length); err != nil {
- return err
- }
- }
- return nil
-}
-
-func VisitSetBitRunsNoErr(bitmap []byte, bitmapOffset int64, length int64, visitFn func(pos, length int64)) {
- if bitmap == nil {
- visitFn(0, length)
- return
- }
- rdr := NewSetBitRunReader(bitmap, bitmapOffset, length)
- for {
- run := rdr.NextRun()
- if run.Length == 0 {
- break
- }
- visitFn(run.Pos, run.Length)
- }
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go
deleted file mode 100644
index 78219d812..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go
+++ /dev/null
@@ -1,109 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package bitutils
-
-import "github.com/apache/arrow/go/v14/arrow/bitutil"
-
-// GenerateBits writes sequential bits to a bitmap. Bits preceding the
-// initial start offset are preserved, bits following the bitmap may
-// get clobbered.
-func GenerateBits(bitmap []byte, start, length int64, g func() bool) {
- if length == 0 {
- return
- }
-
- cur := bitmap[start/8:]
- mask := bitutil.BitMask[start%8]
- curbyte := cur[0] & bitutil.PrecedingBitmask[start%8]
-
- for i := int64(0); i < length; i++ {
- bit := g()
- if bit {
- curbyte = curbyte | mask
- }
- mask <<= 1
- if mask == 0 {
- mask = 1
- cur[0] = curbyte
- cur = cur[1:]
- curbyte = 0
- }
- }
-
- if mask != 1 {
- cur[0] = curbyte
- }
-}
-
-// GenerateBitsUnrolled is like GenerateBits but unrolls its main loop for
-// higher performance.
-//
-// See the benchmarks for evidence.
-func GenerateBitsUnrolled(bitmap []byte, start, length int64, g func() bool) {
- if length == 0 {
- return
- }
-
- var (
- curbyte byte
- cur = bitmap[start/8:]
- startBitOffset uint64 = uint64(start % 8)
- mask = bitutil.BitMask[startBitOffset]
- remaining = length
- )
-
- if mask != 0x01 {
- curbyte = cur[0] & bitutil.PrecedingBitmask[startBitOffset]
- for mask != 0 && remaining > 0 {
- if g() {
- curbyte |= mask
- }
- mask <<= 1
- remaining--
- }
- cur[0] = curbyte
- cur = cur[1:]
- }
-
- var outResults [8]byte
- for remainingBytes := remaining / 8; remainingBytes > 0; remainingBytes-- {
- for i := 0; i < 8; i++ {
- if g() {
- outResults[i] = 1
- } else {
- outResults[i] = 0
- }
- }
- cur[0] = (outResults[0] | outResults[1]<<1 | outResults[2]<<2 |
- outResults[3]<<3 | outResults[4]<<4 | outResults[5]<<5 |
- outResults[6]<<6 | outResults[7]<<7)
- cur = cur[1:]
- }
-
- remainingBits := remaining % 8
- if remainingBits > 0 {
- curbyte = 0
- mask = 0x01
- for ; remainingBits > 0; remainingBits-- {
- if g() {
- curbyte |= mask
- }
- mask <<= 1
- }
- cur[0] = curbyte
- }
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go
deleted file mode 100644
index c1bdfeb6d..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package hashing
-
-import (
- "math/bits"
- "unsafe"
-
- "github.com/zeebo/xxh3"
-)
-
-func hashInt(val uint64, alg uint64) uint64 {
- // Two of xxhash's prime multipliers (which are chosen for their
- // bit dispersion properties)
- var multipliers = [2]uint64{11400714785074694791, 14029467366897019727}
- // Multiplying by the prime number mixes the low bits into the high bits,
- // then byte-swapping (which is a single CPU instruction) allows the
- // combined high and low bits to participate in the initial hash table index.
- return bits.ReverseBytes64(multipliers[alg] * val)
-}
-
-func hashFloat32(val float32, alg uint64) uint64 {
- // grab the raw byte pattern of the
- bt := *(*[4]byte)(unsafe.Pointer(&val))
- x := uint64(*(*uint32)(unsafe.Pointer(&bt[0])))
- hx := hashInt(x, alg)
- hy := hashInt(x, alg^1)
- return 4 ^ hx ^ hy
-}
-
-func hashFloat64(val float64, alg uint64) uint64 {
- bt := *(*[8]byte)(unsafe.Pointer(&val))
- hx := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[4]))), alg)
- hy := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[0]))), alg^1)
- return 8 ^ hx ^ hy
-}
-
-// prime constants used for slightly increasing the hash quality further
-var exprimes = [2]uint64{1609587929392839161, 9650029242287828579}
-
-// for smaller amounts of bytes this is faster than even calling into
-// xxh3 to do the Hash, so we specialize in order to get the benefits
-// of that performance.
-func Hash(b []byte, alg uint64) uint64 {
- n := uint32(len(b))
- if n <= 16 {
- switch {
- case n > 8:
- // 8 < length <= 16
- // apply same principle as above, but as two 64-bit ints
- x := *(*uint64)(unsafe.Pointer(&b[n-8]))
- y := *(*uint64)(unsafe.Pointer(&b[0]))
- hx := hashInt(x, alg)
- hy := hashInt(y, alg^1)
- return uint64(n) ^ hx ^ hy
- case n >= 4:
- // 4 < length <= 8
- // we can read the bytes as two overlapping 32-bit ints, apply different
- // hash functions to each in parallel
- // then xor the results
- x := *(*uint32)(unsafe.Pointer(&b[n-4]))
- y := *(*uint32)(unsafe.Pointer(&b[0]))
- hx := hashInt(uint64(x), alg)
- hy := hashInt(uint64(y), alg^1)
- return uint64(n) ^ hx ^ hy
- case n > 0:
- x := uint32((n << 24) ^ (uint32(b[0]) << 16) ^ (uint32(b[n/2]) << 8) ^ uint32(b[n-1]))
- return hashInt(uint64(x), alg)
- case n == 0:
- return 1
- }
- }
-
- // increase differentiation enough to improve hash quality
- return xxh3.Hash(b) + exprimes[alg]
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go
deleted file mode 100644
index b772c7d7f..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build go1.20 || tinygo
-
-package hashing
-
-import "unsafe"
-
-func hashString(val string, alg uint64) uint64 {
- buf := unsafe.Slice(unsafe.StringData(val), len(val))
- return Hash(buf, alg)
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go
deleted file mode 100644
index f38eb5c52..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go
+++ /dev/null
@@ -1,37 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !go1.20 && !tinygo
-
-package hashing
-
-import (
- "reflect"
- "unsafe"
-)
-
-func hashString(val string, alg uint64) uint64 {
- if val == "" {
- return Hash([]byte{}, alg)
- }
- // highly efficient way to get byte slice without copy before
- // the introduction of unsafe.StringData in go1.20
- // (https://stackoverflow.com/questions/59209493/how-to-use-unsafe-get-a-byte-slice-from-a-string-without-memory-copy)
- const MaxInt32 = 1<<31 - 1
- buf := (*[MaxInt32]byte)(unsafe.Pointer((*reflect.StringHeader)(
- unsafe.Pointer(&val)).Data))[: len(val)&MaxInt32 : len(val)&MaxInt32]
- return Hash(buf, alg)
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata b/vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata
deleted file mode 100644
index 0ba6f765d..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata
+++ /dev/null
@@ -1,42 +0,0 @@
-[
- {
- "Name": "Int8",
- "name": "int8"
- },
- {
- "Name": "Uint8",
- "name": "uint8"
- },
- {
- "Name": "Int16",
- "name": "int16"
- },
- {
- "Name": "Uint16",
- "name": "uint16"
- },
- {
- "Name": "Int32",
- "name": "int32"
- },
- {
- "Name": "Int64",
- "name": "int64"
- },
- {
- "Name": "Uint32",
- "name": "uint32"
- },
- {
- "Name": "Uint64",
- "name": "uint64"
- },
- {
- "Name": "Float32",
- "name": "float32"
- },
- {
- "Name": "Float64",
- "name": "float64"
- }
-]
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go
deleted file mode 100644
index cc996552b..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go
+++ /dev/null
@@ -1,2833 +0,0 @@
-// Code generated by xxh3_memo_table.gen.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package hashing
-
-import (
- "math"
-
- "github.com/apache/arrow/go/v14/arrow"
- "github.com/apache/arrow/go/v14/arrow/bitutil"
- "github.com/apache/arrow/go/v14/internal/utils"
-)
-
-type payloadInt8 struct {
- val int8
- memoIdx int32
-}
-
-type entryInt8 struct {
- h uint64
- payload payloadInt8
-}
-
-func (e entryInt8) Valid() bool { return e.h != sentinel }
-
-// Int8HashTable is a hashtable specifically for int8 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Int8HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryInt8
-}
-
-// NewInt8HashTable returns a new hash table for int8 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewInt8HashTable(cap uint64) *Int8HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Int8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryInt8, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Int8HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryInt8, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Int8HashTable) CopyValues(out []int8) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Int8HashTable) CopyValuesSubset(start int, out []int8) {
- h.VisitEntries(func(e *entryInt8) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Int8HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Int8HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Int8Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryInt8) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = e.payload.val
- }
- })
-}
-
-func (h *Int8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Int8HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Int8HashTable) Lookup(v uint64, cmp func(int8) bool) (*entryInt8, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Int8HashTable) lookup(v uint64, szMask uint64, cmp func(int8) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryInt8
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Int8HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryInt8, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(int8) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Int8HashTable) Insert(e *entryInt8, v uint64, val int8, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Int8HashTable) VisitEntries(visit func(*entryInt8)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Int8MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Int8MemoTable struct {
- tbl *Int8HashTable
- nullIdx int32
-}
-
-// NewInt8MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewInt8MemoTable(num int64) *Int8MemoTable {
- return &Int8MemoTable{tbl: NewInt8HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Int8MemoTable) TypeTraits() TypeTraits {
- return arrow.Int8Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Int8MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Int8MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Int8MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Int8MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Int8MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Int8MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]int8))
-}
-
-func (s *Int8MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Int8Traits.CastFromBytes(out))
-}
-
-func (s *Int8MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Int8Traits.CastFromBytes(out))
-}
-
-func (s *Int8MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Int8MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Int8MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(int8)), 0)
- if e, ok := s.tbl.Lookup(h, func(v int8) bool { return val.(int8) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Int8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(int8)), 0)
- e, ok := s.tbl.Lookup(h, func(v int8) bool {
- return val.(int8) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(int8), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Int8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadUint8 struct {
- val uint8
- memoIdx int32
-}
-
-type entryUint8 struct {
- h uint64
- payload payloadUint8
-}
-
-func (e entryUint8) Valid() bool { return e.h != sentinel }
-
-// Uint8HashTable is a hashtable specifically for uint8 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Uint8HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryUint8
-}
-
-// NewUint8HashTable returns a new hash table for uint8 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewUint8HashTable(cap uint64) *Uint8HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Uint8HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryUint8, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Uint8HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryUint8, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Uint8HashTable) CopyValues(out []uint8) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Uint8HashTable) CopyValuesSubset(start int, out []uint8) {
- h.VisitEntries(func(e *entryUint8) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Uint8HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Uint8HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Uint8Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryUint8) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = e.payload.val
- }
- })
-}
-
-func (h *Uint8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Uint8HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Uint8HashTable) Lookup(v uint64, cmp func(uint8) bool) (*entryUint8, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Uint8HashTable) lookup(v uint64, szMask uint64, cmp func(uint8) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryUint8
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Uint8HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryUint8, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(uint8) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Uint8HashTable) Insert(e *entryUint8, v uint64, val uint8, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Uint8HashTable) VisitEntries(visit func(*entryUint8)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Uint8MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Uint8MemoTable struct {
- tbl *Uint8HashTable
- nullIdx int32
-}
-
-// NewUint8MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewUint8MemoTable(num int64) *Uint8MemoTable {
- return &Uint8MemoTable{tbl: NewUint8HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Uint8MemoTable) TypeTraits() TypeTraits {
- return arrow.Uint8Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Uint8MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Uint8MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Uint8MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Uint8MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Uint8MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Uint8MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]uint8))
-}
-
-func (s *Uint8MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Uint8Traits.CastFromBytes(out))
-}
-
-func (s *Uint8MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Uint8Traits.CastFromBytes(out))
-}
-
-func (s *Uint8MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Uint8MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Uint8MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(uint8)), 0)
- if e, ok := s.tbl.Lookup(h, func(v uint8) bool { return val.(uint8) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Uint8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(uint8)), 0)
- e, ok := s.tbl.Lookup(h, func(v uint8) bool {
- return val.(uint8) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(uint8), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Uint8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadInt16 struct {
- val int16
- memoIdx int32
-}
-
-type entryInt16 struct {
- h uint64
- payload payloadInt16
-}
-
-func (e entryInt16) Valid() bool { return e.h != sentinel }
-
-// Int16HashTable is a hashtable specifically for int16 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Int16HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryInt16
-}
-
-// NewInt16HashTable returns a new hash table for int16 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewInt16HashTable(cap uint64) *Int16HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Int16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryInt16, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Int16HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryInt16, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Int16HashTable) CopyValues(out []int16) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Int16HashTable) CopyValuesSubset(start int, out []int16) {
- h.VisitEntries(func(e *entryInt16) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Int16HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Int16HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Int16Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryInt16) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEInt16(e.payload.val)
- }
- })
-}
-
-func (h *Int16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Int16HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Int16HashTable) Lookup(v uint64, cmp func(int16) bool) (*entryInt16, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Int16HashTable) lookup(v uint64, szMask uint64, cmp func(int16) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryInt16
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Int16HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryInt16, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(int16) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Int16HashTable) Insert(e *entryInt16, v uint64, val int16, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Int16HashTable) VisitEntries(visit func(*entryInt16)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Int16MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Int16MemoTable struct {
- tbl *Int16HashTable
- nullIdx int32
-}
-
-// NewInt16MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewInt16MemoTable(num int64) *Int16MemoTable {
- return &Int16MemoTable{tbl: NewInt16HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Int16MemoTable) TypeTraits() TypeTraits {
- return arrow.Int16Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Int16MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Int16MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Int16MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Int16MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Int16MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Int16MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]int16))
-}
-
-func (s *Int16MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Int16Traits.CastFromBytes(out))
-}
-
-func (s *Int16MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Int16Traits.CastFromBytes(out))
-}
-
-func (s *Int16MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Int16MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Int16MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(int16)), 0)
- if e, ok := s.tbl.Lookup(h, func(v int16) bool { return val.(int16) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Int16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(int16)), 0)
- e, ok := s.tbl.Lookup(h, func(v int16) bool {
- return val.(int16) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(int16), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Int16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadUint16 struct {
- val uint16
- memoIdx int32
-}
-
-type entryUint16 struct {
- h uint64
- payload payloadUint16
-}
-
-func (e entryUint16) Valid() bool { return e.h != sentinel }
-
-// Uint16HashTable is a hashtable specifically for uint16 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Uint16HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryUint16
-}
-
-// NewUint16HashTable returns a new hash table for uint16 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewUint16HashTable(cap uint64) *Uint16HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Uint16HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryUint16, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Uint16HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryUint16, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Uint16HashTable) CopyValues(out []uint16) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Uint16HashTable) CopyValuesSubset(start int, out []uint16) {
- h.VisitEntries(func(e *entryUint16) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Uint16HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Uint16HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Uint16Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryUint16) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEUint16(e.payload.val)
- }
- })
-}
-
-func (h *Uint16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Uint16HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Uint16HashTable) Lookup(v uint64, cmp func(uint16) bool) (*entryUint16, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Uint16HashTable) lookup(v uint64, szMask uint64, cmp func(uint16) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryUint16
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Uint16HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryUint16, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(uint16) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Uint16HashTable) Insert(e *entryUint16, v uint64, val uint16, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Uint16HashTable) VisitEntries(visit func(*entryUint16)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Uint16MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Uint16MemoTable struct {
- tbl *Uint16HashTable
- nullIdx int32
-}
-
-// NewUint16MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewUint16MemoTable(num int64) *Uint16MemoTable {
- return &Uint16MemoTable{tbl: NewUint16HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Uint16MemoTable) TypeTraits() TypeTraits {
- return arrow.Uint16Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Uint16MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Uint16MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Uint16MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Uint16MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Uint16MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Uint16MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]uint16))
-}
-
-func (s *Uint16MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Uint16Traits.CastFromBytes(out))
-}
-
-func (s *Uint16MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Uint16Traits.CastFromBytes(out))
-}
-
-func (s *Uint16MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Uint16MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Uint16MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(uint16)), 0)
- if e, ok := s.tbl.Lookup(h, func(v uint16) bool { return val.(uint16) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Uint16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(uint16)), 0)
- e, ok := s.tbl.Lookup(h, func(v uint16) bool {
- return val.(uint16) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(uint16), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Uint16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadInt32 struct {
- val int32
- memoIdx int32
-}
-
-type entryInt32 struct {
- h uint64
- payload payloadInt32
-}
-
-func (e entryInt32) Valid() bool { return e.h != sentinel }
-
-// Int32HashTable is a hashtable specifically for int32 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Int32HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryInt32
-}
-
-// NewInt32HashTable returns a new hash table for int32 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewInt32HashTable(cap uint64) *Int32HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryInt32, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Int32HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryInt32, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Int32HashTable) CopyValues(out []int32) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) {
- h.VisitEntries(func(e *entryInt32) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Int32HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Int32HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Int32Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryInt32) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEInt32(e.payload.val)
- }
- })
-}
-
-func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Int32HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryInt32
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Int32HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryInt32, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Int32MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Int32MemoTable struct {
- tbl *Int32HashTable
- nullIdx int32
-}
-
-// NewInt32MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewInt32MemoTable(num int64) *Int32MemoTable {
- return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Int32MemoTable) TypeTraits() TypeTraits {
- return arrow.Int32Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Int32MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Int32MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Int32MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Int32MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]int32))
-}
-
-func (s *Int32MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Int32Traits.CastFromBytes(out))
-}
-
-func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Int32Traits.CastFromBytes(out))
-}
-
-func (s *Int32MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Int32MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Int32MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(int32)), 0)
- if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(int32)), 0)
- e, ok := s.tbl.Lookup(h, func(v int32) bool {
- return val.(int32) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(int32), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Int32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadInt64 struct {
- val int64
- memoIdx int32
-}
-
-type entryInt64 struct {
- h uint64
- payload payloadInt64
-}
-
-func (e entryInt64) Valid() bool { return e.h != sentinel }
-
-// Int64HashTable is a hashtable specifically for int64 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Int64HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryInt64
-}
-
-// NewInt64HashTable returns a new hash table for int64 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewInt64HashTable(cap uint64) *Int64HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryInt64, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Int64HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryInt64, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Int64HashTable) CopyValues(out []int64) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) {
- h.VisitEntries(func(e *entryInt64) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Int64HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Int64HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Int64Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryInt64) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEInt64(e.payload.val)
- }
- })
-}
-
-func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Int64HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryInt64
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Int64HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryInt64, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Int64MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Int64MemoTable struct {
- tbl *Int64HashTable
- nullIdx int32
-}
-
-// NewInt64MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewInt64MemoTable(num int64) *Int64MemoTable {
- return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Int64MemoTable) TypeTraits() TypeTraits {
- return arrow.Int64Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Int64MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Int64MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Int64MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Int64MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]int64))
-}
-
-func (s *Int64MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Int64Traits.CastFromBytes(out))
-}
-
-func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Int64Traits.CastFromBytes(out))
-}
-
-func (s *Int64MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Int64MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Int64MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(int64)), 0)
- if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(int64)), 0)
- e, ok := s.tbl.Lookup(h, func(v int64) bool {
- return val.(int64) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(int64), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Int64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadUint32 struct {
- val uint32
- memoIdx int32
-}
-
-type entryUint32 struct {
- h uint64
- payload payloadUint32
-}
-
-func (e entryUint32) Valid() bool { return e.h != sentinel }
-
-// Uint32HashTable is a hashtable specifically for uint32 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Uint32HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryUint32
-}
-
-// NewUint32HashTable returns a new hash table for uint32 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewUint32HashTable(cap uint64) *Uint32HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Uint32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryUint32, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Uint32HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryUint32, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Uint32HashTable) CopyValues(out []uint32) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Uint32HashTable) CopyValuesSubset(start int, out []uint32) {
- h.VisitEntries(func(e *entryUint32) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Uint32HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Uint32HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Uint32Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryUint32) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEUint32(e.payload.val)
- }
- })
-}
-
-func (h *Uint32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Uint32HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Uint32HashTable) Lookup(v uint64, cmp func(uint32) bool) (*entryUint32, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Uint32HashTable) lookup(v uint64, szMask uint64, cmp func(uint32) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryUint32
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Uint32HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryUint32, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(uint32) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Uint32HashTable) Insert(e *entryUint32, v uint64, val uint32, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Uint32HashTable) VisitEntries(visit func(*entryUint32)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Uint32MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Uint32MemoTable struct {
- tbl *Uint32HashTable
- nullIdx int32
-}
-
-// NewUint32MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewUint32MemoTable(num int64) *Uint32MemoTable {
- return &Uint32MemoTable{tbl: NewUint32HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Uint32MemoTable) TypeTraits() TypeTraits {
- return arrow.Uint32Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Uint32MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Uint32MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Uint32MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Uint32MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Uint32MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Uint32MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]uint32))
-}
-
-func (s *Uint32MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Uint32Traits.CastFromBytes(out))
-}
-
-func (s *Uint32MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Uint32Traits.CastFromBytes(out))
-}
-
-func (s *Uint32MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Uint32MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Uint32MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(uint32)), 0)
- if e, ok := s.tbl.Lookup(h, func(v uint32) bool { return val.(uint32) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Uint32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(uint32)), 0)
- e, ok := s.tbl.Lookup(h, func(v uint32) bool {
- return val.(uint32) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(uint32), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Uint32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadUint64 struct {
- val uint64
- memoIdx int32
-}
-
-type entryUint64 struct {
- h uint64
- payload payloadUint64
-}
-
-func (e entryUint64) Valid() bool { return e.h != sentinel }
-
-// Uint64HashTable is a hashtable specifically for uint64 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Uint64HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryUint64
-}
-
-// NewUint64HashTable returns a new hash table for uint64 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewUint64HashTable(cap uint64) *Uint64HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Uint64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryUint64, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Uint64HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryUint64, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Uint64HashTable) CopyValues(out []uint64) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Uint64HashTable) CopyValuesSubset(start int, out []uint64) {
- h.VisitEntries(func(e *entryUint64) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Uint64HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Uint64HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Uint64Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryUint64) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEUint64(e.payload.val)
- }
- })
-}
-
-func (h *Uint64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Uint64HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Uint64HashTable) Lookup(v uint64, cmp func(uint64) bool) (*entryUint64, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Uint64HashTable) lookup(v uint64, szMask uint64, cmp func(uint64) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryUint64
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Uint64HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryUint64, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(uint64) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Uint64HashTable) Insert(e *entryUint64, v uint64, val uint64, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Uint64HashTable) VisitEntries(visit func(*entryUint64)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Uint64MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Uint64MemoTable struct {
- tbl *Uint64HashTable
- nullIdx int32
-}
-
-// NewUint64MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewUint64MemoTable(num int64) *Uint64MemoTable {
- return &Uint64MemoTable{tbl: NewUint64HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Uint64MemoTable) TypeTraits() TypeTraits {
- return arrow.Uint64Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Uint64MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Uint64MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Uint64MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Uint64MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Uint64MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Uint64MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]uint64))
-}
-
-func (s *Uint64MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Uint64Traits.CastFromBytes(out))
-}
-
-func (s *Uint64MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Uint64Traits.CastFromBytes(out))
-}
-
-func (s *Uint64MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Uint64MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Uint64MemoTable) Get(val interface{}) (int, bool) {
-
- h := hashInt(uint64(val.(uint64)), 0)
- if e, ok := s.tbl.Lookup(h, func(v uint64) bool { return val.(uint64) == v }); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Uint64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- h := hashInt(uint64(val.(uint64)), 0)
- e, ok := s.tbl.Lookup(h, func(v uint64) bool {
- return val.(uint64) == v
- })
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(uint64), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Uint64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadFloat32 struct {
- val float32
- memoIdx int32
-}
-
-type entryFloat32 struct {
- h uint64
- payload payloadFloat32
-}
-
-func (e entryFloat32) Valid() bool { return e.h != sentinel }
-
-// Float32HashTable is a hashtable specifically for float32 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Float32HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryFloat32
-}
-
-// NewFloat32HashTable returns a new hash table for float32 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewFloat32HashTable(cap uint64) *Float32HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryFloat32, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Float32HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryFloat32, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Float32HashTable) CopyValues(out []float32) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) {
- h.VisitEntries(func(e *entryFloat32) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Float32HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Float32HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Float32Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryFloat32) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEFloat32(e.payload.val)
- }
- })
-}
-
-func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Float32HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryFloat32
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Float32HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryFloat32, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Float32MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Float32MemoTable struct {
- tbl *Float32HashTable
- nullIdx int32
-}
-
-// NewFloat32MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewFloat32MemoTable(num int64) *Float32MemoTable {
- return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Float32MemoTable) TypeTraits() TypeTraits {
- return arrow.Float32Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Float32MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Float32MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Float32MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Float32MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]float32))
-}
-
-func (s *Float32MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Float32Traits.CastFromBytes(out))
-}
-
-func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Float32Traits.CastFromBytes(out))
-}
-
-func (s *Float32MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Float32MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Float32MemoTable) Get(val interface{}) (int, bool) {
- var cmp func(float32) bool
-
- if math.IsNaN(float64(val.(float32))) {
- cmp = isNan32Cmp
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = float32(math.NaN())
- } else {
- cmp = func(v float32) bool { return val.(float32) == v }
- }
-
- h := hashFloat32(val.(float32), 0)
- if e, ok := s.tbl.Lookup(h, cmp); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- var cmp func(float32) bool
-
- if math.IsNaN(float64(val.(float32))) {
- cmp = isNan32Cmp
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = float32(math.NaN())
- } else {
- cmp = func(v float32) bool { return val.(float32) == v }
- }
-
- h := hashFloat32(val.(float32), 0)
- e, ok := s.tbl.Lookup(h, cmp)
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(float32), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Float32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-
-type payloadFloat64 struct {
- val float64
- memoIdx int32
-}
-
-type entryFloat64 struct {
- h uint64
- payload payloadFloat64
-}
-
-func (e entryFloat64) Valid() bool { return e.h != sentinel }
-
-// Float64HashTable is a hashtable specifically for float64 that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type Float64HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entryFloat64
-}
-
-// NewFloat64HashTable returns a new hash table for float64 values
-// initialized with the passed in capacity or 32 whichever is larger.
-func NewFloat64HashTable(cap uint64) *Float64HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entryFloat64, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *Float64HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entryFloat64, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *Float64HashTable) CopyValues(out []float64) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) {
- h.VisitEntries(func(e *entryFloat64) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *Float64HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *Float64HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.Float64Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entryFloat64) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- data[idx] = utils.ToLEFloat64(e.payload.val)
- }
- })
-}
-
-func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func (Float64HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entryFloat64
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *Float64HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entryFloat64, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// Float64MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type Float64MemoTable struct {
- tbl *Float64HashTable
- nullIdx int32
-}
-
-// NewFloat64MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func NewFloat64MemoTable(num int64) *Float64MemoTable {
- return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func (Float64MemoTable) TypeTraits() TypeTraits {
- return arrow.Float64Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *Float64MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *Float64MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *Float64MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *Float64MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]float64))
-}
-
-func (s *Float64MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.Float64Traits.CastFromBytes(out))
-}
-
-func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out))
-}
-
-func (s *Float64MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *Float64MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *Float64MemoTable) Get(val interface{}) (int, bool) {
- var cmp func(float64) bool
- if math.IsNaN(val.(float64)) {
- cmp = math.IsNaN
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = math.NaN()
- } else {
- cmp = func(v float64) bool { return val.(float64) == v }
- }
-
- h := hashFloat64(val.(float64), 0)
- if e, ok := s.tbl.Lookup(h, cmp); ok {
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
-
- var cmp func(float64) bool
- if math.IsNaN(val.(float64)) {
- cmp = math.IsNaN
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = math.NaN()
- } else {
- cmp = func(v float64) bool { return val.(float64) == v }
- }
-
- h := hashFloat64(val.(float64), 0)
- e, ok := s.tbl.Lookup(h, cmp)
-
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.(float64), int32(idx))
- }
- return
-}
-
-// GetOrInsertBytes is unimplemented
-func (s *Float64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl
deleted file mode 100644
index 25164341d..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl
+++ /dev/null
@@ -1,349 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package hashing
-
-import (
- "github.com/apache/arrow/go/v14/arrow/bitutil"
- "github.com/apache/arrow/go/v14/internal/utils"
-)
-
-{{range .In}}
-type payload{{.Name}} struct {
- val {{.name}}
- memoIdx int32
-}
-
-type entry{{.Name}} struct {
- h uint64
- payload payload{{.Name}}
-}
-
-func (e entry{{.Name}}) Valid() bool { return e.h != sentinel }
-
-// {{.Name}}HashTable is a hashtable specifically for {{.name}} that
-// is utilized with the MemoTable to generalize interactions for easier
-// implementation of dictionaries without losing performance.
-type {{.Name}}HashTable struct {
- cap uint64
- capMask uint64
- size uint64
-
- entries []entry{{.Name}}
-}
-
-// New{{.Name}}HashTable returns a new hash table for {{.name}} values
-// initialized with the passed in capacity or 32 whichever is larger.
-func New{{.Name}}HashTable(cap uint64) *{{.Name}}HashTable {
- initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- ret := &{{.Name}}HashTable{cap: initCap, capMask: initCap - 1, size: 0}
- ret.entries = make([]entry{{.Name}}, initCap)
- return ret
-}
-
-// Reset drops all of the values in this hash table and re-initializes it
-// with the specified initial capacity as if by calling New, but without having
-// to reallocate the object.
-func (h *{{.Name}}HashTable) Reset(cap uint64) {
- h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32))))
- h.capMask = h.cap - 1
- h.size = 0
- h.entries = make([]entry{{.Name}}, h.cap)
-}
-
-// CopyValues is used for copying the values out of the hash table into the
-// passed in slice, in the order that they were first inserted
-func (h *{{.Name}}HashTable) CopyValues(out []{{.name}}) {
- h.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies a subset of the values in the hashtable out, starting
-// with the value at start, in the order that they were inserted.
-func (h *{{.Name}}HashTable) CopyValuesSubset(start int, out []{{.name}}) {
- h.VisitEntries(func(e *entry{{.Name}}) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
- out[idx] = e.payload.val
- }
- })
-}
-
-func (h *{{.Name}}HashTable) WriteOut(out []byte) {
- h.WriteOutSubset(0, out)
-}
-
-func (h *{{.Name}}HashTable) WriteOutSubset(start int, out []byte) {
- data := arrow.{{.Name}}Traits.CastFromBytes(out)
- h.VisitEntries(func(e *entry{{.Name}}) {
- idx := e.payload.memoIdx - int32(start)
- if idx >= 0 {
-{{if and (ne .Name "Int8") (ne .Name "Uint8") -}}
- data[idx] = utils.ToLE{{.Name}}(e.payload.val)
-{{else -}}
- data[idx] = e.payload.val
-{{end -}}
- }
- })
-}
-
-func (h *{{.Name}}HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap }
-
-func ({{.Name}}HashTable) fixHash(v uint64) uint64 {
- if v == sentinel {
- return 42
- }
- return v
-}
-
-// Lookup retrieves the entry for a given hash value assuming it's payload value returns
-// true when passed to the cmp func. Returns a pointer to the entry for the given hash value,
-// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false.
-func (h *{{.Name}}HashTable) Lookup(v uint64, cmp func({{.name}}) bool) (*entry{{.Name}}, bool) {
- idx, ok := h.lookup(v, h.capMask, cmp)
- return &h.entries[idx], ok
-}
-
-func (h *{{.Name}}HashTable) lookup(v uint64, szMask uint64, cmp func({{.name}}) bool) (uint64, bool) {
- const perturbShift uint8 = 5
-
- var (
- idx uint64
- perturb uint64
- e *entry{{.Name}}
- )
-
- v = h.fixHash(v)
- idx = v & szMask
- perturb = (v >> uint64(perturbShift)) + 1
-
- for {
- e = &h.entries[idx]
- if e.h == v && cmp(e.payload.val) {
- return idx, true
- }
-
- if e.h == sentinel {
- return idx, false
- }
-
- // perturbation logic inspired from CPython's set/dict object
- // the goal is that all 64 bits of unmasked hash value eventually
- // participate int he probing sequence, to minimize clustering
- idx = (idx + perturb) & szMask
- perturb = (perturb >> uint64(perturbShift)) + 1
- }
-}
-
-func (h *{{.Name}}HashTable) upsize(newcap uint64) error {
- newMask := newcap - 1
-
- oldEntries := h.entries
- h.entries = make([]entry{{.Name}}, newcap)
- for _, e := range oldEntries {
- if e.Valid() {
- idx, _ := h.lookup(e.h, newMask, func({{.name}}) bool { return false })
- h.entries[idx] = e
- }
- }
- h.cap = newcap
- h.capMask = newMask
- return nil
-}
-
-// Insert updates the given entry with the provided hash value, payload value and memo index.
-// The entry pointer must have been retrieved via lookup in order to actually insert properly.
-func (h *{{.Name}}HashTable) Insert(e *entry{{.Name}}, v uint64, val {{.name}}, memoIdx int32) error {
- e.h = h.fixHash(v)
- e.payload.val = val
- e.payload.memoIdx = memoIdx
- h.size++
-
- if h.needUpsize() {
- h.upsize(h.cap * uint64(loadFactor) * 2)
- }
- return nil
-}
-
-// VisitEntries will call the passed in function on each *valid* entry in the hash table,
-// a valid entry being one which has had a value inserted into it.
-func (h *{{.Name}}HashTable) VisitEntries(visit func(*entry{{.Name}})) {
- for _, e := range h.entries {
- if e.Valid() {
- visit(&e)
- }
- }
-}
-
-// {{.Name}}MemoTable is a wrapper over the appropriate hashtable to provide an interface
-// conforming to the MemoTable interface defined in the encoding package for general interactions
-// regarding dictionaries.
-type {{.Name}}MemoTable struct {
- tbl *{{.Name}}HashTable
- nullIdx int32
-}
-
-// New{{.Name}}MemoTable returns a new memotable with num entries pre-allocated to reduce further
-// allocations when inserting.
-func New{{.Name}}MemoTable(num int64) *{{.Name}}MemoTable {
- return &{{.Name}}MemoTable{tbl: New{{.Name}}HashTable(uint64(num)), nullIdx: KeyNotFound}
-}
-
-func ({{.Name}}MemoTable) TypeTraits() TypeTraits {
- return arrow.{{.Name}}Traits
-}
-
-// Reset allows this table to be re-used by dumping all the data currently in the table.
-func (s *{{.Name}}MemoTable) Reset() {
- s.tbl.Reset(32)
- s.nullIdx = KeyNotFound
-}
-
-// Size returns the current number of inserted elements into the table including if a null
-// has been inserted.
-func (s *{{.Name}}MemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// GetNull returns the index of an inserted null or KeyNotFound along with a bool
-// that will be true if found and false if not.
-func (s *{{.Name}}MemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// GetOrInsertNull will return the index of the null entry or insert a null entry
-// if one currently doesn't exist. The found value will be true if there was already
-// a null in the table, and false if it inserted one.
-func (s *{{.Name}}MemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = s.GetNull()
- if !found {
- idx = s.Size()
- s.nullIdx = int32(idx)
- }
- return
-}
-
-// CopyValues will copy the values from the memo table out into the passed in slice
-// which must be of the appropriate type.
-func (s *{{.Name}}MemoTable) CopyValues(out interface{}) {
- s.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset is like CopyValues but only copies a subset of values starting
-// at the provided start index
-func (s *{{.Name}}MemoTable) CopyValuesSubset(start int, out interface{}) {
- s.tbl.CopyValuesSubset(start, out.([]{{.name}}))
-}
-
-func (s *{{.Name}}MemoTable) WriteOut(out []byte) {
- s.tbl.CopyValues(arrow.{{.Name}}Traits.CastFromBytes(out))
-}
-
-func (s *{{.Name}}MemoTable) WriteOutSubset(start int, out []byte) {
- s.tbl.CopyValuesSubset(start, arrow.{{.Name}}Traits.CastFromBytes(out))
-}
-
-func (s *{{.Name}}MemoTable) WriteOutLE(out []byte) {
- s.tbl.WriteOut(out)
-}
-
-func (s *{{.Name}}MemoTable) WriteOutSubsetLE(start int, out []byte) {
- s.tbl.WriteOutSubset(start, out)
-}
-
-// Get returns the index of the requested value in the hash table or KeyNotFound
-// along with a boolean indicating if it was found or not.
-func (s *{{.Name}}MemoTable) Get(val interface{}) (int, bool) {
-{{if and (ne .Name "Float32") (ne .Name "Float64") }}
- h := hashInt(uint64(val.({{.name}})), 0)
- if e, ok := s.tbl.Lookup(h, func(v {{.name}}) bool { return val.({{.name}}) == v }); ok {
-{{ else -}}
- var cmp func({{.name}}) bool
- {{if eq .Name "Float32"}}
- if math.IsNaN(float64(val.(float32))) {
- cmp = isNan32Cmp
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = float32(math.NaN())
- {{ else -}}
- if math.IsNaN(val.(float64)) {
- cmp = math.IsNaN
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = math.NaN()
- {{end -}}
- } else {
- cmp = func(v {{.name}}) bool { return val.({{.name}}) == v }
- }
-
- h := hash{{.Name}}(val.({{.name}}), 0)
- if e, ok := s.tbl.Lookup(h, cmp); ok {
-{{ end -}}
- return int(e.payload.memoIdx), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsert will return the index of the specified value in the table, or insert the
-// value into the table and return the new index. found indicates whether or not it already
-// existed in the table (true) or was inserted by this call (false).
-func (s *{{.Name}}MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
- {{if and (ne .Name "Float32") (ne .Name "Float64") }}
- h := hashInt(uint64(val.({{.name}})), 0)
- e, ok := s.tbl.Lookup(h, func(v {{.name}}) bool {
- return val.({{.name}}) == v
- })
-{{ else }}
- var cmp func({{.name}}) bool
- {{if eq .Name "Float32"}}
- if math.IsNaN(float64(val.(float32))) {
- cmp = isNan32Cmp
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = float32(math.NaN())
- {{ else -}}
- if math.IsNaN(val.(float64)) {
- cmp = math.IsNaN
- // use consistent internal bit pattern for NaN regardless of the pattern
- // that is passed to us. NaN is NaN is NaN
- val = math.NaN()
- {{end -}}
- } else {
- cmp = func(v {{.name}}) bool { return val.({{.name}}) == v }
- }
-
- h := hash{{.Name}}(val.({{.name}}), 0)
- e, ok := s.tbl.Lookup(h, cmp)
-{{ end }}
- if ok {
- idx = int(e.payload.memoIdx)
- found = true
- } else {
- idx = s.Size()
- s.tbl.Insert(e, h, val.({{.name}}), int32(idx))
- }
- return
-}
-
-
-// GetOrInsertBytes is unimplemented
-func (s *{{.Name}}MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- panic("unimplemented")
-}
-{{end}}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go
deleted file mode 100644
index 81994f0a8..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go
+++ /dev/null
@@ -1,443 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Package hashing provides utilities for and an implementation of a hash
-// table which is more performant than the default go map implementation
-// by leveraging xxh3 and some custom hash functions.
-package hashing
-
-import (
- "bytes"
- "math"
- "reflect"
- "unsafe"
-)
-
-//go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=types.tmpldata xxh3_memo_table.gen.go.tmpl
-
-type TypeTraits interface {
- BytesRequired(n int) int
-}
-
-type ByteSlice interface {
- Bytes() []byte
-}
-
-// MemoTable interface for hash tables and dictionary encoding.
-//
-// Values will remember the order they are inserted to generate a valid
-// dictionary.
-type MemoTable interface {
- TypeTraits() TypeTraits
- // Reset drops everything in the table allowing it to be reused
- Reset()
- // Size returns the current number of unique values stored in
- // the table, including whether or not a null value has been
- // inserted via GetOrInsertNull.
- Size() int
- // GetOrInsert returns the index of the table the specified value is,
- // and a boolean indicating whether or not the value was found in
- // the table (if false, the value was inserted). An error is returned
- // if val is not the appropriate type for the table.
- GetOrInsert(val interface{}) (idx int, existed bool, err error)
- // GetOrInsertBytes returns the index of the table the specified value is,
- // and a boolean indicating whether or not the value was found in
- // the table (if false, the value was inserted). An error is returned
- // if val is not the appropriate type for the table. This function is intended to be used by
- // the BinaryMemoTable to prevent uncessary allocations of the data when converting from a []byte to interface{}.
- GetOrInsertBytes(val []byte) (idx int, existed bool, err error)
- // GetOrInsertNull returns the index of the null value in the table,
- // inserting one if it hasn't already been inserted. It returns a boolean
- // indicating if the null value already existed or not in the table.
- GetOrInsertNull() (idx int, existed bool)
- // GetNull returns the index of the null value in the table, but does not
- // insert one if it doesn't already exist. Will return -1 if it doesn't exist
- // indicated by a false value for the boolean.
- GetNull() (idx int, exists bool)
- // WriteOut copys the unique values of the memotable out to the byte slice
- // provided. Must have allocated enough bytes for all the values.
- WriteOut(out []byte)
- // WriteOutSubset is like WriteOut, but only writes a subset of values
- // starting with the index offset.
- WriteOutSubset(offset int, out []byte)
-}
-
-type NumericMemoTable interface {
- MemoTable
- WriteOutLE(out []byte)
- WriteOutSubsetLE(offset int, out []byte)
-}
-
-const (
- sentinel uint64 = 0
- loadFactor int64 = 2
-)
-
-func max(a, b uint64) uint64 {
- if a > b {
- return a
- }
- return b
-}
-
-var isNan32Cmp = func(v float32) bool { return math.IsNaN(float64(v)) }
-
-// KeyNotFound is the constant returned by memo table functions when a key isn't found in the table
-const KeyNotFound = -1
-
-type BinaryBuilderIFace interface {
- Reserve(int)
- ReserveData(int)
- Retain()
- Resize(int)
- ResizeData(int)
- Release()
- DataLen() int
- Value(int) []byte
- Len() int
- AppendNull()
- AppendString(string)
- Append([]byte)
-}
-
-// BinaryMemoTable is our hashtable for binary data using the BinaryBuilder
-// to construct the actual data in an easy to pass around way with minimal copies
-// while using a hash table to keep track of the indexes into the dictionary that
-// is created as we go.
-type BinaryMemoTable struct {
- tbl *Int32HashTable
- builder BinaryBuilderIFace
- nullIdx int
-}
-
-// NewBinaryMemoTable returns a hash table for Binary data, the passed in allocator will
-// be utilized for the BinaryBuilder, if nil then memory.DefaultAllocator will be used.
-// initial and valuesize can be used to pre-allocate the table to reduce allocations. With
-// initial being the initial number of entries to allocate for and valuesize being the starting
-// amount of space allocated for writing the actual binary data.
-func NewBinaryMemoTable(initial, valuesize int, bldr BinaryBuilderIFace) *BinaryMemoTable {
- bldr.Reserve(int(initial))
- datasize := valuesize
- if datasize <= 0 {
- datasize = initial * 4
- }
- bldr.ReserveData(datasize)
- return &BinaryMemoTable{tbl: NewInt32HashTable(uint64(initial)), builder: bldr, nullIdx: KeyNotFound}
-}
-
-type unimplementedtraits struct{}
-
-func (unimplementedtraits) BytesRequired(int) int { panic("unimplemented") }
-
-func (BinaryMemoTable) TypeTraits() TypeTraits {
- return unimplementedtraits{}
-}
-
-// Reset dumps all of the data in the table allowing it to be reutilized.
-func (s *BinaryMemoTable) Reset() {
- s.tbl.Reset(32)
- s.builder.Resize(0)
- s.builder.ResizeData(0)
- s.builder.Reserve(int(32))
- s.builder.ReserveData(int(32) * 4)
- s.nullIdx = KeyNotFound
-}
-
-// GetNull returns the index of a null that has been inserted into the table or
-// KeyNotFound. The bool returned will be true if there was a null inserted into
-// the table, and false otherwise.
-func (s *BinaryMemoTable) GetNull() (int, bool) {
- return int(s.nullIdx), s.nullIdx != KeyNotFound
-}
-
-// Size returns the current size of the memo table including the null value
-// if one has been inserted.
-func (s *BinaryMemoTable) Size() int {
- sz := int(s.tbl.size)
- if _, ok := s.GetNull(); ok {
- sz++
- }
- return sz
-}
-
-// helper function to easily return a byte slice for any given value
-// regardless of the type if it's a []byte, string, or fulfills the
-// ByteSlice interface.
-func (BinaryMemoTable) valAsByteSlice(val interface{}) []byte {
- switch v := val.(type) {
- case []byte:
- return v
- case ByteSlice:
- return v.Bytes()
- case string:
- var out []byte
- h := (*reflect.StringHeader)(unsafe.Pointer(&v))
- s := (*reflect.SliceHeader)(unsafe.Pointer(&out))
- s.Data = h.Data
- s.Len = h.Len
- s.Cap = h.Len
- return out
- default:
- panic("invalid type for binarymemotable")
- }
-}
-
-// helper function to get the hash value regardless of the underlying binary type
-func (BinaryMemoTable) getHash(val interface{}) uint64 {
- switch v := val.(type) {
- case string:
- return hashString(v, 0)
- case []byte:
- return Hash(v, 0)
- case ByteSlice:
- return Hash(v.Bytes(), 0)
- default:
- panic("invalid type for binarymemotable")
- }
-}
-
-// helper function to append the given value to the builder regardless
-// of the underlying binary type.
-func (b *BinaryMemoTable) appendVal(val interface{}) {
- switch v := val.(type) {
- case string:
- b.builder.AppendString(v)
- case []byte:
- b.builder.Append(v)
- case ByteSlice:
- b.builder.Append(v.Bytes())
- }
-}
-
-func (b *BinaryMemoTable) lookup(h uint64, val []byte) (*entryInt32, bool) {
- return b.tbl.Lookup(h, func(i int32) bool {
- return bytes.Equal(val, b.builder.Value(int(i)))
- })
-}
-
-// Get returns the index of the specified value in the table or KeyNotFound,
-// and a boolean indicating whether it was found in the table.
-func (b *BinaryMemoTable) Get(val interface{}) (int, bool) {
- if p, ok := b.lookup(b.getHash(val), b.valAsByteSlice(val)); ok {
- return int(p.payload.val), ok
- }
- return KeyNotFound, false
-}
-
-// GetOrInsertBytes returns the index of the given value in the table, if not found
-// it is inserted into the table. The return value 'found' indicates whether the value
-// was found in the table (true) or inserted (false) along with any possible error.
-func (b *BinaryMemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) {
- h := Hash(val, 0)
- p, found := b.lookup(h, val)
- if found {
- idx = int(p.payload.val)
- } else {
- idx = b.Size()
- b.builder.Append(val)
- b.tbl.Insert(p, h, int32(idx), -1)
- }
- return
-}
-
-// GetOrInsert returns the index of the given value in the table, if not found
-// it is inserted into the table. The return value 'found' indicates whether the value
-// was found in the table (true) or inserted (false) along with any possible error.
-func (b *BinaryMemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) {
- h := b.getHash(val)
- p, found := b.lookup(h, b.valAsByteSlice(val))
- if found {
- idx = int(p.payload.val)
- } else {
- idx = b.Size()
- b.appendVal(val)
- b.tbl.Insert(p, h, int32(idx), -1)
- }
- return
-}
-
-// GetOrInsertNull retrieves the index of a null in the table or inserts
-// null into the table, returning the index and a boolean indicating if it was
-// found in the table (true) or was inserted (false).
-func (b *BinaryMemoTable) GetOrInsertNull() (idx int, found bool) {
- idx, found = b.GetNull()
- if !found {
- idx = b.Size()
- b.nullIdx = idx
- b.builder.AppendNull()
- }
- return
-}
-
-func (b *BinaryMemoTable) Value(i int) []byte {
- return b.builder.Value(i)
-}
-
-// helper function to get the offset into the builder data for a given
-// index value.
-func (b *BinaryMemoTable) findOffset(idx int) uintptr {
- if b.builder.DataLen() == 0 {
- // only empty strings, short circuit
- return 0
- }
-
- val := b.builder.Value(idx)
- for len(val) == 0 {
- idx++
- if idx >= b.builder.Len() {
- break
- }
- val = b.builder.Value(idx)
- }
- if len(val) != 0 {
- return uintptr(unsafe.Pointer(&val[0]))
- }
- return uintptr(b.builder.DataLen()) + b.findOffset(0)
-}
-
-// CopyOffsets copies the list of offsets into the passed in slice, the offsets
-// being the start and end values of the underlying allocated bytes in the builder
-// for the individual values of the table. out should be at least sized to Size()+1
-func (b *BinaryMemoTable) CopyOffsets(out []int32) {
- b.CopyOffsetsSubset(0, out)
-}
-
-// CopyOffsetsSubset is like CopyOffsets but instead of copying all of the offsets,
-// it gets a subset of the offsets in the table starting at the index provided by "start".
-func (b *BinaryMemoTable) CopyOffsetsSubset(start int, out []int32) {
- if b.builder.Len() <= start {
- return
- }
-
- first := b.findOffset(0)
- delta := b.findOffset(start)
- sz := b.Size()
- for i := start; i < sz; i++ {
- offset := int32(b.findOffset(i) - delta)
- out[i-start] = offset
- }
-
- out[sz-start] = int32(b.builder.DataLen() - (int(delta) - int(first)))
-}
-
-// CopyLargeOffsets copies the list of offsets into the passed in slice, the offsets
-// being the start and end values of the underlying allocated bytes in the builder
-// for the individual values of the table. out should be at least sized to Size()+1
-func (b *BinaryMemoTable) CopyLargeOffsets(out []int64) {
- b.CopyLargeOffsetsSubset(0, out)
-}
-
-// CopyLargeOffsetsSubset is like CopyOffsets but instead of copying all of the offsets,
-// it gets a subset of the offsets in the table starting at the index provided by "start".
-func (b *BinaryMemoTable) CopyLargeOffsetsSubset(start int, out []int64) {
- if b.builder.Len() <= start {
- return
- }
-
- first := b.findOffset(0)
- delta := b.findOffset(start)
- sz := b.Size()
- for i := start; i < sz; i++ {
- offset := int64(b.findOffset(i) - delta)
- out[i-start] = offset
- }
-
- out[sz-start] = int64(b.builder.DataLen() - (int(delta) - int(first)))
-}
-
-// CopyValues copies the raw binary data bytes out, out should be a []byte
-// with at least ValuesSize bytes allocated to copy into.
-func (b *BinaryMemoTable) CopyValues(out interface{}) {
- b.CopyValuesSubset(0, out)
-}
-
-// CopyValuesSubset copies the raw binary data bytes out starting with the value
-// at the index start, out should be a []byte with at least ValuesSize bytes allocated
-func (b *BinaryMemoTable) CopyValuesSubset(start int, out interface{}) {
- if b.builder.Len() <= start {
- return
- }
-
- var (
- first = b.findOffset(0)
- offset = b.findOffset(int(start))
- length = b.builder.DataLen() - int(offset-first)
- )
-
- outval := out.([]byte)
- copy(outval, b.builder.Value(start)[0:length])
-}
-
-func (b *BinaryMemoTable) WriteOut(out []byte) {
- b.CopyValues(out)
-}
-
-func (b *BinaryMemoTable) WriteOutSubset(start int, out []byte) {
- b.CopyValuesSubset(start, out)
-}
-
-// CopyFixedWidthValues exists to cope with the fact that the table doesn't keep
-// track of the fixed width when inserting the null value the databuffer holds a
-// zero length byte slice for the null value (if found)
-func (b *BinaryMemoTable) CopyFixedWidthValues(start, width int, out []byte) {
- if start >= b.Size() {
- return
- }
-
- null, exists := b.GetNull()
- if !exists || null < start {
- // nothing to skip, proceed as usual
- b.CopyValuesSubset(start, out)
- return
- }
-
- var (
- leftOffset = b.findOffset(start)
- nullOffset = b.findOffset(null)
- leftSize = nullOffset - leftOffset
- rightOffset = leftOffset + uintptr(b.ValuesSize())
- )
-
- if leftSize > 0 {
- copy(out, b.builder.Value(start)[0:leftSize])
- }
-
- rightSize := rightOffset - nullOffset
- if rightSize > 0 {
- // skip the null fixed size value
- copy(out[int(leftSize)+width:], b.builder.Value(null + 1)[0:rightSize])
- }
-}
-
-// VisitValues exists to run the visitFn on each value currently in the hash table.
-func (b *BinaryMemoTable) VisitValues(start int, visitFn func([]byte)) {
- for i := int(start); i < b.Size(); i++ {
- visitFn(b.builder.Value(i))
- }
-}
-
-// Release is used to tell the underlying builder that it can release the memory allocated
-// when the reference count reaches 0, this is safe to be called from multiple goroutines
-// simultaneously
-func (b *BinaryMemoTable) Release() { b.builder.Release() }
-
-// Retain increases the ref count, it is safe to call it from multiple goroutines
-// simultaneously.
-func (b *BinaryMemoTable) Retain() { b.builder.Retain() }
-
-// ValuesSize returns the current total size of all the raw bytes that have been inserted
-// into the memotable so far.
-func (b *BinaryMemoTable) ValuesSize() int { return b.builder.DataLen() }
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/json/json.go b/vendor/github.com/apache/arrow/go/v14/internal/json/json.go
deleted file mode 100644
index 319b12c55..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/json/json.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !tinygo
-// +build !tinygo
-
-package json
-
-import (
- "io"
-
- "github.com/goccy/go-json"
-)
-
-type Decoder = json.Decoder
-type Encoder = json.Encoder
-type Marshaler = json.Marshaler
-type Delim = json.Delim
-type UnmarshalTypeError = json.UnmarshalTypeError
-type Number = json.Number
-type Unmarshaler = json.Unmarshaler
-type RawMessage = json.RawMessage
-
-func Marshal(v interface{}) ([]byte, error) {
- return json.Marshal(v)
-}
-
-func Unmarshal(data []byte, v interface{}) error {
- return json.Unmarshal(data, v)
-}
-
-func NewDecoder(r io.Reader) *Decoder {
- return json.NewDecoder(r)
-}
-
-func NewEncoder(w io.Writer) *Encoder {
- return json.NewEncoder(w)
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go b/vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go
deleted file mode 100644
index 8e4f447b3..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go
+++ /dev/null
@@ -1,51 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build tinygo
-// +build tinygo
-
-package json
-
-import (
- "io"
-
- "encoding/json"
-)
-
-type Decoder = json.Decoder
-type Encoder = json.Encoder
-type Marshaler = json.Marshaler
-type Delim = json.Delim
-type UnmarshalTypeError = json.UnmarshalTypeError
-type Number = json.Number
-type Unmarshaler = json.Unmarshaler
-type RawMessage = json.RawMessage
-
-func Marshal(v interface{}) ([]byte, error) {
- return json.Marshal(v)
-}
-
-func Unmarshal(data []byte, v interface{}) error {
- return json.Unmarshal(data, v)
-}
-
-func NewDecoder(r io.Reader) *Decoder {
- return json.NewDecoder(r)
-}
-
-func NewEncoder(w io.Writer) *Encoder {
- return json.NewEncoder(w)
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile b/vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile
deleted file mode 100644
index fded9d1d5..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile
+++ /dev/null
@@ -1,80 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# this converts rotate instructions from "ro[lr] <reg>" -> "ro[lr] <reg>, 1" for yasm compatibility
-PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/'
-
-C2GOASM=c2goasm
-CC=clang-11
-C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \
- -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib
-ASM_FLAGS_AVX2=-mavx2 -mfma
-ASM_FLAGS_SSE4=-msse4
-ASM_FLAGS_BMI2=-mbmi2
-ASM_FLAGS_POPCNT=-mpopcnt
-
-C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \
- -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib
-
-GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go')
-ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go')
-
-.PHONEY: assembly
-
-INTEL_SOURCES := \
- min_max_avx2_amd64.s min_max_sse4_amd64.s transpose_ints_avx2_amd64.s transpose_ints_sse4_amd64.s
-
-#
-# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support.
-# min_max_neon_arm64.s was generated by asm2plan9s.
-# And manually formatted it as the Arm64 Plan9.
-#
-
-assembly: $(INTEL_SOURCES)
-
-_lib/min_max_avx2_amd64.s: _lib/min_max.c
- $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
-
-_lib/min_max_sse4_amd64.s: _lib/min_max.c
- $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
-
-_lib/min_max_neon.s: _lib/min_max.c
- $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
-
-_lib/transpose_ints_avx2_amd64.s: _lib/transpose_ints.c
- $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
-
-_lib/transpose_ints_sse4_amd64.s: _lib/transpose_ints.c
- $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
-
-_lib/transpose_ints_neon.s: _lib/transpose_ints.c
- $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@
-
-min_max_avx2_amd64.s: _lib/min_max_avx2_amd64.s
- $(C2GOASM) -a -f $^ $@
-
-min_max_sse4_amd64.s: _lib/min_max_sse4_amd64.s
- $(C2GOASM) -a -f $^ $@
-
-transpose_ints_avx2_amd64.s: _lib/transpose_ints_avx2_amd64.s
- $(C2GOASM) -a -f $^ $@
-
-transpose_ints_sse4_amd64.s: _lib/transpose_ints_sse4_amd64.s
- $(C2GOASM) -a -f $^ $@
-
-clean:
- rm -f $(INTEL_SOURCES)
- rm -f $(addprefix _lib/,$(INTEL_SOURCES))
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go
deleted file mode 100644
index 0b2381da1..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go
+++ /dev/null
@@ -1,212 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-import (
- "bufio"
- "errors"
- "fmt"
- "io"
-)
-
-// bufferedReader is similar to bufio.Reader except
-// it will expand the buffer if necessary when asked to Peek
-// more bytes than are in the buffer
-type bufferedReader struct {
- bufferSz int
- buf []byte
- r, w int
- rd io.Reader
- err error
-}
-
-// NewBufferedReader returns a buffered reader with similar semantics to bufio.Reader
-// except Peek will expand the internal buffer if needed rather than return
-// an error.
-func NewBufferedReader(rd io.Reader, sz int) *bufferedReader {
- // if rd is already a buffered reader whose buffer is >= the requested size
- // then just return it as is. no need to make a new object.
- b, ok := rd.(*bufferedReader)
- if ok && len(b.buf) >= sz {
- return b
- }
-
- r := &bufferedReader{
- rd: rd,
- }
- r.resizeBuffer(sz)
- return r
-}
-
-func (b *bufferedReader) resetBuffer() {
- if b.buf == nil {
- b.buf = make([]byte, b.bufferSz)
- } else if b.bufferSz > cap(b.buf) {
- buf := b.buf
- b.buf = make([]byte, b.bufferSz)
- copy(b.buf, buf)
- } else {
- b.buf = b.buf[:b.bufferSz]
- }
-}
-
-func (b *bufferedReader) resizeBuffer(newSize int) {
- b.bufferSz = newSize
- b.resetBuffer()
-}
-
-func (b *bufferedReader) fill() error {
- // slide existing data to the beginning
- if b.r > 0 {
- copy(b.buf, b.buf[b.r:b.w])
- b.w -= b.r
- b.r = 0
- }
-
- if b.w >= len(b.buf) {
- return fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrBufferFull)
- }
-
- n, err := io.ReadAtLeast(b.rd, b.buf[b.w:], 1)
- if n < 0 {
- return fmt.Errorf("arrow/bufferedreader: filling buffer: %w", bufio.ErrNegativeCount)
- }
-
- b.w += n
- b.err = err
- return nil
-}
-
-func (b *bufferedReader) readErr() error {
- err := b.err
- b.err = nil
- return err
-}
-
-// Buffered returns the number of bytes currently buffered
-func (b *bufferedReader) Buffered() int { return b.w - b.r }
-
-// SetBufferSize resets the size of the internal buffer to the desired size.
-// Will return an error if newSize is <= 0 or if newSize is less than the size
-// of the buffered data.
-func (b *bufferedReader) SetBufferSize(newSize int) error {
- if newSize <= 0 {
- return errors.New("buffer size should be positive")
- }
-
- if b.w >= newSize {
- return errors.New("cannot shrink read buffer if buffered data remains")
- }
-
- b.resizeBuffer(newSize)
- return nil
-}
-
-// Peek will buffer and return n bytes from the underlying reader without advancing
-// the reader itself. If n is larger than the current buffer size, the buffer will
-// be expanded to accommodate the extra bytes rather than error.
-func (b *bufferedReader) Peek(n int) ([]byte, error) {
- if n < 0 {
- return nil, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
- }
-
- if n > len(b.buf) {
- if err := b.SetBufferSize(n); err != nil {
- return nil, err
- }
- }
-
- for b.w-b.r < n && b.w-b.r < len(b.buf) && b.err == nil {
- b.fill() // b.w-b.r < len(b.buf) => buffer is not full
- }
-
- return b.buf[b.r : b.r+n], b.readErr()
-}
-
-// Discard skips the next n bytes either by advancing the internal buffer
-// or by reading that many bytes in and throwing them away.
-func (b *bufferedReader) Discard(n int) (discarded int, err error) {
- if n < 0 {
- return 0, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
- }
-
- if n == 0 {
- return
- }
-
- remain := n
- for {
- skip := b.Buffered()
- if skip == 0 {
- b.fill()
- skip = b.Buffered()
- }
- if skip > remain {
- skip = remain
- }
- b.r += skip
- remain -= skip
- if remain == 0 {
- return n, nil
- }
- if b.err != nil {
- return n - remain, b.readErr()
- }
- }
-}
-
-func (b *bufferedReader) Read(p []byte) (n int, err error) {
- n = len(p)
- if n == 0 {
- if b.Buffered() > 0 {
- return 0, nil
- }
- return 0, b.readErr()
- }
-
- if b.r == b.w {
- if b.err != nil {
- return 0, b.readErr()
- }
- if len(p) >= len(b.buf) {
- // large read, empty buffer
- // read directly into p to avoid extra copy
- n, b.err = b.rd.Read(p)
- if n < 0 {
- return n, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
- }
- return n, b.readErr()
- }
-
- // one read
- // don't use b.fill
- b.r, b.w = 0, 0
- n, b.err = b.rd.Read(b.buf)
- if n < 0 {
- return n, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount)
- }
- if n == 0 {
- return 0, b.readErr()
- }
- b.w += n
- }
-
- // copy as much as we can
- n = copy(p, b.buf[b.r:b.w])
- b.r += n
- return n, nil
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go
deleted file mode 100644
index 5fd257f52..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !s390x
-
-package utils
-
-var (
- ToLEInt16 = func(x int16) int16 { return x }
- ToLEUint16 = func(x uint16) uint16 { return x }
- ToLEUint32 = func(x uint32) uint32 { return x }
- ToLEUint64 = func(x uint64) uint64 { return x }
- ToLEInt32 = func(x int32) int32 { return x }
- ToLEInt64 = func(x int64) int64 { return x }
- ToLEFloat32 = func(x float32) float32 { return x }
- ToLEFloat64 = func(x float64) float64 { return x }
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go
deleted file mode 100644
index 7bb27cd81..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go
+++ /dev/null
@@ -1,33 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-import (
- "math"
- "math/bits"
-)
-
-var (
- ToLEInt16 = func(x int16) int16 { return int16(bits.ReverseBytes16(uint16(x))) }
- ToLEUint16 = bits.ReverseBytes16
- ToLEUint32 = bits.ReverseBytes32
- ToLEUint64 = bits.ReverseBytes64
- ToLEInt32 = func(x int32) int32 { return int32(bits.ReverseBytes32(uint32(x))) }
- ToLEInt64 = func(x int64) int64 { return int64(bits.ReverseBytes64(uint64(x))) }
- ToLEFloat32 = func(x float32) float32 { return math.Float32frombits(bits.ReverseBytes32(math.Float32bits(x))) }
- ToLEFloat64 = func(x float64) float64 { return math.Float64frombits(bits.ReverseBytes64(math.Float64bits(x))) }
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/math.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/math.go
deleted file mode 100644
index 62cf96ce4..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/math.go
+++ /dev/null
@@ -1,49 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-// Min is a convenience Min function for int64
-func Min(a, b int64) int64 {
- if a < b {
- return a
- }
- return b
-}
-
-// MinInt is a convenience Min function for int
-func MinInt(a, b int) int {
- if a < b {
- return a
- }
- return b
-}
-
-// Max is a convenience Max function for int64
-func Max(a, b int64) int64 {
- if a > b {
- return a
- }
- return b
-}
-
-// MaxInt is a convenience Max function for int
-func MaxInt(a, b int) int {
- if a > b {
- return a
- }
- return b
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go
deleted file mode 100644
index 3d7b0024a..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go
+++ /dev/null
@@ -1,212 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-import (
- "math"
-)
-
-// this file contains pure go implementations of the min_max functions that are
-// SIMD accelerated so that we can fallback to these if the cpu doesn't support
-// AVX2 or SSE4 instructions.
-
-func int8MinMax(values []int8) (min, max int8) {
- min = math.MaxInt8
- max = math.MinInt8
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-func uint8MinMax(values []uint8) (min, max uint8) {
- min = math.MaxUint8
- max = 0
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-func int16MinMax(values []int16) (min, max int16) {
- min = math.MaxInt16
- max = math.MinInt16
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-func uint16MinMax(values []uint16) (min, max uint16) {
- min = math.MaxUint16
- max = 0
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-func int32MinMax(values []int32) (min, max int32) {
- min = math.MaxInt32
- max = math.MinInt32
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-func uint32MinMax(values []uint32) (min, max uint32) {
- min = math.MaxUint32
- max = 0
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-func int64MinMax(values []int64) (min, max int64) {
- min = math.MaxInt64
- max = math.MinInt64
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-func uint64MinMax(values []uint64) (min, max uint64) {
- min = math.MaxUint64
- max = 0
-
- for _, v := range values {
- if min > v {
- min = v
- }
- if max < v {
- max = v
- }
- }
- return
-}
-
-var minmaxFuncs = struct {
- i8 func([]int8) (int8, int8)
- ui8 func([]uint8) (uint8, uint8)
- i16 func([]int16) (int16, int16)
- ui16 func([]uint16) (uint16, uint16)
- i32 func([]int32) (int32, int32)
- ui32 func([]uint32) (uint32, uint32)
- i64 func([]int64) (int64, int64)
- ui64 func([]uint64) (uint64, uint64)
-}{}
-
-// GetMinMaxInt8 returns the min and max for a int8 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxInt8(v []int8) (min, max int8) {
- return minmaxFuncs.i8(v)
-}
-
-// GetMinMaxUint8 returns the min and max for a uint8 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxUint8(v []uint8) (min, max uint8) {
- return minmaxFuncs.ui8(v)
-}
-
-// GetMinMaxInt16 returns the min and max for a int16 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxInt16(v []int16) (min, max int16) {
- return minmaxFuncs.i16(v)
-}
-
-// GetMinMaxUint16 returns the min and max for a uint16 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxUint16(v []uint16) (min, max uint16) {
- return minmaxFuncs.ui16(v)
-}
-
-// GetMinMaxInt32 returns the min and max for a int32 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxInt32(v []int32) (min, max int32) {
- return minmaxFuncs.i32(v)
-}
-
-// GetMinMaxUint32 returns the min and max for a uint32 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxUint32(v []uint32) (min, max uint32) {
- return minmaxFuncs.ui32(v)
-}
-
-// GetMinMaxInt64 returns the min and max for a int64 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxInt64(v []int64) (min, max int64) {
- return minmaxFuncs.i64(v)
-}
-
-// GetMinMaxUint64 returns the min and max for a uint64 slice, using AVX2 or
-// SSE4 cpu extensions if available, falling back to a pure go implementation
-// if they are unavailable or built with the noasm tag.
-func GetMinMaxUint64(v []uint64) (min, max uint64) {
- return minmaxFuncs.ui64(v)
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go
deleted file mode 100644
index 5fccddbee..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go
+++ /dev/null
@@ -1,55 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import "golang.org/x/sys/cpu"
-
-func init() {
- // if the CPU supports AVX2 or SSE4 then let's use those to benefit from SIMD
- // to accelerate the performance for finding the min and max for an integral slice.
- // otherwise fallback to a pure go implementation if the cpu doesn't have these features.
- if cpu.X86.HasAVX2 {
- minmaxFuncs.i8 = int8MaxMinAVX2
- minmaxFuncs.ui8 = uint8MaxMinAVX2
- minmaxFuncs.i16 = int16MaxMinAVX2
- minmaxFuncs.ui16 = uint16MaxMinAVX2
- minmaxFuncs.i32 = int32MaxMinAVX2
- minmaxFuncs.ui32 = uint32MaxMinAVX2
- minmaxFuncs.i64 = int64MaxMinAVX2
- minmaxFuncs.ui64 = uint64MaxMinAVX2
- } else if cpu.X86.HasSSE42 {
- minmaxFuncs.i8 = int8MaxMinSSE4
- minmaxFuncs.ui8 = uint8MaxMinSSE4
- minmaxFuncs.i16 = int16MaxMinSSE4
- minmaxFuncs.ui16 = uint16MaxMinSSE4
- minmaxFuncs.i32 = int32MaxMinSSE4
- minmaxFuncs.ui32 = uint32MaxMinSSE4
- minmaxFuncs.i64 = int64MaxMinSSE4
- minmaxFuncs.ui64 = uint64MaxMinSSE4
- } else {
- minmaxFuncs.i8 = int8MinMax
- minmaxFuncs.ui8 = uint8MinMax
- minmaxFuncs.i16 = int16MinMax
- minmaxFuncs.ui16 = uint16MinMax
- minmaxFuncs.i32 = int32MinMax
- minmaxFuncs.ui32 = uint32MinMax
- minmaxFuncs.i64 = int64MinMax
- minmaxFuncs.ui64 = uint64MinMax
- }
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go
deleted file mode 100644
index 7404e95d9..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go
+++ /dev/null
@@ -1,65 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import (
- "os"
- "strings"
-)
-import "golang.org/x/sys/cpu"
-
-func init() {
- // Added ability to enable extension via environment:
- // ARM_ENABLE_EXT=NEON go test
- if ext, ok := os.LookupEnv("ARM_ENABLE_EXT"); ok {
- exts := strings.Split(ext, ",")
-
- for _, x := range exts {
- switch x {
- case "NEON":
- cpu.ARM64.HasASIMD = true
- case "AES":
- cpu.ARM64.HasAES = true
- case "PMULL":
- cpu.ARM64.HasPMULL = true
- default:
- cpu.ARM64.HasASIMD = false
- cpu.ARM64.HasAES = false
- cpu.ARM64.HasPMULL = false
- }
- }
- }
- if cpu.ARM64.HasASIMD {
- minmaxFuncs.i32 = int32MaxMinNEON
- minmaxFuncs.ui32 = uint32MaxMinNEON
- minmaxFuncs.i64 = int64MaxMinNEON
- minmaxFuncs.ui64 = uint64MaxMinNEON
- } else {
- minmaxFuncs.i32 = int32MinMax
- minmaxFuncs.ui32 = uint32MinMax
- minmaxFuncs.i64 = int64MinMax
- minmaxFuncs.ui64 = uint64MinMax
- }
-
- // haven't yet generated the NEON arm64 for these
- minmaxFuncs.i8 = int8MinMax
- minmaxFuncs.ui8 = uint8MinMax
- minmaxFuncs.i16 = int16MinMax
- minmaxFuncs.ui16 = uint16MinMax
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go
deleted file mode 100644
index af6726243..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go
+++ /dev/null
@@ -1,90 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import (
- "unsafe"
-)
-
-// This file contains convenience functions for utilizing AVX2 intrinsics to quickly
-// and efficiently get the min and max from an integral slice.
-
-//go:noescape
-func _int8_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int8MaxMinAVX2(values []int8) (min, max int8) {
- _int8_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint8_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint8MaxMinAVX2(values []uint8) (min, max uint8) {
- _uint8_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _int16_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int16MaxMinAVX2(values []int16) (min, max int16) {
- _int16_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint16_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint16MaxMinAVX2(values []uint16) (min, max uint16) {
- _uint16_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _int32_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int32MaxMinAVX2(values []int32) (min, max int32) {
- _int32_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint32_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint32MaxMinAVX2(values []uint32) (min, max uint32) {
- _uint32_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _int64_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int64MaxMinAVX2(values []int64) (min, max int64) {
- _int64_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint64_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint64MaxMinAVX2(values []uint64) (min, max uint64) {
- _uint64_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s
deleted file mode 100644
index fe0c36e0e..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s
+++ /dev/null
@@ -1,927 +0,0 @@
-//+build !noasm !appengine
-// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
-
-DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080
-DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080
-DATA LCDATA1<>+0x010(SB)/8, $0x8080808080808080
-DATA LCDATA1<>+0x018(SB)/8, $0x8080808080808080
-DATA LCDATA1<>+0x020(SB)/8, $0x7f7f7f7f7f7f7f7f
-DATA LCDATA1<>+0x028(SB)/8, $0x7f7f7f7f7f7f7f7f
-DATA LCDATA1<>+0x030(SB)/8, $0x7f7f7f7f7f7f7f7f
-DATA LCDATA1<>+0x038(SB)/8, $0x7f7f7f7f7f7f7f7f
-DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f
-DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f
-DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080
-DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080
-GLOBL LCDATA1<>(SB), 8, $96
-
-TEXT ·_int8_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA1<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB0_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x3f // cmp esi, 63
- JA LBB0_4
- WORD $0xb041; BYTE $0x80 // mov r8b, -128
- WORD $0xb640; BYTE $0x7f // mov sil, 127
- WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
- JMP LBB0_11
-
-LBB0_1:
- WORD $0xb640; BYTE $0x7f // mov sil, 127
- WORD $0xb041; BYTE $0x80 // mov r8b, -128
- JMP LBB0_12
-
-LBB0_4:
- WORD $0x8945; BYTE $0xca // mov r10d, r9d
- LONG $0xc0e28341 // and r10d, -64
- LONG $0xc0428d49 // lea rax, [r10 - 64]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x06e8c149 // shr r8, 6
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB0_5
- WORD $0x894c; BYTE $0xc6 // mov rsi, r8
- LONG $0xfee68348 // and rsi, -2
- WORD $0xf748; BYTE $0xde // neg rsi
- LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
- LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
- LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
-
-LBB0_7:
- LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
- LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
- LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64]
- LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96]
- LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4
- LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5
- LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4
- LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5
- LONG $0x387de2c4; BYTE $0xc6 // vpminsb ymm0, ymm0, ymm6
- LONG $0x386de2c4; BYTE $0xd7 // vpminsb ymm2, ymm2, ymm7
- LONG $0x3c75e2c4; BYTE $0xce // vpmaxsb ymm1, ymm1, ymm6
- LONG $0x3c65e2c4; BYTE $0xdf // vpmaxsb ymm3, ymm3, ymm7
- LONG $0x80e88348 // sub rax, -128
- LONG $0x02c68348 // add rsi, 2
- JNE LBB0_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB0_10
-
-LBB0_9:
- LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
- LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
- LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5
- LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4
- LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5
- LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4
-
-LBB0_10:
- LONG $0x3c75e2c4; BYTE $0xcb // vpmaxsb ymm1, ymm1, ymm3
- LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1
- LONG $0x3c71e2c4; BYTE $0xcb // vpmaxsb xmm1, xmm1, xmm3
- LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI0_2] */
- LONG $0x387de2c4; BYTE $0xc2 // vpminsb ymm0, ymm0, ymm2
- LONG $0xd171e9c5; BYTE $0x08 // vpsrlw xmm2, xmm1, 8
- LONG $0xcadaf1c5 // vpminub xmm1, xmm1, xmm2
- LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1
- LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1
- LONG $0x7ff08041 // xor r8b, 127
- LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
- LONG $0x3879e2c4; BYTE $0xc1 // vpminsb xmm0, xmm0, xmm1
- LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI0_3] */
- LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8
- LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1
- LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
- LONG $0xc67ef9c5 // vmovd esi, xmm0
- LONG $0x80f68040 // xor sil, -128
- WORD $0x394d; BYTE $0xca // cmp r10, r9
- JE LBB0_12
-
-LBB0_11:
- LONG $0x04b60f42; BYTE $0x17 // movzx eax, byte [rdi + r10]
- WORD $0x3840; BYTE $0xc6 // cmp sil, al
- LONG $0xf6b60f40 // movzx esi, sil
- WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax
- WORD $0x3841; BYTE $0xc0 // cmp r8b, al
- LONG $0xc0b60f45 // movzx r8d, r8b
- LONG $0xc04c0f44 // cmovl r8d, eax
- LONG $0x01c28349 // add r10, 1
- WORD $0x394d; BYTE $0xd1 // cmp r9, r10
- JNE LBB0_11
-
-LBB0_12:
- WORD $0x8844; BYTE $0x01 // mov byte [rcx], r8b
- WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil
- VZEROUPPER
- RET
-
-LBB0_5:
- LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
- LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
- LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB0_9
- JMP LBB0_10
-
-TEXT ·_uint8_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
-
- WORD $0xf685 // test esi, esi
- JLE LBB1_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x3f // cmp esi, 63
- JA LBB1_4
- WORD $0xb640; BYTE $0xff // mov sil, -1
- WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
- WORD $0xc031 // xor eax, eax
- JMP LBB1_11
-
-LBB1_1:
- WORD $0xb640; BYTE $0xff // mov sil, -1
- WORD $0xc031 // xor eax, eax
- JMP LBB1_12
-
-LBB1_4:
- WORD $0x8945; BYTE $0xca // mov r10d, r9d
- LONG $0xc0e28341 // and r10d, -64
- LONG $0xc0428d49 // lea rax, [r10 - 64]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x06e8c149 // shr r8, 6
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB1_5
- WORD $0x894c; BYTE $0xc6 // mov rsi, r8
- LONG $0xfee68348 // and rsi, -2
- WORD $0xf748; BYTE $0xde // neg rsi
- LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
- LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
- WORD $0xc031 // xor eax, eax
- LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
- LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
-
-LBB1_7:
- LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
- LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
- LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64]
- LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96]
- LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4
- LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5
- LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4
- LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5
- LONG $0xcedaf5c5 // vpminub ymm1, ymm1, ymm6
- LONG $0xd7daedc5 // vpminub ymm2, ymm2, ymm7
- LONG $0xc6defdc5 // vpmaxub ymm0, ymm0, ymm6
- LONG $0xdfdee5c5 // vpmaxub ymm3, ymm3, ymm7
- LONG $0x80e88348 // sub rax, -128
- LONG $0x02c68348 // add rsi, 2
- JNE LBB1_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB1_10
-
-LBB1_9:
- LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax]
- LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32]
- LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5
- LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4
- LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5
- LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4
-
-LBB1_10:
- LONG $0xcadaf5c5 // vpminub ymm1, ymm1, ymm2
- LONG $0xc3defdc5 // vpmaxub ymm0, ymm0, ymm3
- LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1
- LONG $0xc2def9c5 // vpmaxub xmm0, xmm0, xmm2
- LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2
- LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2
- LONG $0xd071e9c5; BYTE $0x08 // vpsrlw xmm2, xmm0, 8
- LONG $0xc2daf9c5 // vpminub xmm0, xmm0, xmm2
- LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
- LONG $0xc07ef9c5 // vmovd eax, xmm0
- WORD $0xd0f6 // not al
- LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1
- LONG $0xc0daf1c5 // vpminub xmm0, xmm1, xmm0
- LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8
- LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1
- LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
- LONG $0xc67ef9c5 // vmovd esi, xmm0
- WORD $0x394d; BYTE $0xca // cmp r10, r9
- JE LBB1_12
-
-LBB1_11:
- LONG $0x04b60f46; BYTE $0x17 // movzx r8d, byte [rdi + r10]
- WORD $0x3844; BYTE $0xc6 // cmp sil, r8b
- LONG $0xf6b60f40 // movzx esi, sil
- LONG $0xf0430f41 // cmovae esi, r8d
- WORD $0x3844; BYTE $0xc0 // cmp al, r8b
- WORD $0xb60f; BYTE $0xc0 // movzx eax, al
- LONG $0xc0460f41 // cmovbe eax, r8d
- LONG $0x01c28349 // add r10, 1
- WORD $0x394d; BYTE $0xd1 // cmp r9, r10
- JNE LBB1_11
-
-LBB1_12:
- WORD $0x0188 // mov byte [rcx], al
- WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil
- VZEROUPPER
- RET
-
-LBB1_5:
- LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
- LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
- WORD $0xc031 // xor eax, eax
- LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
- LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB1_9
- JMP LBB1_10
-
-DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000
-DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000
-DATA LCDATA2<>+0x010(SB)/8, $0x8000800080008000
-DATA LCDATA2<>+0x018(SB)/8, $0x8000800080008000
-DATA LCDATA2<>+0x020(SB)/8, $0x7fff7fff7fff7fff
-DATA LCDATA2<>+0x028(SB)/8, $0x7fff7fff7fff7fff
-DATA LCDATA2<>+0x030(SB)/8, $0x7fff7fff7fff7fff
-DATA LCDATA2<>+0x038(SB)/8, $0x7fff7fff7fff7fff
-DATA LCDATA2<>+0x040(SB)/8, $0x7fff7fff7fff7fff
-DATA LCDATA2<>+0x048(SB)/8, $0x7fff7fff7fff7fff
-DATA LCDATA2<>+0x050(SB)/8, $0x8000800080008000
-DATA LCDATA2<>+0x058(SB)/8, $0x8000800080008000
-GLOBL LCDATA2<>(SB), 8, $96
-
-TEXT ·_int16_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA2<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB2_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x1f // cmp esi, 31
- JA LBB2_4
- LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768
- LONG $0x7fffbe66 // mov si, 32767
- WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
- JMP LBB2_11
-
-LBB2_1:
- LONG $0x7fffbe66 // mov si, 32767
- LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768
- JMP LBB2_12
-
-LBB2_4:
- WORD $0x8945; BYTE $0xca // mov r10d, r9d
- LONG $0xe0e28341 // and r10d, -32
- LONG $0xe0428d49 // lea rax, [r10 - 32]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x05e8c149 // shr r8, 5
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB2_5
- WORD $0x894c; BYTE $0xc6 // mov rsi, r8
- LONG $0xfee68348 // and rsi, -2
- WORD $0xf748; BYTE $0xde // neg rsi
- LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
- LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
- LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
-
-LBB2_7:
- LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
- LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
- LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64]
- LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96]
- LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4
- LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5
- LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4
- LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5
- LONG $0xc6eafdc5 // vpminsw ymm0, ymm0, ymm6
- LONG $0xd7eaedc5 // vpminsw ymm2, ymm2, ymm7
- LONG $0xceeef5c5 // vpmaxsw ymm1, ymm1, ymm6
- LONG $0xdfeee5c5 // vpmaxsw ymm3, ymm3, ymm7
- LONG $0x40c08348 // add rax, 64
- LONG $0x02c68348 // add rsi, 2
- JNE LBB2_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB2_10
-
-LBB2_9:
- LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
- LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
- LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5
- LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4
- LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5
- LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4
-
-LBB2_10:
- LONG $0xcbeef5c5 // vpmaxsw ymm1, ymm1, ymm3
- LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1
- LONG $0xcbeef1c5 // vpmaxsw xmm1, xmm1, xmm3
- LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI2_2] */
- LONG $0xc2eafdc5 // vpminsw ymm0, ymm0, ymm2
- LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1
- LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1
- LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor r8d, 32767
- LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
- LONG $0xc1eaf9c5 // vpminsw xmm0, xmm0, xmm1
- LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI2_3] */
- LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
- LONG $0xc67ef9c5 // vmovd esi, xmm0
- LONG $0x8000f681; WORD $0x0000 // xor esi, 32768
- WORD $0x394d; BYTE $0xca // cmp r10, r9
- JE LBB2_12
-
-LBB2_11:
- LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10]
- WORD $0x3966; BYTE $0xc6 // cmp si, ax
- WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax
- LONG $0xc0394166 // cmp r8w, ax
- LONG $0xc04c0f44 // cmovl r8d, eax
- LONG $0x01c28349 // add r10, 1
- WORD $0x394d; BYTE $0xd1 // cmp r9, r10
- JNE LBB2_11
-
-LBB2_12:
- LONG $0x01894466 // mov word [rcx], r8w
- WORD $0x8966; BYTE $0x32 // mov word [rdx], si
- VZEROUPPER
- RET
-
-LBB2_5:
- LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
- LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
- LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB2_9
- JMP LBB2_10
-
-TEXT ·_uint16_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
-
- WORD $0xf685 // test esi, esi
- JLE LBB3_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x1f // cmp esi, 31
- JA LBB3_4
- LONG $0xffb84166; BYTE $0xff // mov r8w, -1
- WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
- WORD $0xf631 // xor esi, esi
- JMP LBB3_11
-
-LBB3_1:
- LONG $0xffb84166; BYTE $0xff // mov r8w, -1
- WORD $0xf631 // xor esi, esi
- JMP LBB3_12
-
-LBB3_4:
- WORD $0x8945; BYTE $0xca // mov r10d, r9d
- LONG $0xe0e28341 // and r10d, -32
- LONG $0xe0428d49 // lea rax, [r10 - 32]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x05e8c149 // shr r8, 5
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB3_5
- WORD $0x894c; BYTE $0xc6 // mov rsi, r8
- LONG $0xfee68348 // and rsi, -2
- WORD $0xf748; BYTE $0xde // neg rsi
- LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
- LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
- WORD $0xc031 // xor eax, eax
- LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
- LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
-
-LBB3_7:
- LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
- LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
- LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64]
- LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96]
- LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4
- LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5
- LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4
- LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5
- LONG $0x3a75e2c4; BYTE $0xce // vpminuw ymm1, ymm1, ymm6
- LONG $0x3a6de2c4; BYTE $0xd7 // vpminuw ymm2, ymm2, ymm7
- LONG $0x3e7de2c4; BYTE $0xc6 // vpmaxuw ymm0, ymm0, ymm6
- LONG $0x3e65e2c4; BYTE $0xdf // vpmaxuw ymm3, ymm3, ymm7
- LONG $0x40c08348 // add rax, 64
- LONG $0x02c68348 // add rsi, 2
- JNE LBB3_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB3_10
-
-LBB3_9:
- LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax]
- LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32]
- LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5
- LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4
- LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5
- LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4
-
-LBB3_10:
- LONG $0x3a75e2c4; BYTE $0xca // vpminuw ymm1, ymm1, ymm2
- LONG $0x3e7de2c4; BYTE $0xc3 // vpmaxuw ymm0, ymm0, ymm3
- LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1
- LONG $0x3e79e2c4; BYTE $0xc2 // vpmaxuw xmm0, xmm0, xmm2
- LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2
- LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2
- LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
- LONG $0xc67ef9c5 // vmovd esi, xmm0
- WORD $0xd6f7 // not esi
- LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1
- LONG $0x3a71e2c4; BYTE $0xc0 // vpminuw xmm0, xmm1, xmm0
- LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0
- LONG $0x7e79c1c4; BYTE $0xc0 // vmovd r8d, xmm0
- WORD $0x394d; BYTE $0xca // cmp r10, r9
- JE LBB3_12
-
-LBB3_11:
- LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10]
- LONG $0xc0394166 // cmp r8w, ax
- LONG $0xc0430f44 // cmovae r8d, eax
- WORD $0x3966; BYTE $0xc6 // cmp si, ax
- WORD $0x460f; BYTE $0xf0 // cmovbe esi, eax
- LONG $0x01c28349 // add r10, 1
- WORD $0x394d; BYTE $0xd1 // cmp r9, r10
- JNE LBB3_11
-
-LBB3_12:
- WORD $0x8966; BYTE $0x31 // mov word [rcx], si
- LONG $0x02894466 // mov word [rdx], r8w
- VZEROUPPER
- RET
-
-LBB3_5:
- LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0
- LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
- WORD $0xc031 // xor eax, eax
- LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
- LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB3_9
- JMP LBB3_10
-
-DATA LCDATA3<>+0x000(SB)/8, $0x7fffffff80000000
-GLOBL LCDATA3<>(SB), 8, $8
-
-TEXT ·_int32_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA3<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB4_1
- WORD $0x8941; BYTE $0xf0 // mov r8d, esi
- WORD $0xfe83; BYTE $0x1f // cmp esi, 31
- JA LBB4_4
- LONG $0x0000ba41; WORD $0x8000 // mov r10d, -2147483648
- LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647
- WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
- JMP LBB4_7
-
-LBB4_1:
- LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647
- LONG $0x000000be; BYTE $0x80 // mov esi, -2147483648
- JMP LBB4_8
-
-LBB4_4:
- WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
- LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd ymm4, dword 0[rbp] /* [rip + .LCPI4_0] */
- LONG $0xe0e18341 // and r9d, -32
- LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd ymm0, dword 4[rbp] /* [rip + .LCPI4_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0
- LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
- LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0
- LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4
- LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4
- LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4
-
-LBB4_5:
- LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax]
- LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32]
- LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64]
- LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96]
- LONG $0x397dc2c4; BYTE $0xc0 // vpminsd ymm0, ymm0, ymm8
- LONG $0x3975c2c4; BYTE $0xc9 // vpminsd ymm1, ymm1, ymm9
- LONG $0x396dc2c4; BYTE $0xd2 // vpminsd ymm2, ymm2, ymm10
- LONG $0x3965c2c4; BYTE $0xdb // vpminsd ymm3, ymm3, ymm11
- LONG $0x3d5dc2c4; BYTE $0xe0 // vpmaxsd ymm4, ymm4, ymm8
- LONG $0x3d55c2c4; BYTE $0xe9 // vpmaxsd ymm5, ymm5, ymm9
- LONG $0x3d4dc2c4; BYTE $0xf2 // vpmaxsd ymm6, ymm6, ymm10
- LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11
- LONG $0x20c08348 // add rax, 32
- WORD $0x3949; BYTE $0xc1 // cmp r9, rax
- JNE LBB4_5
- LONG $0x3d5de2c4; BYTE $0xe5 // vpmaxsd ymm4, ymm4, ymm5
- LONG $0x3d5de2c4; BYTE $0xe6 // vpmaxsd ymm4, ymm4, ymm6
- LONG $0x3d5de2c4; BYTE $0xe7 // vpmaxsd ymm4, ymm4, ymm7
- LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1
- LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5
- LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78
- LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5
- LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229
- LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5
- LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4
- LONG $0x397de2c4; BYTE $0xc1 // vpminsd ymm0, ymm0, ymm1
- LONG $0x397de2c4; BYTE $0xc2 // vpminsd ymm0, ymm0, ymm2
- LONG $0x397de2c4; BYTE $0xc3 // vpminsd ymm0, ymm0, ymm3
- LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
- LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1
- LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78
- LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1
- LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229
- LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1
- LONG $0xc07ef9c5 // vmovd eax, xmm0
- WORD $0x8944; BYTE $0xd6 // mov esi, r10d
- WORD $0x394d; BYTE $0xc1 // cmp r9, r8
- JE LBB4_8
-
-LBB4_7:
- LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9]
- WORD $0xf039 // cmp eax, esi
- WORD $0x4f0f; BYTE $0xc6 // cmovg eax, esi
- WORD $0x3941; BYTE $0xf2 // cmp r10d, esi
- LONG $0xf24d0f41 // cmovge esi, r10d
- LONG $0x01c18349 // add r9, 1
- WORD $0x8941; BYTE $0xf2 // mov r10d, esi
- WORD $0x394d; BYTE $0xc8 // cmp r8, r9
- JNE LBB4_7
-
-LBB4_8:
- WORD $0x3189 // mov dword [rcx], esi
- WORD $0x0289 // mov dword [rdx], eax
- VZEROUPPER
- RET
-
-TEXT ·_uint32_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
-
- WORD $0xf685 // test esi, esi
- JLE LBB5_1
- WORD $0x8941; BYTE $0xf0 // mov r8d, esi
- WORD $0xfe83; BYTE $0x1f // cmp esi, 31
- JA LBB5_4
- WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
- LONG $0xffffffb8; BYTE $0xff // mov eax, -1
- WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
- JMP LBB5_7
-
-LBB5_1:
- LONG $0xffffffb8; BYTE $0xff // mov eax, -1
- WORD $0xf631 // xor esi, esi
- JMP LBB5_8
-
-LBB5_4:
- WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
- LONG $0xe0e18341 // and r9d, -32
- LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4
- LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0
- WORD $0xc031 // xor eax, eax
- LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
- LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
- LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3
- LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5
- LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6
- LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7
-
-LBB5_5:
- LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax]
- LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32]
- LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64]
- LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96]
- LONG $0x3b7dc2c4; BYTE $0xc0 // vpminud ymm0, ymm0, ymm8
- LONG $0x3b75c2c4; BYTE $0xc9 // vpminud ymm1, ymm1, ymm9
- LONG $0x3b6dc2c4; BYTE $0xd2 // vpminud ymm2, ymm2, ymm10
- LONG $0x3b65c2c4; BYTE $0xdb // vpminud ymm3, ymm3, ymm11
- LONG $0x3f5dc2c4; BYTE $0xe0 // vpmaxud ymm4, ymm4, ymm8
- LONG $0x3f55c2c4; BYTE $0xe9 // vpmaxud ymm5, ymm5, ymm9
- LONG $0x3f4dc2c4; BYTE $0xf2 // vpmaxud ymm6, ymm6, ymm10
- LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11
- LONG $0x20c08348 // add rax, 32
- WORD $0x3949; BYTE $0xc1 // cmp r9, rax
- JNE LBB5_5
- LONG $0x3f5de2c4; BYTE $0xe5 // vpmaxud ymm4, ymm4, ymm5
- LONG $0x3f5de2c4; BYTE $0xe6 // vpmaxud ymm4, ymm4, ymm6
- LONG $0x3f5de2c4; BYTE $0xe7 // vpmaxud ymm4, ymm4, ymm7
- LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1
- LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5
- LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78
- LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5
- LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229
- LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5
- LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4
- LONG $0x3b7de2c4; BYTE $0xc1 // vpminud ymm0, ymm0, ymm1
- LONG $0x3b7de2c4; BYTE $0xc2 // vpminud ymm0, ymm0, ymm2
- LONG $0x3b7de2c4; BYTE $0xc3 // vpminud ymm0, ymm0, ymm3
- LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1
- LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1
- LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78
- LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1
- LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229
- LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1
- LONG $0xc07ef9c5 // vmovd eax, xmm0
- WORD $0x8944; BYTE $0xd6 // mov esi, r10d
- WORD $0x394d; BYTE $0xc1 // cmp r9, r8
- JE LBB5_8
-
-LBB5_7:
- LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9]
- WORD $0xf039 // cmp eax, esi
- WORD $0x430f; BYTE $0xc6 // cmovae eax, esi
- WORD $0x3941; BYTE $0xf2 // cmp r10d, esi
- LONG $0xf2470f41 // cmova esi, r10d
- LONG $0x01c18349 // add r9, 1
- WORD $0x8941; BYTE $0xf2 // mov r10d, esi
- WORD $0x394d; BYTE $0xc8 // cmp r8, r9
- JNE LBB5_7
-
-LBB5_8:
- WORD $0x3189 // mov dword [rcx], esi
- WORD $0x0289 // mov dword [rdx], eax
- VZEROUPPER
- RET
-
-DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
-DATA LCDATA4<>+0x008(SB)/8, $0x7fffffffffffffff
-GLOBL LCDATA4<>(SB), 8, $16
-
-TEXT ·_int64_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA4<>(SB), BP
-
- QUAD $0xffffffffffffb848; WORD $0x7fff // mov rax, 9223372036854775807
- WORD $0xf685 // test esi, esi
- JLE LBB6_1
- WORD $0x8941; BYTE $0xf0 // mov r8d, esi
- WORD $0xfe83; BYTE $0x0f // cmp esi, 15
- JA LBB6_4
- LONG $0x01508d4c // lea r10, [rax + 1]
- WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
- JMP LBB6_7
-
-LBB6_1:
- LONG $0x01708d48 // lea rsi, [rax + 1]
- JMP LBB6_8
-
-LBB6_4:
- WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
- LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq ymm4, qword 0[rbp] /* [rip + .LCPI6_0] */
- LONG $0xf0e18341 // and r9d, -16
- LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq ymm0, qword 8[rbp] /* [rip + .LCPI6_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0
- LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0
- LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0
- LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4
- LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4
- LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4
-
-LBB6_5:
- LONG $0x046f7ec5; BYTE $0xc7 // vmovdqu ymm8, yword [rdi + 8*rax]
- LONG $0x373d62c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm8, ymm0
- LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd ymm0, ymm8, ymm0, ymm9
- LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32]
- LONG $0x373562c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm9, ymm3
- LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd ymm3, ymm9, ymm3, ymm10
- LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu ymm10, yword [rdi + 8*rax + 64]
- LONG $0x372d62c4; BYTE $0xda // vpcmpgtq ymm11, ymm10, ymm2
- LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd ymm2, ymm10, ymm2, ymm11
- LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu ymm11, yword [rdi + 8*rax + 96]
- LONG $0x372562c4; BYTE $0xe1 // vpcmpgtq ymm12, ymm11, ymm1
- LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd ymm1, ymm11, ymm1, ymm12
- LONG $0x375d42c4; BYTE $0xe0 // vpcmpgtq ymm12, ymm4, ymm8
- LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd ymm4, ymm8, ymm4, ymm12
- LONG $0x374542c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm7, ymm9
- LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd ymm7, ymm9, ymm7, ymm8
- LONG $0x374d42c4; BYTE $0xc2 // vpcmpgtq ymm8, ymm6, ymm10
- LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd ymm6, ymm10, ymm6, ymm8
- LONG $0x375542c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm5, ymm11
- LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd ymm5, ymm11, ymm5, ymm8
- LONG $0x10c08348 // add rax, 16
- WORD $0x3949; BYTE $0xc1 // cmp r9, rax
- JNE LBB6_5
- LONG $0x375d62c4; BYTE $0xc7 // vpcmpgtq ymm8, ymm4, ymm7
- LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd ymm4, ymm7, ymm4, ymm8
- LONG $0x375de2c4; BYTE $0xfe // vpcmpgtq ymm7, ymm4, ymm6
- LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd ymm4, ymm6, ymm4, ymm7
- LONG $0x375de2c4; BYTE $0xf5 // vpcmpgtq ymm6, ymm4, ymm5
- LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd ymm4, ymm5, ymm4, ymm6
- LONG $0x197de3c4; WORD $0x01e5 // vextractf128 xmm5, ymm4, 1
- LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5
- LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6
- LONG $0x0479e3c4; WORD $0x4eec // vpermilps xmm5, xmm4, 78
- LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5
- LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6
- LONG $0x7ef9c1c4; BYTE $0xe2 // vmovq r10, xmm4
- LONG $0x3765e2c4; BYTE $0xe0 // vpcmpgtq ymm4, ymm3, ymm0
- LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd ymm0, ymm3, ymm0, ymm4
- LONG $0x376de2c4; BYTE $0xd8 // vpcmpgtq ymm3, ymm2, ymm0
- LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd ymm0, ymm2, ymm0, ymm3
- LONG $0x3775e2c4; BYTE $0xd0 // vpcmpgtq ymm2, ymm1, ymm0
- LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2
- LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1
- LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0
- LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2
- LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps xmm1, xmm0, 78
- LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0
- LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2
- LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0
- WORD $0x894c; BYTE $0xd6 // mov rsi, r10
- WORD $0x394d; BYTE $0xc1 // cmp r9, r8
- JE LBB6_8
-
-LBB6_7:
- LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9]
- WORD $0x3948; BYTE $0xf0 // cmp rax, rsi
- LONG $0xc64f0f48 // cmovg rax, rsi
- WORD $0x3949; BYTE $0xf2 // cmp r10, rsi
- LONG $0xf24d0f49 // cmovge rsi, r10
- LONG $0x01c18349 // add r9, 1
- WORD $0x8949; BYTE $0xf2 // mov r10, rsi
- WORD $0x394d; BYTE $0xc8 // cmp r8, r9
- JNE LBB6_7
-
-LBB6_8:
- WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi
- WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax
- VZEROUPPER
- RET
-
-DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
-GLOBL LCDATA5<>(SB), 8, $8
-
-TEXT ·_uint64_max_min_avx2(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA5<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB7_1
- WORD $0x8941; BYTE $0xf0 // mov r8d, esi
- WORD $0xfe83; BYTE $0x0f // cmp esi, 15
- JA LBB7_4
- LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1
- WORD $0x3145; BYTE $0xc9 // xor r9d, r9d
- WORD $0x3145; BYTE $0xd2 // xor r10d, r10d
- JMP LBB7_7
-
-LBB7_1:
- LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1
- WORD $0xf631 // xor esi, esi
- JMP LBB7_8
-
-LBB7_4:
- WORD $0x8945; BYTE $0xc1 // mov r9d, r8d
- LONG $0xf0e18341 // and r9d, -16
- LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5
- LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1
- WORD $0xc031 // xor eax, eax
- LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI7_0] */
- LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4
- LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3
- LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2
- LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8
- LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7
- LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6
-
-LBB7_5:
- LONG $0x0c6f7ec5; BYTE $0xc7 // vmovdqu ymm9, yword [rdi + 8*rax]
- LONG $0xd0ef75c5 // vpxor ymm10, ymm1, ymm0
- LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0
- LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10
- LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd ymm1, ymm9, ymm1, ymm10
- LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0
- LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11
- LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd ymm5, ymm9, ymm5, ymm10
- LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32]
- LONG $0xd0ef5dc5 // vpxor ymm10, ymm4, ymm0
- LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0
- LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10
- LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd ymm4, ymm9, ymm4, ymm10
- LONG $0xd0ef3dc5 // vpxor ymm10, ymm8, ymm0
- LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11
- LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu ymm11, yword [rdi + 8*rax + 64]
- LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd ymm8, ymm9, ymm8, ymm10
- LONG $0xc8ef65c5 // vpxor ymm9, ymm3, ymm0
- LONG $0xd0ef25c5 // vpxor ymm10, ymm11, ymm0
- LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9
- LONG $0x4b25e3c4; WORD $0x90db // vblendvpd ymm3, ymm11, ymm3, ymm9
- LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0
- LONG $0x373542c4; BYTE $0xca // vpcmpgtq ymm9, ymm9, ymm10
- LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd ymm7, ymm11, ymm7, ymm9
- LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu ymm9, yword [rdi + 8*rax + 96]
- LONG $0xd0ef6dc5 // vpxor ymm10, ymm2, ymm0
- LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0
- LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10
- LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd ymm2, ymm9, ymm2, ymm10
- LONG $0xd0ef4dc5 // vpxor ymm10, ymm6, ymm0
- LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11
- LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd ymm6, ymm9, ymm6, ymm10
- LONG $0x10c08348 // add rax, 16
- WORD $0x3949; BYTE $0xc1 // cmp r9, rax
- JNE LBB7_5
- LONG $0xc8ef3dc5 // vpxor ymm9, ymm8, ymm0
- LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0
- LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9
- LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd ymm5, ymm8, ymm5, ymm9
- LONG $0xc05755c5 // vxorpd ymm8, ymm5, ymm0
- LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0
- LONG $0x373d42c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm8, ymm9
- LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd ymm5, ymm7, ymm5, ymm8
- LONG $0xf857d5c5 // vxorpd ymm7, ymm5, ymm0
- LONG $0xc0ef4dc5 // vpxor ymm8, ymm6, ymm0
- LONG $0x3745c2c4; BYTE $0xf8 // vpcmpgtq ymm7, ymm7, ymm8
- LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd ymm5, ymm6, ymm5, ymm7
- LONG $0x197de3c4; WORD $0x01ee // vextractf128 xmm6, ymm5, 1
- LONG $0xc05749c5 // vxorpd xmm8, xmm6, xmm0
- LONG $0xf857d1c5 // vxorpd xmm7, xmm5, xmm0
- LONG $0x3741c2c4; BYTE $0xf8 // vpcmpgtq xmm7, xmm7, xmm8
- LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7
- LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps xmm6, xmm5, 78
- LONG $0xc05751c5 // vxorpd xmm8, xmm5, xmm0
- LONG $0xf857c9c5 // vxorpd xmm7, xmm6, xmm0
- LONG $0x3739e2c4; BYTE $0xff // vpcmpgtq xmm7, xmm8, xmm7
- LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7
- LONG $0xf0eff5c5 // vpxor ymm6, ymm1, ymm0
- LONG $0xf8efddc5 // vpxor ymm7, ymm4, ymm0
- LONG $0x3745e2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm7, ymm6
- LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd ymm1, ymm4, ymm1, ymm6
- LONG $0xe057f5c5 // vxorpd ymm4, ymm1, ymm0
- LONG $0xf0efe5c5 // vpxor ymm6, ymm3, ymm0
- LONG $0x374de2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm6, ymm4
- LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd ymm1, ymm3, ymm1, ymm4
- LONG $0x7ef9c1c4; BYTE $0xea // vmovq r10, xmm5
- LONG $0xd857f5c5 // vxorpd ymm3, ymm1, ymm0
- LONG $0xe0efedc5 // vpxor ymm4, ymm2, ymm0
- LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3
- LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd ymm1, ymm2, ymm1, ymm3
- LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1
- LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0
- LONG $0xe057e9c5 // vxorpd xmm4, xmm2, xmm0
- LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3
- LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3
- LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78
- LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0
- LONG $0xc057e9c5 // vxorpd xmm0, xmm2, xmm0
- LONG $0x3779e2c4; BYTE $0xc3 // vpcmpgtq xmm0, xmm0, xmm3
- LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd xmm0, xmm2, xmm1, xmm0
- LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0
- WORD $0x894c; BYTE $0xd6 // mov rsi, r10
- WORD $0x394d; BYTE $0xc1 // cmp r9, r8
- JE LBB7_8
-
-LBB7_7:
- LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9]
- WORD $0x3948; BYTE $0xf0 // cmp rax, rsi
- LONG $0xc6430f48 // cmovae rax, rsi
- WORD $0x3949; BYTE $0xf2 // cmp r10, rsi
- LONG $0xf2470f49 // cmova rsi, r10
- LONG $0x01c18349 // add r9, 1
- WORD $0x8949; BYTE $0xf2 // mov r10, rsi
- WORD $0x394d; BYTE $0xc8 // cmp r8, r9
- JNE LBB7_7
-
-LBB7_8:
- WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi
- WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax
- VZEROUPPER
- RET
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go
deleted file mode 100644
index f9d3c44e3..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go
+++ /dev/null
@@ -1,56 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import "unsafe"
-
-// This file contains convenience functions for utilizing Arm64 Neon intrinsics to quickly
-// and efficiently get the min and max from an integral slice.
-
-//go:noescape
-func _int32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int32MaxMinNEON(values []int32) (min, max int32) {
- _int32_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint32MaxMinNEON(values []uint32) (min, max uint32) {
- _uint32_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int64MaxMinNEON(values []int64) (min, max int64) {
- _int64_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint64MaxMinNEON(values []uint64) (min, max uint64) {
- _uint64_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s
deleted file mode 100644
index b679bb6e3..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s
+++ /dev/null
@@ -1,324 +0,0 @@
-//+build !noasm !appengine
-
-// ARROW-15336
-// (C2GOASM doesn't work correctly for Arm64)
-// Partly GENERATED BY asm2plan9s.
-
-
-// func _int32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-TEXT ·_int32_max_min_neon(SB), $0-32
-
- MOVD values+0(FP), R0
- MOVD length+8(FP), R1
- MOVD minout+16(FP), R2
- MOVD maxout+24(FP), R3
-
- WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
- WORD $0x7100043f // cmp w1, #1
- WORD $0x910003fd // mov x29, sp
- BLT LBB0_3
-
- WORD $0x71000c3f // cmp w1, #3
- WORD $0x2a0103e8 // mov w8, w1
- BHI LBB0_4
-
- WORD $0xaa1f03e9 // mov x9, xzr
- WORD $0x52b0000b // mov w11, #-2147483648
- WORD $0x12b0000a // mov w10, #2147483647
- JMP LBB0_7
-LBB0_3:
- WORD $0x12b0000a // mov w10, #2147483647
- WORD $0x52b0000b // mov w11, #-2147483648
- WORD $0xb900006b // str w11, [x3]
- WORD $0xb900004a // str w10, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-LBB0_4:
- WORD $0x927e7509 // and x9, x8, #0xfffffffc
- WORD $0x9100200a // add x10, x0, #8
- WORD $0x0f046402 // movi v2.2s, #128, lsl #24
- WORD $0x2f046400 // mvni v0.2s, #128, lsl #24
- WORD $0x2f046401 // mvni v1.2s, #128, lsl #24
- WORD $0xaa0903eb // mov x11, x9
- WORD $0x0f046403 // movi v3.2s, #128, lsl #24
-LBB0_5:
- WORD $0x6d7f9544 // ldp d4, d5, [x10, #-8]
- WORD $0xf100116b // subs x11, x11, #4
- WORD $0x9100414a // add x10, x10, #16
- WORD $0x0ea46c00 // smin v0.2s, v0.2s, v4.2s
- WORD $0x0ea56c21 // smin v1.2s, v1.2s, v5.2s
- WORD $0x0ea46442 // smax v2.2s, v2.2s, v4.2s
- WORD $0x0ea56463 // smax v3.2s, v3.2s, v5.2s
- BNE LBB0_5
-
- WORD $0x0ea36442 // smax v2.2s, v2.2s, v3.2s
- WORD $0x0ea16c00 // smin v0.2s, v0.2s, v1.2s
- WORD $0x0e0c0441 // dup v1.2s, v2.s[1]
- WORD $0x0e0c0403 // dup v3.2s, v0.s[1]
- WORD $0x0ea16441 // smax v1.2s, v2.2s, v1.2s
- WORD $0x0ea36c00 // smin v0.2s, v0.2s, v3.2s
- WORD $0xeb08013f // cmp x9, x8
- WORD $0x1e26002b // fmov w11, s1
- WORD $0x1e26000a // fmov w10, s0
- BEQ LBB0_9
-LBB0_7:
- WORD $0x8b09080c // add x12, x0, x9, lsl #2
- WORD $0xcb090108 // sub x8, x8, x9
-LBB0_8:
- WORD $0xb8404589 // ldr w9, [x12], #4
- WORD $0x6b09015f // cmp w10, w9
- WORD $0x1a89b14a // csel w10, w10, w9, lt
- WORD $0x6b09017f // cmp w11, w9
- WORD $0x1a89c16b // csel w11, w11, w9, gt
- WORD $0xf1000508 // subs x8, x8, #1
- BNE LBB0_8
-LBB0_9:
- WORD $0xb900006b // str w11, [x3]
- WORD $0xb900004a // str w10, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-
-// func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-TEXT ·_uint32_max_min_neon(SB), $0-32
-
- MOVD values+0(FP), R0
- MOVD length+8(FP), R1
- MOVD minout+16(FP), R2
- MOVD maxout+24(FP), R3
-
- WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
- WORD $0x7100043f // cmp w1, #1
- WORD $0x910003fd // mov x29, sp
- BLT LBB1_3
-
- WORD $0x71000c3f // cmp w1, #3
- WORD $0x2a0103e8 // mov w8, w1
- BHI LBB1_4
-
- WORD $0xaa1f03e9 // mov x9, xzr
- WORD $0x2a1f03ea // mov w10, wzr
- WORD $0x1280000b // mov w11, #-1
- JMP LBB1_7
-LBB1_3:
- WORD $0x2a1f03ea // mov w10, wzr
- WORD $0x1280000b // mov w11, #-1
- WORD $0xb900006a // str w10, [x3]
- WORD $0xb900004b // str w11, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-LBB1_4:
- WORD $0x927e7509 // and x9, x8, #0xfffffffc
- WORD $0x6f00e401 // movi v1.2d, #0000000000000000
- WORD $0x6f07e7e0 // movi v0.2d, #0xffffffffffffffff
- WORD $0x9100200a // add x10, x0, #8
- WORD $0x6f07e7e2 // movi v2.2d, #0xffffffffffffffff
- WORD $0xaa0903eb // mov x11, x9
- WORD $0x6f00e403 // movi v3.2d, #0000000000000000
-LBB1_5:
- WORD $0x6d7f9544 // ldp d4, d5, [x10, #-8]
- WORD $0xf100116b // subs x11, x11, #4
- WORD $0x9100414a // add x10, x10, #16
- WORD $0x2ea46c00 // umin v0.2s, v0.2s, v4.2s
- WORD $0x2ea56c42 // umin v2.2s, v2.2s, v5.2s
- WORD $0x2ea46421 // umax v1.2s, v1.2s, v4.2s
- WORD $0x2ea56463 // umax v3.2s, v3.2s, v5.2s
- BNE LBB1_5
-
- WORD $0x2ea36421 // umax v1.2s, v1.2s, v3.2s
- WORD $0x2ea26c00 // umin v0.2s, v0.2s, v2.2s
- WORD $0x0e0c0422 // dup v2.2s, v1.s[1]
- WORD $0x0e0c0403 // dup v3.2s, v0.s[1]
- WORD $0x2ea26421 // umax v1.2s, v1.2s, v2.2s
- WORD $0x2ea36c00 // umin v0.2s, v0.2s, v3.2s
- WORD $0xeb08013f // cmp x9, x8
- WORD $0x1e26002a // fmov w10, s1
- WORD $0x1e26000b // fmov w11, s0
- BEQ LBB1_9
-LBB1_7:
- WORD $0x8b09080c // add x12, x0, x9, lsl #2
- WORD $0xcb090108 // sub x8, x8, x9
-LBB1_8:
- WORD $0xb8404589 // ldr w9, [x12], #4
- WORD $0x6b09017f // cmp w11, w9
- WORD $0x1a89316b // csel w11, w11, w9, lo
- WORD $0x6b09015f // cmp w10, w9
- WORD $0x1a89814a // csel w10, w10, w9, hi
- WORD $0xf1000508 // subs x8, x8, #1
- BNE LBB1_8
-LBB1_9:
- WORD $0xb900006a // str w10, [x3]
- WORD $0xb900004b // str w11, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-
-// func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-TEXT ·_int64_max_min_neon(SB), $0-32
-
- MOVD values+0(FP), R0
- MOVD length+8(FP), R1
- MOVD minout+16(FP), R2
- MOVD maxout+24(FP), R3
-
- WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
- WORD $0x7100043f // cmp w1, #1
- WORD $0x910003fd // mov x29, sp
- BLT LBB2_3
-
- WORD $0x2a0103e8 // mov w8, w1
- WORD $0xd2f0000b // mov x11, #-9223372036854775808
- WORD $0x71000c3f // cmp w1, #3
- WORD $0x92f0000a // mov x10, #9223372036854775807
- BHI LBB2_4
-
- WORD $0xaa1f03e9 // mov x9, xzr
- JMP LBB2_7
-LBB2_3:
- WORD $0x92f0000a // mov x10, #9223372036854775807
- WORD $0xd2f0000b // mov x11, #-9223372036854775808
- WORD $0xf900006b // str x11, [x3]
- WORD $0xf900004a // str x10, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-LBB2_4:
- WORD $0x927e7509 // and x9, x8, #0xfffffffc
- WORD $0x4e080d61 // dup v1.2d, x11
- WORD $0x4e080d40 // dup v0.2d, x10
- WORD $0x9100400a // add x10, x0, #16
- WORD $0xaa0903eb // mov x11, x9
- WORD $0x4ea01c02 // mov v2.16b, v0.16b
- WORD $0x4ea11c23 // mov v3.16b, v1.16b
-LBB2_5:
- WORD $0xad7f9544 // ldp q4, q5, [x10, #-16]
- WORD $0x4ea31c66 // mov v6.16b, v3.16b
- WORD $0x4ea11c27 // mov v7.16b, v1.16b
- WORD $0x4ea21c43 // mov v3.16b, v2.16b
- WORD $0x4ea01c01 // mov v1.16b, v0.16b
- WORD $0x4ee03480 // cmgt v0.2d, v4.2d, v0.2d
- WORD $0x4ee234a2 // cmgt v2.2d, v5.2d, v2.2d
- WORD $0x6e641c20 // bsl v0.16b, v1.16b, v4.16b
- WORD $0x4ee434e1 // cmgt v1.2d, v7.2d, v4.2d
- WORD $0x6e651c62 // bsl v2.16b, v3.16b, v5.16b
- WORD $0x4ee534c3 // cmgt v3.2d, v6.2d, v5.2d
- WORD $0xf100116b // subs x11, x11, #4
- WORD $0x6e641ce1 // bsl v1.16b, v7.16b, v4.16b
- WORD $0x6e651cc3 // bsl v3.16b, v6.16b, v5.16b
- WORD $0x9100814a // add x10, x10, #32
- BNE LBB2_5
-
- WORD $0x4ee33424 // cmgt v4.2d, v1.2d, v3.2d
- WORD $0x4ee03445 // cmgt v5.2d, v2.2d, v0.2d
- WORD $0x6e631c24 // bsl v4.16b, v1.16b, v3.16b
- WORD $0x6e621c05 // bsl v5.16b, v0.16b, v2.16b
- WORD $0x4e180480 // dup v0.2d, v4.d[1]
- WORD $0x4e1804a1 // dup v1.2d, v5.d[1]
- WORD $0x4ee03482 // cmgt v2.2d, v4.2d, v0.2d
- WORD $0x4ee53423 // cmgt v3.2d, v1.2d, v5.2d
- WORD $0x6e601c82 // bsl v2.16b, v4.16b, v0.16b
- WORD $0x6e611ca3 // bsl v3.16b, v5.16b, v1.16b
- WORD $0xeb08013f // cmp x9, x8
- WORD $0x9e66004b // fmov x11, d2
- WORD $0x9e66006a // fmov x10, d3
- BEQ LBB2_9
-LBB2_7:
- WORD $0x8b090c0c // add x12, x0, x9, lsl #3
- WORD $0xcb090108 // sub x8, x8, x9
-LBB2_8:
- WORD $0xf8408589 // ldr x9, [x12], #8
- WORD $0xeb09015f // cmp x10, x9
- WORD $0x9a89b14a // csel x10, x10, x9, lt
- WORD $0xeb09017f // cmp x11, x9
- WORD $0x9a89c16b // csel x11, x11, x9, gt
- WORD $0xf1000508 // subs x8, x8, #1
- BNE LBB2_8
-LBB2_9:
- WORD $0xf900006b // str x11, [x3]
- WORD $0xf900004a // str x10, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-
-
-// func _uint64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-TEXT ·_uint64_max_min_neon(SB), $0-32
-
- MOVD values+0(FP), R0
- MOVD length+8(FP), R1
- MOVD minout+16(FP), R2
- MOVD maxout+24(FP), R3
-
- WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
- WORD $0x7100043f // cmp w1, #1
- WORD $0x910003fd // mov x29, sp
- BLT LBB3_3
-
- WORD $0x71000c3f // cmp w1, #3
- WORD $0x2a0103e8 // mov w8, w1
- BHI LBB3_4
-
- WORD $0xaa1f03e9 // mov x9, xzr
- WORD $0xaa1f03ea // mov x10, xzr
- WORD $0x9280000b // mov x11, #-1
- JMP LBB3_7
-LBB3_3:
- WORD $0xaa1f03ea // mov x10, xzr
- WORD $0x9280000b // mov x11, #-1
- WORD $0xf900006a // str x10, [x3]
- WORD $0xf900004b // str x11, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-LBB3_4:
- WORD $0x927e7509 // and x9, x8, #0xfffffffc
- WORD $0x9100400a // add x10, x0, #16
- WORD $0x6f00e401 // movi v1.2d, #0000000000000000
- WORD $0x6f07e7e0 // movi v0.2d, #0xffffffffffffffff
- WORD $0x6f07e7e2 // movi v2.2d, #0xffffffffffffffff
- WORD $0xaa0903eb // mov x11, x9
- WORD $0x6f00e403 // movi v3.2d, #0000000000000000
-LBB3_5:
- WORD $0xad7f9544 // ldp q4, q5, [x10, #-16]
- WORD $0x4ea31c66 // mov v6.16b, v3.16b
- WORD $0x4ea11c27 // mov v7.16b, v1.16b
- WORD $0x4ea21c43 // mov v3.16b, v2.16b
- WORD $0x4ea01c01 // mov v1.16b, v0.16b
- WORD $0x6ee03480 // cmhi v0.2d, v4.2d, v0.2d
- WORD $0x6ee234a2 // cmhi v2.2d, v5.2d, v2.2d
- WORD $0x6e641c20 // bsl v0.16b, v1.16b, v4.16b
- WORD $0x6ee434e1 // cmhi v1.2d, v7.2d, v4.2d
- WORD $0x6e651c62 // bsl v2.16b, v3.16b, v5.16b
- WORD $0x6ee534c3 // cmhi v3.2d, v6.2d, v5.2d
- WORD $0xf100116b // subs x11, x11, #4
- WORD $0x6e641ce1 // bsl v1.16b, v7.16b, v4.16b
- WORD $0x6e651cc3 // bsl v3.16b, v6.16b, v5.16b
- WORD $0x9100814a // add x10, x10, #32
- BNE LBB3_5
-
- WORD $0x6ee33424 // cmhi v4.2d, v1.2d, v3.2d
- WORD $0x6ee03445 // cmhi v5.2d, v2.2d, v0.2d
- WORD $0x6e631c24 // bsl v4.16b, v1.16b, v3.16b
- WORD $0x6e621c05 // bsl v5.16b, v0.16b, v2.16b
- WORD $0x4e180480 // dup v0.2d, v4.d[1]
- WORD $0x4e1804a1 // dup v1.2d, v5.d[1]
- WORD $0x6ee03482 // cmhi v2.2d, v4.2d, v0.2d
- WORD $0x6ee53423 // cmhi v3.2d, v1.2d, v5.2d
- WORD $0x6e601c82 // bsl v2.16b, v4.16b, v0.16b
- WORD $0x6e611ca3 // bsl v3.16b, v5.16b, v1.16b
- WORD $0xeb08013f // cmp x9, x8
- WORD $0x9e66004a // fmov x10, d2
- WORD $0x9e66006b // fmov x11, d3
- BEQ LBB3_9
-LBB3_7:
- WORD $0x8b090c0c // add x12, x0, x9, lsl #3
- WORD $0xcb090108 // sub x8, x8, x9
-LBB3_8:
- WORD $0xf8408589 // ldr x9, [x12], #8
- WORD $0xeb09017f // cmp x11, x9
- WORD $0x9a89316b // csel x11, x11, x9, lo
- WORD $0xeb09015f // cmp x10, x9
- WORD $0x9a89814a // csel x10, x10, x9, hi
- WORD $0xf1000508 // subs x8, x8, #1
- BNE LBB3_8
-LBB3_9:
- WORD $0xf900006a // str x10, [x3]
- WORD $0xf900004b // str x11, [x2]
- WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
- RET
-
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go
deleted file mode 100644
index 19c24b590..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go
+++ /dev/null
@@ -1,31 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build noasm
-
-package utils
-
-// if building with the 'noasm' tag, then point to the pure go implementations
-func init() {
- minmaxFuncs.i8 = int8MinMax
- minmaxFuncs.ui8 = uint8MinMax
- minmaxFuncs.i16 = int16MinMax
- minmaxFuncs.ui16 = uint16MinMax
- minmaxFuncs.i32 = int32MinMax
- minmaxFuncs.ui32 = uint32MinMax
- minmaxFuncs.i64 = int64MinMax
- minmaxFuncs.ui64 = uint64MinMax
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go
deleted file mode 100644
index ffd2db006..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-func init() {
- minmaxFuncs.i8 = int8MinMax
- minmaxFuncs.ui8 = uint8MinMax
- minmaxFuncs.i16 = int16MinMax
- minmaxFuncs.ui16 = uint16MinMax
- minmaxFuncs.i32 = int32MinMax
- minmaxFuncs.ui32 = uint32MinMax
- minmaxFuncs.i64 = int64MinMax
- minmaxFuncs.ui64 = uint64MinMax
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go
deleted file mode 100644
index ffd2db006..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-func init() {
- minmaxFuncs.i8 = int8MinMax
- minmaxFuncs.ui8 = uint8MinMax
- minmaxFuncs.i16 = int16MinMax
- minmaxFuncs.ui16 = uint16MinMax
- minmaxFuncs.i32 = int32MinMax
- minmaxFuncs.ui32 = uint32MinMax
- minmaxFuncs.i64 = int64MinMax
- minmaxFuncs.ui64 = uint64MinMax
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go
deleted file mode 100644
index 1e12a8d17..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go
+++ /dev/null
@@ -1,88 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import "unsafe"
-
-// This file contains convenience functions for utilizing SSE4 intrinsics to quickly
-// and efficiently get the min and max from an integral slice.
-
-//go:noescape
-func _int8_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int8MaxMinSSE4(values []int8) (min, max int8) {
- _int8_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint8_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint8MaxMinSSE4(values []uint8) (min, max uint8) {
- _uint8_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _int16_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int16MaxMinSSE4(values []int16) (min, max int16) {
- _int16_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint16_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint16MaxMinSSE4(values []uint16) (min, max uint16) {
- _uint16_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _int32_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int32MaxMinSSE4(values []int32) (min, max int32) {
- _int32_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint32_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint32MaxMinSSE4(values []uint32) (min, max uint32) {
- _uint32_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _int64_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func int64MaxMinSSE4(values []int64) (min, max int64) {
- _int64_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
-
-//go:noescape
-func _uint64_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
-
-func uint64MaxMinSSE4(values []uint64) (min, max uint64) {
- _uint64_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max))
- return
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s
deleted file mode 100644
index 8f1eccf60..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s
+++ /dev/null
@@ -1,1044 +0,0 @@
-//+build !noasm !appengine
-// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
-
-DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080
-DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080
-DATA LCDATA1<>+0x010(SB)/8, $0x7f7f7f7f7f7f7f7f
-DATA LCDATA1<>+0x018(SB)/8, $0x7f7f7f7f7f7f7f7f
-GLOBL LCDATA1<>(SB), 8, $32
-
-TEXT ·_int8_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA1<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB0_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x1f // cmp esi, 31
- JA LBB0_4
- WORD $0xb041; BYTE $0x80 // mov r8b, -128
- WORD $0xb640; BYTE $0x7f // mov sil, 127
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- JMP LBB0_11
-
-LBB0_1:
- WORD $0xb640; BYTE $0x7f // mov sil, 127
- WORD $0xb041; BYTE $0x80 // mov r8b, -128
- JMP LBB0_12
-
-LBB0_4:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xe0e38341 // and r11d, -32
- LONG $0xe0438d49 // lea rax, [r11 - 32]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x05e8c149 // shr r8, 5
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB0_5
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
- LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06f0f66 // movdqa xmm2, xmm0
- LONG $0xd96f0f66 // movdqa xmm3, xmm1
-
-LBB0_7:
- LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax]
- LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16]
- LONG $0x746f0ff3; WORD $0x2007 // movdqu xmm6, oword [rdi + rax + 32]
- LONG $0x7c6f0ff3; WORD $0x3007 // movdqu xmm7, oword [rdi + rax + 48]
- LONG $0x38380f66; BYTE $0xc4 // pminsb xmm0, xmm4
- LONG $0x38380f66; BYTE $0xd5 // pminsb xmm2, xmm5
- LONG $0x3c380f66; BYTE $0xcc // pmaxsb xmm1, xmm4
- LONG $0x3c380f66; BYTE $0xdd // pmaxsb xmm3, xmm5
- LONG $0x38380f66; BYTE $0xc6 // pminsb xmm0, xmm6
- LONG $0x38380f66; BYTE $0xd7 // pminsb xmm2, xmm7
- LONG $0x3c380f66; BYTE $0xce // pmaxsb xmm1, xmm6
- LONG $0x3c380f66; BYTE $0xdf // pmaxsb xmm3, xmm7
- LONG $0x40c08348 // add rax, 64
- LONG $0x02c28349 // add r10, 2
- JNE LBB0_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB0_10
-
-LBB0_9:
- LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax]
- LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16]
- LONG $0x3c380f66; BYTE $0xdd // pmaxsb xmm3, xmm5
- LONG $0x3c380f66; BYTE $0xcc // pmaxsb xmm1, xmm4
- LONG $0x38380f66; BYTE $0xd5 // pminsb xmm2, xmm5
- LONG $0x38380f66; BYTE $0xc4 // pminsb xmm0, xmm4
-
-LBB0_10:
- LONG $0x38380f66; BYTE $0xc2 // pminsb xmm0, xmm2
- LONG $0x3c380f66; BYTE $0xcb // pmaxsb xmm1, xmm3
- LONG $0x4def0f66; BYTE $0x10 // pxor xmm1, oword 16[rbp] /* [rip + .LCPI0_1] */
- LONG $0xd16f0f66 // movdqa xmm2, xmm1
- LONG $0xd2710f66; BYTE $0x08 // psrlw xmm2, 8
- LONG $0xd1da0f66 // pminub xmm2, xmm1
- LONG $0x41380f66; BYTE $0xca // phminposuw xmm1, xmm2
- LONG $0x7e0f4166; BYTE $0xc8 // movd r8d, xmm1
- LONG $0x7ff08041 // xor r8b, 127
- LONG $0x45ef0f66; BYTE $0x00 // pxor xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */
- LONG $0xc86f0f66 // movdqa xmm1, xmm0
- LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8
- LONG $0xc8da0f66 // pminub xmm1, xmm0
- LONG $0x41380f66; BYTE $0xc1 // phminposuw xmm0, xmm1
- LONG $0xc67e0f66 // movd esi, xmm0
- LONG $0x80f68040 // xor sil, -128
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB0_12
-
-LBB0_11:
- LONG $0x04b60f42; BYTE $0x1f // movzx eax, byte [rdi + r11]
- WORD $0x3840; BYTE $0xc6 // cmp sil, al
- LONG $0xf6b60f40 // movzx esi, sil
- WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax
- WORD $0x3841; BYTE $0xc0 // cmp r8b, al
- LONG $0xc0b60f45 // movzx r8d, r8b
- LONG $0xc04c0f44 // cmovl r8d, eax
- LONG $0x01c38349 // add r11, 1
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB0_11
-
-LBB0_12:
- WORD $0x8844; BYTE $0x01 // mov byte [rcx], r8b
- WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil
- RET
-
-LBB0_5:
- LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
- LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06f0f66 // movdqa xmm2, xmm0
- LONG $0xd96f0f66 // movdqa xmm3, xmm1
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB0_9
- JMP LBB0_10
-
-TEXT ·_uint8_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
-
- WORD $0xf685 // test esi, esi
- JLE LBB1_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x1f // cmp esi, 31
- JA LBB1_4
- WORD $0xb640; BYTE $0xff // mov sil, -1
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- WORD $0xc031 // xor eax, eax
- JMP LBB1_11
-
-LBB1_1:
- WORD $0xb640; BYTE $0xff // mov sil, -1
- WORD $0xc031 // xor eax, eax
- JMP LBB1_12
-
-LBB1_4:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xe0e38341 // and r11d, -32
- LONG $0xe0438d49 // lea rax, [r11 - 32]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x05e8c149 // shr r8, 5
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB1_5
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0xc9ef0f66 // pxor xmm1, xmm1
- LONG $0xc0760f66 // pcmpeqd xmm0, xmm0
- WORD $0xc031 // xor eax, eax
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xdbef0f66 // pxor xmm3, xmm3
-
-LBB1_7:
- LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax]
- LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16]
- LONG $0x746f0ff3; WORD $0x2007 // movdqu xmm6, oword [rdi + rax + 32]
- LONG $0x7c6f0ff3; WORD $0x3007 // movdqu xmm7, oword [rdi + rax + 48]
- LONG $0xc4da0f66 // pminub xmm0, xmm4
- LONG $0xd5da0f66 // pminub xmm2, xmm5
- LONG $0xccde0f66 // pmaxub xmm1, xmm4
- LONG $0xddde0f66 // pmaxub xmm3, xmm5
- LONG $0xc6da0f66 // pminub xmm0, xmm6
- LONG $0xd7da0f66 // pminub xmm2, xmm7
- LONG $0xcede0f66 // pmaxub xmm1, xmm6
- LONG $0xdfde0f66 // pmaxub xmm3, xmm7
- LONG $0x40c08348 // add rax, 64
- LONG $0x02c28349 // add r10, 2
- JNE LBB1_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB1_10
-
-LBB1_9:
- LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax]
- LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16]
- LONG $0xddde0f66 // pmaxub xmm3, xmm5
- LONG $0xccde0f66 // pmaxub xmm1, xmm4
- LONG $0xd5da0f66 // pminub xmm2, xmm5
- LONG $0xc4da0f66 // pminub xmm0, xmm4
-
-LBB1_10:
- LONG $0xc2da0f66 // pminub xmm0, xmm2
- LONG $0xcbde0f66 // pmaxub xmm1, xmm3
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xd1ef0f66 // pxor xmm2, xmm1
- LONG $0xca6f0f66 // movdqa xmm1, xmm2
- LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8
- LONG $0xcada0f66 // pminub xmm1, xmm2
- LONG $0x41380f66; BYTE $0xc9 // phminposuw xmm1, xmm1
- LONG $0xc87e0f66 // movd eax, xmm1
- WORD $0xd0f6 // not al
- LONG $0xc86f0f66 // movdqa xmm1, xmm0
- LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8
- LONG $0xc8da0f66 // pminub xmm1, xmm0
- LONG $0x41380f66; BYTE $0xc1 // phminposuw xmm0, xmm1
- LONG $0xc67e0f66 // movd esi, xmm0
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB1_12
-
-LBB1_11:
- LONG $0x04b60f46; BYTE $0x1f // movzx r8d, byte [rdi + r11]
- WORD $0x3844; BYTE $0xc6 // cmp sil, r8b
- LONG $0xf6b60f40 // movzx esi, sil
- LONG $0xf0430f41 // cmovae esi, r8d
- WORD $0x3844; BYTE $0xc0 // cmp al, r8b
- WORD $0xb60f; BYTE $0xc0 // movzx eax, al
- LONG $0xc0460f41 // cmovbe eax, r8d
- LONG $0x01c38349 // add r11, 1
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB1_11
-
-LBB1_12:
- WORD $0x0188 // mov byte [rcx], al
- WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil
- RET
-
-LBB1_5:
- LONG $0xc9ef0f66 // pxor xmm1, xmm1
- LONG $0xc0760f66 // pcmpeqd xmm0, xmm0
- WORD $0xc031 // xor eax, eax
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xdbef0f66 // pxor xmm3, xmm3
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB1_9
- JMP LBB1_10
-
-DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000
-DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000
-DATA LCDATA2<>+0x010(SB)/8, $0x7fff7fff7fff7fff
-DATA LCDATA2<>+0x018(SB)/8, $0x7fff7fff7fff7fff
-GLOBL LCDATA2<>(SB), 8, $32
-
-TEXT ·_int16_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA2<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB2_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x0f // cmp esi, 15
- JA LBB2_4
- LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768
- LONG $0x7fffbe66 // mov si, 32767
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- JMP LBB2_11
-
-LBB2_1:
- LONG $0x7fffbe66 // mov si, 32767
- LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768
- JMP LBB2_12
-
-LBB2_4:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xf0e38341 // and r11d, -16
- LONG $0xf0438d49 // lea rax, [r11 - 16]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x04e8c149 // shr r8, 4
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB2_5
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */
- LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06f0f66 // movdqa xmm2, xmm0
- LONG $0xd96f0f66 // movdqa xmm3, xmm1
-
-LBB2_7:
- LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax]
- LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16]
- LONG $0x746f0ff3; WORD $0x2047 // movdqu xmm6, oword [rdi + 2*rax + 32]
- LONG $0x7c6f0ff3; WORD $0x3047 // movdqu xmm7, oword [rdi + 2*rax + 48]
- LONG $0xc4ea0f66 // pminsw xmm0, xmm4
- LONG $0xd5ea0f66 // pminsw xmm2, xmm5
- LONG $0xccee0f66 // pmaxsw xmm1, xmm4
- LONG $0xddee0f66 // pmaxsw xmm3, xmm5
- LONG $0xc6ea0f66 // pminsw xmm0, xmm6
- LONG $0xd7ea0f66 // pminsw xmm2, xmm7
- LONG $0xceee0f66 // pmaxsw xmm1, xmm6
- LONG $0xdfee0f66 // pmaxsw xmm3, xmm7
- LONG $0x20c08348 // add rax, 32
- LONG $0x02c28349 // add r10, 2
- JNE LBB2_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB2_10
-
-LBB2_9:
- LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax]
- LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16]
- LONG $0xddee0f66 // pmaxsw xmm3, xmm5
- LONG $0xccee0f66 // pmaxsw xmm1, xmm4
- LONG $0xd5ea0f66 // pminsw xmm2, xmm5
- LONG $0xc4ea0f66 // pminsw xmm0, xmm4
-
-LBB2_10:
- LONG $0xc2ea0f66 // pminsw xmm0, xmm2
- LONG $0xcbee0f66 // pmaxsw xmm1, xmm3
- LONG $0x4def0f66; BYTE $0x10 // pxor xmm1, oword 16[rbp] /* [rip + .LCPI2_1] */
- LONG $0x41380f66; BYTE $0xc9 // phminposuw xmm1, xmm1
- LONG $0x7e0f4166; BYTE $0xc8 // movd r8d, xmm1
- LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor r8d, 32767
- LONG $0x45ef0f66; BYTE $0x00 // pxor xmm0, oword 0[rbp] /* [rip + .LCPI2_0] */
- LONG $0x41380f66; BYTE $0xc0 // phminposuw xmm0, xmm0
- LONG $0xc67e0f66 // movd esi, xmm0
- LONG $0x8000f681; WORD $0x0000 // xor esi, 32768
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB2_12
-
-LBB2_11:
- LONG $0x04b70f42; BYTE $0x5f // movzx eax, word [rdi + 2*r11]
- WORD $0x3966; BYTE $0xc6 // cmp si, ax
- WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax
- LONG $0xc0394166 // cmp r8w, ax
- LONG $0xc04c0f44 // cmovl r8d, eax
- LONG $0x01c38349 // add r11, 1
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB2_11
-
-LBB2_12:
- LONG $0x01894466 // mov word [rcx], r8w
- WORD $0x8966; BYTE $0x32 // mov word [rdx], si
- RET
-
-LBB2_5:
- LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */
- LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06f0f66 // movdqa xmm2, xmm0
- LONG $0xd96f0f66 // movdqa xmm3, xmm1
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB2_9
- JMP LBB2_10
-
-TEXT ·_uint16_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
-
- WORD $0xf685 // test esi, esi
- JLE LBB3_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x0f // cmp esi, 15
- JA LBB3_4
- LONG $0xffb84166; BYTE $0xff // mov r8w, -1
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- WORD $0xf631 // xor esi, esi
- JMP LBB3_11
-
-LBB3_1:
- LONG $0xffb84166; BYTE $0xff // mov r8w, -1
- WORD $0xf631 // xor esi, esi
- JMP LBB3_12
-
-LBB3_4:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xf0e38341 // and r11d, -16
- LONG $0xf0438d49 // lea rax, [r11 - 16]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x04e8c149 // shr r8, 4
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB3_5
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0xc9ef0f66 // pxor xmm1, xmm1
- LONG $0xc0760f66 // pcmpeqd xmm0, xmm0
- WORD $0xc031 // xor eax, eax
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xdbef0f66 // pxor xmm3, xmm3
-
-LBB3_7:
- LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax]
- LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16]
- LONG $0x746f0ff3; WORD $0x2047 // movdqu xmm6, oword [rdi + 2*rax + 32]
- LONG $0x7c6f0ff3; WORD $0x3047 // movdqu xmm7, oword [rdi + 2*rax + 48]
- LONG $0x3a380f66; BYTE $0xc4 // pminuw xmm0, xmm4
- LONG $0x3a380f66; BYTE $0xd5 // pminuw xmm2, xmm5
- LONG $0x3e380f66; BYTE $0xcc // pmaxuw xmm1, xmm4
- LONG $0x3e380f66; BYTE $0xdd // pmaxuw xmm3, xmm5
- LONG $0x3a380f66; BYTE $0xc6 // pminuw xmm0, xmm6
- LONG $0x3a380f66; BYTE $0xd7 // pminuw xmm2, xmm7
- LONG $0x3e380f66; BYTE $0xce // pmaxuw xmm1, xmm6
- LONG $0x3e380f66; BYTE $0xdf // pmaxuw xmm3, xmm7
- LONG $0x20c08348 // add rax, 32
- LONG $0x02c28349 // add r10, 2
- JNE LBB3_7
- LONG $0x01c0f641 // test r8b, 1
- JE LBB3_10
-
-LBB3_9:
- LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax]
- LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16]
- LONG $0x3e380f66; BYTE $0xdd // pmaxuw xmm3, xmm5
- LONG $0x3e380f66; BYTE $0xcc // pmaxuw xmm1, xmm4
- LONG $0x3a380f66; BYTE $0xd5 // pminuw xmm2, xmm5
- LONG $0x3a380f66; BYTE $0xc4 // pminuw xmm0, xmm4
-
-LBB3_10:
- LONG $0x3a380f66; BYTE $0xc2 // pminuw xmm0, xmm2
- LONG $0x3e380f66; BYTE $0xcb // pmaxuw xmm1, xmm3
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xd1ef0f66 // pxor xmm2, xmm1
- LONG $0x41380f66; BYTE $0xca // phminposuw xmm1, xmm2
- LONG $0xce7e0f66 // movd esi, xmm1
- WORD $0xd6f7 // not esi
- LONG $0x41380f66; BYTE $0xc0 // phminposuw xmm0, xmm0
- LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB3_12
-
-LBB3_11:
- LONG $0x04b70f42; BYTE $0x5f // movzx eax, word [rdi + 2*r11]
- LONG $0xc0394166 // cmp r8w, ax
- LONG $0xc0430f44 // cmovae r8d, eax
- WORD $0x3966; BYTE $0xc6 // cmp si, ax
- WORD $0x460f; BYTE $0xf0 // cmovbe esi, eax
- LONG $0x01c38349 // add r11, 1
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB3_11
-
-LBB3_12:
- WORD $0x8966; BYTE $0x31 // mov word [rcx], si
- LONG $0x02894466 // mov word [rdx], r8w
- RET
-
-LBB3_5:
- LONG $0xc9ef0f66 // pxor xmm1, xmm1
- LONG $0xc0760f66 // pcmpeqd xmm0, xmm0
- WORD $0xc031 // xor eax, eax
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xdbef0f66 // pxor xmm3, xmm3
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB3_9
- JMP LBB3_10
-
-DATA LCDATA3<>+0x000(SB)/8, $0x8000000080000000
-DATA LCDATA3<>+0x008(SB)/8, $0x8000000080000000
-DATA LCDATA3<>+0x010(SB)/8, $0x7fffffff7fffffff
-DATA LCDATA3<>+0x018(SB)/8, $0x7fffffff7fffffff
-GLOBL LCDATA3<>(SB), 8, $32
-
-TEXT ·_int32_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA3<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB4_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x07 // cmp esi, 7
- JA LBB4_6
- LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648
- LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- JMP LBB4_4
-
-LBB4_1:
- LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647
- LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648
- JMP LBB4_13
-
-LBB4_6:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xf8e38341 // and r11d, -8
- LONG $0xf8438d49 // lea rax, [r11 - 8]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x03e8c149 // shr r8, 3
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB4_7
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
- LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06f0f66 // movdqa xmm2, xmm0
- LONG $0xd96f0f66 // movdqa xmm3, xmm1
-
-LBB4_9:
- LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax]
- LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16]
- LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32]
- LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48]
- LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4
- LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5
- LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4
- LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5
- LONG $0x39380f66; BYTE $0xc6 // pminsd xmm0, xmm6
- LONG $0x39380f66; BYTE $0xd7 // pminsd xmm2, xmm7
- LONG $0x3d380f66; BYTE $0xce // pmaxsd xmm1, xmm6
- LONG $0x3d380f66; BYTE $0xdf // pmaxsd xmm3, xmm7
- LONG $0x10c08348 // add rax, 16
- LONG $0x02c28349 // add r10, 2
- JNE LBB4_9
- LONG $0x01c0f641 // test r8b, 1
- JE LBB4_12
-
-LBB4_11:
- LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax]
- LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16]
- LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5
- LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4
- LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5
- LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4
-
-LBB4_12:
- LONG $0x39380f66; BYTE $0xc2 // pminsd xmm0, xmm2
- LONG $0x3d380f66; BYTE $0xcb // pmaxsd xmm1, xmm3
- LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78
- LONG $0x3d380f66; BYTE $0xd1 // pmaxsd xmm2, xmm1
- LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229
- LONG $0x3d380f66; BYTE $0xca // pmaxsd xmm1, xmm2
- LONG $0xc87e0f66 // movd eax, xmm1
- LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78
- LONG $0x39380f66; BYTE $0xc8 // pminsd xmm1, xmm0
- LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229
- LONG $0x39380f66; BYTE $0xc1 // pminsd xmm0, xmm1
- LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB4_13
-
-LBB4_4:
- WORD $0xc689 // mov esi, eax
-
-LBB4_5:
- LONG $0x9f048b42 // mov eax, dword [rdi + 4*r11]
- WORD $0x3941; BYTE $0xc0 // cmp r8d, eax
- LONG $0xc04f0f44 // cmovg r8d, eax
- WORD $0xc639 // cmp esi, eax
- WORD $0x4d0f; BYTE $0xc6 // cmovge eax, esi
- LONG $0x01c38349 // add r11, 1
- WORD $0xc689 // mov esi, eax
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB4_5
-
-LBB4_13:
- WORD $0x0189 // mov dword [rcx], eax
- WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d
- RET
-
-LBB4_7:
- LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
- LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0xd06f0f66 // movdqa xmm2, xmm0
- LONG $0xd96f0f66 // movdqa xmm3, xmm1
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB4_11
- JMP LBB4_12
-
-TEXT ·_uint32_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
-
- WORD $0xf685 // test esi, esi
- JLE LBB5_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x07 // cmp esi, 7
- JA LBB5_6
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- LONG $0xffffb841; WORD $0xffff // mov r8d, -1
- WORD $0xf631 // xor esi, esi
- JMP LBB5_4
-
-LBB5_1:
- LONG $0xffffb841; WORD $0xffff // mov r8d, -1
- WORD $0xf631 // xor esi, esi
- JMP LBB5_13
-
-LBB5_6:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xf8e38341 // and r11d, -8
- LONG $0xf8438d49 // lea rax, [r11 - 8]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x03e8c149 // shr r8, 3
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB5_7
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0xc9ef0f66 // pxor xmm1, xmm1
- LONG $0xc0760f66 // pcmpeqd xmm0, xmm0
- WORD $0xc031 // xor eax, eax
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xdbef0f66 // pxor xmm3, xmm3
-
-LBB5_9:
- LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax]
- LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16]
- LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32]
- LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48]
- LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4
- LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5
- LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4
- LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5
- LONG $0x3b380f66; BYTE $0xc6 // pminud xmm0, xmm6
- LONG $0x3b380f66; BYTE $0xd7 // pminud xmm2, xmm7
- LONG $0x3f380f66; BYTE $0xce // pmaxud xmm1, xmm6
- LONG $0x3f380f66; BYTE $0xdf // pmaxud xmm3, xmm7
- LONG $0x10c08348 // add rax, 16
- LONG $0x02c28349 // add r10, 2
- JNE LBB5_9
- LONG $0x01c0f641 // test r8b, 1
- JE LBB5_12
-
-LBB5_11:
- LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax]
- LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16]
- LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5
- LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4
- LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5
- LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4
-
-LBB5_12:
- LONG $0x3b380f66; BYTE $0xc2 // pminud xmm0, xmm2
- LONG $0x3f380f66; BYTE $0xcb // pmaxud xmm1, xmm3
- LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78
- LONG $0x3f380f66; BYTE $0xd1 // pmaxud xmm2, xmm1
- LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229
- LONG $0x3f380f66; BYTE $0xca // pmaxud xmm1, xmm2
- LONG $0xce7e0f66 // movd esi, xmm1
- LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78
- LONG $0x3b380f66; BYTE $0xc8 // pminud xmm1, xmm0
- LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229
- LONG $0x3b380f66; BYTE $0xc1 // pminud xmm0, xmm1
- LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB5_13
-
-LBB5_4:
- WORD $0xf089 // mov eax, esi
-
-LBB5_5:
- LONG $0x9f348b42 // mov esi, dword [rdi + 4*r11]
- WORD $0x3941; BYTE $0xf0 // cmp r8d, esi
- LONG $0xc6430f44 // cmovae r8d, esi
- WORD $0xf039 // cmp eax, esi
- WORD $0x470f; BYTE $0xf0 // cmova esi, eax
- LONG $0x01c38349 // add r11, 1
- WORD $0xf089 // mov eax, esi
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB5_5
-
-LBB5_13:
- WORD $0x3189 // mov dword [rcx], esi
- WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d
- RET
-
-LBB5_7:
- LONG $0xc9ef0f66 // pxor xmm1, xmm1
- LONG $0xc0760f66 // pcmpeqd xmm0, xmm0
- WORD $0xc031 // xor eax, eax
- LONG $0xd2760f66 // pcmpeqd xmm2, xmm2
- LONG $0xdbef0f66 // pxor xmm3, xmm3
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB5_11
- JMP LBB5_12
-
-DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
-DATA LCDATA4<>+0x008(SB)/8, $0x8000000000000000
-DATA LCDATA4<>+0x010(SB)/8, $0x7fffffffffffffff
-DATA LCDATA4<>+0x018(SB)/8, $0x7fffffffffffffff
-GLOBL LCDATA4<>(SB), 8, $32
-
-TEXT ·_int64_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA4<>(SB), BP
-
- QUAD $0xffffffffffffb849; WORD $0x7fff // mov r8, 9223372036854775807
- WORD $0xf685 // test esi, esi
- JLE LBB6_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x03 // cmp esi, 3
- JA LBB6_6
- LONG $0x01708d49 // lea rsi, [r8 + 1]
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- JMP LBB6_4
-
-LBB6_1:
- LONG $0x01708d49 // lea rsi, [r8 + 1]
- JMP LBB6_13
-
-LBB6_6:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xfce38341 // and r11d, -4
- LONG $0xfc438d49 // lea rax, [r11 - 4]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x02e8c149 // shr r8, 2
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB6_7
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0x6f0f4466; WORD $0x004d // movdqa xmm9, oword 0[rbp] /* [rip + .LCPI6_0] */
- LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8
- LONG $0x6f0f4166; BYTE $0xf1 // movdqa xmm6, xmm9
-
-LBB6_9:
- LONG $0x3c6f0ff3; BYTE $0xc7 // movdqu xmm7, oword [rdi + 8*rax]
- LONG $0xc76f0f66 // movdqa xmm0, xmm7
- LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8
- LONG $0xe76f0f66 // movdqa xmm4, xmm7
- LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0
- LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16]
- LONG $0xc16f0f66 // movdqa xmm0, xmm1
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0xe96f0f66 // movdqa xmm5, xmm1
- LONG $0x15380f66; BYTE $0xea // blendvpd xmm5, xmm2, xmm0
- LONG $0x6f0f4166; BYTE $0xc1 // movdqa xmm0, xmm9
- LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7
- LONG $0x380f4166; WORD $0xf915 // blendvpd xmm7, xmm9, xmm0
- LONG $0xc66f0f66 // movdqa xmm0, xmm6
- LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1
- LONG $0x15380f66; BYTE $0xce // blendvpd xmm1, xmm6, xmm0
- LONG $0x5c6f0ff3; WORD $0x20c7 // movdqu xmm3, oword [rdi + 8*rax + 32]
- LONG $0xc36f0f66 // movdqa xmm0, xmm3
- LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4
- LONG $0x6f0f4466; BYTE $0xc3 // movdqa xmm8, xmm3
- LONG $0x380f4466; WORD $0xc415 // blendvpd xmm8, xmm4, xmm0
- LONG $0x646f0ff3; WORD $0x30c7 // movdqu xmm4, oword [rdi + 8*rax + 48]
- LONG $0xc46f0f66 // movdqa xmm0, xmm4
- LONG $0x37380f66; BYTE $0xc5 // pcmpgtq xmm0, xmm5
- LONG $0xd46f0f66 // movdqa xmm2, xmm4
- LONG $0x15380f66; BYTE $0xd5 // blendvpd xmm2, xmm5, xmm0
- LONG $0xc7280f66 // movapd xmm0, xmm7
- LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3
- LONG $0x15380f66; BYTE $0xdf // blendvpd xmm3, xmm7, xmm0
- LONG $0xc1280f66 // movapd xmm0, xmm1
- LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4
- LONG $0x15380f66; BYTE $0xe1 // blendvpd xmm4, xmm1, xmm0
- LONG $0x08c08348 // add rax, 8
- LONG $0x280f4466; BYTE $0xcb // movapd xmm9, xmm3
- LONG $0xf4280f66 // movapd xmm6, xmm4
- LONG $0x02c28349 // add r10, 2
- JNE LBB6_9
- LONG $0x01c0f641 // test r8b, 1
- JE LBB6_12
-
-LBB6_11:
- LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16]
- LONG $0xc4280f66 // movapd xmm0, xmm4
- LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1
- LONG $0xe96f0f66 // movdqa xmm5, xmm1
- LONG $0x15380f66; BYTE $0xec // blendvpd xmm5, xmm4, xmm0
- LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax]
- LONG $0xc3280f66 // movapd xmm0, xmm3
- LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4
- LONG $0xf46f0f66 // movdqa xmm6, xmm4
- LONG $0x15380f66; BYTE $0xf3 // blendvpd xmm6, xmm3, xmm0
- LONG $0xc16f0f66 // movdqa xmm0, xmm1
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0
- LONG $0xc46f0f66 // movdqa xmm0, xmm4
- LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8
- LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0
- LONG $0x280f4466; BYTE $0xc4 // movapd xmm8, xmm4
- LONG $0xd1280f66 // movapd xmm2, xmm1
- LONG $0xde280f66 // movapd xmm3, xmm6
- LONG $0xe5280f66 // movapd xmm4, xmm5
-
-LBB6_12:
- LONG $0xc3280f66 // movapd xmm0, xmm3
- LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4
- LONG $0x15380f66; BYTE $0xe3 // blendvpd xmm4, xmm3, xmm0
- LONG $0xcc700f66; BYTE $0x4e // pshufd xmm1, xmm4, 78
- LONG $0xc46f0f66 // movdqa xmm0, xmm4
- LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1
- LONG $0x15380f66; BYTE $0xcc // blendvpd xmm1, xmm4, xmm0
- LONG $0x7e0f4866; BYTE $0xce // movq rsi, xmm1
- LONG $0xc26f0f66 // movdqa xmm0, xmm2
- LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8
- LONG $0x380f4166; WORD $0xd015 // blendvpd xmm2, xmm8, xmm0
- LONG $0xca700f66; BYTE $0x4e // pshufd xmm1, xmm2, 78
- LONG $0xc16f0f66 // movdqa xmm0, xmm1
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0
- LONG $0x7e0f4966; BYTE $0xc8 // movq r8, xmm1
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB6_13
-
-LBB6_4:
- WORD $0x8948; BYTE $0xf0 // mov rax, rsi
-
-LBB6_5:
- LONG $0xdf348b4a // mov rsi, qword [rdi + 8*r11]
- WORD $0x3949; BYTE $0xf0 // cmp r8, rsi
- LONG $0xc64f0f4c // cmovg r8, rsi
- WORD $0x3948; BYTE $0xf0 // cmp rax, rsi
- LONG $0xf04d0f48 // cmovge rsi, rax
- LONG $0x01c38349 // add r11, 1
- WORD $0x8948; BYTE $0xf0 // mov rax, rsi
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB6_5
-
-LBB6_13:
- WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi
- WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8
- RET
-
-LBB6_7:
- LONG $0x5d280f66; BYTE $0x00 // movapd xmm3, oword 0[rbp] /* [rip + .LCPI6_0] */
- LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */
- WORD $0xc031 // xor eax, eax
- LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8
- LONG $0xe3280f66 // movapd xmm4, xmm3
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB6_11
- JMP LBB6_12
-
-DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
-DATA LCDATA5<>+0x008(SB)/8, $0x8000000000000000
-GLOBL LCDATA5<>(SB), 8, $16
-
-TEXT ·_uint64_max_min_sse4(SB), $0-32
-
- MOVQ values+0(FP), DI
- MOVQ length+8(FP), SI
- MOVQ minout+16(FP), DX
- MOVQ maxout+24(FP), CX
- LEAQ LCDATA5<>(SB), BP
-
- WORD $0xf685 // test esi, esi
- JLE LBB7_1
- WORD $0x8941; BYTE $0xf1 // mov r9d, esi
- WORD $0xfe83; BYTE $0x03 // cmp esi, 3
- JA LBB7_6
- LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1
- WORD $0x3145; BYTE $0xdb // xor r11d, r11d
- WORD $0xc031 // xor eax, eax
- JMP LBB7_4
-
-LBB7_1:
- LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1
- WORD $0xc031 // xor eax, eax
- JMP LBB7_13
-
-LBB7_6:
- WORD $0x8945; BYTE $0xcb // mov r11d, r9d
- LONG $0xfce38341 // and r11d, -4
- LONG $0xfc438d49 // lea rax, [r11 - 4]
- WORD $0x8949; BYTE $0xc0 // mov r8, rax
- LONG $0x02e8c149 // shr r8, 2
- LONG $0x01c08349 // add r8, 1
- WORD $0x8548; BYTE $0xc0 // test rax, rax
- JE LBB7_7
- WORD $0x894d; BYTE $0xc2 // mov r10, r8
- LONG $0xfee28349 // and r10, -2
- WORD $0xf749; BYTE $0xda // neg r10
- LONG $0xef0f4566; BYTE $0xc9 // pxor xmm9, xmm9
- LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10
- WORD $0xc031 // xor eax, eax
- LONG $0x6f0f4466; WORD $0x0045 // movdqa xmm8, oword 0[rbp] /* [rip + .LCPI7_0] */
- LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11
- LONG $0xef0f4566; BYTE $0xe4 // pxor xmm12, xmm12
-
-LBB7_9:
- LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10
- LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8
- LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax]
- LONG $0x6c6f0ff3; WORD $0x10c7 // movdqu xmm5, oword [rdi + 8*rax + 16]
- LONG $0x6f0f44f3; WORD $0xc76c; BYTE $0x20 // movdqu xmm13, oword [rdi + 8*rax + 32]
- LONG $0xc46f0f66 // movdqa xmm0, xmm4
- LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8
- LONG $0x6f0f4166; BYTE $0xc9 // movdqa xmm1, xmm9
- LONG $0xef0f4166; BYTE $0xc8 // pxor xmm1, xmm8
- LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0xdc6f0f66 // movdqa xmm3, xmm4
- LONG $0x380f4166; WORD $0xda15 // blendvpd xmm3, xmm10, xmm0
- LONG $0x746f0ff3; WORD $0x30c7 // movdqu xmm6, oword [rdi + 8*rax + 48]
- LONG $0x6f0f4166; BYTE $0xfb // movdqa xmm7, xmm11
- LONG $0xef0f4166; BYTE $0xf8 // pxor xmm7, xmm8
- LONG $0xc56f0f66 // movdqa xmm0, xmm5
- LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8
- LONG $0x6f0f4166; BYTE $0xd4 // movdqa xmm2, xmm12
- LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8
- LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0
- LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7
- LONG $0xfd6f0f66 // movdqa xmm7, xmm5
- LONG $0x380f4166; WORD $0xfb15 // blendvpd xmm7, xmm11, xmm0
- LONG $0xc16f0f66 // movdqa xmm0, xmm1
- LONG $0x380f4166; WORD $0xe115 // blendvpd xmm4, xmm9, xmm0
- LONG $0xc26f0f66 // movdqa xmm0, xmm2
- LONG $0x380f4166; WORD $0xec15 // blendvpd xmm5, xmm12, xmm0
- LONG $0xd3280f66 // movapd xmm2, xmm3
- LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8
- LONG $0x6f0f4166; BYTE $0xc5 // movdqa xmm0, xmm13
- LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8
- LONG $0xcc280f66 // movapd xmm1, xmm4
- LONG $0x570f4166; BYTE $0xc8 // xorpd xmm1, xmm8
- LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0x6f0f4566; BYTE $0xd5 // movdqa xmm10, xmm13
- LONG $0x380f4466; WORD $0xd315 // blendvpd xmm10, xmm3, xmm0
- LONG $0xdf280f66 // movapd xmm3, xmm7
- LONG $0x570f4166; BYTE $0xd8 // xorpd xmm3, xmm8
- LONG $0xc66f0f66 // movdqa xmm0, xmm6
- LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8
- LONG $0xd5280f66 // movapd xmm2, xmm5
- LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8
- LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0
- LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3
- LONG $0x6f0f4466; BYTE $0xde // movdqa xmm11, xmm6
- LONG $0x380f4466; WORD $0xdf15 // blendvpd xmm11, xmm7, xmm0
- LONG $0xc16f0f66 // movdqa xmm0, xmm1
- LONG $0x380f4466; WORD $0xec15 // blendvpd xmm13, xmm4, xmm0
- LONG $0xc26f0f66 // movdqa xmm0, xmm2
- LONG $0x15380f66; BYTE $0xf5 // blendvpd xmm6, xmm5, xmm0
- LONG $0x08c08348 // add rax, 8
- LONG $0x280f4566; BYTE $0xcd // movapd xmm9, xmm13
- LONG $0x280f4466; BYTE $0xe6 // movapd xmm12, xmm6
- LONG $0x02c28349 // add r10, 2
- JNE LBB7_9
- LONG $0x01c0f641 // test r8b, 1
- JE LBB7_12
-
-LBB7_11:
- LONG $0x24100f66; BYTE $0xc7 // movupd xmm4, oword [rdi + 8*rax]
- LONG $0x5c100f66; WORD $0x10c7 // movupd xmm3, oword [rdi + 8*rax + 16]
- LONG $0x6d280f66; BYTE $0x00 // movapd xmm5, oword 0[rbp] /* [rip + .LCPI7_0] */
- LONG $0xc6280f66 // movapd xmm0, xmm6
- LONG $0xc5570f66 // xorpd xmm0, xmm5
- LONG $0xcb280f66 // movapd xmm1, xmm3
- LONG $0xcd570f66 // xorpd xmm1, xmm5
- LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1
- LONG $0xfb280f66 // movapd xmm7, xmm3
- LONG $0x15380f66; BYTE $0xfe // blendvpd xmm7, xmm6, xmm0
- LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13
- LONG $0xc5570f66 // xorpd xmm0, xmm5
- LONG $0xd4280f66 // movapd xmm2, xmm4
- LONG $0xd5570f66 // xorpd xmm2, xmm5
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0xf4280f66 // movapd xmm6, xmm4
- LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0
- LONG $0x280f4166; BYTE $0xc3 // movapd xmm0, xmm11
- LONG $0xc5570f66 // xorpd xmm0, xmm5
- LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0
- LONG $0xc16f0f66 // movdqa xmm0, xmm1
- LONG $0x380f4166; WORD $0xdb15 // blendvpd xmm3, xmm11, xmm0
- LONG $0x570f4166; BYTE $0xea // xorpd xmm5, xmm10
- LONG $0x37380f66; BYTE $0xd5 // pcmpgtq xmm2, xmm5
- LONG $0xc26f0f66 // movdqa xmm0, xmm2
- LONG $0x380f4166; WORD $0xe215 // blendvpd xmm4, xmm10, xmm0
- LONG $0x280f4466; BYTE $0xd4 // movapd xmm10, xmm4
- LONG $0x280f4466; BYTE $0xdb // movapd xmm11, xmm3
- LONG $0x280f4466; BYTE $0xee // movapd xmm13, xmm6
- LONG $0xf7280f66 // movapd xmm6, xmm7
-
-LBB7_12:
- LONG $0x4d280f66; BYTE $0x00 // movapd xmm1, oword 0[rbp] /* [rip + .LCPI7_0] */
- LONG $0xd6280f66 // movapd xmm2, xmm6
- LONG $0xd1570f66 // xorpd xmm2, xmm1
- LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13
- LONG $0xc1570f66 // xorpd xmm0, xmm1
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0
- LONG $0xd6700f66; BYTE $0x4e // pshufd xmm2, xmm6, 78
- LONG $0xc6280f66 // movapd xmm0, xmm6
- LONG $0xc1570f66 // xorpd xmm0, xmm1
- LONG $0xda6f0f66 // movdqa xmm3, xmm2
- LONG $0xd9ef0f66 // pxor xmm3, xmm1
- LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3
- LONG $0x15380f66; BYTE $0xd6 // blendvpd xmm2, xmm6, xmm0
- LONG $0x7e0f4866; BYTE $0xd0 // movq rax, xmm2
- LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10
- LONG $0xd1ef0f66 // pxor xmm2, xmm1
- LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11
- LONG $0xc1ef0f66 // pxor xmm0, xmm1
- LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2
- LONG $0x380f4566; WORD $0xda15 // blendvpd xmm11, xmm10, xmm0
- LONG $0x700f4166; WORD $0x4ed3 // pshufd xmm2, xmm11, 78
- LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11
- LONG $0xc1ef0f66 // pxor xmm0, xmm1
- LONG $0xcaef0f66 // pxor xmm1, xmm2
- LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0
- LONG $0xc16f0f66 // movdqa xmm0, xmm1
- LONG $0x380f4166; WORD $0xd315 // blendvpd xmm2, xmm11, xmm0
- LONG $0x7e0f4966; BYTE $0xd0 // movq r8, xmm2
- WORD $0x394d; BYTE $0xcb // cmp r11, r9
- JE LBB7_13
-
-LBB7_4:
- WORD $0x8948; BYTE $0xc6 // mov rsi, rax
-
-LBB7_5:
- LONG $0xdf048b4a // mov rax, qword [rdi + 8*r11]
- WORD $0x3949; BYTE $0xc0 // cmp r8, rax
- LONG $0xc0430f4c // cmovae r8, rax
- WORD $0x3948; BYTE $0xc6 // cmp rsi, rax
- LONG $0xc6470f48 // cmova rax, rsi
- LONG $0x01c38349 // add r11, 1
- WORD $0x8948; BYTE $0xc6 // mov rsi, rax
- WORD $0x394d; BYTE $0xd9 // cmp r9, r11
- JNE LBB7_5
-
-LBB7_13:
- WORD $0x8948; BYTE $0x01 // mov qword [rcx], rax
- WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8
- RET
-
-LBB7_7:
- LONG $0x570f4566; BYTE $0xed // xorpd xmm13, xmm13
- LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10
- WORD $0xc031 // xor eax, eax
- LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11
- LONG $0xf6570f66 // xorpd xmm6, xmm6
- LONG $0x01c0f641 // test r8b, 1
- JNE LBB7_11
- JMP LBB7_12
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go
deleted file mode 100644
index 1666df129..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go
+++ /dev/null
@@ -1,407 +0,0 @@
-// Code generated by transpose_ints.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-// when we upgrade to support go1.18, this can be massively simplified by using
-// Go Generics, but since we aren't supporting go1.18 yet, I didn't want to use
-// them here so we can maintain the backwards compatibility.
-
-func transposeInt8Int8(src []int8, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeInt8Uint8(src []int8, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeInt8Int16(src []int8, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeInt8Uint16(src []int8, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeInt8Int32(src []int8, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeInt8Uint32(src []int8, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeInt8Int64(src []int8, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeInt8Uint64(src []int8, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
-
-func transposeUint8Int8(src []uint8, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeUint8Uint8(src []uint8, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeUint8Int16(src []uint8, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeUint8Uint16(src []uint8, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeUint8Int32(src []uint8, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeUint8Uint32(src []uint8, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeUint8Int64(src []uint8, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeUint8Uint64(src []uint8, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
-
-func transposeInt16Int8(src []int16, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeInt16Uint8(src []int16, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeInt16Int16(src []int16, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeInt16Uint16(src []int16, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeInt16Int32(src []int16, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeInt16Uint32(src []int16, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeInt16Int64(src []int16, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeInt16Uint64(src []int16, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
-
-func transposeUint16Int8(src []uint16, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeUint16Uint8(src []uint16, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeUint16Int16(src []uint16, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeUint16Uint16(src []uint16, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeUint16Int32(src []uint16, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeUint16Uint32(src []uint16, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeUint16Int64(src []uint16, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeUint16Uint64(src []uint16, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
-
-func transposeInt32Int8(src []int32, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeInt32Uint8(src []int32, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeInt32Int16(src []int32, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeInt32Uint16(src []int32, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeInt32Int32(src []int32, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeInt32Uint32(src []int32, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeInt32Int64(src []int32, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeInt32Uint64(src []int32, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
-
-func transposeUint32Int8(src []uint32, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeUint32Uint8(src []uint32, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeUint32Int16(src []uint32, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeUint32Uint16(src []uint32, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeUint32Int32(src []uint32, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeUint32Uint32(src []uint32, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeUint32Int64(src []uint32, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeUint32Uint64(src []uint32, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
-
-func transposeInt64Int8(src []int64, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeInt64Uint8(src []int64, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeInt64Int16(src []int64, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeInt64Uint16(src []int64, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeInt64Int32(src []int64, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeInt64Uint32(src []int64, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeInt64Int64(src []int64, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeInt64Uint64(src []int64, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
-
-func transposeUint64Int8(src []uint64, dest []int8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int8(transposeMap[s])
- }
-}
-
-func transposeUint64Uint8(src []uint64, dest []uint8, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint8(transposeMap[s])
- }
-}
-
-func transposeUint64Int16(src []uint64, dest []int16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int16(transposeMap[s])
- }
-}
-
-func transposeUint64Uint16(src []uint64, dest []uint16, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint16(transposeMap[s])
- }
-}
-
-func transposeUint64Int32(src []uint64, dest []int32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int32(transposeMap[s])
- }
-}
-
-func transposeUint64Uint32(src []uint64, dest []uint32, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint32(transposeMap[s])
- }
-}
-
-func transposeUint64Int64(src []uint64, dest []int64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = int64(transposeMap[s])
- }
-}
-
-func transposeUint64Uint64(src []uint64, dest []uint64, transposeMap []int32) {
- for i, s := range src {
- dest[i] = uint64(transposeMap[s])
- }
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl
deleted file mode 100644
index 680ae1ee7..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-{{ $typelist := .In }}
-{{range .In}}
-{{ $src := .Type }}
-{{ $srcName := .Name }}
-{{ range $typelist }}
-{{ $dest := .Type }}
-{{ $destName := .Name }}
-
-func transpose{{ $srcName }}{{ $destName }}(src []{{$src}}, dest []{{$dest}}, transposeMap []int32) {
- for i, s := range src {
- dest[i] = {{ $dest }}(transposeMap[s])
- }
-}
-
-{{ end }}
-{{ end }}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata
deleted file mode 100644
index 72eaf300c..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata
+++ /dev/null
@@ -1,34 +0,0 @@
-[
- {
- "Name": "Int8",
- "Type": "int8"
- },
- {
- "Name": "Uint8",
- "Type": "uint8"
- },
- {
- "Name": "Int16",
- "Type": "int16"
- },
- {
- "Name": "Uint16",
- "Type": "uint16"
- },
- {
- "Name": "Int32",
- "Type": "int32"
- },
- {
- "Name": "Uint32",
- "Type": "uint32"
- },
- {
- "Name": "Int64",
- "Type": "int64"
- },
- {
- "Name": "Uint64",
- "Type": "uint64"
- }
-]
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go
deleted file mode 100644
index d4433d368..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go
+++ /dev/null
@@ -1,325 +0,0 @@
-// Code generated by transpose_ints_amd64.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import (
- "golang.org/x/sys/cpu"
-)
-
-var (
- TransposeInt8Int8 func([]int8, []int8, []int32)
- TransposeInt8Uint8 func([]int8, []uint8, []int32)
- TransposeInt8Int16 func([]int8, []int16, []int32)
- TransposeInt8Uint16 func([]int8, []uint16, []int32)
- TransposeInt8Int32 func([]int8, []int32, []int32)
- TransposeInt8Uint32 func([]int8, []uint32, []int32)
- TransposeInt8Int64 func([]int8, []int64, []int32)
- TransposeInt8Uint64 func([]int8, []uint64, []int32)
-
- TransposeUint8Int8 func([]uint8, []int8, []int32)
- TransposeUint8Uint8 func([]uint8, []uint8, []int32)
- TransposeUint8Int16 func([]uint8, []int16, []int32)
- TransposeUint8Uint16 func([]uint8, []uint16, []int32)
- TransposeUint8Int32 func([]uint8, []int32, []int32)
- TransposeUint8Uint32 func([]uint8, []uint32, []int32)
- TransposeUint8Int64 func([]uint8, []int64, []int32)
- TransposeUint8Uint64 func([]uint8, []uint64, []int32)
-
- TransposeInt16Int8 func([]int16, []int8, []int32)
- TransposeInt16Uint8 func([]int16, []uint8, []int32)
- TransposeInt16Int16 func([]int16, []int16, []int32)
- TransposeInt16Uint16 func([]int16, []uint16, []int32)
- TransposeInt16Int32 func([]int16, []int32, []int32)
- TransposeInt16Uint32 func([]int16, []uint32, []int32)
- TransposeInt16Int64 func([]int16, []int64, []int32)
- TransposeInt16Uint64 func([]int16, []uint64, []int32)
-
- TransposeUint16Int8 func([]uint16, []int8, []int32)
- TransposeUint16Uint8 func([]uint16, []uint8, []int32)
- TransposeUint16Int16 func([]uint16, []int16, []int32)
- TransposeUint16Uint16 func([]uint16, []uint16, []int32)
- TransposeUint16Int32 func([]uint16, []int32, []int32)
- TransposeUint16Uint32 func([]uint16, []uint32, []int32)
- TransposeUint16Int64 func([]uint16, []int64, []int32)
- TransposeUint16Uint64 func([]uint16, []uint64, []int32)
-
- TransposeInt32Int8 func([]int32, []int8, []int32)
- TransposeInt32Uint8 func([]int32, []uint8, []int32)
- TransposeInt32Int16 func([]int32, []int16, []int32)
- TransposeInt32Uint16 func([]int32, []uint16, []int32)
- TransposeInt32Int32 func([]int32, []int32, []int32)
- TransposeInt32Uint32 func([]int32, []uint32, []int32)
- TransposeInt32Int64 func([]int32, []int64, []int32)
- TransposeInt32Uint64 func([]int32, []uint64, []int32)
-
- TransposeUint32Int8 func([]uint32, []int8, []int32)
- TransposeUint32Uint8 func([]uint32, []uint8, []int32)
- TransposeUint32Int16 func([]uint32, []int16, []int32)
- TransposeUint32Uint16 func([]uint32, []uint16, []int32)
- TransposeUint32Int32 func([]uint32, []int32, []int32)
- TransposeUint32Uint32 func([]uint32, []uint32, []int32)
- TransposeUint32Int64 func([]uint32, []int64, []int32)
- TransposeUint32Uint64 func([]uint32, []uint64, []int32)
-
- TransposeInt64Int8 func([]int64, []int8, []int32)
- TransposeInt64Uint8 func([]int64, []uint8, []int32)
- TransposeInt64Int16 func([]int64, []int16, []int32)
- TransposeInt64Uint16 func([]int64, []uint16, []int32)
- TransposeInt64Int32 func([]int64, []int32, []int32)
- TransposeInt64Uint32 func([]int64, []uint32, []int32)
- TransposeInt64Int64 func([]int64, []int64, []int32)
- TransposeInt64Uint64 func([]int64, []uint64, []int32)
-
- TransposeUint64Int8 func([]uint64, []int8, []int32)
- TransposeUint64Uint8 func([]uint64, []uint8, []int32)
- TransposeUint64Int16 func([]uint64, []int16, []int32)
- TransposeUint64Uint16 func([]uint64, []uint16, []int32)
- TransposeUint64Int32 func([]uint64, []int32, []int32)
- TransposeUint64Uint32 func([]uint64, []uint32, []int32)
- TransposeUint64Int64 func([]uint64, []int64, []int32)
- TransposeUint64Uint64 func([]uint64, []uint64, []int32)
-)
-
-func init() {
- if cpu.X86.HasAVX2 {
-
- TransposeInt8Int8 = transposeInt8Int8avx2
- TransposeInt8Uint8 = transposeInt8Uint8avx2
- TransposeInt8Int16 = transposeInt8Int16avx2
- TransposeInt8Uint16 = transposeInt8Uint16avx2
- TransposeInt8Int32 = transposeInt8Int32avx2
- TransposeInt8Uint32 = transposeInt8Uint32avx2
- TransposeInt8Int64 = transposeInt8Int64avx2
- TransposeInt8Uint64 = transposeInt8Uint64avx2
-
- TransposeUint8Int8 = transposeUint8Int8avx2
- TransposeUint8Uint8 = transposeUint8Uint8avx2
- TransposeUint8Int16 = transposeUint8Int16avx2
- TransposeUint8Uint16 = transposeUint8Uint16avx2
- TransposeUint8Int32 = transposeUint8Int32avx2
- TransposeUint8Uint32 = transposeUint8Uint32avx2
- TransposeUint8Int64 = transposeUint8Int64avx2
- TransposeUint8Uint64 = transposeUint8Uint64avx2
-
- TransposeInt16Int8 = transposeInt16Int8avx2
- TransposeInt16Uint8 = transposeInt16Uint8avx2
- TransposeInt16Int16 = transposeInt16Int16avx2
- TransposeInt16Uint16 = transposeInt16Uint16avx2
- TransposeInt16Int32 = transposeInt16Int32avx2
- TransposeInt16Uint32 = transposeInt16Uint32avx2
- TransposeInt16Int64 = transposeInt16Int64avx2
- TransposeInt16Uint64 = transposeInt16Uint64avx2
-
- TransposeUint16Int8 = transposeUint16Int8avx2
- TransposeUint16Uint8 = transposeUint16Uint8avx2
- TransposeUint16Int16 = transposeUint16Int16avx2
- TransposeUint16Uint16 = transposeUint16Uint16avx2
- TransposeUint16Int32 = transposeUint16Int32avx2
- TransposeUint16Uint32 = transposeUint16Uint32avx2
- TransposeUint16Int64 = transposeUint16Int64avx2
- TransposeUint16Uint64 = transposeUint16Uint64avx2
-
- TransposeInt32Int8 = transposeInt32Int8avx2
- TransposeInt32Uint8 = transposeInt32Uint8avx2
- TransposeInt32Int16 = transposeInt32Int16avx2
- TransposeInt32Uint16 = transposeInt32Uint16avx2
- TransposeInt32Int32 = transposeInt32Int32avx2
- TransposeInt32Uint32 = transposeInt32Uint32avx2
- TransposeInt32Int64 = transposeInt32Int64avx2
- TransposeInt32Uint64 = transposeInt32Uint64avx2
-
- TransposeUint32Int8 = transposeUint32Int8avx2
- TransposeUint32Uint8 = transposeUint32Uint8avx2
- TransposeUint32Int16 = transposeUint32Int16avx2
- TransposeUint32Uint16 = transposeUint32Uint16avx2
- TransposeUint32Int32 = transposeUint32Int32avx2
- TransposeUint32Uint32 = transposeUint32Uint32avx2
- TransposeUint32Int64 = transposeUint32Int64avx2
- TransposeUint32Uint64 = transposeUint32Uint64avx2
-
- TransposeInt64Int8 = transposeInt64Int8avx2
- TransposeInt64Uint8 = transposeInt64Uint8avx2
- TransposeInt64Int16 = transposeInt64Int16avx2
- TransposeInt64Uint16 = transposeInt64Uint16avx2
- TransposeInt64Int32 = transposeInt64Int32avx2
- TransposeInt64Uint32 = transposeInt64Uint32avx2
- TransposeInt64Int64 = transposeInt64Int64avx2
- TransposeInt64Uint64 = transposeInt64Uint64avx2
-
- TransposeUint64Int8 = transposeUint64Int8avx2
- TransposeUint64Uint8 = transposeUint64Uint8avx2
- TransposeUint64Int16 = transposeUint64Int16avx2
- TransposeUint64Uint16 = transposeUint64Uint16avx2
- TransposeUint64Int32 = transposeUint64Int32avx2
- TransposeUint64Uint32 = transposeUint64Uint32avx2
- TransposeUint64Int64 = transposeUint64Int64avx2
- TransposeUint64Uint64 = transposeUint64Uint64avx2
-
- } else if cpu.X86.HasSSE42 {
-
- TransposeInt8Int8 = transposeInt8Int8sse4
- TransposeInt8Uint8 = transposeInt8Uint8sse4
- TransposeInt8Int16 = transposeInt8Int16sse4
- TransposeInt8Uint16 = transposeInt8Uint16sse4
- TransposeInt8Int32 = transposeInt8Int32sse4
- TransposeInt8Uint32 = transposeInt8Uint32sse4
- TransposeInt8Int64 = transposeInt8Int64sse4
- TransposeInt8Uint64 = transposeInt8Uint64sse4
-
- TransposeUint8Int8 = transposeUint8Int8sse4
- TransposeUint8Uint8 = transposeUint8Uint8sse4
- TransposeUint8Int16 = transposeUint8Int16sse4
- TransposeUint8Uint16 = transposeUint8Uint16sse4
- TransposeUint8Int32 = transposeUint8Int32sse4
- TransposeUint8Uint32 = transposeUint8Uint32sse4
- TransposeUint8Int64 = transposeUint8Int64sse4
- TransposeUint8Uint64 = transposeUint8Uint64sse4
-
- TransposeInt16Int8 = transposeInt16Int8sse4
- TransposeInt16Uint8 = transposeInt16Uint8sse4
- TransposeInt16Int16 = transposeInt16Int16sse4
- TransposeInt16Uint16 = transposeInt16Uint16sse4
- TransposeInt16Int32 = transposeInt16Int32sse4
- TransposeInt16Uint32 = transposeInt16Uint32sse4
- TransposeInt16Int64 = transposeInt16Int64sse4
- TransposeInt16Uint64 = transposeInt16Uint64sse4
-
- TransposeUint16Int8 = transposeUint16Int8sse4
- TransposeUint16Uint8 = transposeUint16Uint8sse4
- TransposeUint16Int16 = transposeUint16Int16sse4
- TransposeUint16Uint16 = transposeUint16Uint16sse4
- TransposeUint16Int32 = transposeUint16Int32sse4
- TransposeUint16Uint32 = transposeUint16Uint32sse4
- TransposeUint16Int64 = transposeUint16Int64sse4
- TransposeUint16Uint64 = transposeUint16Uint64sse4
-
- TransposeInt32Int8 = transposeInt32Int8sse4
- TransposeInt32Uint8 = transposeInt32Uint8sse4
- TransposeInt32Int16 = transposeInt32Int16sse4
- TransposeInt32Uint16 = transposeInt32Uint16sse4
- TransposeInt32Int32 = transposeInt32Int32sse4
- TransposeInt32Uint32 = transposeInt32Uint32sse4
- TransposeInt32Int64 = transposeInt32Int64sse4
- TransposeInt32Uint64 = transposeInt32Uint64sse4
-
- TransposeUint32Int8 = transposeUint32Int8sse4
- TransposeUint32Uint8 = transposeUint32Uint8sse4
- TransposeUint32Int16 = transposeUint32Int16sse4
- TransposeUint32Uint16 = transposeUint32Uint16sse4
- TransposeUint32Int32 = transposeUint32Int32sse4
- TransposeUint32Uint32 = transposeUint32Uint32sse4
- TransposeUint32Int64 = transposeUint32Int64sse4
- TransposeUint32Uint64 = transposeUint32Uint64sse4
-
- TransposeInt64Int8 = transposeInt64Int8sse4
- TransposeInt64Uint8 = transposeInt64Uint8sse4
- TransposeInt64Int16 = transposeInt64Int16sse4
- TransposeInt64Uint16 = transposeInt64Uint16sse4
- TransposeInt64Int32 = transposeInt64Int32sse4
- TransposeInt64Uint32 = transposeInt64Uint32sse4
- TransposeInt64Int64 = transposeInt64Int64sse4
- TransposeInt64Uint64 = transposeInt64Uint64sse4
-
- TransposeUint64Int8 = transposeUint64Int8sse4
- TransposeUint64Uint8 = transposeUint64Uint8sse4
- TransposeUint64Int16 = transposeUint64Int16sse4
- TransposeUint64Uint16 = transposeUint64Uint16sse4
- TransposeUint64Int32 = transposeUint64Int32sse4
- TransposeUint64Uint32 = transposeUint64Uint32sse4
- TransposeUint64Int64 = transposeUint64Int64sse4
- TransposeUint64Uint64 = transposeUint64Uint64sse4
-
- } else {
-
- TransposeInt8Int8 = transposeInt8Int8
- TransposeInt8Uint8 = transposeInt8Uint8
- TransposeInt8Int16 = transposeInt8Int16
- TransposeInt8Uint16 = transposeInt8Uint16
- TransposeInt8Int32 = transposeInt8Int32
- TransposeInt8Uint32 = transposeInt8Uint32
- TransposeInt8Int64 = transposeInt8Int64
- TransposeInt8Uint64 = transposeInt8Uint64
-
- TransposeUint8Int8 = transposeUint8Int8
- TransposeUint8Uint8 = transposeUint8Uint8
- TransposeUint8Int16 = transposeUint8Int16
- TransposeUint8Uint16 = transposeUint8Uint16
- TransposeUint8Int32 = transposeUint8Int32
- TransposeUint8Uint32 = transposeUint8Uint32
- TransposeUint8Int64 = transposeUint8Int64
- TransposeUint8Uint64 = transposeUint8Uint64
-
- TransposeInt16Int8 = transposeInt16Int8
- TransposeInt16Uint8 = transposeInt16Uint8
- TransposeInt16Int16 = transposeInt16Int16
- TransposeInt16Uint16 = transposeInt16Uint16
- TransposeInt16Int32 = transposeInt16Int32
- TransposeInt16Uint32 = transposeInt16Uint32
- TransposeInt16Int64 = transposeInt16Int64
- TransposeInt16Uint64 = transposeInt16Uint64
-
- TransposeUint16Int8 = transposeUint16Int8
- TransposeUint16Uint8 = transposeUint16Uint8
- TransposeUint16Int16 = transposeUint16Int16
- TransposeUint16Uint16 = transposeUint16Uint16
- TransposeUint16Int32 = transposeUint16Int32
- TransposeUint16Uint32 = transposeUint16Uint32
- TransposeUint16Int64 = transposeUint16Int64
- TransposeUint16Uint64 = transposeUint16Uint64
-
- TransposeInt32Int8 = transposeInt32Int8
- TransposeInt32Uint8 = transposeInt32Uint8
- TransposeInt32Int16 = transposeInt32Int16
- TransposeInt32Uint16 = transposeInt32Uint16
- TransposeInt32Int32 = transposeInt32Int32
- TransposeInt32Uint32 = transposeInt32Uint32
- TransposeInt32Int64 = transposeInt32Int64
- TransposeInt32Uint64 = transposeInt32Uint64
-
- TransposeUint32Int8 = transposeUint32Int8
- TransposeUint32Uint8 = transposeUint32Uint8
- TransposeUint32Int16 = transposeUint32Int16
- TransposeUint32Uint16 = transposeUint32Uint16
- TransposeUint32Int32 = transposeUint32Int32
- TransposeUint32Uint32 = transposeUint32Uint32
- TransposeUint32Int64 = transposeUint32Int64
- TransposeUint32Uint64 = transposeUint32Uint64
-
- TransposeInt64Int8 = transposeInt64Int8
- TransposeInt64Uint8 = transposeInt64Uint8
- TransposeInt64Int16 = transposeInt64Int16
- TransposeInt64Uint16 = transposeInt64Uint16
- TransposeInt64Int32 = transposeInt64Int32
- TransposeInt64Uint32 = transposeInt64Uint32
- TransposeInt64Int64 = transposeInt64Int64
- TransposeInt64Uint64 = transposeInt64Uint64
-
- TransposeUint64Int8 = transposeUint64Int8
- TransposeUint64Uint8 = transposeUint64Uint8
- TransposeUint64Int16 = transposeUint64Int16
- TransposeUint64Uint16 = transposeUint64Uint16
- TransposeUint64Int32 = transposeUint64Int32
- TransposeUint64Uint32 = transposeUint64Uint32
- TransposeUint64Int64 = transposeUint64Int64
- TransposeUint64Uint64 = transposeUint64Uint64
-
- }
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl
deleted file mode 100644
index eac0208e5..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl
+++ /dev/null
@@ -1,75 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-// +build !noasm
-
-package utils
-
-import (
- "golang.org/x/sys/cpu"
-)
-
-var (
-{{ $typelist := .In }}
-{{range .In}}
-{{ $src := .Type -}}
-{{ $srcName := .Name -}}
-{{ range $typelist -}}
-{{ $dest := .Type -}}
-{{ $destName := .Name -}}
- Transpose{{$srcName}}{{$destName}} func([]{{$src}}, []{{$dest}}, []int32)
-{{end}}
-{{end}}
-)
-
-
-func init() {
- if cpu.X86.HasAVX2 {
-{{ $typelist := .In }}
-{{range .In}}
-{{ $src := .Type -}}
-{{ $srcName := .Name -}}
-{{ range $typelist -}}
-{{ $dest := .Type -}}
-{{ $destName := .Name -}}
- Transpose{{$srcName}}{{$destName}} = transpose{{ $srcName }}{{ $destName }}avx2
-{{end}}
-{{end}}
- } else if cpu.X86.HasSSE42 {
-{{ $typelist := .In }}
-{{range .In}}
-{{ $src := .Type -}}
-{{ $srcName := .Name -}}
-{{ range $typelist -}}
-{{ $dest := .Type -}}
-{{ $destName := .Name -}}
- Transpose{{$srcName}}{{$destName}} = transpose{{ $srcName }}{{ $destName }}sse4
-{{end}}
-{{end}}
- } else {
-{{ $typelist := .In }}
-{{range .In}}
-{{ $src := .Type -}}
-{{ $srcName := .Name -}}
-{{ range $typelist -}}
-{{ $dest := .Type -}}
-{{ $destName := .Name -}}
- Transpose{{$srcName}}{{$destName}} = transpose{{ $srcName }}{{ $destName }}
-{{end}}
-{{end}}
- }
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go
deleted file mode 100644
index cc957cdaa..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Code generated by transpose_ints_s390x.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-// if building with the 'noasm' tag, then point to the pure go implementations
-var (
- TransposeInt8Int8 = transposeInt8Int8
- TransposeInt8Uint8 = transposeInt8Uint8
- TransposeInt8Int16 = transposeInt8Int16
- TransposeInt8Uint16 = transposeInt8Uint16
- TransposeInt8Int32 = transposeInt8Int32
- TransposeInt8Uint32 = transposeInt8Uint32
- TransposeInt8Int64 = transposeInt8Int64
- TransposeInt8Uint64 = transposeInt8Uint64
-
- TransposeUint8Int8 = transposeUint8Int8
- TransposeUint8Uint8 = transposeUint8Uint8
- TransposeUint8Int16 = transposeUint8Int16
- TransposeUint8Uint16 = transposeUint8Uint16
- TransposeUint8Int32 = transposeUint8Int32
- TransposeUint8Uint32 = transposeUint8Uint32
- TransposeUint8Int64 = transposeUint8Int64
- TransposeUint8Uint64 = transposeUint8Uint64
-
- TransposeInt16Int8 = transposeInt16Int8
- TransposeInt16Uint8 = transposeInt16Uint8
- TransposeInt16Int16 = transposeInt16Int16
- TransposeInt16Uint16 = transposeInt16Uint16
- TransposeInt16Int32 = transposeInt16Int32
- TransposeInt16Uint32 = transposeInt16Uint32
- TransposeInt16Int64 = transposeInt16Int64
- TransposeInt16Uint64 = transposeInt16Uint64
-
- TransposeUint16Int8 = transposeUint16Int8
- TransposeUint16Uint8 = transposeUint16Uint8
- TransposeUint16Int16 = transposeUint16Int16
- TransposeUint16Uint16 = transposeUint16Uint16
- TransposeUint16Int32 = transposeUint16Int32
- TransposeUint16Uint32 = transposeUint16Uint32
- TransposeUint16Int64 = transposeUint16Int64
- TransposeUint16Uint64 = transposeUint16Uint64
-
- TransposeInt32Int8 = transposeInt32Int8
- TransposeInt32Uint8 = transposeInt32Uint8
- TransposeInt32Int16 = transposeInt32Int16
- TransposeInt32Uint16 = transposeInt32Uint16
- TransposeInt32Int32 = transposeInt32Int32
- TransposeInt32Uint32 = transposeInt32Uint32
- TransposeInt32Int64 = transposeInt32Int64
- TransposeInt32Uint64 = transposeInt32Uint64
-
- TransposeUint32Int8 = transposeUint32Int8
- TransposeUint32Uint8 = transposeUint32Uint8
- TransposeUint32Int16 = transposeUint32Int16
- TransposeUint32Uint16 = transposeUint32Uint16
- TransposeUint32Int32 = transposeUint32Int32
- TransposeUint32Uint32 = transposeUint32Uint32
- TransposeUint32Int64 = transposeUint32Int64
- TransposeUint32Uint64 = transposeUint32Uint64
-
- TransposeInt64Int8 = transposeInt64Int8
- TransposeInt64Uint8 = transposeInt64Uint8
- TransposeInt64Int16 = transposeInt64Int16
- TransposeInt64Uint16 = transposeInt64Uint16
- TransposeInt64Int32 = transposeInt64Int32
- TransposeInt64Uint32 = transposeInt64Uint32
- TransposeInt64Int64 = transposeInt64Int64
- TransposeInt64Uint64 = transposeInt64Uint64
-
- TransposeUint64Int8 = transposeUint64Int8
- TransposeUint64Uint8 = transposeUint64Uint8
- TransposeUint64Int16 = transposeUint64Int16
- TransposeUint64Uint16 = transposeUint64Uint16
- TransposeUint64Int32 = transposeUint64Int32
- TransposeUint64Uint32 = transposeUint64Uint32
- TransposeUint64Int64 = transposeUint64Int64
- TransposeUint64Uint64 = transposeUint64Uint64
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go
deleted file mode 100644
index f1421ddf5..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go
+++ /dev/null
@@ -1,473 +0,0 @@
-// Code generated by transpose_ints_simd.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import (
- "unsafe"
-)
-
-//go:noescape
-func _transpose_int8_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int8avx2(src []int8, dest []int8, transposeMap []int32) {
- _transpose_int8_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint8avx2(src []int8, dest []uint8, transposeMap []int32) {
- _transpose_int8_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int16avx2(src []int8, dest []int16, transposeMap []int32) {
- _transpose_int8_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint16avx2(src []int8, dest []uint16, transposeMap []int32) {
- _transpose_int8_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int32avx2(src []int8, dest []int32, transposeMap []int32) {
- _transpose_int8_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint32avx2(src []int8, dest []uint32, transposeMap []int32) {
- _transpose_int8_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int64avx2(src []int8, dest []int64, transposeMap []int32) {
- _transpose_int8_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint64avx2(src []int8, dest []uint64, transposeMap []int32) {
- _transpose_int8_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int8avx2(src []uint8, dest []int8, transposeMap []int32) {
- _transpose_uint8_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint8avx2(src []uint8, dest []uint8, transposeMap []int32) {
- _transpose_uint8_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int16avx2(src []uint8, dest []int16, transposeMap []int32) {
- _transpose_uint8_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint16avx2(src []uint8, dest []uint16, transposeMap []int32) {
- _transpose_uint8_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int32avx2(src []uint8, dest []int32, transposeMap []int32) {
- _transpose_uint8_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint32avx2(src []uint8, dest []uint32, transposeMap []int32) {
- _transpose_uint8_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int64avx2(src []uint8, dest []int64, transposeMap []int32) {
- _transpose_uint8_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint64avx2(src []uint8, dest []uint64, transposeMap []int32) {
- _transpose_uint8_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int8avx2(src []int16, dest []int8, transposeMap []int32) {
- _transpose_int16_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint8avx2(src []int16, dest []uint8, transposeMap []int32) {
- _transpose_int16_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int16avx2(src []int16, dest []int16, transposeMap []int32) {
- _transpose_int16_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint16avx2(src []int16, dest []uint16, transposeMap []int32) {
- _transpose_int16_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int32avx2(src []int16, dest []int32, transposeMap []int32) {
- _transpose_int16_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint32avx2(src []int16, dest []uint32, transposeMap []int32) {
- _transpose_int16_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int64avx2(src []int16, dest []int64, transposeMap []int32) {
- _transpose_int16_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint64avx2(src []int16, dest []uint64, transposeMap []int32) {
- _transpose_int16_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int8avx2(src []uint16, dest []int8, transposeMap []int32) {
- _transpose_uint16_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint8avx2(src []uint16, dest []uint8, transposeMap []int32) {
- _transpose_uint16_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int16avx2(src []uint16, dest []int16, transposeMap []int32) {
- _transpose_uint16_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint16avx2(src []uint16, dest []uint16, transposeMap []int32) {
- _transpose_uint16_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int32avx2(src []uint16, dest []int32, transposeMap []int32) {
- _transpose_uint16_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint32avx2(src []uint16, dest []uint32, transposeMap []int32) {
- _transpose_uint16_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int64avx2(src []uint16, dest []int64, transposeMap []int32) {
- _transpose_uint16_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint64avx2(src []uint16, dest []uint64, transposeMap []int32) {
- _transpose_uint16_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int8avx2(src []int32, dest []int8, transposeMap []int32) {
- _transpose_int32_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint8avx2(src []int32, dest []uint8, transposeMap []int32) {
- _transpose_int32_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int16avx2(src []int32, dest []int16, transposeMap []int32) {
- _transpose_int32_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint16avx2(src []int32, dest []uint16, transposeMap []int32) {
- _transpose_int32_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int32avx2(src []int32, dest []int32, transposeMap []int32) {
- _transpose_int32_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint32avx2(src []int32, dest []uint32, transposeMap []int32) {
- _transpose_int32_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int64avx2(src []int32, dest []int64, transposeMap []int32) {
- _transpose_int32_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint64avx2(src []int32, dest []uint64, transposeMap []int32) {
- _transpose_int32_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int8avx2(src []uint32, dest []int8, transposeMap []int32) {
- _transpose_uint32_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint8avx2(src []uint32, dest []uint8, transposeMap []int32) {
- _transpose_uint32_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int16avx2(src []uint32, dest []int16, transposeMap []int32) {
- _transpose_uint32_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint16avx2(src []uint32, dest []uint16, transposeMap []int32) {
- _transpose_uint32_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int32avx2(src []uint32, dest []int32, transposeMap []int32) {
- _transpose_uint32_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint32avx2(src []uint32, dest []uint32, transposeMap []int32) {
- _transpose_uint32_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int64avx2(src []uint32, dest []int64, transposeMap []int32) {
- _transpose_uint32_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint64avx2(src []uint32, dest []uint64, transposeMap []int32) {
- _transpose_uint32_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int8avx2(src []int64, dest []int8, transposeMap []int32) {
- _transpose_int64_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint8avx2(src []int64, dest []uint8, transposeMap []int32) {
- _transpose_int64_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int16avx2(src []int64, dest []int16, transposeMap []int32) {
- _transpose_int64_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint16avx2(src []int64, dest []uint16, transposeMap []int32) {
- _transpose_int64_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int32avx2(src []int64, dest []int32, transposeMap []int32) {
- _transpose_int64_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint32avx2(src []int64, dest []uint32, transposeMap []int32) {
- _transpose_int64_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int64avx2(src []int64, dest []int64, transposeMap []int32) {
- _transpose_int64_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint64avx2(src []int64, dest []uint64, transposeMap []int32) {
- _transpose_int64_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int8avx2(src []uint64, dest []int8, transposeMap []int32) {
- _transpose_uint64_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint8avx2(src []uint64, dest []uint8, transposeMap []int32) {
- _transpose_uint64_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int16avx2(src []uint64, dest []int16, transposeMap []int32) {
- _transpose_uint64_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint16avx2(src []uint64, dest []uint16, transposeMap []int32) {
- _transpose_uint64_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int32avx2(src []uint64, dest []int32, transposeMap []int32) {
- _transpose_uint64_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint32avx2(src []uint64, dest []uint32, transposeMap []int32) {
- _transpose_uint64_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int64avx2(src []uint64, dest []int64, transposeMap []int32) {
- _transpose_uint64_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint64avx2(src []uint64, dest []uint64, transposeMap []int32) {
- _transpose_uint64_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s
deleted file mode 100644
index fbcc101eb..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s
+++ /dev/null
@@ -1,3074 +0,0 @@
-//+build !noasm !appengine
-// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
-
-TEXT ·_transpose_uint8_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB0_1
-
-LBB0_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB0_5
-
-LBB0_1:
- WORD $0xd285 // test edx, edx
- JLE LBB0_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB0_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB0_3
-
-LBB0_4:
- RET
-
-TEXT ·_transpose_int8_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB1_1
-
-LBB1_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB1_5
-
-LBB1_1:
- WORD $0xd285 // test edx, edx
- JLE LBB1_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB1_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB1_3
-
-LBB1_4:
- RET
-
-TEXT ·_transpose_uint16_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB2_1
-
-LBB2_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB2_5
-
-LBB2_1:
- WORD $0xd285 // test edx, edx
- JLE LBB2_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB2_3:
- LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB2_3
-
-LBB2_4:
- RET
-
-TEXT ·_transpose_int16_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB3_1
-
-LBB3_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB3_5
-
-LBB3_1:
- WORD $0xd285 // test edx, edx
- JLE LBB3_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB3_3:
- LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB3_3
-
-LBB3_4:
- RET
-
-TEXT ·_transpose_uint32_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB4_1
-
-LBB4_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB4_5
-
-LBB4_1:
- WORD $0xd285 // test edx, edx
- JLE LBB4_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB4_3:
- LONG $0x87048b42 // mov eax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB4_3
-
-LBB4_4:
- RET
-
-TEXT ·_transpose_int32_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB5_1
-
-LBB5_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB5_5
-
-LBB5_1:
- WORD $0xd285 // test edx, edx
- JLE LBB5_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB5_3:
- LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB5_3
-
-LBB5_4:
- RET
-
-TEXT ·_transpose_uint64_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB6_1
-
-LBB6_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB6_5
-
-LBB6_1:
- WORD $0xd285 // test edx, edx
- JLE LBB6_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB6_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB6_3
-
-LBB6_4:
- RET
-
-TEXT ·_transpose_int64_uint8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB7_1
-
-LBB7_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB7_5
-
-LBB7_1:
- WORD $0xd285 // test edx, edx
- JLE LBB7_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB7_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB7_3
-
-LBB7_4:
- RET
-
-TEXT ·_transpose_uint8_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB8_1
-
-LBB8_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB8_5
-
-LBB8_1:
- WORD $0xd285 // test edx, edx
- JLE LBB8_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB8_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB8_3
-
-LBB8_4:
- RET
-
-TEXT ·_transpose_int8_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB9_1
-
-LBB9_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB9_5
-
-LBB9_1:
- WORD $0xd285 // test edx, edx
- JLE LBB9_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB9_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB9_3
-
-LBB9_4:
- RET
-
-TEXT ·_transpose_uint16_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB10_1
-
-LBB10_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB10_5
-
-LBB10_1:
- WORD $0xd285 // test edx, edx
- JLE LBB10_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB10_3:
- LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB10_3
-
-LBB10_4:
- RET
-
-TEXT ·_transpose_int16_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB11_1
-
-LBB11_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB11_5
-
-LBB11_1:
- WORD $0xd285 // test edx, edx
- JLE LBB11_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB11_3:
- LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB11_3
-
-LBB11_4:
- RET
-
-TEXT ·_transpose_uint32_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB12_1
-
-LBB12_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB12_5
-
-LBB12_1:
- WORD $0xd285 // test edx, edx
- JLE LBB12_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB12_3:
- LONG $0x87048b42 // mov eax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB12_3
-
-LBB12_4:
- RET
-
-TEXT ·_transpose_int32_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB13_1
-
-LBB13_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB13_5
-
-LBB13_1:
- WORD $0xd285 // test edx, edx
- JLE LBB13_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB13_3:
- LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB13_3
-
-LBB13_4:
- RET
-
-TEXT ·_transpose_uint64_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB14_1
-
-LBB14_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB14_5
-
-LBB14_1:
- WORD $0xd285 // test edx, edx
- JLE LBB14_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB14_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB14_3
-
-LBB14_4:
- RET
-
-TEXT ·_transpose_int64_int8_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB15_1
-
-LBB15_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB15_5
-
-LBB15_1:
- WORD $0xd285 // test edx, edx
- JLE LBB15_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB15_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB15_3
-
-LBB15_4:
- RET
-
-TEXT ·_transpose_uint8_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB16_1
-
-LBB16_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB16_5
-
-LBB16_1:
- WORD $0xd285 // test edx, edx
- JLE LBB16_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB16_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB16_3
-
-LBB16_4:
- RET
-
-TEXT ·_transpose_int8_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB17_1
-
-LBB17_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB17_5
-
-LBB17_1:
- WORD $0xd285 // test edx, edx
- JLE LBB17_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB17_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB17_3
-
-LBB17_4:
- RET
-
-TEXT ·_transpose_uint16_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB18_1
-
-LBB18_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB18_5
-
-LBB18_1:
- WORD $0xd285 // test edx, edx
- JLE LBB18_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB18_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB18_3
-
-LBB18_4:
- RET
-
-TEXT ·_transpose_int16_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB19_1
-
-LBB19_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB19_5
-
-LBB19_1:
- WORD $0xd285 // test edx, edx
- JLE LBB19_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB19_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB19_3
-
-LBB19_4:
- RET
-
-TEXT ·_transpose_uint32_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB20_1
-
-LBB20_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB20_5
-
-LBB20_1:
- WORD $0xd285 // test edx, edx
- JLE LBB20_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB20_3:
- LONG $0x47048b42 // mov eax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB20_3
-
-LBB20_4:
- RET
-
-TEXT ·_transpose_int32_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB21_1
-
-LBB21_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB21_5
-
-LBB21_1:
- WORD $0xd285 // test edx, edx
- JLE LBB21_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB21_3:
- LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB21_3
-
-LBB21_4:
- RET
-
-TEXT ·_transpose_uint64_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB22_1
-
-LBB22_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB22_5
-
-LBB22_1:
- WORD $0xd285 // test edx, edx
- JLE LBB22_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB22_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB22_3
-
-LBB22_4:
- RET
-
-TEXT ·_transpose_int64_uint16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB23_1
-
-LBB23_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB23_5
-
-LBB23_1:
- WORD $0xd285 // test edx, edx
- JLE LBB23_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB23_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB23_3
-
-LBB23_4:
- RET
-
-TEXT ·_transpose_uint8_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB24_1
-
-LBB24_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB24_5
-
-LBB24_1:
- WORD $0xd285 // test edx, edx
- JLE LBB24_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB24_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB24_3
-
-LBB24_4:
- RET
-
-TEXT ·_transpose_int8_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB25_1
-
-LBB25_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB25_5
-
-LBB25_1:
- WORD $0xd285 // test edx, edx
- JLE LBB25_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB25_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB25_3
-
-LBB25_4:
- RET
-
-TEXT ·_transpose_uint16_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB26_1
-
-LBB26_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB26_5
-
-LBB26_1:
- WORD $0xd285 // test edx, edx
- JLE LBB26_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB26_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB26_3
-
-LBB26_4:
- RET
-
-TEXT ·_transpose_int16_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB27_1
-
-LBB27_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB27_5
-
-LBB27_1:
- WORD $0xd285 // test edx, edx
- JLE LBB27_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB27_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB27_3
-
-LBB27_4:
- RET
-
-TEXT ·_transpose_uint32_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB28_1
-
-LBB28_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB28_5
-
-LBB28_1:
- WORD $0xd285 // test edx, edx
- JLE LBB28_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB28_3:
- LONG $0x47048b42 // mov eax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB28_3
-
-LBB28_4:
- RET
-
-TEXT ·_transpose_int32_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB29_1
-
-LBB29_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB29_5
-
-LBB29_1:
- WORD $0xd285 // test edx, edx
- JLE LBB29_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB29_3:
- LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB29_3
-
-LBB29_4:
- RET
-
-TEXT ·_transpose_uint64_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB30_1
-
-LBB30_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB30_5
-
-LBB30_1:
- WORD $0xd285 // test edx, edx
- JLE LBB30_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB30_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB30_3
-
-LBB30_4:
- RET
-
-TEXT ·_transpose_int64_int16_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB31_1
-
-LBB31_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB31_5
-
-LBB31_1:
- WORD $0xd285 // test edx, edx
- JLE LBB31_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB31_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB31_3
-
-LBB31_4:
- RET
-
-TEXT ·_transpose_uint8_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB32_1
-
-LBB32_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB32_5
-
-LBB32_1:
- WORD $0xd285 // test edx, edx
- JLE LBB32_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB32_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB32_3
-
-LBB32_4:
- RET
-
-TEXT ·_transpose_int8_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB33_1
-
-LBB33_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB33_5
-
-LBB33_1:
- WORD $0xd285 // test edx, edx
- JLE LBB33_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB33_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB33_3
-
-LBB33_4:
- RET
-
-TEXT ·_transpose_uint16_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB34_1
-
-LBB34_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB34_5
-
-LBB34_1:
- WORD $0xd285 // test edx, edx
- JLE LBB34_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB34_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB34_3
-
-LBB34_4:
- RET
-
-TEXT ·_transpose_int16_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB35_1
-
-LBB35_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB35_5
-
-LBB35_1:
- WORD $0xd285 // test edx, edx
- JLE LBB35_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB35_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB35_3
-
-LBB35_4:
- RET
-
-TEXT ·_transpose_uint32_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB36_1
-
-LBB36_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB36_5
-
-LBB36_1:
- WORD $0xd285 // test edx, edx
- JLE LBB36_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB36_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB36_3
-
-LBB36_4:
- RET
-
-TEXT ·_transpose_int32_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB37_1
-
-LBB37_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB37_5
-
-LBB37_1:
- WORD $0xd285 // test edx, edx
- JLE LBB37_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB37_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB37_3
-
-LBB37_4:
- RET
-
-TEXT ·_transpose_uint64_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB38_1
-
-LBB38_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB38_5
-
-LBB38_1:
- WORD $0xd285 // test edx, edx
- JLE LBB38_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB38_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB38_3
-
-LBB38_4:
- RET
-
-TEXT ·_transpose_int64_uint32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB39_1
-
-LBB39_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB39_5
-
-LBB39_1:
- WORD $0xd285 // test edx, edx
- JLE LBB39_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB39_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB39_3
-
-LBB39_4:
- RET
-
-TEXT ·_transpose_uint8_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB40_1
-
-LBB40_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB40_5
-
-LBB40_1:
- WORD $0xd285 // test edx, edx
- JLE LBB40_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB40_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB40_3
-
-LBB40_4:
- RET
-
-TEXT ·_transpose_int8_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB41_1
-
-LBB41_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB41_5
-
-LBB41_1:
- WORD $0xd285 // test edx, edx
- JLE LBB41_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB41_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB41_3
-
-LBB41_4:
- RET
-
-TEXT ·_transpose_uint16_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB42_1
-
-LBB42_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB42_5
-
-LBB42_1:
- WORD $0xd285 // test edx, edx
- JLE LBB42_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB42_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB42_3
-
-LBB42_4:
- RET
-
-TEXT ·_transpose_int16_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB43_1
-
-LBB43_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB43_5
-
-LBB43_1:
- WORD $0xd285 // test edx, edx
- JLE LBB43_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB43_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB43_3
-
-LBB43_4:
- RET
-
-TEXT ·_transpose_uint32_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB44_1
-
-LBB44_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB44_5
-
-LBB44_1:
- WORD $0xd285 // test edx, edx
- JLE LBB44_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB44_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB44_3
-
-LBB44_4:
- RET
-
-TEXT ·_transpose_int32_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB45_1
-
-LBB45_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB45_5
-
-LBB45_1:
- WORD $0xd285 // test edx, edx
- JLE LBB45_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB45_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB45_3
-
-LBB45_4:
- RET
-
-TEXT ·_transpose_uint64_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB46_1
-
-LBB46_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB46_5
-
-LBB46_1:
- WORD $0xd285 // test edx, edx
- JLE LBB46_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB46_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB46_3
-
-LBB46_4:
- RET
-
-TEXT ·_transpose_int64_int32_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB47_1
-
-LBB47_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB47_5
-
-LBB47_1:
- WORD $0xd285 // test edx, edx
- JLE LBB47_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB47_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB47_3
-
-LBB47_4:
- RET
-
-TEXT ·_transpose_uint8_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB48_1
-
-LBB48_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB48_5
-
-LBB48_1:
- WORD $0xd285 // test edx, edx
- JLE LBB48_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB48_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB48_3
-
-LBB48_4:
- RET
-
-TEXT ·_transpose_int8_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB49_1
-
-LBB49_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB49_5
-
-LBB49_1:
- WORD $0xd285 // test edx, edx
- JLE LBB49_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB49_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB49_3
-
-LBB49_4:
- RET
-
-TEXT ·_transpose_uint16_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB50_1
-
-LBB50_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB50_5
-
-LBB50_1:
- WORD $0xd285 // test edx, edx
- JLE LBB50_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB50_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB50_3
-
-LBB50_4:
- RET
-
-TEXT ·_transpose_int16_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB51_1
-
-LBB51_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB51_5
-
-LBB51_1:
- WORD $0xd285 // test edx, edx
- JLE LBB51_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB51_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB51_3
-
-LBB51_4:
- RET
-
-TEXT ·_transpose_uint32_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB52_1
-
-LBB52_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB52_5
-
-LBB52_1:
- WORD $0xd285 // test edx, edx
- JLE LBB52_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB52_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB52_3
-
-LBB52_4:
- RET
-
-TEXT ·_transpose_int32_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB53_1
-
-LBB53_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB53_5
-
-LBB53_1:
- WORD $0xd285 // test edx, edx
- JLE LBB53_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB53_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB53_3
-
-LBB53_4:
- RET
-
-TEXT ·_transpose_uint64_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB54_1
-
-LBB54_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB54_5
-
-LBB54_1:
- WORD $0xd285 // test edx, edx
- JLE LBB54_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB54_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB54_3
-
-LBB54_4:
- RET
-
-TEXT ·_transpose_int64_uint64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB55_1
-
-LBB55_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB55_5
-
-LBB55_1:
- WORD $0xd285 // test edx, edx
- JLE LBB55_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB55_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB55_3
-
-LBB55_4:
- RET
-
-TEXT ·_transpose_uint8_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB56_1
-
-LBB56_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB56_5
-
-LBB56_1:
- WORD $0xd285 // test edx, edx
- JLE LBB56_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB56_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB56_3
-
-LBB56_4:
- RET
-
-TEXT ·_transpose_int8_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB57_1
-
-LBB57_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB57_5
-
-LBB57_1:
- WORD $0xd285 // test edx, edx
- JLE LBB57_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB57_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB57_3
-
-LBB57_4:
- RET
-
-TEXT ·_transpose_uint16_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB58_1
-
-LBB58_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB58_5
-
-LBB58_1:
- WORD $0xd285 // test edx, edx
- JLE LBB58_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB58_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB58_3
-
-LBB58_4:
- RET
-
-TEXT ·_transpose_int16_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB59_1
-
-LBB59_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB59_5
-
-LBB59_1:
- WORD $0xd285 // test edx, edx
- JLE LBB59_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB59_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB59_3
-
-LBB59_4:
- RET
-
-TEXT ·_transpose_uint32_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB60_1
-
-LBB60_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB60_5
-
-LBB60_1:
- WORD $0xd285 // test edx, edx
- JLE LBB60_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB60_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB60_3
-
-LBB60_4:
- RET
-
-TEXT ·_transpose_int32_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB61_1
-
-LBB61_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB61_5
-
-LBB61_1:
- WORD $0xd285 // test edx, edx
- JLE LBB61_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB61_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB61_3
-
-LBB61_4:
- RET
-
-TEXT ·_transpose_uint64_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB62_1
-
-LBB62_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB62_5
-
-LBB62_1:
- WORD $0xd285 // test edx, edx
- JLE LBB62_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB62_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB62_3
-
-LBB62_4:
- RET
-
-TEXT ·_transpose_int64_int64_avx2(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB63_1
-
-LBB63_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB63_5
-
-LBB63_1:
- WORD $0xd285 // test edx, edx
- JLE LBB63_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB63_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB63_3
-
-LBB63_4:
- RET
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go
deleted file mode 100644
index cc3b0abb5..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go
+++ /dev/null
@@ -1,227 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package utils
-
-import (
- "errors"
-
- "github.com/apache/arrow/go/v14/arrow"
-)
-
-//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=avx2 transpose_ints_simd.go.tmpl=transpose_ints_avx2_amd64.go
-//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=sse4 transpose_ints_simd.go.tmpl=transpose_ints_sse4_amd64.go
-//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints_s390x.go.tmpl=transpose_ints_s390x.go
-//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints_s390x.go.tmpl=transpose_ints_arm64.go
-//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints_noasm.go.tmpl=transpose_ints_noasm.go
-//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints.go.tmpl=transpose_ints.go
-
-func bufToTyped(typ arrow.DataType, buf []byte, offset, length int) (interface{}, error) {
- switch typ.ID() {
- case arrow.INT8:
- return arrow.Int8Traits.CastFromBytes(buf)[offset : offset+length], nil
- case arrow.INT16:
- return arrow.Int16Traits.CastFromBytes(buf)[offset : offset+length], nil
- case arrow.INT32:
- return arrow.Int32Traits.CastFromBytes(buf)[offset : offset+length], nil
- case arrow.INT64:
- return arrow.Int64Traits.CastFromBytes(buf)[offset : offset+length], nil
- case arrow.UINT8:
- return arrow.Uint8Traits.CastFromBytes(buf)[offset : offset+length], nil
- case arrow.UINT16:
- return arrow.Uint16Traits.CastFromBytes(buf)[offset : offset+length], nil
- case arrow.UINT32:
- return arrow.Uint32Traits.CastFromBytes(buf)[offset : offset+length], nil
- case arrow.UINT64:
- return arrow.Uint64Traits.CastFromBytes(buf)[offset : offset+length], nil
- }
- return nil, errors.New("only accepts integral types")
-}
-
-// TransposeIntsBuffers takes the data-types, byte buffers, and offsets of a source and destination
-// buffer to perform TransposeInts on with the provided mapping data.
-func TransposeIntsBuffers(inType, outType arrow.DataType, indata, outdata []byte, inOffset, outOffset int, length int, transposeMap []int32) error {
- src, err := bufToTyped(inType, indata, inOffset, length)
- if err != nil {
- return err
- }
- dest, err := bufToTyped(outType, outdata, outOffset, length)
- if err != nil {
- return err
- }
-
- return TransposeInts(src, dest, transposeMap)
-}
-
-// TransposeInts expects two integral slices and the values they map to. Returning
-// an error if either src or dest are not an integral type.
-func TransposeInts(src, dest interface{}, mapping []int32) error {
- switch s := src.(type) {
- case []int8:
- switch d := dest.(type) {
- case []int8:
- TransposeInt8Int8(s, d, mapping)
- case []int16:
- TransposeInt8Int16(s, d, mapping)
- case []int32:
- TransposeInt8Int32(s, d, mapping)
- case []int64:
- TransposeInt8Int64(s, d, mapping)
- case []uint8:
- TransposeInt8Uint8(s, d, mapping)
- case []uint16:
- TransposeInt8Uint16(s, d, mapping)
- case []uint32:
- TransposeInt8Uint32(s, d, mapping)
- case []uint64:
- TransposeInt8Uint64(s, d, mapping)
- }
- case []int16:
- switch d := dest.(type) {
- case []int8:
- TransposeInt16Int8(s, d, mapping)
- case []int16:
- TransposeInt16Int16(s, d, mapping)
- case []int32:
- TransposeInt16Int32(s, d, mapping)
- case []int64:
- TransposeInt16Int64(s, d, mapping)
- case []uint8:
- TransposeInt16Uint8(s, d, mapping)
- case []uint16:
- TransposeInt16Uint16(s, d, mapping)
- case []uint32:
- TransposeInt16Uint32(s, d, mapping)
- case []uint64:
- TransposeInt16Uint64(s, d, mapping)
- }
- case []int32:
- switch d := dest.(type) {
- case []int8:
- TransposeInt32Int8(s, d, mapping)
- case []int16:
- TransposeInt32Int16(s, d, mapping)
- case []int32:
- TransposeInt32Int32(s, d, mapping)
- case []int64:
- TransposeInt32Int64(s, d, mapping)
- case []uint8:
- TransposeInt32Uint8(s, d, mapping)
- case []uint16:
- TransposeInt32Uint16(s, d, mapping)
- case []uint32:
- TransposeInt32Uint32(s, d, mapping)
- case []uint64:
- TransposeInt32Uint64(s, d, mapping)
- }
- case []int64:
- switch d := dest.(type) {
- case []int8:
- TransposeInt64Int8(s, d, mapping)
- case []int16:
- TransposeInt64Int16(s, d, mapping)
- case []int32:
- TransposeInt64Int32(s, d, mapping)
- case []int64:
- TransposeInt64Int64(s, d, mapping)
- case []uint8:
- TransposeInt64Uint8(s, d, mapping)
- case []uint16:
- TransposeInt64Uint16(s, d, mapping)
- case []uint32:
- TransposeInt64Uint32(s, d, mapping)
- case []uint64:
- TransposeInt64Uint64(s, d, mapping)
- }
- case []uint8:
- switch d := dest.(type) {
- case []int8:
- TransposeUint8Int8(s, d, mapping)
- case []int16:
- TransposeUint8Int16(s, d, mapping)
- case []int32:
- TransposeUint8Int32(s, d, mapping)
- case []int64:
- TransposeUint8Int64(s, d, mapping)
- case []uint8:
- TransposeUint8Uint8(s, d, mapping)
- case []uint16:
- TransposeUint8Uint16(s, d, mapping)
- case []uint32:
- TransposeUint8Uint32(s, d, mapping)
- case []uint64:
- TransposeUint8Uint64(s, d, mapping)
- }
- case []uint16:
- switch d := dest.(type) {
- case []int8:
- TransposeUint16Int8(s, d, mapping)
- case []int16:
- TransposeUint16Int16(s, d, mapping)
- case []int32:
- TransposeUint16Int32(s, d, mapping)
- case []int64:
- TransposeUint16Int64(s, d, mapping)
- case []uint8:
- TransposeUint16Uint8(s, d, mapping)
- case []uint16:
- TransposeUint16Uint16(s, d, mapping)
- case []uint32:
- TransposeUint16Uint32(s, d, mapping)
- case []uint64:
- TransposeUint16Uint64(s, d, mapping)
- }
- case []uint32:
- switch d := dest.(type) {
- case []int8:
- TransposeUint32Int8(s, d, mapping)
- case []int16:
- TransposeUint32Int16(s, d, mapping)
- case []int32:
- TransposeUint32Int32(s, d, mapping)
- case []int64:
- TransposeUint32Int64(s, d, mapping)
- case []uint8:
- TransposeUint32Uint8(s, d, mapping)
- case []uint16:
- TransposeUint32Uint16(s, d, mapping)
- case []uint32:
- TransposeUint32Uint32(s, d, mapping)
- case []uint64:
- TransposeUint32Uint64(s, d, mapping)
- }
- case []uint64:
- switch d := dest.(type) {
- case []int8:
- TransposeUint64Int8(s, d, mapping)
- case []int16:
- TransposeUint64Int16(s, d, mapping)
- case []int32:
- TransposeUint64Int32(s, d, mapping)
- case []int64:
- TransposeUint64Int64(s, d, mapping)
- case []uint8:
- TransposeUint64Uint8(s, d, mapping)
- case []uint16:
- TransposeUint64Uint16(s, d, mapping)
- case []uint32:
- TransposeUint64Uint32(s, d, mapping)
- case []uint64:
- TransposeUint64Uint64(s, d, mapping)
- }
- }
- return nil
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go
deleted file mode 100644
index 461aaf31f..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Code generated by transpose_ints_noasm.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build noasm || (!amd64 && !arm64 && !s390x && !ppc64le)
-
-package utils
-
-// if building with the 'noasm' tag, then point to the pure go implementations
-var (
- TransposeInt8Int8 = transposeInt8Int8
- TransposeInt8Uint8 = transposeInt8Uint8
- TransposeInt8Int16 = transposeInt8Int16
- TransposeInt8Uint16 = transposeInt8Uint16
- TransposeInt8Int32 = transposeInt8Int32
- TransposeInt8Uint32 = transposeInt8Uint32
- TransposeInt8Int64 = transposeInt8Int64
- TransposeInt8Uint64 = transposeInt8Uint64
-
- TransposeUint8Int8 = transposeUint8Int8
- TransposeUint8Uint8 = transposeUint8Uint8
- TransposeUint8Int16 = transposeUint8Int16
- TransposeUint8Uint16 = transposeUint8Uint16
- TransposeUint8Int32 = transposeUint8Int32
- TransposeUint8Uint32 = transposeUint8Uint32
- TransposeUint8Int64 = transposeUint8Int64
- TransposeUint8Uint64 = transposeUint8Uint64
-
- TransposeInt16Int8 = transposeInt16Int8
- TransposeInt16Uint8 = transposeInt16Uint8
- TransposeInt16Int16 = transposeInt16Int16
- TransposeInt16Uint16 = transposeInt16Uint16
- TransposeInt16Int32 = transposeInt16Int32
- TransposeInt16Uint32 = transposeInt16Uint32
- TransposeInt16Int64 = transposeInt16Int64
- TransposeInt16Uint64 = transposeInt16Uint64
-
- TransposeUint16Int8 = transposeUint16Int8
- TransposeUint16Uint8 = transposeUint16Uint8
- TransposeUint16Int16 = transposeUint16Int16
- TransposeUint16Uint16 = transposeUint16Uint16
- TransposeUint16Int32 = transposeUint16Int32
- TransposeUint16Uint32 = transposeUint16Uint32
- TransposeUint16Int64 = transposeUint16Int64
- TransposeUint16Uint64 = transposeUint16Uint64
-
- TransposeInt32Int8 = transposeInt32Int8
- TransposeInt32Uint8 = transposeInt32Uint8
- TransposeInt32Int16 = transposeInt32Int16
- TransposeInt32Uint16 = transposeInt32Uint16
- TransposeInt32Int32 = transposeInt32Int32
- TransposeInt32Uint32 = transposeInt32Uint32
- TransposeInt32Int64 = transposeInt32Int64
- TransposeInt32Uint64 = transposeInt32Uint64
-
- TransposeUint32Int8 = transposeUint32Int8
- TransposeUint32Uint8 = transposeUint32Uint8
- TransposeUint32Int16 = transposeUint32Int16
- TransposeUint32Uint16 = transposeUint32Uint16
- TransposeUint32Int32 = transposeUint32Int32
- TransposeUint32Uint32 = transposeUint32Uint32
- TransposeUint32Int64 = transposeUint32Int64
- TransposeUint32Uint64 = transposeUint32Uint64
-
- TransposeInt64Int8 = transposeInt64Int8
- TransposeInt64Uint8 = transposeInt64Uint8
- TransposeInt64Int16 = transposeInt64Int16
- TransposeInt64Uint16 = transposeInt64Uint16
- TransposeInt64Int32 = transposeInt64Int32
- TransposeInt64Uint32 = transposeInt64Uint32
- TransposeInt64Int64 = transposeInt64Int64
- TransposeInt64Uint64 = transposeInt64Uint64
-
- TransposeUint64Int8 = transposeUint64Int8
- TransposeUint64Uint8 = transposeUint64Uint8
- TransposeUint64Int16 = transposeUint64Int16
- TransposeUint64Uint16 = transposeUint64Uint16
- TransposeUint64Int32 = transposeUint64Int32
- TransposeUint64Uint32 = transposeUint64Uint32
- TransposeUint64Int64 = transposeUint64Int64
- TransposeUint64Uint64 = transposeUint64Uint64
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl
deleted file mode 100644
index faffdce35..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build noasm
-// +build noasm
-
-package utils
-
-// if building with the 'noasm' tag, then point to the pure go implementations
-var (
-{{ $typelist := .In }}
-{{range .In}}
-{{ $src := .Type -}}
-{{ $srcName := .Name -}}
-{{ range $typelist -}}
-{{ $dest := .Type -}}
-{{ $destName := .Name -}}
- Transpose{{$srcName}}{{$destName}} = transpose{{$srcName}}{{$destName}}
-{{end}}
-{{end}}
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go
deleted file mode 100644
index cc957cdaa..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Code generated by transpose_ints_s390x.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-// if building with the 'noasm' tag, then point to the pure go implementations
-var (
- TransposeInt8Int8 = transposeInt8Int8
- TransposeInt8Uint8 = transposeInt8Uint8
- TransposeInt8Int16 = transposeInt8Int16
- TransposeInt8Uint16 = transposeInt8Uint16
- TransposeInt8Int32 = transposeInt8Int32
- TransposeInt8Uint32 = transposeInt8Uint32
- TransposeInt8Int64 = transposeInt8Int64
- TransposeInt8Uint64 = transposeInt8Uint64
-
- TransposeUint8Int8 = transposeUint8Int8
- TransposeUint8Uint8 = transposeUint8Uint8
- TransposeUint8Int16 = transposeUint8Int16
- TransposeUint8Uint16 = transposeUint8Uint16
- TransposeUint8Int32 = transposeUint8Int32
- TransposeUint8Uint32 = transposeUint8Uint32
- TransposeUint8Int64 = transposeUint8Int64
- TransposeUint8Uint64 = transposeUint8Uint64
-
- TransposeInt16Int8 = transposeInt16Int8
- TransposeInt16Uint8 = transposeInt16Uint8
- TransposeInt16Int16 = transposeInt16Int16
- TransposeInt16Uint16 = transposeInt16Uint16
- TransposeInt16Int32 = transposeInt16Int32
- TransposeInt16Uint32 = transposeInt16Uint32
- TransposeInt16Int64 = transposeInt16Int64
- TransposeInt16Uint64 = transposeInt16Uint64
-
- TransposeUint16Int8 = transposeUint16Int8
- TransposeUint16Uint8 = transposeUint16Uint8
- TransposeUint16Int16 = transposeUint16Int16
- TransposeUint16Uint16 = transposeUint16Uint16
- TransposeUint16Int32 = transposeUint16Int32
- TransposeUint16Uint32 = transposeUint16Uint32
- TransposeUint16Int64 = transposeUint16Int64
- TransposeUint16Uint64 = transposeUint16Uint64
-
- TransposeInt32Int8 = transposeInt32Int8
- TransposeInt32Uint8 = transposeInt32Uint8
- TransposeInt32Int16 = transposeInt32Int16
- TransposeInt32Uint16 = transposeInt32Uint16
- TransposeInt32Int32 = transposeInt32Int32
- TransposeInt32Uint32 = transposeInt32Uint32
- TransposeInt32Int64 = transposeInt32Int64
- TransposeInt32Uint64 = transposeInt32Uint64
-
- TransposeUint32Int8 = transposeUint32Int8
- TransposeUint32Uint8 = transposeUint32Uint8
- TransposeUint32Int16 = transposeUint32Int16
- TransposeUint32Uint16 = transposeUint32Uint16
- TransposeUint32Int32 = transposeUint32Int32
- TransposeUint32Uint32 = transposeUint32Uint32
- TransposeUint32Int64 = transposeUint32Int64
- TransposeUint32Uint64 = transposeUint32Uint64
-
- TransposeInt64Int8 = transposeInt64Int8
- TransposeInt64Uint8 = transposeInt64Uint8
- TransposeInt64Int16 = transposeInt64Int16
- TransposeInt64Uint16 = transposeInt64Uint16
- TransposeInt64Int32 = transposeInt64Int32
- TransposeInt64Uint32 = transposeInt64Uint32
- TransposeInt64Int64 = transposeInt64Int64
- TransposeInt64Uint64 = transposeInt64Uint64
-
- TransposeUint64Int8 = transposeUint64Int8
- TransposeUint64Uint8 = transposeUint64Uint8
- TransposeUint64Int16 = transposeUint64Int16
- TransposeUint64Uint16 = transposeUint64Uint16
- TransposeUint64Int32 = transposeUint64Int32
- TransposeUint64Uint32 = transposeUint64Uint32
- TransposeUint64Int64 = transposeUint64Int64
- TransposeUint64Uint64 = transposeUint64Uint64
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go
deleted file mode 100644
index cc957cdaa..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go
+++ /dev/null
@@ -1,96 +0,0 @@
-// Code generated by transpose_ints_s390x.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-// if building with the 'noasm' tag, then point to the pure go implementations
-var (
- TransposeInt8Int8 = transposeInt8Int8
- TransposeInt8Uint8 = transposeInt8Uint8
- TransposeInt8Int16 = transposeInt8Int16
- TransposeInt8Uint16 = transposeInt8Uint16
- TransposeInt8Int32 = transposeInt8Int32
- TransposeInt8Uint32 = transposeInt8Uint32
- TransposeInt8Int64 = transposeInt8Int64
- TransposeInt8Uint64 = transposeInt8Uint64
-
- TransposeUint8Int8 = transposeUint8Int8
- TransposeUint8Uint8 = transposeUint8Uint8
- TransposeUint8Int16 = transposeUint8Int16
- TransposeUint8Uint16 = transposeUint8Uint16
- TransposeUint8Int32 = transposeUint8Int32
- TransposeUint8Uint32 = transposeUint8Uint32
- TransposeUint8Int64 = transposeUint8Int64
- TransposeUint8Uint64 = transposeUint8Uint64
-
- TransposeInt16Int8 = transposeInt16Int8
- TransposeInt16Uint8 = transposeInt16Uint8
- TransposeInt16Int16 = transposeInt16Int16
- TransposeInt16Uint16 = transposeInt16Uint16
- TransposeInt16Int32 = transposeInt16Int32
- TransposeInt16Uint32 = transposeInt16Uint32
- TransposeInt16Int64 = transposeInt16Int64
- TransposeInt16Uint64 = transposeInt16Uint64
-
- TransposeUint16Int8 = transposeUint16Int8
- TransposeUint16Uint8 = transposeUint16Uint8
- TransposeUint16Int16 = transposeUint16Int16
- TransposeUint16Uint16 = transposeUint16Uint16
- TransposeUint16Int32 = transposeUint16Int32
- TransposeUint16Uint32 = transposeUint16Uint32
- TransposeUint16Int64 = transposeUint16Int64
- TransposeUint16Uint64 = transposeUint16Uint64
-
- TransposeInt32Int8 = transposeInt32Int8
- TransposeInt32Uint8 = transposeInt32Uint8
- TransposeInt32Int16 = transposeInt32Int16
- TransposeInt32Uint16 = transposeInt32Uint16
- TransposeInt32Int32 = transposeInt32Int32
- TransposeInt32Uint32 = transposeInt32Uint32
- TransposeInt32Int64 = transposeInt32Int64
- TransposeInt32Uint64 = transposeInt32Uint64
-
- TransposeUint32Int8 = transposeUint32Int8
- TransposeUint32Uint8 = transposeUint32Uint8
- TransposeUint32Int16 = transposeUint32Int16
- TransposeUint32Uint16 = transposeUint32Uint16
- TransposeUint32Int32 = transposeUint32Int32
- TransposeUint32Uint32 = transposeUint32Uint32
- TransposeUint32Int64 = transposeUint32Int64
- TransposeUint32Uint64 = transposeUint32Uint64
-
- TransposeInt64Int8 = transposeInt64Int8
- TransposeInt64Uint8 = transposeInt64Uint8
- TransposeInt64Int16 = transposeInt64Int16
- TransposeInt64Uint16 = transposeInt64Uint16
- TransposeInt64Int32 = transposeInt64Int32
- TransposeInt64Uint32 = transposeInt64Uint32
- TransposeInt64Int64 = transposeInt64Int64
- TransposeInt64Uint64 = transposeInt64Uint64
-
- TransposeUint64Int8 = transposeUint64Int8
- TransposeUint64Uint8 = transposeUint64Uint8
- TransposeUint64Int16 = transposeUint64Int16
- TransposeUint64Uint16 = transposeUint64Uint16
- TransposeUint64Int32 = transposeUint64Int32
- TransposeUint64Uint32 = transposeUint64Uint32
- TransposeUint64Int64 = transposeUint64Int64
- TransposeUint64Uint64 = transposeUint64Uint64
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl
deleted file mode 100644
index d93c8779c..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl
+++ /dev/null
@@ -1,34 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-// +build !noasm
-
-package utils
-
-// if building with the 'noasm' tag, then point to the pure go implementations
-var (
-{{ $typelist := .In }}
-{{range .In}}
-{{ $src := .Type -}}
-{{ $srcName := .Name -}}
-{{ range $typelist -}}
-{{ $dest := .Type -}}
-{{ $destName := .Name -}}
- Transpose{{$srcName}}{{$destName}} = transpose{{$srcName}}{{$destName}}
-{{end}}
-{{end}}
-)
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl
deleted file mode 100644
index 034d0e9d2..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl
+++ /dev/null
@@ -1,42 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-// +build !noasm
-
-package utils
-
-import (
- "unsafe"
-)
-
-{{ $arch := .D.arch}}
-{{ $typelist := .In}}
-{{range .In}}
-{{ $src := .Type }}
-{{ $srcName := .Name }}
-{{ range $typelist}}
-{{ $dest := .Type }}
-{{ $destName := .Name }}
-
-//go:noescape
-func _transpose_{{printf "%s_%s_%s" $src $dest $arch}}(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transpose{{ $srcName }}{{ $destName }}{{ $arch }}(src []{{$src}}, dest []{{$dest}}, transposeMap []int32) {
- _transpose_{{printf "%s_%s_%s" $src $dest $arch}}(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-{{ end }}
-{{ end }}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go
deleted file mode 100644
index 241ca74a7..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go
+++ /dev/null
@@ -1,473 +0,0 @@
-// Code generated by transpose_ints_simd.go.tmpl. DO NOT EDIT.
-
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build !noasm
-
-package utils
-
-import (
- "unsafe"
-)
-
-//go:noescape
-func _transpose_int8_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int8sse4(src []int8, dest []int8, transposeMap []int32) {
- _transpose_int8_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint8sse4(src []int8, dest []uint8, transposeMap []int32) {
- _transpose_int8_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int16sse4(src []int8, dest []int16, transposeMap []int32) {
- _transpose_int8_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint16sse4(src []int8, dest []uint16, transposeMap []int32) {
- _transpose_int8_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int32sse4(src []int8, dest []int32, transposeMap []int32) {
- _transpose_int8_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint32sse4(src []int8, dest []uint32, transposeMap []int32) {
- _transpose_int8_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Int64sse4(src []int8, dest []int64, transposeMap []int32) {
- _transpose_int8_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int8_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt8Uint64sse4(src []int8, dest []uint64, transposeMap []int32) {
- _transpose_int8_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int8sse4(src []uint8, dest []int8, transposeMap []int32) {
- _transpose_uint8_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint8sse4(src []uint8, dest []uint8, transposeMap []int32) {
- _transpose_uint8_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int16sse4(src []uint8, dest []int16, transposeMap []int32) {
- _transpose_uint8_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint16sse4(src []uint8, dest []uint16, transposeMap []int32) {
- _transpose_uint8_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int32sse4(src []uint8, dest []int32, transposeMap []int32) {
- _transpose_uint8_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint32sse4(src []uint8, dest []uint32, transposeMap []int32) {
- _transpose_uint8_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Int64sse4(src []uint8, dest []int64, transposeMap []int32) {
- _transpose_uint8_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint8_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint8Uint64sse4(src []uint8, dest []uint64, transposeMap []int32) {
- _transpose_uint8_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int8sse4(src []int16, dest []int8, transposeMap []int32) {
- _transpose_int16_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint8sse4(src []int16, dest []uint8, transposeMap []int32) {
- _transpose_int16_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int16sse4(src []int16, dest []int16, transposeMap []int32) {
- _transpose_int16_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint16sse4(src []int16, dest []uint16, transposeMap []int32) {
- _transpose_int16_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int32sse4(src []int16, dest []int32, transposeMap []int32) {
- _transpose_int16_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint32sse4(src []int16, dest []uint32, transposeMap []int32) {
- _transpose_int16_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Int64sse4(src []int16, dest []int64, transposeMap []int32) {
- _transpose_int16_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int16_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt16Uint64sse4(src []int16, dest []uint64, transposeMap []int32) {
- _transpose_int16_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int8sse4(src []uint16, dest []int8, transposeMap []int32) {
- _transpose_uint16_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint8sse4(src []uint16, dest []uint8, transposeMap []int32) {
- _transpose_uint16_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int16sse4(src []uint16, dest []int16, transposeMap []int32) {
- _transpose_uint16_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint16sse4(src []uint16, dest []uint16, transposeMap []int32) {
- _transpose_uint16_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int32sse4(src []uint16, dest []int32, transposeMap []int32) {
- _transpose_uint16_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint32sse4(src []uint16, dest []uint32, transposeMap []int32) {
- _transpose_uint16_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Int64sse4(src []uint16, dest []int64, transposeMap []int32) {
- _transpose_uint16_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint16_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint16Uint64sse4(src []uint16, dest []uint64, transposeMap []int32) {
- _transpose_uint16_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int8sse4(src []int32, dest []int8, transposeMap []int32) {
- _transpose_int32_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint8sse4(src []int32, dest []uint8, transposeMap []int32) {
- _transpose_int32_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int16sse4(src []int32, dest []int16, transposeMap []int32) {
- _transpose_int32_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint16sse4(src []int32, dest []uint16, transposeMap []int32) {
- _transpose_int32_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int32sse4(src []int32, dest []int32, transposeMap []int32) {
- _transpose_int32_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint32sse4(src []int32, dest []uint32, transposeMap []int32) {
- _transpose_int32_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Int64sse4(src []int32, dest []int64, transposeMap []int32) {
- _transpose_int32_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int32_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt32Uint64sse4(src []int32, dest []uint64, transposeMap []int32) {
- _transpose_int32_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int8sse4(src []uint32, dest []int8, transposeMap []int32) {
- _transpose_uint32_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint8sse4(src []uint32, dest []uint8, transposeMap []int32) {
- _transpose_uint32_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int16sse4(src []uint32, dest []int16, transposeMap []int32) {
- _transpose_uint32_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint16sse4(src []uint32, dest []uint16, transposeMap []int32) {
- _transpose_uint32_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int32sse4(src []uint32, dest []int32, transposeMap []int32) {
- _transpose_uint32_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint32sse4(src []uint32, dest []uint32, transposeMap []int32) {
- _transpose_uint32_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Int64sse4(src []uint32, dest []int64, transposeMap []int32) {
- _transpose_uint32_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint32_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint32Uint64sse4(src []uint32, dest []uint64, transposeMap []int32) {
- _transpose_uint32_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int8sse4(src []int64, dest []int8, transposeMap []int32) {
- _transpose_int64_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint8sse4(src []int64, dest []uint8, transposeMap []int32) {
- _transpose_int64_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int16sse4(src []int64, dest []int16, transposeMap []int32) {
- _transpose_int64_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint16sse4(src []int64, dest []uint16, transposeMap []int32) {
- _transpose_int64_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int32sse4(src []int64, dest []int32, transposeMap []int32) {
- _transpose_int64_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint32sse4(src []int64, dest []uint32, transposeMap []int32) {
- _transpose_int64_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Int64sse4(src []int64, dest []int64, transposeMap []int32) {
- _transpose_int64_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_int64_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeInt64Uint64sse4(src []int64, dest []uint64, transposeMap []int32) {
- _transpose_int64_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int8sse4(src []uint64, dest []int8, transposeMap []int32) {
- _transpose_uint64_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint8sse4(src []uint64, dest []uint8, transposeMap []int32) {
- _transpose_uint64_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int16sse4(src []uint64, dest []int16, transposeMap []int32) {
- _transpose_uint64_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint16sse4(src []uint64, dest []uint16, transposeMap []int32) {
- _transpose_uint64_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int32sse4(src []uint64, dest []int32, transposeMap []int32) {
- _transpose_uint64_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint32sse4(src []uint64, dest []uint32, transposeMap []int32) {
- _transpose_uint64_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Int64sse4(src []uint64, dest []int64, transposeMap []int32) {
- _transpose_uint64_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
-
-//go:noescape
-func _transpose_uint64_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer)
-
-func transposeUint64Uint64sse4(src []uint64, dest []uint64, transposeMap []int32) {
- _transpose_uint64_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0]))
-}
diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s
deleted file mode 100644
index ee5199a5a..000000000
--- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s
+++ /dev/null
@@ -1,3074 +0,0 @@
-//+build !noasm !appengine
-// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
-
-TEXT ·_transpose_uint8_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB0_1
-
-LBB0_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB0_5
-
-LBB0_1:
- WORD $0xd285 // test edx, edx
- JLE LBB0_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB0_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB0_3
-
-LBB0_4:
- RET
-
-TEXT ·_transpose_int8_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB1_1
-
-LBB1_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB1_5
-
-LBB1_1:
- WORD $0xd285 // test edx, edx
- JLE LBB1_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB1_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB1_3
-
-LBB1_4:
- RET
-
-TEXT ·_transpose_uint16_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB2_1
-
-LBB2_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB2_5
-
-LBB2_1:
- WORD $0xd285 // test edx, edx
- JLE LBB2_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB2_3:
- LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB2_3
-
-LBB2_4:
- RET
-
-TEXT ·_transpose_int16_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB3_1
-
-LBB3_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB3_5
-
-LBB3_1:
- WORD $0xd285 // test edx, edx
- JLE LBB3_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB3_3:
- LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB3_3
-
-LBB3_4:
- RET
-
-TEXT ·_transpose_uint32_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB4_1
-
-LBB4_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB4_5
-
-LBB4_1:
- WORD $0xd285 // test edx, edx
- JLE LBB4_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB4_3:
- LONG $0x87048b42 // mov eax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB4_3
-
-LBB4_4:
- RET
-
-TEXT ·_transpose_int32_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB5_1
-
-LBB5_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB5_5
-
-LBB5_1:
- WORD $0xd285 // test edx, edx
- JLE LBB5_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB5_3:
- LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB5_3
-
-LBB5_4:
- RET
-
-TEXT ·_transpose_uint64_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB6_1
-
-LBB6_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB6_5
-
-LBB6_1:
- WORD $0xd285 // test edx, edx
- JLE LBB6_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB6_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB6_3
-
-LBB6_4:
- RET
-
-TEXT ·_transpose_int64_uint8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB7_1
-
-LBB7_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB7_5
-
-LBB7_1:
- WORD $0xd285 // test edx, edx
- JLE LBB7_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB7_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB7_3
-
-LBB7_4:
- RET
-
-TEXT ·_transpose_uint8_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB8_1
-
-LBB8_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB8_5
-
-LBB8_1:
- WORD $0xd285 // test edx, edx
- JLE LBB8_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB8_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB8_3
-
-LBB8_4:
- RET
-
-TEXT ·_transpose_int8_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB9_1
-
-LBB9_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB9_5
-
-LBB9_1:
- WORD $0xd285 // test edx, edx
- JLE LBB9_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB9_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB9_3
-
-LBB9_4:
- RET
-
-TEXT ·_transpose_uint16_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB10_1
-
-LBB10_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB10_5
-
-LBB10_1:
- WORD $0xd285 // test edx, edx
- JLE LBB10_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB10_3:
- LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB10_3
-
-LBB10_4:
- RET
-
-TEXT ·_transpose_int16_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB11_1
-
-LBB11_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB11_5
-
-LBB11_1:
- WORD $0xd285 // test edx, edx
- JLE LBB11_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB11_3:
- LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB11_3
-
-LBB11_4:
- RET
-
-TEXT ·_transpose_uint32_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB12_1
-
-LBB12_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB12_5
-
-LBB12_1:
- WORD $0xd285 // test edx, edx
- JLE LBB12_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB12_3:
- LONG $0x87048b42 // mov eax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB12_3
-
-LBB12_4:
- RET
-
-TEXT ·_transpose_int32_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB13_1
-
-LBB13_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB13_5
-
-LBB13_1:
- WORD $0xd285 // test edx, edx
- JLE LBB13_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB13_3:
- LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB13_3
-
-LBB13_4:
- RET
-
-TEXT ·_transpose_uint64_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB14_1
-
-LBB14_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB14_5
-
-LBB14_1:
- WORD $0xd285 // test edx, edx
- JLE LBB14_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB14_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB14_3
-
-LBB14_4:
- RET
-
-TEXT ·_transpose_int64_int8_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB15_1
-
-LBB15_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x1688 // mov byte [rsi], dl
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx]
- WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x04c68348 // add rsi, 4
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB15_5
-
-LBB15_1:
- WORD $0xd285 // test edx, edx
- JLE LBB15_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB15_3:
- LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8]
- LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax]
- LONG $0x06048842 // mov byte [rsi + r8], al
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB15_3
-
-LBB15_4:
- RET
-
-TEXT ·_transpose_uint8_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB16_1
-
-LBB16_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB16_5
-
-LBB16_1:
- WORD $0xd285 // test edx, edx
- JLE LBB16_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB16_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB16_3
-
-LBB16_4:
- RET
-
-TEXT ·_transpose_int8_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB17_1
-
-LBB17_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB17_5
-
-LBB17_1:
- WORD $0xd285 // test edx, edx
- JLE LBB17_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB17_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB17_3
-
-LBB17_4:
- RET
-
-TEXT ·_transpose_uint16_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB18_1
-
-LBB18_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB18_5
-
-LBB18_1:
- WORD $0xd285 // test edx, edx
- JLE LBB18_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB18_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB18_3
-
-LBB18_4:
- RET
-
-TEXT ·_transpose_int16_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB19_1
-
-LBB19_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB19_5
-
-LBB19_1:
- WORD $0xd285 // test edx, edx
- JLE LBB19_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB19_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB19_3
-
-LBB19_4:
- RET
-
-TEXT ·_transpose_uint32_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB20_1
-
-LBB20_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB20_5
-
-LBB20_1:
- WORD $0xd285 // test edx, edx
- JLE LBB20_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB20_3:
- LONG $0x47048b42 // mov eax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB20_3
-
-LBB20_4:
- RET
-
-TEXT ·_transpose_int32_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB21_1
-
-LBB21_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB21_5
-
-LBB21_1:
- WORD $0xd285 // test edx, edx
- JLE LBB21_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB21_3:
- LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB21_3
-
-LBB21_4:
- RET
-
-TEXT ·_transpose_uint64_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB22_1
-
-LBB22_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB22_5
-
-LBB22_1:
- WORD $0xd285 // test edx, edx
- JLE LBB22_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB22_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB22_3
-
-LBB22_4:
- RET
-
-TEXT ·_transpose_int64_uint16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB23_1
-
-LBB23_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB23_5
-
-LBB23_1:
- WORD $0xd285 // test edx, edx
- JLE LBB23_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB23_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB23_3
-
-LBB23_4:
- RET
-
-TEXT ·_transpose_uint8_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB24_1
-
-LBB24_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB24_5
-
-LBB24_1:
- WORD $0xd285 // test edx, edx
- JLE LBB24_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB24_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB24_3
-
-LBB24_4:
- RET
-
-TEXT ·_transpose_int8_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB25_1
-
-LBB25_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB25_5
-
-LBB25_1:
- WORD $0xd285 // test edx, edx
- JLE LBB25_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB25_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB25_3
-
-LBB25_4:
- RET
-
-TEXT ·_transpose_uint16_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB26_1
-
-LBB26_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB26_5
-
-LBB26_1:
- WORD $0xd285 // test edx, edx
- JLE LBB26_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB26_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB26_3
-
-LBB26_4:
- RET
-
-TEXT ·_transpose_int16_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB27_1
-
-LBB27_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB27_5
-
-LBB27_1:
- WORD $0xd285 // test edx, edx
- JLE LBB27_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB27_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB27_3
-
-LBB27_4:
- RET
-
-TEXT ·_transpose_uint32_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB28_1
-
-LBB28_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB28_5
-
-LBB28_1:
- WORD $0xd285 // test edx, edx
- JLE LBB28_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB28_3:
- LONG $0x47048b42 // mov eax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB28_3
-
-LBB28_4:
- RET
-
-TEXT ·_transpose_int32_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB29_1
-
-LBB29_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB29_5
-
-LBB29_1:
- WORD $0xd285 // test edx, edx
- JLE LBB29_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB29_3:
- LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB29_3
-
-LBB29_4:
- RET
-
-TEXT ·_transpose_uint64_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB30_1
-
-LBB30_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB30_5
-
-LBB30_1:
- WORD $0xd285 // test edx, edx
- JLE LBB30_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB30_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB30_3
-
-LBB30_4:
- RET
-
-TEXT ·_transpose_int64_int16_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB31_1
-
-LBB31_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- WORD $0x8966; BYTE $0x16 // mov word [rsi], dx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x02568966 // mov word [rsi + 2], dx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x04568966 // mov word [rsi + 4], dx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx]
- LONG $0x06568966 // mov word [rsi + 6], dx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x08c68348 // add rsi, 8
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB31_5
-
-LBB31_1:
- WORD $0xd285 // test edx, edx
- JLE LBB31_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB31_3:
- LONG $0x87048b4a // mov rax, qword [rdi + 4*r8]
- LONG $0x8104b70f // movzx eax, word [rcx + 4*rax]
- LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB31_3
-
-LBB31_4:
- RET
-
-TEXT ·_transpose_uint8_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB32_1
-
-LBB32_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB32_5
-
-LBB32_1:
- WORD $0xd285 // test edx, edx
- JLE LBB32_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB32_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB32_3
-
-LBB32_4:
- RET
-
-TEXT ·_transpose_int8_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB33_1
-
-LBB33_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB33_5
-
-LBB33_1:
- WORD $0xd285 // test edx, edx
- JLE LBB33_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB33_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB33_3
-
-LBB33_4:
- RET
-
-TEXT ·_transpose_uint16_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB34_1
-
-LBB34_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB34_5
-
-LBB34_1:
- WORD $0xd285 // test edx, edx
- JLE LBB34_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB34_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB34_3
-
-LBB34_4:
- RET
-
-TEXT ·_transpose_int16_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB35_1
-
-LBB35_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB35_5
-
-LBB35_1:
- WORD $0xd285 // test edx, edx
- JLE LBB35_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB35_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB35_3
-
-LBB35_4:
- RET
-
-TEXT ·_transpose_uint32_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB36_1
-
-LBB36_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB36_5
-
-LBB36_1:
- WORD $0xd285 // test edx, edx
- JLE LBB36_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB36_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB36_3
-
-LBB36_4:
- RET
-
-TEXT ·_transpose_int32_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB37_1
-
-LBB37_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB37_5
-
-LBB37_1:
- WORD $0xd285 // test edx, edx
- JLE LBB37_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB37_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB37_3
-
-LBB37_4:
- RET
-
-TEXT ·_transpose_uint64_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB38_1
-
-LBB38_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB38_5
-
-LBB38_1:
- WORD $0xd285 // test edx, edx
- JLE LBB38_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB38_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB38_3
-
-LBB38_4:
- RET
-
-TEXT ·_transpose_int64_uint32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB39_1
-
-LBB39_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB39_5
-
-LBB39_1:
- WORD $0xd285 // test edx, edx
- JLE LBB39_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB39_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB39_3
-
-LBB39_4:
- RET
-
-TEXT ·_transpose_uint8_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB40_1
-
-LBB40_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB40_5
-
-LBB40_1:
- WORD $0xd285 // test edx, edx
- JLE LBB40_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB40_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB40_3
-
-LBB40_4:
- RET
-
-TEXT ·_transpose_int8_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB41_1
-
-LBB41_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB41_5
-
-LBB41_1:
- WORD $0xd285 // test edx, edx
- JLE LBB41_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB41_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x86048942 // mov dword [rsi + 4*r8], eax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB41_3
-
-LBB41_4:
- RET
-
-TEXT ·_transpose_uint16_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB42_1
-
-LBB42_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB42_5
-
-LBB42_1:
- WORD $0xd285 // test edx, edx
- JLE LBB42_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB42_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB42_3
-
-LBB42_4:
- RET
-
-TEXT ·_transpose_int16_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB43_1
-
-LBB43_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB43_5
-
-LBB43_1:
- WORD $0xd285 // test edx, edx
- JLE LBB43_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB43_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x46048942 // mov dword [rsi + 2*r8], eax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB43_3
-
-LBB43_4:
- RET
-
-TEXT ·_transpose_uint32_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB44_1
-
-LBB44_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB44_5
-
-LBB44_1:
- WORD $0xd285 // test edx, edx
- JLE LBB44_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB44_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB44_3
-
-LBB44_4:
- RET
-
-TEXT ·_transpose_int32_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB45_1
-
-LBB45_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB45_5
-
-LBB45_1:
- WORD $0xd285 // test edx, edx
- JLE LBB45_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB45_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB45_3
-
-LBB45_4:
- RET
-
-TEXT ·_transpose_uint64_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB46_1
-
-LBB46_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB46_5
-
-LBB46_1:
- WORD $0xd285 // test edx, edx
- JLE LBB46_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB46_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB46_3
-
-LBB46_4:
- RET
-
-TEXT ·_transpose_int64_int32_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB47_1
-
-LBB47_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x1689 // mov dword [rsi], edx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx]
- WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x10c68348 // add rsi, 16
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB47_5
-
-LBB47_1:
- WORD $0xd285 // test edx, edx
- JLE LBB47_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB47_3:
- LONG $0x47048b4a // mov rax, qword [rdi + 2*r8]
- WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax]
- LONG $0x06048942 // mov dword [rsi + r8], eax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB47_3
-
-LBB47_4:
- RET
-
-TEXT ·_transpose_uint8_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB48_1
-
-LBB48_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB48_5
-
-LBB48_1:
- WORD $0xd285 // test edx, edx
- JLE LBB48_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB48_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB48_3
-
-LBB48_4:
- RET
-
-TEXT ·_transpose_int8_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB49_1
-
-LBB49_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB49_5
-
-LBB49_1:
- WORD $0xd285 // test edx, edx
- JLE LBB49_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB49_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB49_3
-
-LBB49_4:
- RET
-
-TEXT ·_transpose_uint16_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB50_1
-
-LBB50_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB50_5
-
-LBB50_1:
- WORD $0xd285 // test edx, edx
- JLE LBB50_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB50_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB50_3
-
-LBB50_4:
- RET
-
-TEXT ·_transpose_int16_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB51_1
-
-LBB51_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB51_5
-
-LBB51_1:
- WORD $0xd285 // test edx, edx
- JLE LBB51_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB51_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB51_3
-
-LBB51_4:
- RET
-
-TEXT ·_transpose_uint32_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB52_1
-
-LBB52_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB52_5
-
-LBB52_1:
- WORD $0xd285 // test edx, edx
- JLE LBB52_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB52_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB52_3
-
-LBB52_4:
- RET
-
-TEXT ·_transpose_int32_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB53_1
-
-LBB53_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB53_5
-
-LBB53_1:
- WORD $0xd285 // test edx, edx
- JLE LBB53_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB53_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB53_3
-
-LBB53_4:
- RET
-
-TEXT ·_transpose_uint64_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB54_1
-
-LBB54_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB54_5
-
-LBB54_1:
- WORD $0xd285 // test edx, edx
- JLE LBB54_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB54_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB54_3
-
-LBB54_4:
- RET
-
-TEXT ·_transpose_int64_uint64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB55_1
-
-LBB55_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB55_5
-
-LBB55_1:
- WORD $0xd285 // test edx, edx
- JLE LBB55_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB55_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB55_3
-
-LBB55_4:
- RET
-
-TEXT ·_transpose_uint8_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB56_1
-
-LBB56_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0157b60f // movzx edx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0257b60f // movzx edx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0357b60f // movzx edx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB56_5
-
-LBB56_1:
- WORD $0xd285 // test edx, edx
- JLE LBB56_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB56_3:
- LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB56_3
-
-LBB56_4:
- RET
-
-TEXT ·_transpose_int8_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB57_1
-
-LBB57_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17be0f48 // movsx rdx, byte [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x04c78348 // add rdi, 4
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB57_5
-
-LBB57_1:
- WORD $0xd285 // test edx, edx
- JLE LBB57_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB57_3:
- LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0xc604894a // mov qword [rsi + 8*r8], rax
- LONG $0x01c08349 // add r8, 1
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB57_3
-
-LBB57_4:
- RET
-
-TEXT ·_transpose_uint16_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB58_1
-
-LBB58_5:
- WORD $0xd089 // mov eax, edx
- WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x0257b70f // movzx edx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x0457b70f // movzx edx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0657b70f // movzx edx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB58_5
-
-LBB58_1:
- WORD $0xd285 // test edx, edx
- JLE LBB58_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB58_3:
- LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB58_3
-
-LBB58_4:
- RET
-
-TEXT ·_transpose_int16_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB59_1
-
-LBB59_5:
- WORD $0xd089 // mov eax, edx
- LONG $0x17bf0f48 // movsx rdx, word [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x08c78348 // add rdi, 8
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB59_5
-
-LBB59_1:
- WORD $0xd285 // test edx, edx
- JLE LBB59_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB59_3:
- LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x8604894a // mov qword [rsi + 4*r8], rax
- LONG $0x02c08349 // add r8, 2
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB59_3
-
-LBB59_4:
- RET
-
-TEXT ·_transpose_uint32_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB60_1
-
-LBB60_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x178b // mov edx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB60_5
-
-LBB60_1:
- WORD $0xd285 // test edx, edx
- JLE LBB60_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB60_3:
- LONG $0x07048b42 // mov eax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB60_3
-
-LBB60_4:
- RET
-
-TEXT ·_transpose_int32_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB61_1
-
-LBB61_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x04576348 // movsxd rdx, dword [rdi + 4]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x08576348 // movsxd rdx, dword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x0c576348 // movsxd rdx, dword [rdi + 12]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x10c78348 // add rdi, 16
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB61_5
-
-LBB61_1:
- WORD $0xd285 // test edx, edx
- JLE LBB61_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB61_3:
- LONG $0x0704634a // movsxd rax, dword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x4604894a // mov qword [rsi + 2*r8], rax
- LONG $0x04c08349 // add r8, 4
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB61_3
-
-LBB61_4:
- RET
-
-TEXT ·_transpose_uint64_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB62_1
-
-LBB62_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB62_5
-
-LBB62_1:
- WORD $0xd285 // test edx, edx
- JLE LBB62_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB62_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB62_3
-
-LBB62_4:
- RET
-
-TEXT ·_transpose_int64_int64_sse4(SB), $0-32
-
- MOVQ src+0(FP), DI
- MOVQ dest+8(FP), SI
- MOVQ length+16(FP), DX
- MOVQ transposeMap+24(FP), CX
-
- WORD $0xfa83; BYTE $0x04 // cmp edx, 4
- JL LBB63_1
-
-LBB63_5:
- WORD $0xd089 // mov eax, edx
- WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx
- LONG $0x08578b48 // mov rdx, qword [rdi + 8]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x08568948 // mov qword [rsi + 8], rdx
- LONG $0x10578b48 // mov rdx, qword [rdi + 16]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x10568948 // mov qword [rsi + 16], rdx
- LONG $0x18578b48 // mov rdx, qword [rdi + 24]
- LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx]
- LONG $0x18568948 // mov qword [rsi + 24], rdx
- WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4]
- LONG $0x20c78348 // add rdi, 32
- LONG $0x20c68348 // add rsi, 32
- WORD $0xf883; BYTE $0x07 // cmp eax, 7
- JG LBB63_5
-
-LBB63_1:
- WORD $0xd285 // test edx, edx
- JLE LBB63_4
- WORD $0xc283; BYTE $0x01 // add edx, 1
- WORD $0x3145; BYTE $0xc0 // xor r8d, r8d
-
-LBB63_3:
- LONG $0x07048b4a // mov rax, qword [rdi + r8]
- LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax]
- LONG $0x0604894a // mov qword [rsi + r8], rax
- LONG $0x08c08349 // add r8, 8
- WORD $0xc283; BYTE $0xff // add edx, -1
- WORD $0xfa83; BYTE $0x01 // cmp edx, 1
- JG LBB63_3
-
-LBB63_4:
- RET