From c97c816133b42257d0bcf1ee4bd178bb2a7a2b9e Mon Sep 17 00:00:00 2001 From: Taras Madan Date: Tue, 10 Sep 2024 12:16:33 +0200 Subject: vendor: update --- .../go/v14/internal/bitutils/bit_block_counter.go | 452 --- .../go/v14/internal/bitutils/bit_run_reader.go | 151 - .../go/v14/internal/bitutils/bit_set_run_reader.go | 361 --- .../go/v14/internal/bitutils/bitmap_generate.go | 109 - .../arrow/go/v14/internal/hashing/hash_funcs.go | 90 - .../arrow/go/v14/internal/hashing/hash_string.go | 26 - .../go/v14/internal/hashing/hash_string_go1.19.go | 37 - .../arrow/go/v14/internal/hashing/types.tmpldata | 42 - .../go/v14/internal/hashing/xxh3_memo_table.gen.go | 2833 ------------------ .../internal/hashing/xxh3_memo_table.gen.go.tmpl | 349 --- .../go/v14/internal/hashing/xxh3_memo_table.go | 443 --- .../apache/arrow/go/v14/internal/json/json.go | 51 - .../arrow/go/v14/internal/json/json_tinygo.go | 51 - .../apache/arrow/go/v14/internal/utils/Makefile | 80 - .../arrow/go/v14/internal/utils/buf_reader.go | 212 -- .../arrow/go/v14/internal/utils/endians_default.go | 30 - .../arrow/go/v14/internal/utils/endians_s390x.go | 33 - .../apache/arrow/go/v14/internal/utils/math.go | 49 - .../apache/arrow/go/v14/internal/utils/min_max.go | 212 -- .../arrow/go/v14/internal/utils/min_max_amd64.go | 55 - .../arrow/go/v14/internal/utils/min_max_arm64.go | 65 - .../go/v14/internal/utils/min_max_avx2_amd64.go | 90 - .../go/v14/internal/utils/min_max_avx2_amd64.s | 927 ------ .../go/v14/internal/utils/min_max_neon_arm64.go | 56 - .../go/v14/internal/utils/min_max_neon_arm64.s | 324 --- .../arrow/go/v14/internal/utils/min_max_noasm.go | 31 - .../arrow/go/v14/internal/utils/min_max_ppc64le.go | 30 - .../arrow/go/v14/internal/utils/min_max_s390x.go | 30 - .../go/v14/internal/utils/min_max_sse4_amd64.go | 88 - .../go/v14/internal/utils/min_max_sse4_amd64.s | 1044 ------- .../arrow/go/v14/internal/utils/transpose_ints.go | 407 --- .../go/v14/internal/utils/transpose_ints.go.tmpl | 34 - .../go/v14/internal/utils/transpose_ints.tmpldata | 34 - .../go/v14/internal/utils/transpose_ints_amd64.go | 325 --- .../internal/utils/transpose_ints_amd64.go.tmpl | 75 - .../go/v14/internal/utils/transpose_ints_arm64.go | 96 - .../internal/utils/transpose_ints_avx2_amd64.go | 473 --- .../v14/internal/utils/transpose_ints_avx2_amd64.s | 3074 -------------------- .../go/v14/internal/utils/transpose_ints_def.go | 227 -- .../go/v14/internal/utils/transpose_ints_noasm.go | 96 - .../internal/utils/transpose_ints_noasm.go.tmpl | 34 - .../v14/internal/utils/transpose_ints_ppc64le.go | 96 - .../go/v14/internal/utils/transpose_ints_s390x.go | 96 - .../internal/utils/transpose_ints_s390x.go.tmpl | 34 - .../v14/internal/utils/transpose_ints_simd.go.tmpl | 42 - .../internal/utils/transpose_ints_sse4_amd64.go | 473 --- .../v14/internal/utils/transpose_ints_sse4_amd64.s | 3074 -------------------- 47 files changed, 17041 deletions(-) delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/json/json.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/math.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go delete mode 100644 vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s (limited to 'vendor/github.com/apache/arrow/go/v14/internal') diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go deleted file mode 100644 index 86818bfd4..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_block_counter.go +++ /dev/null @@ -1,452 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutils - -import ( - "math" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v14/arrow/bitutil" - "github.com/apache/arrow/go/v14/internal/utils" -) - -func loadWord(byt []byte) uint64 { - return utils.ToLEUint64(*(*uint64)(unsafe.Pointer(&byt[0]))) -} - -func shiftWord(current, next uint64, shift int64) uint64 { - if shift == 0 { - return current - } - return (current >> shift) | (next << (64 - shift)) -} - -// BitBlockCount is returned by the various bit block counter utilities -// in order to return a length of bits and the population count of that -// slice of bits. -type BitBlockCount struct { - Len int16 - Popcnt int16 -} - -// NoneSet returns true if ALL the bits were 0 in this set, ie: Popcnt == 0 -func (b BitBlockCount) NoneSet() bool { - return b.Popcnt == 0 -} - -// AllSet returns true if ALL the bits were 1 in this set, ie: Popcnt == Len -func (b BitBlockCount) AllSet() bool { - return b.Len == b.Popcnt -} - -// BitBlockCounter is a utility for grabbing chunks of a bitmap at a time and efficiently -// counting the number of bits which are 1. -type BitBlockCounter struct { - bitmap []byte - bitsRemaining int64 - bitOffset int8 -} - -const ( - wordBits int64 = 64 - fourWordsBits int64 = wordBits * 4 -) - -// NewBitBlockCounter returns a BitBlockCounter for the passed bitmap starting at startOffset -// of length nbits. -func NewBitBlockCounter(bitmap []byte, startOffset, nbits int64) *BitBlockCounter { - return &BitBlockCounter{ - bitmap: bitmap[startOffset/8:], - bitsRemaining: nbits, - bitOffset: int8(startOffset % 8), - } -} - -// getBlockSlow is for returning a block of the requested size when there aren't -// enough bits remaining to do a full word computation. -func (b *BitBlockCounter) getBlockSlow(blockSize int64) BitBlockCount { - runlen := int16(utils.Min(b.bitsRemaining, blockSize)) - popcnt := int16(bitutil.CountSetBits(b.bitmap, int(b.bitOffset), int(runlen))) - b.bitsRemaining -= int64(runlen) - b.bitmap = b.bitmap[runlen/8:] - return BitBlockCount{runlen, popcnt} -} - -// NextFourWords returns the next run of available bits, usually 256. The -// returned pair contains the size of run and the number of true values. -// The last block will have a length less than 256 if the bitmap length -// is not a multiple of 256, and will return 0-length blocks in subsequent -// invocations. -func (b *BitBlockCounter) NextFourWords() BitBlockCount { - if b.bitsRemaining == 0 { - return BitBlockCount{0, 0} - } - - totalPopcnt := 0 - if b.bitOffset == 0 { - // if we're aligned at 0 bitoffset, then we can easily just jump from - // word to word nice and easy. - if b.bitsRemaining < fourWordsBits { - return b.getBlockSlow(fourWordsBits) - } - totalPopcnt += bits.OnesCount64(loadWord(b.bitmap)) - totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[8:])) - totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[16:])) - totalPopcnt += bits.OnesCount64(loadWord(b.bitmap[24:])) - } else { - // When the offset is > 0, we need there to be a word beyond the last - // aligned word in the bitmap for the bit shifting logic. - if b.bitsRemaining < 5*fourWordsBits-int64(b.bitOffset) { - return b.getBlockSlow(fourWordsBits) - } - - current := loadWord(b.bitmap) - next := loadWord(b.bitmap[8:]) - totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset))) - - current = next - next = loadWord(b.bitmap[16:]) - totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset))) - - current = next - next = loadWord(b.bitmap[24:]) - totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset))) - - current = next - next = loadWord(b.bitmap[32:]) - totalPopcnt += bits.OnesCount64(shiftWord(current, next, int64(b.bitOffset))) - } - b.bitmap = b.bitmap[bitutil.BytesForBits(fourWordsBits):] - b.bitsRemaining -= fourWordsBits - return BitBlockCount{256, int16(totalPopcnt)} -} - -// NextWord returns the next run of available bits, usually 64. The returned -// pair contains the size of run and the number of true values. The last -// block will have a length less than 64 if the bitmap length is not a -// multiple of 64, and will return 0-length blocks in subsequent -// invocations. -func (b *BitBlockCounter) NextWord() BitBlockCount { - if b.bitsRemaining == 0 { - return BitBlockCount{0, 0} - } - popcnt := 0 - if b.bitOffset == 0 { - if b.bitsRemaining < wordBits { - return b.getBlockSlow(wordBits) - } - popcnt = bits.OnesCount64(loadWord(b.bitmap)) - } else { - // When the offset is > 0, we need there to be a word beyond the last - // aligned word in the bitmap for the bit shifting logic. - if b.bitsRemaining < (2*wordBits - int64(b.bitOffset)) { - return b.getBlockSlow(wordBits) - } - popcnt = bits.OnesCount64(shiftWord(loadWord(b.bitmap), loadWord(b.bitmap[8:]), int64(b.bitOffset))) - } - b.bitmap = b.bitmap[wordBits/8:] - b.bitsRemaining -= wordBits - return BitBlockCount{64, int16(popcnt)} -} - -// OptionalBitBlockCounter is a useful counter to iterate through a possibly -// non-existent validity bitmap to allow us to write one code path for both -// the with-nulls and no-nulls cases without giving up a lot of performance. -type OptionalBitBlockCounter struct { - hasBitmap bool - pos int64 - len int64 - counter *BitBlockCounter -} - -// NewOptionalBitBlockCounter constructs and returns a new bit block counter that -// can properly handle the case when a bitmap is null, if it is guaranteed that the -// the bitmap is not nil, then prefer NewBitBlockCounter here. -func NewOptionalBitBlockCounter(bitmap []byte, offset, length int64) *OptionalBitBlockCounter { - var counter *BitBlockCounter - if bitmap != nil { - counter = NewBitBlockCounter(bitmap, offset, length) - } - return &OptionalBitBlockCounter{ - hasBitmap: bitmap != nil, - pos: 0, - len: length, - counter: counter, - } -} - -// NextBlock returns block count for next word when the bitmap is available otherwise -// return a block with length up to INT16_MAX when there is no validity -// bitmap (so all the referenced values are not null). -func (obc *OptionalBitBlockCounter) NextBlock() BitBlockCount { - const maxBlockSize = math.MaxInt16 - if obc.hasBitmap { - block := obc.counter.NextWord() - obc.pos += int64(block.Len) - return block - } - - blockSize := int16(utils.Min(maxBlockSize, obc.len-obc.pos)) - obc.pos += int64(blockSize) - // all values are non-null - return BitBlockCount{blockSize, blockSize} -} - -// NextWord is like NextBlock, but returns a word-sized block even when there is no -// validity bitmap -func (obc *OptionalBitBlockCounter) NextWord() BitBlockCount { - const wordsize = 64 - if obc.hasBitmap { - block := obc.counter.NextWord() - obc.pos += int64(block.Len) - return block - } - blockSize := int16(utils.Min(wordsize, obc.len-obc.pos)) - obc.pos += int64(blockSize) - // all values are non-null - return BitBlockCount{blockSize, blockSize} -} - -// VisitBitBlocks is a utility for easily iterating through the blocks of bits in a bitmap, -// calling the appropriate visitValid/visitInvalid function as we iterate through the bits. -// visitValid is called with the bitoffset of the valid bit. Don't use this inside a tight -// loop when performance is needed and instead prefer manually constructing these loops -// in that scenario. -func VisitBitBlocks(bitmap []byte, offset, length int64, visitValid func(pos int64), visitInvalid func()) { - counter := NewOptionalBitBlockCounter(bitmap, offset, length) - pos := int64(0) - for pos < length { - block := counter.NextBlock() - if block.AllSet() { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - visitValid(pos) - } - } else if block.NoneSet() { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - visitInvalid() - } - } else { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - if bitutil.BitIsSet(bitmap, int(offset+pos)) { - visitValid(pos) - } else { - visitInvalid() - } - } - } - } -} - -// VisitBitBlocks is a utility for easily iterating through the blocks of bits in a bitmap, -// calling the appropriate visitValid/visitInvalid function as we iterate through the bits. -// visitValid is called with the bitoffset of the valid bit. Don't use this inside a tight -// loop when performance is needed and instead prefer manually constructing these loops -// in that scenario. -func VisitBitBlocksShort(bitmap []byte, offset, length int64, visitValid func(pos int64) error, visitInvalid func() error) error { - counter := NewOptionalBitBlockCounter(bitmap, offset, length) - pos := int64(0) - for pos < length { - block := counter.NextBlock() - if block.AllSet() { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - if err := visitValid(pos); err != nil { - return err - } - } - } else if block.NoneSet() { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - if err := visitInvalid(); err != nil { - return err - } - } - } else { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - if bitutil.BitIsSet(bitmap, int(offset+pos)) { - if err := visitValid(pos); err != nil { - return err - } - } else { - if err := visitInvalid(); err != nil { - return err - } - } - } - } - } - return nil -} - -func VisitTwoBitBlocks(leftBitmap, rightBitmap []byte, leftOffset, rightOffset int64, len int64, visitValid func(pos int64), visitNull func()) { - if leftBitmap == nil || rightBitmap == nil { - // at most one is present - if leftBitmap == nil { - VisitBitBlocks(rightBitmap, rightOffset, len, visitValid, visitNull) - } else { - VisitBitBlocks(leftBitmap, leftOffset, len, visitValid, visitNull) - } - return - } - - bitCounter := NewBinaryBitBlockCounter(leftBitmap, rightBitmap, leftOffset, rightOffset, len) - var pos int64 - for pos < len { - block := bitCounter.NextAndWord() - if block.AllSet() { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - visitValid(pos) - } - } else if block.NoneSet() { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - visitNull() - } - } else { - for i := 0; i < int(block.Len); i, pos = i+1, pos+1 { - if bitutil.BitIsSet(leftBitmap, int(leftOffset+pos)) && bitutil.BitIsSet(rightBitmap, int(rightOffset+pos)) { - visitValid(pos) - } else { - visitNull() - } - } - } - } -} - -type bitOp struct { - bit func(bool, bool) bool - word func(uint64, uint64) uint64 -} - -var ( - bitBlockAnd = bitOp{ - bit: func(a, b bool) bool { return a && b }, - word: func(a, b uint64) uint64 { return a & b }, - } - bitBlockAndNot = bitOp{ - bit: func(a, b bool) bool { return a && !b }, - word: func(a, b uint64) uint64 { return a &^ b }, - } - bitBlockOr = bitOp{ - bit: func(a, b bool) bool { return a || b }, - word: func(a, b uint64) uint64 { return a | b }, - } - bitBlockOrNot = bitOp{ - bit: func(a, b bool) bool { return a || !b }, - word: func(a, b uint64) uint64 { return a | ^b }, - } -) - -// BinaryBitBlockCounter computes popcounts on the result of bitwise -// operations between two bitmaps, 64 bits at a time. A 64-bit word -// is loaded from each bitmap, then the popcount is computed on -// e.g. the bitwise-and of the two words -type BinaryBitBlockCounter struct { - left []byte - right []byte - bitsRemaining int64 - leftOffset, rightOffset int64 - - bitsRequiredForWords int64 -} - -// NewBinaryBitBlockCounter constructs a binary bit block counter for -// computing the popcounts on the results of operations between -// the passed in bitmaps, with their respective offsets. -func NewBinaryBitBlockCounter(left, right []byte, leftOffset, rightOffset int64, length int64) *BinaryBitBlockCounter { - ret := &BinaryBitBlockCounter{ - left: left[leftOffset/8:], - right: right[rightOffset/8:], - leftOffset: leftOffset % 8, - rightOffset: rightOffset % 8, - bitsRemaining: length, - } - - leftBitsReq := int64(64) - if ret.leftOffset != 0 { - leftBitsReq = 64 + (64 - ret.leftOffset) - } - rightBitsReq := int64(64) - if ret.rightOffset != 0 { - rightBitsReq = 64 + (64 - ret.rightOffset) - } - - if leftBitsReq > rightBitsReq { - ret.bitsRequiredForWords = leftBitsReq - } else { - ret.bitsRequiredForWords = rightBitsReq - } - - return ret -} - -// NextAndWord returns the popcount of the bitwise-and of the next run -// of available bits, up to 64. The returned pair contains the size of -// the run and the number of true values. the last block will have a -// length less than 64 if the bitmap length is not a multiple of 64, -// and will return 0-length blocks in subsequent invocations -func (b *BinaryBitBlockCounter) NextAndWord() BitBlockCount { return b.nextWord(bitBlockAnd) } - -// NextAndNotWord is like NextAndWord but performs x &^ y on each run -func (b *BinaryBitBlockCounter) NextAndNotWord() BitBlockCount { return b.nextWord(bitBlockAndNot) } - -// NextOrWord is like NextAndWord but performs x | y on each run -func (b *BinaryBitBlockCounter) NextOrWord() BitBlockCount { return b.nextWord(bitBlockOr) } - -// NextOrWord is like NextAndWord but performs x | ^y on each run -func (b *BinaryBitBlockCounter) NextOrNotWord() BitBlockCount { return b.nextWord(bitBlockOrNot) } - -func (b *BinaryBitBlockCounter) nextWord(op bitOp) BitBlockCount { - if b.bitsRemaining == 0 { - return BitBlockCount{} - } - - // when offset is >0, we need there to be a word beyond the last - // aligned word in the bitmap for the bit shifting logic - if b.bitsRemaining < b.bitsRequiredForWords { - runLength := int16(b.bitsRemaining) - if runLength > int16(wordBits) { - runLength = int16(wordBits) - } - - var popcount int16 - for i := int16(0); i < runLength; i++ { - if op.bit(bitutil.BitIsSet(b.left, int(b.leftOffset)+int(i)), - bitutil.BitIsSet(b.right, int(b.rightOffset)+int(i))) { - popcount++ - } - } - // this code path should trigger _at most_ 2 times. in the "two times" - // case, the first time the run length will be a multiple of 8. - b.left = b.left[runLength/8:] - b.right = b.right[runLength/8:] - b.bitsRemaining -= int64(runLength) - return BitBlockCount{Len: runLength, Popcnt: popcount} - } - - var popcount int - if b.leftOffset == 0 && b.rightOffset == 0 { - popcount = bits.OnesCount64(op.word(loadWord(b.left), loadWord(b.right))) - } else { - leftWord := shiftWord(loadWord(b.left), loadWord(b.left[8:]), b.leftOffset) - rightWord := shiftWord(loadWord(b.right), loadWord(b.right[8:]), b.rightOffset) - popcount = bits.OnesCount64(op.word(leftWord, rightWord)) - } - b.left = b.left[wordBits/8:] - b.right = b.right[wordBits/8:] - b.bitsRemaining -= wordBits - return BitBlockCount{Len: int16(wordBits), Popcnt: int16(popcount)} -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go deleted file mode 100644 index a1686a490..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_run_reader.go +++ /dev/null @@ -1,151 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutils - -import ( - "encoding/binary" - "fmt" - "math/bits" - "unsafe" - - "github.com/apache/arrow/go/v14/arrow" - "github.com/apache/arrow/go/v14/arrow/bitutil" - "github.com/apache/arrow/go/v14/internal/utils" -) - -// BitRun represents a run of bits with the same value of length Len -// with Set representing if the group of bits were 1 or 0. -type BitRun struct { - Len int64 - Set bool -} - -// BitRunReader is an interface that is usable by multiple callers to provide -// multiple types of bit run readers such as a reverse reader and so on. -// -// It's a convenience interface for counting contiguous set/unset bits in a bitmap. -// In places where BitBlockCounter can be used, then it would be preferred to use that -// as it would be faster than using BitRunReader. -type BitRunReader interface { - NextRun() BitRun -} - -func (b BitRun) String() string { - return fmt.Sprintf("{Length: %d, set=%t}", b.Len, b.Set) -} - -type bitRunReader struct { - bitmap []byte - pos int64 - length int64 - word uint64 - curRunBitSet bool -} - -// NewBitRunReader returns a reader for the given bitmap, offset and length that -// grabs runs of the same value bit at a time for easy iteration. -func NewBitRunReader(bitmap []byte, offset int64, length int64) BitRunReader { - ret := &bitRunReader{ - bitmap: bitmap[offset/8:], - pos: offset % 8, - length: (offset % 8) + length, - } - - if length == 0 { - return ret - } - - ret.curRunBitSet = bitutil.BitIsNotSet(bitmap, int(offset)) - bitsRemaining := length + ret.pos - ret.loadWord(bitsRemaining) - ret.word = ret.word &^ LeastSignificantBitMask(ret.pos) - return ret -} - -// NextRun returns a new BitRun containing the number of contiguous bits with the -// same value. Len == 0 indicates the end of the bitmap. -func (b *bitRunReader) NextRun() BitRun { - if b.pos >= b.length { - return BitRun{0, false} - } - - // This implementation relies on a efficient implementations of - // CountTrailingZeros and assumes that runs are more often then - // not. The logic is to incrementally find the next bit change - // from the current position. This is done by zeroing all - // bits in word_ up to position_ and using the TrailingZeroCount - // to find the index of the next set bit. - - // The runs alternate on each call, so flip the bit. - b.curRunBitSet = !b.curRunBitSet - - start := b.pos - startOffset := start & 63 - - // Invert the word for proper use of CountTrailingZeros and - // clear bits so CountTrailingZeros can do it magic. - b.word = ^b.word &^ LeastSignificantBitMask(startOffset) - - // Go forward until the next change from unset to set. - newbits := int64(bits.TrailingZeros64(b.word)) - startOffset - b.pos += newbits - - if IsMultipleOf64(b.pos) && b.pos < b.length { - b.advanceUntilChange() - } - return BitRun{b.pos - start, b.curRunBitSet} -} - -func (b *bitRunReader) advanceUntilChange() { - newbits := int64(0) - for { - b.bitmap = b.bitmap[arrow.Uint64SizeBytes:] - b.loadNextWord() - newbits = int64(bits.TrailingZeros64(b.word)) - b.pos += newbits - if !IsMultipleOf64(b.pos) || b.pos >= b.length || newbits <= 0 { - break - } - } -} - -func (b *bitRunReader) loadNextWord() { - b.loadWord(b.length - b.pos) -} - -func (b *bitRunReader) loadWord(bitsRemaining int64) { - b.word = 0 - if bitsRemaining >= 64 { - b.word = binary.LittleEndian.Uint64(b.bitmap) - } else { - nbytes := bitutil.BytesForBits(bitsRemaining) - wordptr := (*(*[8]byte)(unsafe.Pointer(&b.word)))[:] - copy(wordptr, b.bitmap[:nbytes]) - - bitutil.SetBitTo(wordptr, int(bitsRemaining), bitutil.BitIsNotSet(wordptr, int(bitsRemaining-1))) - // reset the value to little endian for big endian architectures - b.word = utils.ToLEUint64(b.word) - } - - // Two cases: - // 1. For unset, CountTrailingZeros works naturally so we don't - // invert the word. - // 2. Otherwise invert so we can use CountTrailingZeros. - if b.curRunBitSet { - b.word = ^b.word - } -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go deleted file mode 100644 index a2269ffec..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bit_set_run_reader.go +++ /dev/null @@ -1,361 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutils - -import ( - "encoding/binary" - "math/bits" - - "github.com/apache/arrow/go/v14/arrow/bitutil" - "github.com/apache/arrow/go/v14/internal/utils" -) - -// IsMultipleOf64 returns whether v is a multiple of 64. -func IsMultipleOf64(v int64) bool { return v&63 == 0 } - -// LeastSignificantBitMask returns a bit mask to return the least significant -// bits for a value starting from the bit index passed in. ie: if you want a -// mask for the 4 least significant bits, you call LeastSignificantBitMask(4) -func LeastSignificantBitMask(index int64) uint64 { - return (uint64(1) << index) - 1 -} - -// SetBitRun describes a run of contiguous set bits in a bitmap with Pos being -// the starting position of the run and Length being the number of bits. -type SetBitRun struct { - Pos int64 - Length int64 -} - -// AtEnd returns true if this bit run is the end of the set by checking -// that the length is 0. -func (s SetBitRun) AtEnd() bool { - return s.Length == 0 -} - -// Equal returns whether rhs is the same run as s -func (s SetBitRun) Equal(rhs SetBitRun) bool { - return s.Pos == rhs.Pos && s.Length == rhs.Length -} - -// SetBitRunReader is an interface for reading groups of contiguous set bits -// from a bitmap. The interface allows us to create different reader implementations -// that share the same interface easily such as a reverse set reader. -type SetBitRunReader interface { - // NextRun will return the next run of contiguous set bits in the bitmap - NextRun() SetBitRun - // Reset allows re-using the reader by providing a new bitmap, offset and length. The arguments - // match the New function for the reader being used. - Reset([]byte, int64, int64) - // VisitSetBitRuns calls visitFn for each set in a loop starting from the current position - // it's roughly equivalent to simply looping, calling NextRun and calling visitFn on the run - // for each run. - VisitSetBitRuns(visitFn VisitFn) error -} - -type baseSetBitRunReader struct { - bitmap []byte - pos int64 - length int64 - remaining int64 - curWord uint64 - curNumBits int32 - reversed bool - - firstBit uint64 -} - -// NewSetBitRunReader returns a SetBitRunReader for the bitmap starting at startOffset which will read -// numvalues bits. -func NewSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader { - return newBaseSetBitRunReader(validBits, startOffset, numValues, false) -} - -// NewReverseSetBitRunReader returns a SetBitRunReader like NewSetBitRunReader, except it will -// return runs starting from the end of the bitmap until it reaches startOffset rather than starting -// at startOffset and reading from there. The SetBitRuns will still operate the same, so Pos -// will still be the position of the "left-most" bit of the run or the "start" of the run. It -// just returns runs starting from the end instead of starting from the beginning. -func NewReverseSetBitRunReader(validBits []byte, startOffset, numValues int64) SetBitRunReader { - return newBaseSetBitRunReader(validBits, startOffset, numValues, true) -} - -func newBaseSetBitRunReader(bitmap []byte, startOffset, length int64, reverse bool) *baseSetBitRunReader { - ret := &baseSetBitRunReader{reversed: reverse} - ret.Reset(bitmap, startOffset, length) - return ret -} - -func (br *baseSetBitRunReader) Reset(bitmap []byte, startOffset, length int64) { - br.bitmap = bitmap - br.length = length - br.remaining = length - br.curNumBits = 0 - br.curWord = 0 - - if !br.reversed { - br.pos = startOffset / 8 - br.firstBit = 1 - - bitOffset := int8(startOffset % 8) - if length > 0 && bitOffset != 0 { - br.curNumBits = int32(utils.MinInt(int(length), int(8-bitOffset))) - br.curWord = br.loadPartial(bitOffset, int64(br.curNumBits)) - } - return - } - - br.pos = (startOffset + length) / 8 - br.firstBit = uint64(0x8000000000000000) - endBitOffset := int8((startOffset + length) % 8) - if length > 0 && endBitOffset != 0 { - br.pos++ - br.curNumBits = int32(utils.MinInt(int(length), int(endBitOffset))) - br.curWord = br.loadPartial(8-endBitOffset, int64(br.curNumBits)) - } -} - -func (br *baseSetBitRunReader) consumeBits(word uint64, nbits int32) uint64 { - if br.reversed { - return word << nbits - } - return word >> nbits -} - -func (br *baseSetBitRunReader) countFirstZeros(word uint64) int32 { - if br.reversed { - return int32(bits.LeadingZeros64(word)) - } - return int32(bits.TrailingZeros64(word)) -} - -func (br *baseSetBitRunReader) loadPartial(bitOffset int8, numBits int64) uint64 { - var word [8]byte - nbytes := bitutil.BytesForBits(numBits) - if br.reversed { - br.pos -= nbytes - copy(word[8-nbytes:], br.bitmap[br.pos:br.pos+nbytes]) - return (binary.LittleEndian.Uint64(word[:]) << bitOffset) &^ LeastSignificantBitMask(64-numBits) - } - - copy(word[:], br.bitmap[br.pos:br.pos+nbytes]) - br.pos += nbytes - return (binary.LittleEndian.Uint64(word[:]) >> bitOffset) & LeastSignificantBitMask(numBits) -} - -func (br *baseSetBitRunReader) findCurrentRun() SetBitRun { - nzeros := br.countFirstZeros(br.curWord) - if nzeros >= br.curNumBits { - br.remaining -= int64(br.curNumBits) - br.curWord = 0 - br.curNumBits = 0 - return SetBitRun{0, 0} - } - - br.curWord = br.consumeBits(br.curWord, nzeros) - br.curNumBits -= nzeros - br.remaining -= int64(nzeros) - pos := br.position() - - numOnes := br.countFirstZeros(^br.curWord) - br.curWord = br.consumeBits(br.curWord, numOnes) - br.curNumBits -= numOnes - br.remaining -= int64(numOnes) - return SetBitRun{pos, int64(numOnes)} -} - -func (br *baseSetBitRunReader) position() int64 { - if br.reversed { - return br.remaining - } - return br.length - br.remaining -} - -func (br *baseSetBitRunReader) adjustRun(run SetBitRun) SetBitRun { - if br.reversed { - run.Pos -= run.Length - } - return run -} - -func (br *baseSetBitRunReader) loadFull() (ret uint64) { - if br.reversed { - br.pos -= 8 - } - ret = binary.LittleEndian.Uint64(br.bitmap[br.pos : br.pos+8]) - if !br.reversed { - br.pos += 8 - } - return -} - -func (br *baseSetBitRunReader) skipNextZeros() { - for br.remaining >= 64 { - br.curWord = br.loadFull() - nzeros := br.countFirstZeros(br.curWord) - if nzeros < 64 { - br.curWord = br.consumeBits(br.curWord, nzeros) - br.curNumBits = 64 - nzeros - br.remaining -= int64(nzeros) - return - } - br.remaining -= 64 - } - // run of zeros continues in last bitmap word - if br.remaining > 0 { - br.curWord = br.loadPartial(0, br.remaining) - br.curNumBits = int32(br.remaining) - nzeros := int32(utils.MinInt(int(br.curNumBits), int(br.countFirstZeros(br.curWord)))) - br.curWord = br.consumeBits(br.curWord, nzeros) - br.curNumBits -= nzeros - br.remaining -= int64(nzeros) - } -} - -func (br *baseSetBitRunReader) countNextOnes() int64 { - var length int64 - if ^br.curWord != 0 { - numOnes := br.countFirstZeros(^br.curWord) - br.remaining -= int64(numOnes) - br.curWord = br.consumeBits(br.curWord, numOnes) - br.curNumBits -= numOnes - if br.curNumBits != 0 { - return int64(numOnes) - } - length = int64(numOnes) - } else { - br.remaining -= 64 - br.curNumBits = 0 - length = 64 - } - - for br.remaining >= 64 { - br.curWord = br.loadFull() - numOnes := br.countFirstZeros(^br.curWord) - length += int64(numOnes) - br.remaining -= int64(numOnes) - if numOnes < 64 { - br.curWord = br.consumeBits(br.curWord, numOnes) - br.curNumBits = 64 - numOnes - return length - } - } - - if br.remaining > 0 { - br.curWord = br.loadPartial(0, br.remaining) - br.curNumBits = int32(br.remaining) - numOnes := br.countFirstZeros(^br.curWord) - br.curWord = br.consumeBits(br.curWord, numOnes) - br.curNumBits -= numOnes - br.remaining -= int64(numOnes) - length += int64(numOnes) - } - return length -} - -func (br *baseSetBitRunReader) NextRun() SetBitRun { - var ( - pos int64 = 0 - length int64 = 0 - ) - - if br.curNumBits != 0 { - run := br.findCurrentRun() - if run.Length != 0 && br.curNumBits != 0 { - return br.adjustRun(run) - } - pos = run.Pos - length = run.Length - } - - if length == 0 { - // we didn't get any ones in curWord, so we can skip any zeros - // in the following words - br.skipNextZeros() - if br.remaining == 0 { - return SetBitRun{0, 0} - } - pos = br.position() - } else if br.curNumBits == 0 { - if br.remaining >= 64 { - br.curWord = br.loadFull() - br.curNumBits = 64 - } else if br.remaining > 0 { - br.curWord = br.loadPartial(0, br.remaining) - br.curNumBits = int32(br.remaining) - } else { - return br.adjustRun(SetBitRun{pos, length}) - } - if (br.curWord & br.firstBit) == 0 { - return br.adjustRun(SetBitRun{pos, length}) - } - } - - length += br.countNextOnes() - return br.adjustRun(SetBitRun{pos, length}) -} - -// VisitFn is a callback function for visiting runs of contiguous bits -type VisitFn func(pos int64, length int64) error - -func (br *baseSetBitRunReader) VisitSetBitRuns(visitFn VisitFn) error { - for { - run := br.NextRun() - if run.Length == 0 { - break - } - - if err := visitFn(run.Pos, run.Length); err != nil { - return err - } - } - return nil -} - -// VisitSetBitRuns is just a convenience function for calling NewSetBitRunReader and then VisitSetBitRuns -func VisitSetBitRuns(bitmap []byte, bitmapOffset int64, length int64, visitFn VisitFn) error { - if bitmap == nil { - return visitFn(0, length) - } - rdr := NewSetBitRunReader(bitmap, bitmapOffset, length) - for { - run := rdr.NextRun() - if run.Length == 0 { - break - } - - if err := visitFn(run.Pos, run.Length); err != nil { - return err - } - } - return nil -} - -func VisitSetBitRunsNoErr(bitmap []byte, bitmapOffset int64, length int64, visitFn func(pos, length int64)) { - if bitmap == nil { - visitFn(0, length) - return - } - rdr := NewSetBitRunReader(bitmap, bitmapOffset, length) - for { - run := rdr.NextRun() - if run.Length == 0 { - break - } - visitFn(run.Pos, run.Length) - } -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go b/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go deleted file mode 100644 index 78219d812..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/bitutils/bitmap_generate.go +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package bitutils - -import "github.com/apache/arrow/go/v14/arrow/bitutil" - -// GenerateBits writes sequential bits to a bitmap. Bits preceding the -// initial start offset are preserved, bits following the bitmap may -// get clobbered. -func GenerateBits(bitmap []byte, start, length int64, g func() bool) { - if length == 0 { - return - } - - cur := bitmap[start/8:] - mask := bitutil.BitMask[start%8] - curbyte := cur[0] & bitutil.PrecedingBitmask[start%8] - - for i := int64(0); i < length; i++ { - bit := g() - if bit { - curbyte = curbyte | mask - } - mask <<= 1 - if mask == 0 { - mask = 1 - cur[0] = curbyte - cur = cur[1:] - curbyte = 0 - } - } - - if mask != 1 { - cur[0] = curbyte - } -} - -// GenerateBitsUnrolled is like GenerateBits but unrolls its main loop for -// higher performance. -// -// See the benchmarks for evidence. -func GenerateBitsUnrolled(bitmap []byte, start, length int64, g func() bool) { - if length == 0 { - return - } - - var ( - curbyte byte - cur = bitmap[start/8:] - startBitOffset uint64 = uint64(start % 8) - mask = bitutil.BitMask[startBitOffset] - remaining = length - ) - - if mask != 0x01 { - curbyte = cur[0] & bitutil.PrecedingBitmask[startBitOffset] - for mask != 0 && remaining > 0 { - if g() { - curbyte |= mask - } - mask <<= 1 - remaining-- - } - cur[0] = curbyte - cur = cur[1:] - } - - var outResults [8]byte - for remainingBytes := remaining / 8; remainingBytes > 0; remainingBytes-- { - for i := 0; i < 8; i++ { - if g() { - outResults[i] = 1 - } else { - outResults[i] = 0 - } - } - cur[0] = (outResults[0] | outResults[1]<<1 | outResults[2]<<2 | - outResults[3]<<3 | outResults[4]<<4 | outResults[5]<<5 | - outResults[6]<<6 | outResults[7]<<7) - cur = cur[1:] - } - - remainingBits := remaining % 8 - if remainingBits > 0 { - curbyte = 0 - mask = 0x01 - for ; remainingBits > 0; remainingBits-- { - if g() { - curbyte |= mask - } - mask <<= 1 - } - cur[0] = curbyte - } -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go deleted file mode 100644 index c1bdfeb6d..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_funcs.go +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package hashing - -import ( - "math/bits" - "unsafe" - - "github.com/zeebo/xxh3" -) - -func hashInt(val uint64, alg uint64) uint64 { - // Two of xxhash's prime multipliers (which are chosen for their - // bit dispersion properties) - var multipliers = [2]uint64{11400714785074694791, 14029467366897019727} - // Multiplying by the prime number mixes the low bits into the high bits, - // then byte-swapping (which is a single CPU instruction) allows the - // combined high and low bits to participate in the initial hash table index. - return bits.ReverseBytes64(multipliers[alg] * val) -} - -func hashFloat32(val float32, alg uint64) uint64 { - // grab the raw byte pattern of the - bt := *(*[4]byte)(unsafe.Pointer(&val)) - x := uint64(*(*uint32)(unsafe.Pointer(&bt[0]))) - hx := hashInt(x, alg) - hy := hashInt(x, alg^1) - return 4 ^ hx ^ hy -} - -func hashFloat64(val float64, alg uint64) uint64 { - bt := *(*[8]byte)(unsafe.Pointer(&val)) - hx := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[4]))), alg) - hy := hashInt(uint64(*(*uint32)(unsafe.Pointer(&bt[0]))), alg^1) - return 8 ^ hx ^ hy -} - -// prime constants used for slightly increasing the hash quality further -var exprimes = [2]uint64{1609587929392839161, 9650029242287828579} - -// for smaller amounts of bytes this is faster than even calling into -// xxh3 to do the Hash, so we specialize in order to get the benefits -// of that performance. -func Hash(b []byte, alg uint64) uint64 { - n := uint32(len(b)) - if n <= 16 { - switch { - case n > 8: - // 8 < length <= 16 - // apply same principle as above, but as two 64-bit ints - x := *(*uint64)(unsafe.Pointer(&b[n-8])) - y := *(*uint64)(unsafe.Pointer(&b[0])) - hx := hashInt(x, alg) - hy := hashInt(y, alg^1) - return uint64(n) ^ hx ^ hy - case n >= 4: - // 4 < length <= 8 - // we can read the bytes as two overlapping 32-bit ints, apply different - // hash functions to each in parallel - // then xor the results - x := *(*uint32)(unsafe.Pointer(&b[n-4])) - y := *(*uint32)(unsafe.Pointer(&b[0])) - hx := hashInt(uint64(x), alg) - hy := hashInt(uint64(y), alg^1) - return uint64(n) ^ hx ^ hy - case n > 0: - x := uint32((n << 24) ^ (uint32(b[0]) << 16) ^ (uint32(b[n/2]) << 8) ^ uint32(b[n-1])) - return hashInt(uint64(x), alg) - case n == 0: - return 1 - } - } - - // increase differentiation enough to improve hash quality - return xxh3.Hash(b) + exprimes[alg] -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go deleted file mode 100644 index b772c7d7f..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string.go +++ /dev/null @@ -1,26 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build go1.20 || tinygo - -package hashing - -import "unsafe" - -func hashString(val string, alg uint64) uint64 { - buf := unsafe.Slice(unsafe.StringData(val), len(val)) - return Hash(buf, alg) -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go deleted file mode 100644 index f38eb5c52..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/hash_string_go1.19.go +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !go1.20 && !tinygo - -package hashing - -import ( - "reflect" - "unsafe" -) - -func hashString(val string, alg uint64) uint64 { - if val == "" { - return Hash([]byte{}, alg) - } - // highly efficient way to get byte slice without copy before - // the introduction of unsafe.StringData in go1.20 - // (https://stackoverflow.com/questions/59209493/how-to-use-unsafe-get-a-byte-slice-from-a-string-without-memory-copy) - const MaxInt32 = 1<<31 - 1 - buf := (*[MaxInt32]byte)(unsafe.Pointer((*reflect.StringHeader)( - unsafe.Pointer(&val)).Data))[: len(val)&MaxInt32 : len(val)&MaxInt32] - return Hash(buf, alg) -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata b/vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata deleted file mode 100644 index 0ba6f765d..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/types.tmpldata +++ /dev/null @@ -1,42 +0,0 @@ -[ - { - "Name": "Int8", - "name": "int8" - }, - { - "Name": "Uint8", - "name": "uint8" - }, - { - "Name": "Int16", - "name": "int16" - }, - { - "Name": "Uint16", - "name": "uint16" - }, - { - "Name": "Int32", - "name": "int32" - }, - { - "Name": "Int64", - "name": "int64" - }, - { - "Name": "Uint32", - "name": "uint32" - }, - { - "Name": "Uint64", - "name": "uint64" - }, - { - "Name": "Float32", - "name": "float32" - }, - { - "Name": "Float64", - "name": "float64" - } -] diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go deleted file mode 100644 index cc996552b..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go +++ /dev/null @@ -1,2833 +0,0 @@ -// Code generated by xxh3_memo_table.gen.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package hashing - -import ( - "math" - - "github.com/apache/arrow/go/v14/arrow" - "github.com/apache/arrow/go/v14/arrow/bitutil" - "github.com/apache/arrow/go/v14/internal/utils" -) - -type payloadInt8 struct { - val int8 - memoIdx int32 -} - -type entryInt8 struct { - h uint64 - payload payloadInt8 -} - -func (e entryInt8) Valid() bool { return e.h != sentinel } - -// Int8HashTable is a hashtable specifically for int8 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Int8HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryInt8 -} - -// NewInt8HashTable returns a new hash table for int8 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewInt8HashTable(cap uint64) *Int8HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Int8HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryInt8, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Int8HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryInt8, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Int8HashTable) CopyValues(out []int8) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Int8HashTable) CopyValuesSubset(start int, out []int8) { - h.VisitEntries(func(e *entryInt8) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Int8HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Int8HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Int8Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryInt8) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = e.payload.val - } - }) -} - -func (h *Int8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Int8HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Int8HashTable) Lookup(v uint64, cmp func(int8) bool) (*entryInt8, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Int8HashTable) lookup(v uint64, szMask uint64, cmp func(int8) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryInt8 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Int8HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryInt8, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(int8) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Int8HashTable) Insert(e *entryInt8, v uint64, val int8, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Int8HashTable) VisitEntries(visit func(*entryInt8)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Int8MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Int8MemoTable struct { - tbl *Int8HashTable - nullIdx int32 -} - -// NewInt8MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewInt8MemoTable(num int64) *Int8MemoTable { - return &Int8MemoTable{tbl: NewInt8HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Int8MemoTable) TypeTraits() TypeTraits { - return arrow.Int8Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Int8MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Int8MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Int8MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Int8MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Int8MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Int8MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]int8)) -} - -func (s *Int8MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Int8Traits.CastFromBytes(out)) -} - -func (s *Int8MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Int8Traits.CastFromBytes(out)) -} - -func (s *Int8MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Int8MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Int8MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(int8)), 0) - if e, ok := s.tbl.Lookup(h, func(v int8) bool { return val.(int8) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Int8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(int8)), 0) - e, ok := s.tbl.Lookup(h, func(v int8) bool { - return val.(int8) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(int8), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Int8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadUint8 struct { - val uint8 - memoIdx int32 -} - -type entryUint8 struct { - h uint64 - payload payloadUint8 -} - -func (e entryUint8) Valid() bool { return e.h != sentinel } - -// Uint8HashTable is a hashtable specifically for uint8 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Uint8HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryUint8 -} - -// NewUint8HashTable returns a new hash table for uint8 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewUint8HashTable(cap uint64) *Uint8HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Uint8HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryUint8, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Uint8HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryUint8, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Uint8HashTable) CopyValues(out []uint8) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Uint8HashTable) CopyValuesSubset(start int, out []uint8) { - h.VisitEntries(func(e *entryUint8) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Uint8HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Uint8HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Uint8Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryUint8) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = e.payload.val - } - }) -} - -func (h *Uint8HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Uint8HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Uint8HashTable) Lookup(v uint64, cmp func(uint8) bool) (*entryUint8, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Uint8HashTable) lookup(v uint64, szMask uint64, cmp func(uint8) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryUint8 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Uint8HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryUint8, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(uint8) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Uint8HashTable) Insert(e *entryUint8, v uint64, val uint8, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Uint8HashTable) VisitEntries(visit func(*entryUint8)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Uint8MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Uint8MemoTable struct { - tbl *Uint8HashTable - nullIdx int32 -} - -// NewUint8MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewUint8MemoTable(num int64) *Uint8MemoTable { - return &Uint8MemoTable{tbl: NewUint8HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Uint8MemoTable) TypeTraits() TypeTraits { - return arrow.Uint8Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Uint8MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Uint8MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Uint8MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Uint8MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Uint8MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Uint8MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]uint8)) -} - -func (s *Uint8MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Uint8Traits.CastFromBytes(out)) -} - -func (s *Uint8MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Uint8Traits.CastFromBytes(out)) -} - -func (s *Uint8MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Uint8MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Uint8MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(uint8)), 0) - if e, ok := s.tbl.Lookup(h, func(v uint8) bool { return val.(uint8) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Uint8MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(uint8)), 0) - e, ok := s.tbl.Lookup(h, func(v uint8) bool { - return val.(uint8) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(uint8), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Uint8MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadInt16 struct { - val int16 - memoIdx int32 -} - -type entryInt16 struct { - h uint64 - payload payloadInt16 -} - -func (e entryInt16) Valid() bool { return e.h != sentinel } - -// Int16HashTable is a hashtable specifically for int16 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Int16HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryInt16 -} - -// NewInt16HashTable returns a new hash table for int16 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewInt16HashTable(cap uint64) *Int16HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Int16HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryInt16, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Int16HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryInt16, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Int16HashTable) CopyValues(out []int16) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Int16HashTable) CopyValuesSubset(start int, out []int16) { - h.VisitEntries(func(e *entryInt16) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Int16HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Int16HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Int16Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryInt16) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEInt16(e.payload.val) - } - }) -} - -func (h *Int16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Int16HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Int16HashTable) Lookup(v uint64, cmp func(int16) bool) (*entryInt16, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Int16HashTable) lookup(v uint64, szMask uint64, cmp func(int16) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryInt16 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Int16HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryInt16, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(int16) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Int16HashTable) Insert(e *entryInt16, v uint64, val int16, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Int16HashTable) VisitEntries(visit func(*entryInt16)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Int16MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Int16MemoTable struct { - tbl *Int16HashTable - nullIdx int32 -} - -// NewInt16MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewInt16MemoTable(num int64) *Int16MemoTable { - return &Int16MemoTable{tbl: NewInt16HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Int16MemoTable) TypeTraits() TypeTraits { - return arrow.Int16Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Int16MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Int16MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Int16MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Int16MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Int16MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Int16MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]int16)) -} - -func (s *Int16MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Int16Traits.CastFromBytes(out)) -} - -func (s *Int16MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Int16Traits.CastFromBytes(out)) -} - -func (s *Int16MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Int16MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Int16MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(int16)), 0) - if e, ok := s.tbl.Lookup(h, func(v int16) bool { return val.(int16) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Int16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(int16)), 0) - e, ok := s.tbl.Lookup(h, func(v int16) bool { - return val.(int16) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(int16), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Int16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadUint16 struct { - val uint16 - memoIdx int32 -} - -type entryUint16 struct { - h uint64 - payload payloadUint16 -} - -func (e entryUint16) Valid() bool { return e.h != sentinel } - -// Uint16HashTable is a hashtable specifically for uint16 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Uint16HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryUint16 -} - -// NewUint16HashTable returns a new hash table for uint16 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewUint16HashTable(cap uint64) *Uint16HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Uint16HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryUint16, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Uint16HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryUint16, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Uint16HashTable) CopyValues(out []uint16) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Uint16HashTable) CopyValuesSubset(start int, out []uint16) { - h.VisitEntries(func(e *entryUint16) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Uint16HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Uint16HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Uint16Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryUint16) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEUint16(e.payload.val) - } - }) -} - -func (h *Uint16HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Uint16HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Uint16HashTable) Lookup(v uint64, cmp func(uint16) bool) (*entryUint16, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Uint16HashTable) lookup(v uint64, szMask uint64, cmp func(uint16) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryUint16 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Uint16HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryUint16, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(uint16) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Uint16HashTable) Insert(e *entryUint16, v uint64, val uint16, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Uint16HashTable) VisitEntries(visit func(*entryUint16)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Uint16MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Uint16MemoTable struct { - tbl *Uint16HashTable - nullIdx int32 -} - -// NewUint16MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewUint16MemoTable(num int64) *Uint16MemoTable { - return &Uint16MemoTable{tbl: NewUint16HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Uint16MemoTable) TypeTraits() TypeTraits { - return arrow.Uint16Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Uint16MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Uint16MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Uint16MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Uint16MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Uint16MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Uint16MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]uint16)) -} - -func (s *Uint16MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Uint16Traits.CastFromBytes(out)) -} - -func (s *Uint16MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Uint16Traits.CastFromBytes(out)) -} - -func (s *Uint16MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Uint16MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Uint16MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(uint16)), 0) - if e, ok := s.tbl.Lookup(h, func(v uint16) bool { return val.(uint16) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Uint16MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(uint16)), 0) - e, ok := s.tbl.Lookup(h, func(v uint16) bool { - return val.(uint16) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(uint16), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Uint16MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadInt32 struct { - val int32 - memoIdx int32 -} - -type entryInt32 struct { - h uint64 - payload payloadInt32 -} - -func (e entryInt32) Valid() bool { return e.h != sentinel } - -// Int32HashTable is a hashtable specifically for int32 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Int32HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryInt32 -} - -// NewInt32HashTable returns a new hash table for int32 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewInt32HashTable(cap uint64) *Int32HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Int32HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryInt32, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Int32HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryInt32, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Int32HashTable) CopyValues(out []int32) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Int32HashTable) CopyValuesSubset(start int, out []int32) { - h.VisitEntries(func(e *entryInt32) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Int32HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Int32HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Int32Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryInt32) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEInt32(e.payload.val) - } - }) -} - -func (h *Int32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Int32HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Int32HashTable) Lookup(v uint64, cmp func(int32) bool) (*entryInt32, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Int32HashTable) lookup(v uint64, szMask uint64, cmp func(int32) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryInt32 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Int32HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryInt32, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(int32) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Int32HashTable) Insert(e *entryInt32, v uint64, val int32, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Int32HashTable) VisitEntries(visit func(*entryInt32)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Int32MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Int32MemoTable struct { - tbl *Int32HashTable - nullIdx int32 -} - -// NewInt32MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewInt32MemoTable(num int64) *Int32MemoTable { - return &Int32MemoTable{tbl: NewInt32HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Int32MemoTable) TypeTraits() TypeTraits { - return arrow.Int32Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Int32MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Int32MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Int32MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Int32MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Int32MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Int32MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]int32)) -} - -func (s *Int32MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Int32Traits.CastFromBytes(out)) -} - -func (s *Int32MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Int32Traits.CastFromBytes(out)) -} - -func (s *Int32MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Int32MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Int32MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(int32)), 0) - if e, ok := s.tbl.Lookup(h, func(v int32) bool { return val.(int32) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Int32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(int32)), 0) - e, ok := s.tbl.Lookup(h, func(v int32) bool { - return val.(int32) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(int32), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Int32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadInt64 struct { - val int64 - memoIdx int32 -} - -type entryInt64 struct { - h uint64 - payload payloadInt64 -} - -func (e entryInt64) Valid() bool { return e.h != sentinel } - -// Int64HashTable is a hashtable specifically for int64 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Int64HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryInt64 -} - -// NewInt64HashTable returns a new hash table for int64 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewInt64HashTable(cap uint64) *Int64HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Int64HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryInt64, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Int64HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryInt64, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Int64HashTable) CopyValues(out []int64) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Int64HashTable) CopyValuesSubset(start int, out []int64) { - h.VisitEntries(func(e *entryInt64) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Int64HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Int64HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Int64Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryInt64) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEInt64(e.payload.val) - } - }) -} - -func (h *Int64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Int64HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Int64HashTable) Lookup(v uint64, cmp func(int64) bool) (*entryInt64, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Int64HashTable) lookup(v uint64, szMask uint64, cmp func(int64) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryInt64 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Int64HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryInt64, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(int64) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Int64HashTable) Insert(e *entryInt64, v uint64, val int64, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Int64HashTable) VisitEntries(visit func(*entryInt64)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Int64MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Int64MemoTable struct { - tbl *Int64HashTable - nullIdx int32 -} - -// NewInt64MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewInt64MemoTable(num int64) *Int64MemoTable { - return &Int64MemoTable{tbl: NewInt64HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Int64MemoTable) TypeTraits() TypeTraits { - return arrow.Int64Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Int64MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Int64MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Int64MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Int64MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Int64MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Int64MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]int64)) -} - -func (s *Int64MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Int64Traits.CastFromBytes(out)) -} - -func (s *Int64MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Int64Traits.CastFromBytes(out)) -} - -func (s *Int64MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Int64MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Int64MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(int64)), 0) - if e, ok := s.tbl.Lookup(h, func(v int64) bool { return val.(int64) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Int64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(int64)), 0) - e, ok := s.tbl.Lookup(h, func(v int64) bool { - return val.(int64) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(int64), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Int64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadUint32 struct { - val uint32 - memoIdx int32 -} - -type entryUint32 struct { - h uint64 - payload payloadUint32 -} - -func (e entryUint32) Valid() bool { return e.h != sentinel } - -// Uint32HashTable is a hashtable specifically for uint32 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Uint32HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryUint32 -} - -// NewUint32HashTable returns a new hash table for uint32 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewUint32HashTable(cap uint64) *Uint32HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Uint32HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryUint32, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Uint32HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryUint32, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Uint32HashTable) CopyValues(out []uint32) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Uint32HashTable) CopyValuesSubset(start int, out []uint32) { - h.VisitEntries(func(e *entryUint32) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Uint32HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Uint32HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Uint32Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryUint32) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEUint32(e.payload.val) - } - }) -} - -func (h *Uint32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Uint32HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Uint32HashTable) Lookup(v uint64, cmp func(uint32) bool) (*entryUint32, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Uint32HashTable) lookup(v uint64, szMask uint64, cmp func(uint32) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryUint32 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Uint32HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryUint32, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(uint32) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Uint32HashTable) Insert(e *entryUint32, v uint64, val uint32, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Uint32HashTable) VisitEntries(visit func(*entryUint32)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Uint32MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Uint32MemoTable struct { - tbl *Uint32HashTable - nullIdx int32 -} - -// NewUint32MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewUint32MemoTable(num int64) *Uint32MemoTable { - return &Uint32MemoTable{tbl: NewUint32HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Uint32MemoTable) TypeTraits() TypeTraits { - return arrow.Uint32Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Uint32MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Uint32MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Uint32MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Uint32MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Uint32MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Uint32MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]uint32)) -} - -func (s *Uint32MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Uint32Traits.CastFromBytes(out)) -} - -func (s *Uint32MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Uint32Traits.CastFromBytes(out)) -} - -func (s *Uint32MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Uint32MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Uint32MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(uint32)), 0) - if e, ok := s.tbl.Lookup(h, func(v uint32) bool { return val.(uint32) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Uint32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(uint32)), 0) - e, ok := s.tbl.Lookup(h, func(v uint32) bool { - return val.(uint32) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(uint32), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Uint32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadUint64 struct { - val uint64 - memoIdx int32 -} - -type entryUint64 struct { - h uint64 - payload payloadUint64 -} - -func (e entryUint64) Valid() bool { return e.h != sentinel } - -// Uint64HashTable is a hashtable specifically for uint64 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Uint64HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryUint64 -} - -// NewUint64HashTable returns a new hash table for uint64 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewUint64HashTable(cap uint64) *Uint64HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Uint64HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryUint64, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Uint64HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryUint64, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Uint64HashTable) CopyValues(out []uint64) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Uint64HashTable) CopyValuesSubset(start int, out []uint64) { - h.VisitEntries(func(e *entryUint64) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Uint64HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Uint64HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Uint64Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryUint64) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEUint64(e.payload.val) - } - }) -} - -func (h *Uint64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Uint64HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Uint64HashTable) Lookup(v uint64, cmp func(uint64) bool) (*entryUint64, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Uint64HashTable) lookup(v uint64, szMask uint64, cmp func(uint64) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryUint64 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Uint64HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryUint64, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(uint64) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Uint64HashTable) Insert(e *entryUint64, v uint64, val uint64, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Uint64HashTable) VisitEntries(visit func(*entryUint64)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Uint64MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Uint64MemoTable struct { - tbl *Uint64HashTable - nullIdx int32 -} - -// NewUint64MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewUint64MemoTable(num int64) *Uint64MemoTable { - return &Uint64MemoTable{tbl: NewUint64HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Uint64MemoTable) TypeTraits() TypeTraits { - return arrow.Uint64Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Uint64MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Uint64MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Uint64MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Uint64MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Uint64MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Uint64MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]uint64)) -} - -func (s *Uint64MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Uint64Traits.CastFromBytes(out)) -} - -func (s *Uint64MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Uint64Traits.CastFromBytes(out)) -} - -func (s *Uint64MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Uint64MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Uint64MemoTable) Get(val interface{}) (int, bool) { - - h := hashInt(uint64(val.(uint64)), 0) - if e, ok := s.tbl.Lookup(h, func(v uint64) bool { return val.(uint64) == v }); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Uint64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - h := hashInt(uint64(val.(uint64)), 0) - e, ok := s.tbl.Lookup(h, func(v uint64) bool { - return val.(uint64) == v - }) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(uint64), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Uint64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadFloat32 struct { - val float32 - memoIdx int32 -} - -type entryFloat32 struct { - h uint64 - payload payloadFloat32 -} - -func (e entryFloat32) Valid() bool { return e.h != sentinel } - -// Float32HashTable is a hashtable specifically for float32 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Float32HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryFloat32 -} - -// NewFloat32HashTable returns a new hash table for float32 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewFloat32HashTable(cap uint64) *Float32HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Float32HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryFloat32, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Float32HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryFloat32, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Float32HashTable) CopyValues(out []float32) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Float32HashTable) CopyValuesSubset(start int, out []float32) { - h.VisitEntries(func(e *entryFloat32) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Float32HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Float32HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Float32Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryFloat32) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEFloat32(e.payload.val) - } - }) -} - -func (h *Float32HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Float32HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Float32HashTable) Lookup(v uint64, cmp func(float32) bool) (*entryFloat32, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Float32HashTable) lookup(v uint64, szMask uint64, cmp func(float32) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryFloat32 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Float32HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryFloat32, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(float32) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Float32HashTable) Insert(e *entryFloat32, v uint64, val float32, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Float32HashTable) VisitEntries(visit func(*entryFloat32)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Float32MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Float32MemoTable struct { - tbl *Float32HashTable - nullIdx int32 -} - -// NewFloat32MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewFloat32MemoTable(num int64) *Float32MemoTable { - return &Float32MemoTable{tbl: NewFloat32HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Float32MemoTable) TypeTraits() TypeTraits { - return arrow.Float32Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Float32MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Float32MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Float32MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Float32MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Float32MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Float32MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]float32)) -} - -func (s *Float32MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Float32Traits.CastFromBytes(out)) -} - -func (s *Float32MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Float32Traits.CastFromBytes(out)) -} - -func (s *Float32MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Float32MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Float32MemoTable) Get(val interface{}) (int, bool) { - var cmp func(float32) bool - - if math.IsNaN(float64(val.(float32))) { - cmp = isNan32Cmp - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = float32(math.NaN()) - } else { - cmp = func(v float32) bool { return val.(float32) == v } - } - - h := hashFloat32(val.(float32), 0) - if e, ok := s.tbl.Lookup(h, cmp); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Float32MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - var cmp func(float32) bool - - if math.IsNaN(float64(val.(float32))) { - cmp = isNan32Cmp - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = float32(math.NaN()) - } else { - cmp = func(v float32) bool { return val.(float32) == v } - } - - h := hashFloat32(val.(float32), 0) - e, ok := s.tbl.Lookup(h, cmp) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(float32), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Float32MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} - -type payloadFloat64 struct { - val float64 - memoIdx int32 -} - -type entryFloat64 struct { - h uint64 - payload payloadFloat64 -} - -func (e entryFloat64) Valid() bool { return e.h != sentinel } - -// Float64HashTable is a hashtable specifically for float64 that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type Float64HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entryFloat64 -} - -// NewFloat64HashTable returns a new hash table for float64 values -// initialized with the passed in capacity or 32 whichever is larger. -func NewFloat64HashTable(cap uint64) *Float64HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &Float64HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entryFloat64, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *Float64HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entryFloat64, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *Float64HashTable) CopyValues(out []float64) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *Float64HashTable) CopyValuesSubset(start int, out []float64) { - h.VisitEntries(func(e *entryFloat64) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *Float64HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *Float64HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.Float64Traits.CastFromBytes(out) - h.VisitEntries(func(e *entryFloat64) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - data[idx] = utils.ToLEFloat64(e.payload.val) - } - }) -} - -func (h *Float64HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func (Float64HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *Float64HashTable) Lookup(v uint64, cmp func(float64) bool) (*entryFloat64, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *Float64HashTable) lookup(v uint64, szMask uint64, cmp func(float64) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entryFloat64 - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *Float64HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entryFloat64, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func(float64) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *Float64HashTable) Insert(e *entryFloat64, v uint64, val float64, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *Float64HashTable) VisitEntries(visit func(*entryFloat64)) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// Float64MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type Float64MemoTable struct { - tbl *Float64HashTable - nullIdx int32 -} - -// NewFloat64MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func NewFloat64MemoTable(num int64) *Float64MemoTable { - return &Float64MemoTable{tbl: NewFloat64HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func (Float64MemoTable) TypeTraits() TypeTraits { - return arrow.Float64Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *Float64MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *Float64MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *Float64MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *Float64MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *Float64MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *Float64MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]float64)) -} - -func (s *Float64MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.Float64Traits.CastFromBytes(out)) -} - -func (s *Float64MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.Float64Traits.CastFromBytes(out)) -} - -func (s *Float64MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *Float64MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *Float64MemoTable) Get(val interface{}) (int, bool) { - var cmp func(float64) bool - if math.IsNaN(val.(float64)) { - cmp = math.IsNaN - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = math.NaN() - } else { - cmp = func(v float64) bool { return val.(float64) == v } - } - - h := hashFloat64(val.(float64), 0) - if e, ok := s.tbl.Lookup(h, cmp); ok { - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *Float64MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - - var cmp func(float64) bool - if math.IsNaN(val.(float64)) { - cmp = math.IsNaN - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = math.NaN() - } else { - cmp = func(v float64) bool { return val.(float64) == v } - } - - h := hashFloat64(val.(float64), 0) - e, ok := s.tbl.Lookup(h, cmp) - - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.(float64), int32(idx)) - } - return -} - -// GetOrInsertBytes is unimplemented -func (s *Float64MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl deleted file mode 100644 index 25164341d..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.gen.go.tmpl +++ /dev/null @@ -1,349 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package hashing - -import ( - "github.com/apache/arrow/go/v14/arrow/bitutil" - "github.com/apache/arrow/go/v14/internal/utils" -) - -{{range .In}} -type payload{{.Name}} struct { - val {{.name}} - memoIdx int32 -} - -type entry{{.Name}} struct { - h uint64 - payload payload{{.Name}} -} - -func (e entry{{.Name}}) Valid() bool { return e.h != sentinel } - -// {{.Name}}HashTable is a hashtable specifically for {{.name}} that -// is utilized with the MemoTable to generalize interactions for easier -// implementation of dictionaries without losing performance. -type {{.Name}}HashTable struct { - cap uint64 - capMask uint64 - size uint64 - - entries []entry{{.Name}} -} - -// New{{.Name}}HashTable returns a new hash table for {{.name}} values -// initialized with the passed in capacity or 32 whichever is larger. -func New{{.Name}}HashTable(cap uint64) *{{.Name}}HashTable { - initCap := uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - ret := &{{.Name}}HashTable{cap: initCap, capMask: initCap - 1, size: 0} - ret.entries = make([]entry{{.Name}}, initCap) - return ret -} - -// Reset drops all of the values in this hash table and re-initializes it -// with the specified initial capacity as if by calling New, but without having -// to reallocate the object. -func (h *{{.Name}}HashTable) Reset(cap uint64) { - h.cap = uint64(bitutil.NextPowerOf2(int(max(cap, 32)))) - h.capMask = h.cap - 1 - h.size = 0 - h.entries = make([]entry{{.Name}}, h.cap) -} - -// CopyValues is used for copying the values out of the hash table into the -// passed in slice, in the order that they were first inserted -func (h *{{.Name}}HashTable) CopyValues(out []{{.name}}) { - h.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies a subset of the values in the hashtable out, starting -// with the value at start, in the order that they were inserted. -func (h *{{.Name}}HashTable) CopyValuesSubset(start int, out []{{.name}}) { - h.VisitEntries(func(e *entry{{.Name}}) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { - out[idx] = e.payload.val - } - }) -} - -func (h *{{.Name}}HashTable) WriteOut(out []byte) { - h.WriteOutSubset(0, out) -} - -func (h *{{.Name}}HashTable) WriteOutSubset(start int, out []byte) { - data := arrow.{{.Name}}Traits.CastFromBytes(out) - h.VisitEntries(func(e *entry{{.Name}}) { - idx := e.payload.memoIdx - int32(start) - if idx >= 0 { -{{if and (ne .Name "Int8") (ne .Name "Uint8") -}} - data[idx] = utils.ToLE{{.Name}}(e.payload.val) -{{else -}} - data[idx] = e.payload.val -{{end -}} - } - }) -} - -func (h *{{.Name}}HashTable) needUpsize() bool { return h.size*uint64(loadFactor) >= h.cap } - -func ({{.Name}}HashTable) fixHash(v uint64) uint64 { - if v == sentinel { - return 42 - } - return v -} - -// Lookup retrieves the entry for a given hash value assuming it's payload value returns -// true when passed to the cmp func. Returns a pointer to the entry for the given hash value, -// and a boolean as to whether it was found. It is not safe to use the pointer if the bool is false. -func (h *{{.Name}}HashTable) Lookup(v uint64, cmp func({{.name}}) bool) (*entry{{.Name}}, bool) { - idx, ok := h.lookup(v, h.capMask, cmp) - return &h.entries[idx], ok -} - -func (h *{{.Name}}HashTable) lookup(v uint64, szMask uint64, cmp func({{.name}}) bool) (uint64, bool) { - const perturbShift uint8 = 5 - - var ( - idx uint64 - perturb uint64 - e *entry{{.Name}} - ) - - v = h.fixHash(v) - idx = v & szMask - perturb = (v >> uint64(perturbShift)) + 1 - - for { - e = &h.entries[idx] - if e.h == v && cmp(e.payload.val) { - return idx, true - } - - if e.h == sentinel { - return idx, false - } - - // perturbation logic inspired from CPython's set/dict object - // the goal is that all 64 bits of unmasked hash value eventually - // participate int he probing sequence, to minimize clustering - idx = (idx + perturb) & szMask - perturb = (perturb >> uint64(perturbShift)) + 1 - } -} - -func (h *{{.Name}}HashTable) upsize(newcap uint64) error { - newMask := newcap - 1 - - oldEntries := h.entries - h.entries = make([]entry{{.Name}}, newcap) - for _, e := range oldEntries { - if e.Valid() { - idx, _ := h.lookup(e.h, newMask, func({{.name}}) bool { return false }) - h.entries[idx] = e - } - } - h.cap = newcap - h.capMask = newMask - return nil -} - -// Insert updates the given entry with the provided hash value, payload value and memo index. -// The entry pointer must have been retrieved via lookup in order to actually insert properly. -func (h *{{.Name}}HashTable) Insert(e *entry{{.Name}}, v uint64, val {{.name}}, memoIdx int32) error { - e.h = h.fixHash(v) - e.payload.val = val - e.payload.memoIdx = memoIdx - h.size++ - - if h.needUpsize() { - h.upsize(h.cap * uint64(loadFactor) * 2) - } - return nil -} - -// VisitEntries will call the passed in function on each *valid* entry in the hash table, -// a valid entry being one which has had a value inserted into it. -func (h *{{.Name}}HashTable) VisitEntries(visit func(*entry{{.Name}})) { - for _, e := range h.entries { - if e.Valid() { - visit(&e) - } - } -} - -// {{.Name}}MemoTable is a wrapper over the appropriate hashtable to provide an interface -// conforming to the MemoTable interface defined in the encoding package for general interactions -// regarding dictionaries. -type {{.Name}}MemoTable struct { - tbl *{{.Name}}HashTable - nullIdx int32 -} - -// New{{.Name}}MemoTable returns a new memotable with num entries pre-allocated to reduce further -// allocations when inserting. -func New{{.Name}}MemoTable(num int64) *{{.Name}}MemoTable { - return &{{.Name}}MemoTable{tbl: New{{.Name}}HashTable(uint64(num)), nullIdx: KeyNotFound} -} - -func ({{.Name}}MemoTable) TypeTraits() TypeTraits { - return arrow.{{.Name}}Traits -} - -// Reset allows this table to be re-used by dumping all the data currently in the table. -func (s *{{.Name}}MemoTable) Reset() { - s.tbl.Reset(32) - s.nullIdx = KeyNotFound -} - -// Size returns the current number of inserted elements into the table including if a null -// has been inserted. -func (s *{{.Name}}MemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// GetNull returns the index of an inserted null or KeyNotFound along with a bool -// that will be true if found and false if not. -func (s *{{.Name}}MemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// GetOrInsertNull will return the index of the null entry or insert a null entry -// if one currently doesn't exist. The found value will be true if there was already -// a null in the table, and false if it inserted one. -func (s *{{.Name}}MemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = s.GetNull() - if !found { - idx = s.Size() - s.nullIdx = int32(idx) - } - return -} - -// CopyValues will copy the values from the memo table out into the passed in slice -// which must be of the appropriate type. -func (s *{{.Name}}MemoTable) CopyValues(out interface{}) { - s.CopyValuesSubset(0, out) -} - -// CopyValuesSubset is like CopyValues but only copies a subset of values starting -// at the provided start index -func (s *{{.Name}}MemoTable) CopyValuesSubset(start int, out interface{}) { - s.tbl.CopyValuesSubset(start, out.([]{{.name}})) -} - -func (s *{{.Name}}MemoTable) WriteOut(out []byte) { - s.tbl.CopyValues(arrow.{{.Name}}Traits.CastFromBytes(out)) -} - -func (s *{{.Name}}MemoTable) WriteOutSubset(start int, out []byte) { - s.tbl.CopyValuesSubset(start, arrow.{{.Name}}Traits.CastFromBytes(out)) -} - -func (s *{{.Name}}MemoTable) WriteOutLE(out []byte) { - s.tbl.WriteOut(out) -} - -func (s *{{.Name}}MemoTable) WriteOutSubsetLE(start int, out []byte) { - s.tbl.WriteOutSubset(start, out) -} - -// Get returns the index of the requested value in the hash table or KeyNotFound -// along with a boolean indicating if it was found or not. -func (s *{{.Name}}MemoTable) Get(val interface{}) (int, bool) { -{{if and (ne .Name "Float32") (ne .Name "Float64") }} - h := hashInt(uint64(val.({{.name}})), 0) - if e, ok := s.tbl.Lookup(h, func(v {{.name}}) bool { return val.({{.name}}) == v }); ok { -{{ else -}} - var cmp func({{.name}}) bool - {{if eq .Name "Float32"}} - if math.IsNaN(float64(val.(float32))) { - cmp = isNan32Cmp - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = float32(math.NaN()) - {{ else -}} - if math.IsNaN(val.(float64)) { - cmp = math.IsNaN - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = math.NaN() - {{end -}} - } else { - cmp = func(v {{.name}}) bool { return val.({{.name}}) == v } - } - - h := hash{{.Name}}(val.({{.name}}), 0) - if e, ok := s.tbl.Lookup(h, cmp); ok { -{{ end -}} - return int(e.payload.memoIdx), ok - } - return KeyNotFound, false -} - -// GetOrInsert will return the index of the specified value in the table, or insert the -// value into the table and return the new index. found indicates whether or not it already -// existed in the table (true) or was inserted by this call (false). -func (s *{{.Name}}MemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - {{if and (ne .Name "Float32") (ne .Name "Float64") }} - h := hashInt(uint64(val.({{.name}})), 0) - e, ok := s.tbl.Lookup(h, func(v {{.name}}) bool { - return val.({{.name}}) == v - }) -{{ else }} - var cmp func({{.name}}) bool - {{if eq .Name "Float32"}} - if math.IsNaN(float64(val.(float32))) { - cmp = isNan32Cmp - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = float32(math.NaN()) - {{ else -}} - if math.IsNaN(val.(float64)) { - cmp = math.IsNaN - // use consistent internal bit pattern for NaN regardless of the pattern - // that is passed to us. NaN is NaN is NaN - val = math.NaN() - {{end -}} - } else { - cmp = func(v {{.name}}) bool { return val.({{.name}}) == v } - } - - h := hash{{.Name}}(val.({{.name}}), 0) - e, ok := s.tbl.Lookup(h, cmp) -{{ end }} - if ok { - idx = int(e.payload.memoIdx) - found = true - } else { - idx = s.Size() - s.tbl.Insert(e, h, val.({{.name}}), int32(idx)) - } - return -} - - -// GetOrInsertBytes is unimplemented -func (s *{{.Name}}MemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - panic("unimplemented") -} -{{end}} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go b/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go deleted file mode 100644 index 81994f0a8..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/hashing/xxh3_memo_table.go +++ /dev/null @@ -1,443 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Package hashing provides utilities for and an implementation of a hash -// table which is more performant than the default go map implementation -// by leveraging xxh3 and some custom hash functions. -package hashing - -import ( - "bytes" - "math" - "reflect" - "unsafe" -) - -//go:generate go run ../../arrow/_tools/tmpl/main.go -i -data=types.tmpldata xxh3_memo_table.gen.go.tmpl - -type TypeTraits interface { - BytesRequired(n int) int -} - -type ByteSlice interface { - Bytes() []byte -} - -// MemoTable interface for hash tables and dictionary encoding. -// -// Values will remember the order they are inserted to generate a valid -// dictionary. -type MemoTable interface { - TypeTraits() TypeTraits - // Reset drops everything in the table allowing it to be reused - Reset() - // Size returns the current number of unique values stored in - // the table, including whether or not a null value has been - // inserted via GetOrInsertNull. - Size() int - // GetOrInsert returns the index of the table the specified value is, - // and a boolean indicating whether or not the value was found in - // the table (if false, the value was inserted). An error is returned - // if val is not the appropriate type for the table. - GetOrInsert(val interface{}) (idx int, existed bool, err error) - // GetOrInsertBytes returns the index of the table the specified value is, - // and a boolean indicating whether or not the value was found in - // the table (if false, the value was inserted). An error is returned - // if val is not the appropriate type for the table. This function is intended to be used by - // the BinaryMemoTable to prevent uncessary allocations of the data when converting from a []byte to interface{}. - GetOrInsertBytes(val []byte) (idx int, existed bool, err error) - // GetOrInsertNull returns the index of the null value in the table, - // inserting one if it hasn't already been inserted. It returns a boolean - // indicating if the null value already existed or not in the table. - GetOrInsertNull() (idx int, existed bool) - // GetNull returns the index of the null value in the table, but does not - // insert one if it doesn't already exist. Will return -1 if it doesn't exist - // indicated by a false value for the boolean. - GetNull() (idx int, exists bool) - // WriteOut copys the unique values of the memotable out to the byte slice - // provided. Must have allocated enough bytes for all the values. - WriteOut(out []byte) - // WriteOutSubset is like WriteOut, but only writes a subset of values - // starting with the index offset. - WriteOutSubset(offset int, out []byte) -} - -type NumericMemoTable interface { - MemoTable - WriteOutLE(out []byte) - WriteOutSubsetLE(offset int, out []byte) -} - -const ( - sentinel uint64 = 0 - loadFactor int64 = 2 -) - -func max(a, b uint64) uint64 { - if a > b { - return a - } - return b -} - -var isNan32Cmp = func(v float32) bool { return math.IsNaN(float64(v)) } - -// KeyNotFound is the constant returned by memo table functions when a key isn't found in the table -const KeyNotFound = -1 - -type BinaryBuilderIFace interface { - Reserve(int) - ReserveData(int) - Retain() - Resize(int) - ResizeData(int) - Release() - DataLen() int - Value(int) []byte - Len() int - AppendNull() - AppendString(string) - Append([]byte) -} - -// BinaryMemoTable is our hashtable for binary data using the BinaryBuilder -// to construct the actual data in an easy to pass around way with minimal copies -// while using a hash table to keep track of the indexes into the dictionary that -// is created as we go. -type BinaryMemoTable struct { - tbl *Int32HashTable - builder BinaryBuilderIFace - nullIdx int -} - -// NewBinaryMemoTable returns a hash table for Binary data, the passed in allocator will -// be utilized for the BinaryBuilder, if nil then memory.DefaultAllocator will be used. -// initial and valuesize can be used to pre-allocate the table to reduce allocations. With -// initial being the initial number of entries to allocate for and valuesize being the starting -// amount of space allocated for writing the actual binary data. -func NewBinaryMemoTable(initial, valuesize int, bldr BinaryBuilderIFace) *BinaryMemoTable { - bldr.Reserve(int(initial)) - datasize := valuesize - if datasize <= 0 { - datasize = initial * 4 - } - bldr.ReserveData(datasize) - return &BinaryMemoTable{tbl: NewInt32HashTable(uint64(initial)), builder: bldr, nullIdx: KeyNotFound} -} - -type unimplementedtraits struct{} - -func (unimplementedtraits) BytesRequired(int) int { panic("unimplemented") } - -func (BinaryMemoTable) TypeTraits() TypeTraits { - return unimplementedtraits{} -} - -// Reset dumps all of the data in the table allowing it to be reutilized. -func (s *BinaryMemoTable) Reset() { - s.tbl.Reset(32) - s.builder.Resize(0) - s.builder.ResizeData(0) - s.builder.Reserve(int(32)) - s.builder.ReserveData(int(32) * 4) - s.nullIdx = KeyNotFound -} - -// GetNull returns the index of a null that has been inserted into the table or -// KeyNotFound. The bool returned will be true if there was a null inserted into -// the table, and false otherwise. -func (s *BinaryMemoTable) GetNull() (int, bool) { - return int(s.nullIdx), s.nullIdx != KeyNotFound -} - -// Size returns the current size of the memo table including the null value -// if one has been inserted. -func (s *BinaryMemoTable) Size() int { - sz := int(s.tbl.size) - if _, ok := s.GetNull(); ok { - sz++ - } - return sz -} - -// helper function to easily return a byte slice for any given value -// regardless of the type if it's a []byte, string, or fulfills the -// ByteSlice interface. -func (BinaryMemoTable) valAsByteSlice(val interface{}) []byte { - switch v := val.(type) { - case []byte: - return v - case ByteSlice: - return v.Bytes() - case string: - var out []byte - h := (*reflect.StringHeader)(unsafe.Pointer(&v)) - s := (*reflect.SliceHeader)(unsafe.Pointer(&out)) - s.Data = h.Data - s.Len = h.Len - s.Cap = h.Len - return out - default: - panic("invalid type for binarymemotable") - } -} - -// helper function to get the hash value regardless of the underlying binary type -func (BinaryMemoTable) getHash(val interface{}) uint64 { - switch v := val.(type) { - case string: - return hashString(v, 0) - case []byte: - return Hash(v, 0) - case ByteSlice: - return Hash(v.Bytes(), 0) - default: - panic("invalid type for binarymemotable") - } -} - -// helper function to append the given value to the builder regardless -// of the underlying binary type. -func (b *BinaryMemoTable) appendVal(val interface{}) { - switch v := val.(type) { - case string: - b.builder.AppendString(v) - case []byte: - b.builder.Append(v) - case ByteSlice: - b.builder.Append(v.Bytes()) - } -} - -func (b *BinaryMemoTable) lookup(h uint64, val []byte) (*entryInt32, bool) { - return b.tbl.Lookup(h, func(i int32) bool { - return bytes.Equal(val, b.builder.Value(int(i))) - }) -} - -// Get returns the index of the specified value in the table or KeyNotFound, -// and a boolean indicating whether it was found in the table. -func (b *BinaryMemoTable) Get(val interface{}) (int, bool) { - if p, ok := b.lookup(b.getHash(val), b.valAsByteSlice(val)); ok { - return int(p.payload.val), ok - } - return KeyNotFound, false -} - -// GetOrInsertBytes returns the index of the given value in the table, if not found -// it is inserted into the table. The return value 'found' indicates whether the value -// was found in the table (true) or inserted (false) along with any possible error. -func (b *BinaryMemoTable) GetOrInsertBytes(val []byte) (idx int, found bool, err error) { - h := Hash(val, 0) - p, found := b.lookup(h, val) - if found { - idx = int(p.payload.val) - } else { - idx = b.Size() - b.builder.Append(val) - b.tbl.Insert(p, h, int32(idx), -1) - } - return -} - -// GetOrInsert returns the index of the given value in the table, if not found -// it is inserted into the table. The return value 'found' indicates whether the value -// was found in the table (true) or inserted (false) along with any possible error. -func (b *BinaryMemoTable) GetOrInsert(val interface{}) (idx int, found bool, err error) { - h := b.getHash(val) - p, found := b.lookup(h, b.valAsByteSlice(val)) - if found { - idx = int(p.payload.val) - } else { - idx = b.Size() - b.appendVal(val) - b.tbl.Insert(p, h, int32(idx), -1) - } - return -} - -// GetOrInsertNull retrieves the index of a null in the table or inserts -// null into the table, returning the index and a boolean indicating if it was -// found in the table (true) or was inserted (false). -func (b *BinaryMemoTable) GetOrInsertNull() (idx int, found bool) { - idx, found = b.GetNull() - if !found { - idx = b.Size() - b.nullIdx = idx - b.builder.AppendNull() - } - return -} - -func (b *BinaryMemoTable) Value(i int) []byte { - return b.builder.Value(i) -} - -// helper function to get the offset into the builder data for a given -// index value. -func (b *BinaryMemoTable) findOffset(idx int) uintptr { - if b.builder.DataLen() == 0 { - // only empty strings, short circuit - return 0 - } - - val := b.builder.Value(idx) - for len(val) == 0 { - idx++ - if idx >= b.builder.Len() { - break - } - val = b.builder.Value(idx) - } - if len(val) != 0 { - return uintptr(unsafe.Pointer(&val[0])) - } - return uintptr(b.builder.DataLen()) + b.findOffset(0) -} - -// CopyOffsets copies the list of offsets into the passed in slice, the offsets -// being the start and end values of the underlying allocated bytes in the builder -// for the individual values of the table. out should be at least sized to Size()+1 -func (b *BinaryMemoTable) CopyOffsets(out []int32) { - b.CopyOffsetsSubset(0, out) -} - -// CopyOffsetsSubset is like CopyOffsets but instead of copying all of the offsets, -// it gets a subset of the offsets in the table starting at the index provided by "start". -func (b *BinaryMemoTable) CopyOffsetsSubset(start int, out []int32) { - if b.builder.Len() <= start { - return - } - - first := b.findOffset(0) - delta := b.findOffset(start) - sz := b.Size() - for i := start; i < sz; i++ { - offset := int32(b.findOffset(i) - delta) - out[i-start] = offset - } - - out[sz-start] = int32(b.builder.DataLen() - (int(delta) - int(first))) -} - -// CopyLargeOffsets copies the list of offsets into the passed in slice, the offsets -// being the start and end values of the underlying allocated bytes in the builder -// for the individual values of the table. out should be at least sized to Size()+1 -func (b *BinaryMemoTable) CopyLargeOffsets(out []int64) { - b.CopyLargeOffsetsSubset(0, out) -} - -// CopyLargeOffsetsSubset is like CopyOffsets but instead of copying all of the offsets, -// it gets a subset of the offsets in the table starting at the index provided by "start". -func (b *BinaryMemoTable) CopyLargeOffsetsSubset(start int, out []int64) { - if b.builder.Len() <= start { - return - } - - first := b.findOffset(0) - delta := b.findOffset(start) - sz := b.Size() - for i := start; i < sz; i++ { - offset := int64(b.findOffset(i) - delta) - out[i-start] = offset - } - - out[sz-start] = int64(b.builder.DataLen() - (int(delta) - int(first))) -} - -// CopyValues copies the raw binary data bytes out, out should be a []byte -// with at least ValuesSize bytes allocated to copy into. -func (b *BinaryMemoTable) CopyValues(out interface{}) { - b.CopyValuesSubset(0, out) -} - -// CopyValuesSubset copies the raw binary data bytes out starting with the value -// at the index start, out should be a []byte with at least ValuesSize bytes allocated -func (b *BinaryMemoTable) CopyValuesSubset(start int, out interface{}) { - if b.builder.Len() <= start { - return - } - - var ( - first = b.findOffset(0) - offset = b.findOffset(int(start)) - length = b.builder.DataLen() - int(offset-first) - ) - - outval := out.([]byte) - copy(outval, b.builder.Value(start)[0:length]) -} - -func (b *BinaryMemoTable) WriteOut(out []byte) { - b.CopyValues(out) -} - -func (b *BinaryMemoTable) WriteOutSubset(start int, out []byte) { - b.CopyValuesSubset(start, out) -} - -// CopyFixedWidthValues exists to cope with the fact that the table doesn't keep -// track of the fixed width when inserting the null value the databuffer holds a -// zero length byte slice for the null value (if found) -func (b *BinaryMemoTable) CopyFixedWidthValues(start, width int, out []byte) { - if start >= b.Size() { - return - } - - null, exists := b.GetNull() - if !exists || null < start { - // nothing to skip, proceed as usual - b.CopyValuesSubset(start, out) - return - } - - var ( - leftOffset = b.findOffset(start) - nullOffset = b.findOffset(null) - leftSize = nullOffset - leftOffset - rightOffset = leftOffset + uintptr(b.ValuesSize()) - ) - - if leftSize > 0 { - copy(out, b.builder.Value(start)[0:leftSize]) - } - - rightSize := rightOffset - nullOffset - if rightSize > 0 { - // skip the null fixed size value - copy(out[int(leftSize)+width:], b.builder.Value(null + 1)[0:rightSize]) - } -} - -// VisitValues exists to run the visitFn on each value currently in the hash table. -func (b *BinaryMemoTable) VisitValues(start int, visitFn func([]byte)) { - for i := int(start); i < b.Size(); i++ { - visitFn(b.builder.Value(i)) - } -} - -// Release is used to tell the underlying builder that it can release the memory allocated -// when the reference count reaches 0, this is safe to be called from multiple goroutines -// simultaneously -func (b *BinaryMemoTable) Release() { b.builder.Release() } - -// Retain increases the ref count, it is safe to call it from multiple goroutines -// simultaneously. -func (b *BinaryMemoTable) Retain() { b.builder.Retain() } - -// ValuesSize returns the current total size of all the raw bytes that have been inserted -// into the memotable so far. -func (b *BinaryMemoTable) ValuesSize() int { return b.builder.DataLen() } diff --git a/vendor/github.com/apache/arrow/go/v14/internal/json/json.go b/vendor/github.com/apache/arrow/go/v14/internal/json/json.go deleted file mode 100644 index 319b12c55..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/json/json.go +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !tinygo -// +build !tinygo - -package json - -import ( - "io" - - "github.com/goccy/go-json" -) - -type Decoder = json.Decoder -type Encoder = json.Encoder -type Marshaler = json.Marshaler -type Delim = json.Delim -type UnmarshalTypeError = json.UnmarshalTypeError -type Number = json.Number -type Unmarshaler = json.Unmarshaler -type RawMessage = json.RawMessage - -func Marshal(v interface{}) ([]byte, error) { - return json.Marshal(v) -} - -func Unmarshal(data []byte, v interface{}) error { - return json.Unmarshal(data, v) -} - -func NewDecoder(r io.Reader) *Decoder { - return json.NewDecoder(r) -} - -func NewEncoder(w io.Writer) *Encoder { - return json.NewEncoder(w) -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go b/vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go deleted file mode 100644 index 8e4f447b3..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/json/json_tinygo.go +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build tinygo -// +build tinygo - -package json - -import ( - "io" - - "encoding/json" -) - -type Decoder = json.Decoder -type Encoder = json.Encoder -type Marshaler = json.Marshaler -type Delim = json.Delim -type UnmarshalTypeError = json.UnmarshalTypeError -type Number = json.Number -type Unmarshaler = json.Unmarshaler -type RawMessage = json.RawMessage - -func Marshal(v interface{}) ([]byte, error) { - return json.Marshal(v) -} - -func Unmarshal(data []byte, v interface{}) error { - return json.Unmarshal(data, v) -} - -func NewDecoder(r io.Reader) *Decoder { - return json.NewDecoder(r) -} - -func NewEncoder(w io.Writer) *Encoder { - return json.NewEncoder(w) -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile b/vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile deleted file mode 100644 index fded9d1d5..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/Makefile +++ /dev/null @@ -1,80 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# this converts rotate instructions from "ro[lr] " -> "ro[lr] , 1" for yasm compatibility -PERL_FIXUP_ROTATE=perl -i -pe 's/(ro[rl]\s+\w{2,3})$$/\1, 1/' - -C2GOASM=c2goasm -CC=clang-11 -C_FLAGS=-target x86_64-unknown-none -masm=intel -mno-red-zone -mstackrealign -mllvm -inline-threshold=1000 \ - -fno-asynchronous-unwind-tables -fno-exceptions -fno-rtti -O3 -fno-builtin -ffast-math -fno-jump-tables -I_lib -ASM_FLAGS_AVX2=-mavx2 -mfma -ASM_FLAGS_SSE4=-msse4 -ASM_FLAGS_BMI2=-mbmi2 -ASM_FLAGS_POPCNT=-mpopcnt - -C_FLAGS_NEON=-O3 -fvectorize -mllvm -force-vector-width=16 -fno-asynchronous-unwind-tables -mno-red-zone -mstackrealign -fno-exceptions \ - -fno-rtti -fno-builtin -ffast-math -fno-jump-tables -I_lib - -GO_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -not -name '*_test.go') -ALL_SOURCES := $(shell find . -path ./_lib -prune -o -name '*.go' -name '*.s' -not -name '*_test.go') - -.PHONEY: assembly - -INTEL_SOURCES := \ - min_max_avx2_amd64.s min_max_sse4_amd64.s transpose_ints_avx2_amd64.s transpose_ints_sse4_amd64.s - -# -# ARROW-15336: DO NOT add the assembly target for Arm64 (ARM_SOURCES) until c2goasm added the Arm64 support. -# min_max_neon_arm64.s was generated by asm2plan9s. -# And manually formatted it as the Arm64 Plan9. -# - -assembly: $(INTEL_SOURCES) - -_lib/min_max_avx2_amd64.s: _lib/min_max.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/min_max_sse4_amd64.s: _lib/min_max.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/min_max_neon.s: _lib/min_max.c - $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/transpose_ints_avx2_amd64.s: _lib/transpose_ints.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_AVX2) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/transpose_ints_sse4_amd64.s: _lib/transpose_ints.c - $(CC) -S $(C_FLAGS) $(ASM_FLAGS_SSE4) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -_lib/transpose_ints_neon.s: _lib/transpose_ints.c - $(CC) -S $(C_FLAGS_NEON) $^ -o $@ ; $(PERL_FIXUP_ROTATE) $@ - -min_max_avx2_amd64.s: _lib/min_max_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -min_max_sse4_amd64.s: _lib/min_max_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -transpose_ints_avx2_amd64.s: _lib/transpose_ints_avx2_amd64.s - $(C2GOASM) -a -f $^ $@ - -transpose_ints_sse4_amd64.s: _lib/transpose_ints_sse4_amd64.s - $(C2GOASM) -a -f $^ $@ - -clean: - rm -f $(INTEL_SOURCES) - rm -f $(addprefix _lib/,$(INTEL_SOURCES)) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go deleted file mode 100644 index 0b2381da1..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/buf_reader.go +++ /dev/null @@ -1,212 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "bufio" - "errors" - "fmt" - "io" -) - -// bufferedReader is similar to bufio.Reader except -// it will expand the buffer if necessary when asked to Peek -// more bytes than are in the buffer -type bufferedReader struct { - bufferSz int - buf []byte - r, w int - rd io.Reader - err error -} - -// NewBufferedReader returns a buffered reader with similar semantics to bufio.Reader -// except Peek will expand the internal buffer if needed rather than return -// an error. -func NewBufferedReader(rd io.Reader, sz int) *bufferedReader { - // if rd is already a buffered reader whose buffer is >= the requested size - // then just return it as is. no need to make a new object. - b, ok := rd.(*bufferedReader) - if ok && len(b.buf) >= sz { - return b - } - - r := &bufferedReader{ - rd: rd, - } - r.resizeBuffer(sz) - return r -} - -func (b *bufferedReader) resetBuffer() { - if b.buf == nil { - b.buf = make([]byte, b.bufferSz) - } else if b.bufferSz > cap(b.buf) { - buf := b.buf - b.buf = make([]byte, b.bufferSz) - copy(b.buf, buf) - } else { - b.buf = b.buf[:b.bufferSz] - } -} - -func (b *bufferedReader) resizeBuffer(newSize int) { - b.bufferSz = newSize - b.resetBuffer() -} - -func (b *bufferedReader) fill() error { - // slide existing data to the beginning - if b.r > 0 { - copy(b.buf, b.buf[b.r:b.w]) - b.w -= b.r - b.r = 0 - } - - if b.w >= len(b.buf) { - return fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrBufferFull) - } - - n, err := io.ReadAtLeast(b.rd, b.buf[b.w:], 1) - if n < 0 { - return fmt.Errorf("arrow/bufferedreader: filling buffer: %w", bufio.ErrNegativeCount) - } - - b.w += n - b.err = err - return nil -} - -func (b *bufferedReader) readErr() error { - err := b.err - b.err = nil - return err -} - -// Buffered returns the number of bytes currently buffered -func (b *bufferedReader) Buffered() int { return b.w - b.r } - -// SetBufferSize resets the size of the internal buffer to the desired size. -// Will return an error if newSize is <= 0 or if newSize is less than the size -// of the buffered data. -func (b *bufferedReader) SetBufferSize(newSize int) error { - if newSize <= 0 { - return errors.New("buffer size should be positive") - } - - if b.w >= newSize { - return errors.New("cannot shrink read buffer if buffered data remains") - } - - b.resizeBuffer(newSize) - return nil -} - -// Peek will buffer and return n bytes from the underlying reader without advancing -// the reader itself. If n is larger than the current buffer size, the buffer will -// be expanded to accommodate the extra bytes rather than error. -func (b *bufferedReader) Peek(n int) ([]byte, error) { - if n < 0 { - return nil, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount) - } - - if n > len(b.buf) { - if err := b.SetBufferSize(n); err != nil { - return nil, err - } - } - - for b.w-b.r < n && b.w-b.r < len(b.buf) && b.err == nil { - b.fill() // b.w-b.r < len(b.buf) => buffer is not full - } - - return b.buf[b.r : b.r+n], b.readErr() -} - -// Discard skips the next n bytes either by advancing the internal buffer -// or by reading that many bytes in and throwing them away. -func (b *bufferedReader) Discard(n int) (discarded int, err error) { - if n < 0 { - return 0, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount) - } - - if n == 0 { - return - } - - remain := n - for { - skip := b.Buffered() - if skip == 0 { - b.fill() - skip = b.Buffered() - } - if skip > remain { - skip = remain - } - b.r += skip - remain -= skip - if remain == 0 { - return n, nil - } - if b.err != nil { - return n - remain, b.readErr() - } - } -} - -func (b *bufferedReader) Read(p []byte) (n int, err error) { - n = len(p) - if n == 0 { - if b.Buffered() > 0 { - return 0, nil - } - return 0, b.readErr() - } - - if b.r == b.w { - if b.err != nil { - return 0, b.readErr() - } - if len(p) >= len(b.buf) { - // large read, empty buffer - // read directly into p to avoid extra copy - n, b.err = b.rd.Read(p) - if n < 0 { - return n, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount) - } - return n, b.readErr() - } - - // one read - // don't use b.fill - b.r, b.w = 0, 0 - n, b.err = b.rd.Read(b.buf) - if n < 0 { - return n, fmt.Errorf("arrow/bufferedreader: %w", bufio.ErrNegativeCount) - } - if n == 0 { - return 0, b.readErr() - } - b.w += n - } - - // copy as much as we can - n = copy(p, b.buf[b.r:b.w]) - b.r += n - return n, nil -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go deleted file mode 100644 index 5fd257f52..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_default.go +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !s390x - -package utils - -var ( - ToLEInt16 = func(x int16) int16 { return x } - ToLEUint16 = func(x uint16) uint16 { return x } - ToLEUint32 = func(x uint32) uint32 { return x } - ToLEUint64 = func(x uint64) uint64 { return x } - ToLEInt32 = func(x int32) int32 { return x } - ToLEInt64 = func(x int64) int64 { return x } - ToLEFloat32 = func(x float32) float32 { return x } - ToLEFloat64 = func(x float64) float64 { return x } -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go deleted file mode 100644 index 7bb27cd81..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/endians_s390x.go +++ /dev/null @@ -1,33 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "math" - "math/bits" -) - -var ( - ToLEInt16 = func(x int16) int16 { return int16(bits.ReverseBytes16(uint16(x))) } - ToLEUint16 = bits.ReverseBytes16 - ToLEUint32 = bits.ReverseBytes32 - ToLEUint64 = bits.ReverseBytes64 - ToLEInt32 = func(x int32) int32 { return int32(bits.ReverseBytes32(uint32(x))) } - ToLEInt64 = func(x int64) int64 { return int64(bits.ReverseBytes64(uint64(x))) } - ToLEFloat32 = func(x float32) float32 { return math.Float32frombits(bits.ReverseBytes32(math.Float32bits(x))) } - ToLEFloat64 = func(x float64) float64 { return math.Float64frombits(bits.ReverseBytes64(math.Float64bits(x))) } -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/math.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/math.go deleted file mode 100644 index 62cf96ce4..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/math.go +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -// Min is a convenience Min function for int64 -func Min(a, b int64) int64 { - if a < b { - return a - } - return b -} - -// MinInt is a convenience Min function for int -func MinInt(a, b int) int { - if a < b { - return a - } - return b -} - -// Max is a convenience Max function for int64 -func Max(a, b int64) int64 { - if a > b { - return a - } - return b -} - -// MaxInt is a convenience Max function for int -func MaxInt(a, b int) int { - if a > b { - return a - } - return b -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go deleted file mode 100644 index 3d7b0024a..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max.go +++ /dev/null @@ -1,212 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "math" -) - -// this file contains pure go implementations of the min_max functions that are -// SIMD accelerated so that we can fallback to these if the cpu doesn't support -// AVX2 or SSE4 instructions. - -func int8MinMax(values []int8) (min, max int8) { - min = math.MaxInt8 - max = math.MinInt8 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -func uint8MinMax(values []uint8) (min, max uint8) { - min = math.MaxUint8 - max = 0 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -func int16MinMax(values []int16) (min, max int16) { - min = math.MaxInt16 - max = math.MinInt16 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -func uint16MinMax(values []uint16) (min, max uint16) { - min = math.MaxUint16 - max = 0 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -func int32MinMax(values []int32) (min, max int32) { - min = math.MaxInt32 - max = math.MinInt32 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -func uint32MinMax(values []uint32) (min, max uint32) { - min = math.MaxUint32 - max = 0 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -func int64MinMax(values []int64) (min, max int64) { - min = math.MaxInt64 - max = math.MinInt64 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -func uint64MinMax(values []uint64) (min, max uint64) { - min = math.MaxUint64 - max = 0 - - for _, v := range values { - if min > v { - min = v - } - if max < v { - max = v - } - } - return -} - -var minmaxFuncs = struct { - i8 func([]int8) (int8, int8) - ui8 func([]uint8) (uint8, uint8) - i16 func([]int16) (int16, int16) - ui16 func([]uint16) (uint16, uint16) - i32 func([]int32) (int32, int32) - ui32 func([]uint32) (uint32, uint32) - i64 func([]int64) (int64, int64) - ui64 func([]uint64) (uint64, uint64) -}{} - -// GetMinMaxInt8 returns the min and max for a int8 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxInt8(v []int8) (min, max int8) { - return minmaxFuncs.i8(v) -} - -// GetMinMaxUint8 returns the min and max for a uint8 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxUint8(v []uint8) (min, max uint8) { - return minmaxFuncs.ui8(v) -} - -// GetMinMaxInt16 returns the min and max for a int16 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxInt16(v []int16) (min, max int16) { - return minmaxFuncs.i16(v) -} - -// GetMinMaxUint16 returns the min and max for a uint16 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxUint16(v []uint16) (min, max uint16) { - return minmaxFuncs.ui16(v) -} - -// GetMinMaxInt32 returns the min and max for a int32 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxInt32(v []int32) (min, max int32) { - return minmaxFuncs.i32(v) -} - -// GetMinMaxUint32 returns the min and max for a uint32 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxUint32(v []uint32) (min, max uint32) { - return minmaxFuncs.ui32(v) -} - -// GetMinMaxInt64 returns the min and max for a int64 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxInt64(v []int64) (min, max int64) { - return minmaxFuncs.i64(v) -} - -// GetMinMaxUint64 returns the min and max for a uint64 slice, using AVX2 or -// SSE4 cpu extensions if available, falling back to a pure go implementation -// if they are unavailable or built with the noasm tag. -func GetMinMaxUint64(v []uint64) (min, max uint64) { - return minmaxFuncs.ui64(v) -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go deleted file mode 100644 index 5fccddbee..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_amd64.go +++ /dev/null @@ -1,55 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import "golang.org/x/sys/cpu" - -func init() { - // if the CPU supports AVX2 or SSE4 then let's use those to benefit from SIMD - // to accelerate the performance for finding the min and max for an integral slice. - // otherwise fallback to a pure go implementation if the cpu doesn't have these features. - if cpu.X86.HasAVX2 { - minmaxFuncs.i8 = int8MaxMinAVX2 - minmaxFuncs.ui8 = uint8MaxMinAVX2 - minmaxFuncs.i16 = int16MaxMinAVX2 - minmaxFuncs.ui16 = uint16MaxMinAVX2 - minmaxFuncs.i32 = int32MaxMinAVX2 - minmaxFuncs.ui32 = uint32MaxMinAVX2 - minmaxFuncs.i64 = int64MaxMinAVX2 - minmaxFuncs.ui64 = uint64MaxMinAVX2 - } else if cpu.X86.HasSSE42 { - minmaxFuncs.i8 = int8MaxMinSSE4 - minmaxFuncs.ui8 = uint8MaxMinSSE4 - minmaxFuncs.i16 = int16MaxMinSSE4 - minmaxFuncs.ui16 = uint16MaxMinSSE4 - minmaxFuncs.i32 = int32MaxMinSSE4 - minmaxFuncs.ui32 = uint32MaxMinSSE4 - minmaxFuncs.i64 = int64MaxMinSSE4 - minmaxFuncs.ui64 = uint64MaxMinSSE4 - } else { - minmaxFuncs.i8 = int8MinMax - minmaxFuncs.ui8 = uint8MinMax - minmaxFuncs.i16 = int16MinMax - minmaxFuncs.ui16 = uint16MinMax - minmaxFuncs.i32 = int32MinMax - minmaxFuncs.ui32 = uint32MinMax - minmaxFuncs.i64 = int64MinMax - minmaxFuncs.ui64 = uint64MinMax - } -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go deleted file mode 100644 index 7404e95d9..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_arm64.go +++ /dev/null @@ -1,65 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import ( - "os" - "strings" -) -import "golang.org/x/sys/cpu" - -func init() { - // Added ability to enable extension via environment: - // ARM_ENABLE_EXT=NEON go test - if ext, ok := os.LookupEnv("ARM_ENABLE_EXT"); ok { - exts := strings.Split(ext, ",") - - for _, x := range exts { - switch x { - case "NEON": - cpu.ARM64.HasASIMD = true - case "AES": - cpu.ARM64.HasAES = true - case "PMULL": - cpu.ARM64.HasPMULL = true - default: - cpu.ARM64.HasASIMD = false - cpu.ARM64.HasAES = false - cpu.ARM64.HasPMULL = false - } - } - } - if cpu.ARM64.HasASIMD { - minmaxFuncs.i32 = int32MaxMinNEON - minmaxFuncs.ui32 = uint32MaxMinNEON - minmaxFuncs.i64 = int64MaxMinNEON - minmaxFuncs.ui64 = uint64MaxMinNEON - } else { - minmaxFuncs.i32 = int32MinMax - minmaxFuncs.ui32 = uint32MinMax - minmaxFuncs.i64 = int64MinMax - minmaxFuncs.ui64 = uint64MinMax - } - - // haven't yet generated the NEON arm64 for these - minmaxFuncs.i8 = int8MinMax - minmaxFuncs.ui8 = uint8MinMax - minmaxFuncs.i16 = int16MinMax - minmaxFuncs.ui16 = uint16MinMax -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go deleted file mode 100644 index af6726243..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.go +++ /dev/null @@ -1,90 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import ( - "unsafe" -) - -// This file contains convenience functions for utilizing AVX2 intrinsics to quickly -// and efficiently get the min and max from an integral slice. - -//go:noescape -func _int8_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int8MaxMinAVX2(values []int8) (min, max int8) { - _int8_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint8_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint8MaxMinAVX2(values []uint8) (min, max uint8) { - _uint8_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _int16_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int16MaxMinAVX2(values []int16) (min, max int16) { - _int16_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint16_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint16MaxMinAVX2(values []uint16) (min, max uint16) { - _uint16_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _int32_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int32MaxMinAVX2(values []int32) (min, max int32) { - _int32_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint32_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint32MaxMinAVX2(values []uint32) (min, max uint32) { - _uint32_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _int64_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int64MaxMinAVX2(values []int64) (min, max int64) { - _int64_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint64_max_min_avx2(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint64MaxMinAVX2(values []uint64) (min, max uint64) { - _uint64_max_min_avx2(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s deleted file mode 100644 index fe0c36e0e..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_avx2_amd64.s +++ /dev/null @@ -1,927 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080 -DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080 -DATA LCDATA1<>+0x010(SB)/8, $0x8080808080808080 -DATA LCDATA1<>+0x018(SB)/8, $0x8080808080808080 -DATA LCDATA1<>+0x020(SB)/8, $0x7f7f7f7f7f7f7f7f -DATA LCDATA1<>+0x028(SB)/8, $0x7f7f7f7f7f7f7f7f -DATA LCDATA1<>+0x030(SB)/8, $0x7f7f7f7f7f7f7f7f -DATA LCDATA1<>+0x038(SB)/8, $0x7f7f7f7f7f7f7f7f -DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f -DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f -DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080 -DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080 -GLOBL LCDATA1<>(SB), 8, $96 - -TEXT ·_int8_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA1<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB0_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x3f // cmp esi, 63 - JA LBB0_4 - WORD $0xb041; BYTE $0x80 // mov r8b, -128 - WORD $0xb640; BYTE $0x7f // mov sil, 127 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB0_11 - -LBB0_1: - WORD $0xb640; BYTE $0x7f // mov sil, 127 - WORD $0xb041; BYTE $0x80 // mov r8b, -128 - JMP LBB0_12 - -LBB0_4: - WORD $0x8945; BYTE $0xca // mov r10d, r9d - LONG $0xc0e28341 // and r10d, -64 - LONG $0xc0428d49 // lea rax, [r10 - 64] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x06e8c149 // shr r8, 6 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB0_5 - WORD $0x894c; BYTE $0xc6 // mov rsi, r8 - LONG $0xfee68348 // and rsi, -2 - WORD $0xf748; BYTE $0xde // neg rsi - LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */ - LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 - LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 - -LBB0_7: - LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] - LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] - LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64] - LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96] - LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4 - LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5 - LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4 - LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5 - LONG $0x387de2c4; BYTE $0xc6 // vpminsb ymm0, ymm0, ymm6 - LONG $0x386de2c4; BYTE $0xd7 // vpminsb ymm2, ymm2, ymm7 - LONG $0x3c75e2c4; BYTE $0xce // vpmaxsb ymm1, ymm1, ymm6 - LONG $0x3c65e2c4; BYTE $0xdf // vpmaxsb ymm3, ymm3, ymm7 - LONG $0x80e88348 // sub rax, -128 - LONG $0x02c68348 // add rsi, 2 - JNE LBB0_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB0_10 - -LBB0_9: - LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] - LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] - LONG $0x3c65e2c4; BYTE $0xdd // vpmaxsb ymm3, ymm3, ymm5 - LONG $0x3c75e2c4; BYTE $0xcc // vpmaxsb ymm1, ymm1, ymm4 - LONG $0x386de2c4; BYTE $0xd5 // vpminsb ymm2, ymm2, ymm5 - LONG $0x387de2c4; BYTE $0xc4 // vpminsb ymm0, ymm0, ymm4 - -LBB0_10: - LONG $0x3c75e2c4; BYTE $0xcb // vpmaxsb ymm1, ymm1, ymm3 - LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1 - LONG $0x3c71e2c4; BYTE $0xcb // vpmaxsb xmm1, xmm1, xmm3 - LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI0_2] */ - LONG $0x387de2c4; BYTE $0xc2 // vpminsb ymm0, ymm0, ymm2 - LONG $0xd171e9c5; BYTE $0x08 // vpsrlw xmm2, xmm1, 8 - LONG $0xcadaf1c5 // vpminub xmm1, xmm1, xmm2 - LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1 - LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1 - LONG $0x7ff08041 // xor r8b, 127 - LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 - LONG $0x3879e2c4; BYTE $0xc1 // vpminsb xmm0, xmm0, xmm1 - LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI0_3] */ - LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8 - LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1 - LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 - LONG $0xc67ef9c5 // vmovd esi, xmm0 - LONG $0x80f68040 // xor sil, -128 - WORD $0x394d; BYTE $0xca // cmp r10, r9 - JE LBB0_12 - -LBB0_11: - LONG $0x04b60f42; BYTE $0x17 // movzx eax, byte [rdi + r10] - WORD $0x3840; BYTE $0xc6 // cmp sil, al - LONG $0xf6b60f40 // movzx esi, sil - WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax - WORD $0x3841; BYTE $0xc0 // cmp r8b, al - LONG $0xc0b60f45 // movzx r8d, r8b - LONG $0xc04c0f44 // cmovl r8d, eax - LONG $0x01c28349 // add r10, 1 - WORD $0x394d; BYTE $0xd1 // cmp r9, r10 - JNE LBB0_11 - -LBB0_12: - WORD $0x8844; BYTE $0x01 // mov byte [rcx], r8b - WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil - VZEROUPPER - RET - -LBB0_5: - LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */ - LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 - LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB0_9 - JMP LBB0_10 - -TEXT ·_uint8_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - - WORD $0xf685 // test esi, esi - JLE LBB1_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x3f // cmp esi, 63 - JA LBB1_4 - WORD $0xb640; BYTE $0xff // mov sil, -1 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0xc031 // xor eax, eax - JMP LBB1_11 - -LBB1_1: - WORD $0xb640; BYTE $0xff // mov sil, -1 - WORD $0xc031 // xor eax, eax - JMP LBB1_12 - -LBB1_4: - WORD $0x8945; BYTE $0xca // mov r10d, r9d - LONG $0xc0e28341 // and r10d, -64 - LONG $0xc0428d49 // lea rax, [r10 - 64] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x06e8c149 // shr r8, 6 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB1_5 - WORD $0x894c; BYTE $0xc6 // mov rsi, r8 - LONG $0xfee68348 // and rsi, -2 - WORD $0xf748; BYTE $0xde // neg rsi - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 - WORD $0xc031 // xor eax, eax - LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 - -LBB1_7: - LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] - LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] - LONG $0x746ffec5; WORD $0x4007 // vmovdqu ymm6, yword [rdi + rax + 64] - LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu ymm7, yword [rdi + rax + 96] - LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4 - LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5 - LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4 - LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5 - LONG $0xcedaf5c5 // vpminub ymm1, ymm1, ymm6 - LONG $0xd7daedc5 // vpminub ymm2, ymm2, ymm7 - LONG $0xc6defdc5 // vpmaxub ymm0, ymm0, ymm6 - LONG $0xdfdee5c5 // vpmaxub ymm3, ymm3, ymm7 - LONG $0x80e88348 // sub rax, -128 - LONG $0x02c68348 // add rsi, 2 - JNE LBB1_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB1_10 - -LBB1_9: - LONG $0x246ffec5; BYTE $0x07 // vmovdqu ymm4, yword [rdi + rax] - LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu ymm5, yword [rdi + rax + 32] - LONG $0xdddee5c5 // vpmaxub ymm3, ymm3, ymm5 - LONG $0xc4defdc5 // vpmaxub ymm0, ymm0, ymm4 - LONG $0xd5daedc5 // vpminub ymm2, ymm2, ymm5 - LONG $0xccdaf5c5 // vpminub ymm1, ymm1, ymm4 - -LBB1_10: - LONG $0xcadaf5c5 // vpminub ymm1, ymm1, ymm2 - LONG $0xc3defdc5 // vpmaxub ymm0, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1 - LONG $0xc2def9c5 // vpmaxub xmm0, xmm0, xmm2 - LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2 - LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2 - LONG $0xd071e9c5; BYTE $0x08 // vpsrlw xmm2, xmm0, 8 - LONG $0xc2daf9c5 // vpminub xmm0, xmm0, xmm2 - LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 - LONG $0xc07ef9c5 // vmovd eax, xmm0 - WORD $0xd0f6 // not al - LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1 - LONG $0xc0daf1c5 // vpminub xmm0, xmm1, xmm0 - LONG $0xd071f1c5; BYTE $0x08 // vpsrlw xmm1, xmm0, 8 - LONG $0xc1daf9c5 // vpminub xmm0, xmm0, xmm1 - LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 - LONG $0xc67ef9c5 // vmovd esi, xmm0 - WORD $0x394d; BYTE $0xca // cmp r10, r9 - JE LBB1_12 - -LBB1_11: - LONG $0x04b60f46; BYTE $0x17 // movzx r8d, byte [rdi + r10] - WORD $0x3844; BYTE $0xc6 // cmp sil, r8b - LONG $0xf6b60f40 // movzx esi, sil - LONG $0xf0430f41 // cmovae esi, r8d - WORD $0x3844; BYTE $0xc0 // cmp al, r8b - WORD $0xb60f; BYTE $0xc0 // movzx eax, al - LONG $0xc0460f41 // cmovbe eax, r8d - LONG $0x01c28349 // add r10, 1 - WORD $0x394d; BYTE $0xd1 // cmp r9, r10 - JNE LBB1_11 - -LBB1_12: - WORD $0x0188 // mov byte [rcx], al - WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil - VZEROUPPER - RET - -LBB1_5: - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 - WORD $0xc031 // xor eax, eax - LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB1_9 - JMP LBB1_10 - -DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000 -DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000 -DATA LCDATA2<>+0x010(SB)/8, $0x8000800080008000 -DATA LCDATA2<>+0x018(SB)/8, $0x8000800080008000 -DATA LCDATA2<>+0x020(SB)/8, $0x7fff7fff7fff7fff -DATA LCDATA2<>+0x028(SB)/8, $0x7fff7fff7fff7fff -DATA LCDATA2<>+0x030(SB)/8, $0x7fff7fff7fff7fff -DATA LCDATA2<>+0x038(SB)/8, $0x7fff7fff7fff7fff -DATA LCDATA2<>+0x040(SB)/8, $0x7fff7fff7fff7fff -DATA LCDATA2<>+0x048(SB)/8, $0x7fff7fff7fff7fff -DATA LCDATA2<>+0x050(SB)/8, $0x8000800080008000 -DATA LCDATA2<>+0x058(SB)/8, $0x8000800080008000 -GLOBL LCDATA2<>(SB), 8, $96 - -TEXT ·_int16_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA2<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB2_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB2_4 - LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 - LONG $0x7fffbe66 // mov si, 32767 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB2_11 - -LBB2_1: - LONG $0x7fffbe66 // mov si, 32767 - LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 - JMP LBB2_12 - -LBB2_4: - WORD $0x8945; BYTE $0xca // mov r10d, r9d - LONG $0xe0e28341 // and r10d, -32 - LONG $0xe0428d49 // lea rax, [r10 - 32] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x05e8c149 // shr r8, 5 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB2_5 - WORD $0x894c; BYTE $0xc6 // mov rsi, r8 - LONG $0xfee68348 // and rsi, -2 - WORD $0xf748; BYTE $0xde // neg rsi - LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */ - LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 - LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 - -LBB2_7: - LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] - LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] - LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64] - LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96] - LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4 - LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5 - LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4 - LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5 - LONG $0xc6eafdc5 // vpminsw ymm0, ymm0, ymm6 - LONG $0xd7eaedc5 // vpminsw ymm2, ymm2, ymm7 - LONG $0xceeef5c5 // vpmaxsw ymm1, ymm1, ymm6 - LONG $0xdfeee5c5 // vpmaxsw ymm3, ymm3, ymm7 - LONG $0x40c08348 // add rax, 64 - LONG $0x02c68348 // add rsi, 2 - JNE LBB2_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB2_10 - -LBB2_9: - LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] - LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] - LONG $0xddeee5c5 // vpmaxsw ymm3, ymm3, ymm5 - LONG $0xcceef5c5 // vpmaxsw ymm1, ymm1, ymm4 - LONG $0xd5eaedc5 // vpminsw ymm2, ymm2, ymm5 - LONG $0xc4eafdc5 // vpminsw ymm0, ymm0, ymm4 - -LBB2_10: - LONG $0xcbeef5c5 // vpmaxsw ymm1, ymm1, ymm3 - LONG $0x397de3c4; WORD $0x01cb // vextracti128 xmm3, ymm1, 1 - LONG $0xcbeef1c5 // vpmaxsw xmm1, xmm1, xmm3 - LONG $0x4deff1c5; BYTE $0x40 // vpxor xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI2_2] */ - LONG $0xc2eafdc5 // vpminsw ymm0, ymm0, ymm2 - LONG $0x4179e2c4; BYTE $0xc9 // vphminposuw xmm1, xmm1 - LONG $0x7e79c1c4; BYTE $0xc8 // vmovd r8d, xmm1 - LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor r8d, 32767 - LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 - LONG $0xc1eaf9c5 // vpminsw xmm0, xmm0, xmm1 - LONG $0x45eff9c5; BYTE $0x50 // vpxor xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI2_3] */ - LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 - LONG $0xc67ef9c5 // vmovd esi, xmm0 - LONG $0x8000f681; WORD $0x0000 // xor esi, 32768 - WORD $0x394d; BYTE $0xca // cmp r10, r9 - JE LBB2_12 - -LBB2_11: - LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10] - WORD $0x3966; BYTE $0xc6 // cmp si, ax - WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax - LONG $0xc0394166 // cmp r8w, ax - LONG $0xc04c0f44 // cmovl r8d, eax - LONG $0x01c28349 // add r10, 1 - WORD $0x394d; BYTE $0xd1 // cmp r9, r10 - JNE LBB2_11 - -LBB2_12: - LONG $0x01894466 // mov word [rcx], r8w - WORD $0x8966; BYTE $0x32 // mov word [rdx], si - VZEROUPPER - RET - -LBB2_5: - LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */ - LONG $0x456ffdc5; BYTE $0x20 // vmovdqa ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 - LONG $0xd96ffdc5 // vmovdqa ymm3, ymm1 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB2_9 - JMP LBB2_10 - -TEXT ·_uint16_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - - WORD $0xf685 // test esi, esi - JLE LBB3_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB3_4 - LONG $0xffb84166; BYTE $0xff // mov r8w, -1 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - WORD $0xf631 // xor esi, esi - JMP LBB3_11 - -LBB3_1: - LONG $0xffb84166; BYTE $0xff // mov r8w, -1 - WORD $0xf631 // xor esi, esi - JMP LBB3_12 - -LBB3_4: - WORD $0x8945; BYTE $0xca // mov r10d, r9d - LONG $0xe0e28341 // and r10d, -32 - LONG $0xe0428d49 // lea rax, [r10 - 32] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x05e8c149 // shr r8, 5 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB3_5 - WORD $0x894c; BYTE $0xc6 // mov rsi, r8 - LONG $0xfee68348 // and rsi, -2 - WORD $0xf748; BYTE $0xde // neg rsi - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 - WORD $0xc031 // xor eax, eax - LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 - -LBB3_7: - LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] - LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] - LONG $0x746ffec5; WORD $0x4047 // vmovdqu ymm6, yword [rdi + 2*rax + 64] - LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu ymm7, yword [rdi + 2*rax + 96] - LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4 - LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5 - LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4 - LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5 - LONG $0x3a75e2c4; BYTE $0xce // vpminuw ymm1, ymm1, ymm6 - LONG $0x3a6de2c4; BYTE $0xd7 // vpminuw ymm2, ymm2, ymm7 - LONG $0x3e7de2c4; BYTE $0xc6 // vpmaxuw ymm0, ymm0, ymm6 - LONG $0x3e65e2c4; BYTE $0xdf // vpmaxuw ymm3, ymm3, ymm7 - LONG $0x40c08348 // add rax, 64 - LONG $0x02c68348 // add rsi, 2 - JNE LBB3_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB3_10 - -LBB3_9: - LONG $0x246ffec5; BYTE $0x47 // vmovdqu ymm4, yword [rdi + 2*rax] - LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu ymm5, yword [rdi + 2*rax + 32] - LONG $0x3e65e2c4; BYTE $0xdd // vpmaxuw ymm3, ymm3, ymm5 - LONG $0x3e7de2c4; BYTE $0xc4 // vpmaxuw ymm0, ymm0, ymm4 - LONG $0x3a6de2c4; BYTE $0xd5 // vpminuw ymm2, ymm2, ymm5 - LONG $0x3a75e2c4; BYTE $0xcc // vpminuw ymm1, ymm1, ymm4 - -LBB3_10: - LONG $0x3a75e2c4; BYTE $0xca // vpminuw ymm1, ymm1, ymm2 - LONG $0x3e7de2c4; BYTE $0xc3 // vpmaxuw ymm0, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01c2 // vextracti128 xmm2, ymm0, 1 - LONG $0x3e79e2c4; BYTE $0xc2 // vpmaxuw xmm0, xmm0, xmm2 - LONG $0xd276e9c5 // vpcmpeqd xmm2, xmm2, xmm2 - LONG $0xc2eff9c5 // vpxor xmm0, xmm0, xmm2 - LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 - LONG $0xc67ef9c5 // vmovd esi, xmm0 - WORD $0xd6f7 // not esi - LONG $0x397de3c4; WORD $0x01c8 // vextracti128 xmm0, ymm1, 1 - LONG $0x3a71e2c4; BYTE $0xc0 // vpminuw xmm0, xmm1, xmm0 - LONG $0x4179e2c4; BYTE $0xc0 // vphminposuw xmm0, xmm0 - LONG $0x7e79c1c4; BYTE $0xc0 // vmovd r8d, xmm0 - WORD $0x394d; BYTE $0xca // cmp r10, r9 - JE LBB3_12 - -LBB3_11: - LONG $0x04b70f42; BYTE $0x57 // movzx eax, word [rdi + 2*r10] - LONG $0xc0394166 // cmp r8w, ax - LONG $0xc0430f44 // cmovae r8d, eax - WORD $0x3966; BYTE $0xc6 // cmp si, ax - WORD $0x460f; BYTE $0xf0 // cmovbe esi, eax - LONG $0x01c28349 // add r10, 1 - WORD $0x394d; BYTE $0xd1 // cmp r9, r10 - JNE LBB3_11 - -LBB3_12: - WORD $0x8966; BYTE $0x31 // mov word [rcx], si - LONG $0x02894466 // mov word [rdx], r8w - VZEROUPPER - RET - -LBB3_5: - LONG $0xc0eff9c5 // vpxor xmm0, xmm0, xmm0 - LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 - WORD $0xc031 // xor eax, eax - LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xdbefe1c5 // vpxor xmm3, xmm3, xmm3 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB3_9 - JMP LBB3_10 - -DATA LCDATA3<>+0x000(SB)/8, $0x7fffffff80000000 -GLOBL LCDATA3<>(SB), 8, $8 - -TEXT ·_int32_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA3<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB4_1 - WORD $0x8941; BYTE $0xf0 // mov r8d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB4_4 - LONG $0x0000ba41; WORD $0x8000 // mov r10d, -2147483648 - LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 - WORD $0x3145; BYTE $0xc9 // xor r9d, r9d - JMP LBB4_7 - -LBB4_1: - LONG $0xffffffb8; BYTE $0x7f // mov eax, 2147483647 - LONG $0x000000be; BYTE $0x80 // mov esi, -2147483648 - JMP LBB4_8 - -LBB4_4: - WORD $0x8945; BYTE $0xc1 // mov r9d, r8d - LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd ymm4, dword 0[rbp] /* [rip + .LCPI4_0] */ - LONG $0xe0e18341 // and r9d, -32 - LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd ymm0, dword 4[rbp] /* [rip + .LCPI4_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 - LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 - LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 - LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 - LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 - LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 - -LBB4_5: - LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] - LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] - LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] - LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] - LONG $0x397dc2c4; BYTE $0xc0 // vpminsd ymm0, ymm0, ymm8 - LONG $0x3975c2c4; BYTE $0xc9 // vpminsd ymm1, ymm1, ymm9 - LONG $0x396dc2c4; BYTE $0xd2 // vpminsd ymm2, ymm2, ymm10 - LONG $0x3965c2c4; BYTE $0xdb // vpminsd ymm3, ymm3, ymm11 - LONG $0x3d5dc2c4; BYTE $0xe0 // vpmaxsd ymm4, ymm4, ymm8 - LONG $0x3d55c2c4; BYTE $0xe9 // vpmaxsd ymm5, ymm5, ymm9 - LONG $0x3d4dc2c4; BYTE $0xf2 // vpmaxsd ymm6, ymm6, ymm10 - LONG $0x3d45c2c4; BYTE $0xfb // vpmaxsd ymm7, ymm7, ymm11 - LONG $0x20c08348 // add rax, 32 - WORD $0x3949; BYTE $0xc1 // cmp r9, rax - JNE LBB4_5 - LONG $0x3d5de2c4; BYTE $0xe5 // vpmaxsd ymm4, ymm4, ymm5 - LONG $0x3d5de2c4; BYTE $0xe6 // vpmaxsd ymm4, ymm4, ymm6 - LONG $0x3d5de2c4; BYTE $0xe7 // vpmaxsd ymm4, ymm4, ymm7 - LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 - LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 - LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 - LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 - LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 - LONG $0x3d59e2c4; BYTE $0xe5 // vpmaxsd xmm4, xmm4, xmm5 - LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 - LONG $0x397de2c4; BYTE $0xc1 // vpminsd ymm0, ymm0, ymm1 - LONG $0x397de2c4; BYTE $0xc2 // vpminsd ymm0, ymm0, ymm2 - LONG $0x397de2c4; BYTE $0xc3 // vpminsd ymm0, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 - LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 - LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 - LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 - LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 - LONG $0x3979e2c4; BYTE $0xc1 // vpminsd xmm0, xmm0, xmm1 - LONG $0xc07ef9c5 // vmovd eax, xmm0 - WORD $0x8944; BYTE $0xd6 // mov esi, r10d - WORD $0x394d; BYTE $0xc1 // cmp r9, r8 - JE LBB4_8 - -LBB4_7: - LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] - WORD $0xf039 // cmp eax, esi - WORD $0x4f0f; BYTE $0xc6 // cmovg eax, esi - WORD $0x3941; BYTE $0xf2 // cmp r10d, esi - LONG $0xf24d0f41 // cmovge esi, r10d - LONG $0x01c18349 // add r9, 1 - WORD $0x8941; BYTE $0xf2 // mov r10d, esi - WORD $0x394d; BYTE $0xc8 // cmp r8, r9 - JNE LBB4_7 - -LBB4_8: - WORD $0x3189 // mov dword [rcx], esi - WORD $0x0289 // mov dword [rdx], eax - VZEROUPPER - RET - -TEXT ·_uint32_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - - WORD $0xf685 // test esi, esi - JLE LBB5_1 - WORD $0x8941; BYTE $0xf0 // mov r8d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB5_4 - WORD $0x3145; BYTE $0xc9 // xor r9d, r9d - LONG $0xffffffb8; BYTE $0xff // mov eax, -1 - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB5_7 - -LBB5_1: - LONG $0xffffffb8; BYTE $0xff // mov eax, -1 - WORD $0xf631 // xor esi, esi - JMP LBB5_8 - -LBB5_4: - WORD $0x8945; BYTE $0xc1 // mov r9d, r8d - LONG $0xe0e18341 // and r9d, -32 - LONG $0xe4efd9c5 // vpxor xmm4, xmm4, xmm4 - LONG $0xc076fdc5 // vpcmpeqd ymm0, ymm0, ymm0 - WORD $0xc031 // xor eax, eax - LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 - LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 - LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 - LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 - LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 - -LBB5_5: - LONG $0x046f7ec5; BYTE $0x87 // vmovdqu ymm8, yword [rdi + 4*rax] - LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu ymm9, yword [rdi + 4*rax + 32] - LONG $0x546f7ec5; WORD $0x4087 // vmovdqu ymm10, yword [rdi + 4*rax + 64] - LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu ymm11, yword [rdi + 4*rax + 96] - LONG $0x3b7dc2c4; BYTE $0xc0 // vpminud ymm0, ymm0, ymm8 - LONG $0x3b75c2c4; BYTE $0xc9 // vpminud ymm1, ymm1, ymm9 - LONG $0x3b6dc2c4; BYTE $0xd2 // vpminud ymm2, ymm2, ymm10 - LONG $0x3b65c2c4; BYTE $0xdb // vpminud ymm3, ymm3, ymm11 - LONG $0x3f5dc2c4; BYTE $0xe0 // vpmaxud ymm4, ymm4, ymm8 - LONG $0x3f55c2c4; BYTE $0xe9 // vpmaxud ymm5, ymm5, ymm9 - LONG $0x3f4dc2c4; BYTE $0xf2 // vpmaxud ymm6, ymm6, ymm10 - LONG $0x3f45c2c4; BYTE $0xfb // vpmaxud ymm7, ymm7, ymm11 - LONG $0x20c08348 // add rax, 32 - WORD $0x3949; BYTE $0xc1 // cmp r9, rax - JNE LBB5_5 - LONG $0x3f5de2c4; BYTE $0xe5 // vpmaxud ymm4, ymm4, ymm5 - LONG $0x3f5de2c4; BYTE $0xe6 // vpmaxud ymm4, ymm4, ymm6 - LONG $0x3f5de2c4; BYTE $0xe7 // vpmaxud ymm4, ymm4, ymm7 - LONG $0x397de3c4; WORD $0x01e5 // vextracti128 xmm5, ymm4, 1 - LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 - LONG $0xec70f9c5; BYTE $0x4e // vpshufd xmm5, xmm4, 78 - LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 - LONG $0xec70f9c5; BYTE $0xe5 // vpshufd xmm5, xmm4, 229 - LONG $0x3f59e2c4; BYTE $0xe5 // vpmaxud xmm4, xmm4, xmm5 - LONG $0x7e79c1c4; BYTE $0xe2 // vmovd r10d, xmm4 - LONG $0x3b7de2c4; BYTE $0xc1 // vpminud ymm0, ymm0, ymm1 - LONG $0x3b7de2c4; BYTE $0xc2 // vpminud ymm0, ymm0, ymm2 - LONG $0x3b7de2c4; BYTE $0xc3 // vpminud ymm0, ymm0, ymm3 - LONG $0x397de3c4; WORD $0x01c1 // vextracti128 xmm1, ymm0, 1 - LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 - LONG $0xc870f9c5; BYTE $0x4e // vpshufd xmm1, xmm0, 78 - LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 - LONG $0xc870f9c5; BYTE $0xe5 // vpshufd xmm1, xmm0, 229 - LONG $0x3b79e2c4; BYTE $0xc1 // vpminud xmm0, xmm0, xmm1 - LONG $0xc07ef9c5 // vmovd eax, xmm0 - WORD $0x8944; BYTE $0xd6 // mov esi, r10d - WORD $0x394d; BYTE $0xc1 // cmp r9, r8 - JE LBB5_8 - -LBB5_7: - LONG $0x8f348b42 // mov esi, dword [rdi + 4*r9] - WORD $0xf039 // cmp eax, esi - WORD $0x430f; BYTE $0xc6 // cmovae eax, esi - WORD $0x3941; BYTE $0xf2 // cmp r10d, esi - LONG $0xf2470f41 // cmova esi, r10d - LONG $0x01c18349 // add r9, 1 - WORD $0x8941; BYTE $0xf2 // mov r10d, esi - WORD $0x394d; BYTE $0xc8 // cmp r8, r9 - JNE LBB5_7 - -LBB5_8: - WORD $0x3189 // mov dword [rcx], esi - WORD $0x0289 // mov dword [rdx], eax - VZEROUPPER - RET - -DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000 -DATA LCDATA4<>+0x008(SB)/8, $0x7fffffffffffffff -GLOBL LCDATA4<>(SB), 8, $16 - -TEXT ·_int64_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA4<>(SB), BP - - QUAD $0xffffffffffffb848; WORD $0x7fff // mov rax, 9223372036854775807 - WORD $0xf685 // test esi, esi - JLE LBB6_1 - WORD $0x8941; BYTE $0xf0 // mov r8d, esi - WORD $0xfe83; BYTE $0x0f // cmp esi, 15 - JA LBB6_4 - LONG $0x01508d4c // lea r10, [rax + 1] - WORD $0x3145; BYTE $0xc9 // xor r9d, r9d - JMP LBB6_7 - -LBB6_1: - LONG $0x01708d48 // lea rsi, [rax + 1] - JMP LBB6_8 - -LBB6_4: - WORD $0x8945; BYTE $0xc1 // mov r9d, r8d - LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq ymm4, qword 0[rbp] /* [rip + .LCPI6_0] */ - LONG $0xf0e18341 // and r9d, -16 - LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq ymm0, qword 8[rbp] /* [rip + .LCPI6_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd86ffdc5 // vmovdqa ymm3, ymm0 - LONG $0xd06ffdc5 // vmovdqa ymm2, ymm0 - LONG $0xc86ffdc5 // vmovdqa ymm1, ymm0 - LONG $0xfc6ffdc5 // vmovdqa ymm7, ymm4 - LONG $0xf46ffdc5 // vmovdqa ymm6, ymm4 - LONG $0xec6ffdc5 // vmovdqa ymm5, ymm4 - -LBB6_5: - LONG $0x046f7ec5; BYTE $0xc7 // vmovdqu ymm8, yword [rdi + 8*rax] - LONG $0x373d62c4; BYTE $0xc8 // vpcmpgtq ymm9, ymm8, ymm0 - LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd ymm0, ymm8, ymm0, ymm9 - LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] - LONG $0x373562c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm9, ymm3 - LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd ymm3, ymm9, ymm3, ymm10 - LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu ymm10, yword [rdi + 8*rax + 64] - LONG $0x372d62c4; BYTE $0xda // vpcmpgtq ymm11, ymm10, ymm2 - LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd ymm2, ymm10, ymm2, ymm11 - LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu ymm11, yword [rdi + 8*rax + 96] - LONG $0x372562c4; BYTE $0xe1 // vpcmpgtq ymm12, ymm11, ymm1 - LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd ymm1, ymm11, ymm1, ymm12 - LONG $0x375d42c4; BYTE $0xe0 // vpcmpgtq ymm12, ymm4, ymm8 - LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd ymm4, ymm8, ymm4, ymm12 - LONG $0x374542c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm7, ymm9 - LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd ymm7, ymm9, ymm7, ymm8 - LONG $0x374d42c4; BYTE $0xc2 // vpcmpgtq ymm8, ymm6, ymm10 - LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd ymm6, ymm10, ymm6, ymm8 - LONG $0x375542c4; BYTE $0xc3 // vpcmpgtq ymm8, ymm5, ymm11 - LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd ymm5, ymm11, ymm5, ymm8 - LONG $0x10c08348 // add rax, 16 - WORD $0x3949; BYTE $0xc1 // cmp r9, rax - JNE LBB6_5 - LONG $0x375d62c4; BYTE $0xc7 // vpcmpgtq ymm8, ymm4, ymm7 - LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd ymm4, ymm7, ymm4, ymm8 - LONG $0x375de2c4; BYTE $0xfe // vpcmpgtq ymm7, ymm4, ymm6 - LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd ymm4, ymm6, ymm4, ymm7 - LONG $0x375de2c4; BYTE $0xf5 // vpcmpgtq ymm6, ymm4, ymm5 - LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd ymm4, ymm5, ymm4, ymm6 - LONG $0x197de3c4; WORD $0x01e5 // vextractf128 xmm5, ymm4, 1 - LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 - LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 - LONG $0x0479e3c4; WORD $0x4eec // vpermilps xmm5, xmm4, 78 - LONG $0x3759e2c4; BYTE $0xf5 // vpcmpgtq xmm6, xmm4, xmm5 - LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd xmm4, xmm5, xmm4, xmm6 - LONG $0x7ef9c1c4; BYTE $0xe2 // vmovq r10, xmm4 - LONG $0x3765e2c4; BYTE $0xe0 // vpcmpgtq ymm4, ymm3, ymm0 - LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd ymm0, ymm3, ymm0, ymm4 - LONG $0x376de2c4; BYTE $0xd8 // vpcmpgtq ymm3, ymm2, ymm0 - LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd ymm0, ymm2, ymm0, ymm3 - LONG $0x3775e2c4; BYTE $0xd0 // vpcmpgtq ymm2, ymm1, ymm0 - LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd ymm0, ymm1, ymm0, ymm2 - LONG $0x197de3c4; WORD $0x01c1 // vextractf128 xmm1, ymm0, 1 - LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 - LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 - LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps xmm1, xmm0, 78 - LONG $0x3771e2c4; BYTE $0xd0 // vpcmpgtq xmm2, xmm1, xmm0 - LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd xmm0, xmm1, xmm0, xmm2 - LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 - WORD $0x894c; BYTE $0xd6 // mov rsi, r10 - WORD $0x394d; BYTE $0xc1 // cmp r9, r8 - JE LBB6_8 - -LBB6_7: - LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] - WORD $0x3948; BYTE $0xf0 // cmp rax, rsi - LONG $0xc64f0f48 // cmovg rax, rsi - WORD $0x3949; BYTE $0xf2 // cmp r10, rsi - LONG $0xf24d0f49 // cmovge rsi, r10 - LONG $0x01c18349 // add r9, 1 - WORD $0x8949; BYTE $0xf2 // mov r10, rsi - WORD $0x394d; BYTE $0xc8 // cmp r8, r9 - JNE LBB6_7 - -LBB6_8: - WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi - WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax - VZEROUPPER - RET - -DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000 -GLOBL LCDATA5<>(SB), 8, $8 - -TEXT ·_uint64_max_min_avx2(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA5<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB7_1 - WORD $0x8941; BYTE $0xf0 // mov r8d, esi - WORD $0xfe83; BYTE $0x0f // cmp esi, 15 - JA LBB7_4 - LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 - WORD $0x3145; BYTE $0xc9 // xor r9d, r9d - WORD $0x3145; BYTE $0xd2 // xor r10d, r10d - JMP LBB7_7 - -LBB7_1: - LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov rax, -1 - WORD $0xf631 // xor esi, esi - JMP LBB7_8 - -LBB7_4: - WORD $0x8945; BYTE $0xc1 // mov r9d, r8d - LONG $0xf0e18341 // and r9d, -16 - LONG $0xedefd1c5 // vpxor xmm5, xmm5, xmm5 - LONG $0xc976f5c5 // vpcmpeqd ymm1, ymm1, ymm1 - WORD $0xc031 // xor eax, eax - LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq ymm0, qword 0[rbp] /* [rip + .LCPI7_0] */ - LONG $0xe476ddc5 // vpcmpeqd ymm4, ymm4, ymm4 - LONG $0xdb76e5c5 // vpcmpeqd ymm3, ymm3, ymm3 - LONG $0xd276edc5 // vpcmpeqd ymm2, ymm2, ymm2 - LONG $0xef3941c4; BYTE $0xc0 // vpxor xmm8, xmm8, xmm8 - LONG $0xffefc1c5 // vpxor xmm7, xmm7, xmm7 - LONG $0xf6efc9c5 // vpxor xmm6, xmm6, xmm6 - -LBB7_5: - LONG $0x0c6f7ec5; BYTE $0xc7 // vmovdqu ymm9, yword [rdi + 8*rax] - LONG $0xd0ef75c5 // vpxor ymm10, ymm1, ymm0 - LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 - LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 - LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd ymm1, ymm9, ymm1, ymm10 - LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 - LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 - LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd ymm5, ymm9, ymm5, ymm10 - LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu ymm9, yword [rdi + 8*rax + 32] - LONG $0xd0ef5dc5 // vpxor ymm10, ymm4, ymm0 - LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 - LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 - LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd ymm4, ymm9, ymm4, ymm10 - LONG $0xd0ef3dc5 // vpxor ymm10, ymm8, ymm0 - LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 - LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu ymm11, yword [rdi + 8*rax + 64] - LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd ymm8, ymm9, ymm8, ymm10 - LONG $0xc8ef65c5 // vpxor ymm9, ymm3, ymm0 - LONG $0xd0ef25c5 // vpxor ymm10, ymm11, ymm0 - LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 - LONG $0x4b25e3c4; WORD $0x90db // vblendvpd ymm3, ymm11, ymm3, ymm9 - LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 - LONG $0x373542c4; BYTE $0xca // vpcmpgtq ymm9, ymm9, ymm10 - LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd ymm7, ymm11, ymm7, ymm9 - LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu ymm9, yword [rdi + 8*rax + 96] - LONG $0xd0ef6dc5 // vpxor ymm10, ymm2, ymm0 - LONG $0xd8ef35c5 // vpxor ymm11, ymm9, ymm0 - LONG $0x372542c4; BYTE $0xd2 // vpcmpgtq ymm10, ymm11, ymm10 - LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd ymm2, ymm9, ymm2, ymm10 - LONG $0xd0ef4dc5 // vpxor ymm10, ymm6, ymm0 - LONG $0x372d42c4; BYTE $0xd3 // vpcmpgtq ymm10, ymm10, ymm11 - LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd ymm6, ymm9, ymm6, ymm10 - LONG $0x10c08348 // add rax, 16 - WORD $0x3949; BYTE $0xc1 // cmp r9, rax - JNE LBB7_5 - LONG $0xc8ef3dc5 // vpxor ymm9, ymm8, ymm0 - LONG $0xd0ef55c5 // vpxor ymm10, ymm5, ymm0 - LONG $0x372d42c4; BYTE $0xc9 // vpcmpgtq ymm9, ymm10, ymm9 - LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd ymm5, ymm8, ymm5, ymm9 - LONG $0xc05755c5 // vxorpd ymm8, ymm5, ymm0 - LONG $0xc8ef45c5 // vpxor ymm9, ymm7, ymm0 - LONG $0x373d42c4; BYTE $0xc1 // vpcmpgtq ymm8, ymm8, ymm9 - LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd ymm5, ymm7, ymm5, ymm8 - LONG $0xf857d5c5 // vxorpd ymm7, ymm5, ymm0 - LONG $0xc0ef4dc5 // vpxor ymm8, ymm6, ymm0 - LONG $0x3745c2c4; BYTE $0xf8 // vpcmpgtq ymm7, ymm7, ymm8 - LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd ymm5, ymm6, ymm5, ymm7 - LONG $0x197de3c4; WORD $0x01ee // vextractf128 xmm6, ymm5, 1 - LONG $0xc05749c5 // vxorpd xmm8, xmm6, xmm0 - LONG $0xf857d1c5 // vxorpd xmm7, xmm5, xmm0 - LONG $0x3741c2c4; BYTE $0xf8 // vpcmpgtq xmm7, xmm7, xmm8 - LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 - LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps xmm6, xmm5, 78 - LONG $0xc05751c5 // vxorpd xmm8, xmm5, xmm0 - LONG $0xf857c9c5 // vxorpd xmm7, xmm6, xmm0 - LONG $0x3739e2c4; BYTE $0xff // vpcmpgtq xmm7, xmm8, xmm7 - LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd xmm5, xmm6, xmm5, xmm7 - LONG $0xf0eff5c5 // vpxor ymm6, ymm1, ymm0 - LONG $0xf8efddc5 // vpxor ymm7, ymm4, ymm0 - LONG $0x3745e2c4; BYTE $0xf6 // vpcmpgtq ymm6, ymm7, ymm6 - LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd ymm1, ymm4, ymm1, ymm6 - LONG $0xe057f5c5 // vxorpd ymm4, ymm1, ymm0 - LONG $0xf0efe5c5 // vpxor ymm6, ymm3, ymm0 - LONG $0x374de2c4; BYTE $0xe4 // vpcmpgtq ymm4, ymm6, ymm4 - LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd ymm1, ymm3, ymm1, ymm4 - LONG $0x7ef9c1c4; BYTE $0xea // vmovq r10, xmm5 - LONG $0xd857f5c5 // vxorpd ymm3, ymm1, ymm0 - LONG $0xe0efedc5 // vpxor ymm4, ymm2, ymm0 - LONG $0x375de2c4; BYTE $0xdb // vpcmpgtq ymm3, ymm4, ymm3 - LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd ymm1, ymm2, ymm1, ymm3 - LONG $0x197de3c4; WORD $0x01ca // vextractf128 xmm2, ymm1, 1 - LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 - LONG $0xe057e9c5 // vxorpd xmm4, xmm2, xmm0 - LONG $0x3759e2c4; BYTE $0xdb // vpcmpgtq xmm3, xmm4, xmm3 - LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd xmm1, xmm2, xmm1, xmm3 - LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps xmm2, xmm1, 78 - LONG $0xd857f1c5 // vxorpd xmm3, xmm1, xmm0 - LONG $0xc057e9c5 // vxorpd xmm0, xmm2, xmm0 - LONG $0x3779e2c4; BYTE $0xc3 // vpcmpgtq xmm0, xmm0, xmm3 - LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd xmm0, xmm2, xmm1, xmm0 - LONG $0x7ef9e1c4; BYTE $0xc0 // vmovq rax, xmm0 - WORD $0x894c; BYTE $0xd6 // mov rsi, r10 - WORD $0x394d; BYTE $0xc1 // cmp r9, r8 - JE LBB7_8 - -LBB7_7: - LONG $0xcf348b4a // mov rsi, qword [rdi + 8*r9] - WORD $0x3948; BYTE $0xf0 // cmp rax, rsi - LONG $0xc6430f48 // cmovae rax, rsi - WORD $0x3949; BYTE $0xf2 // cmp r10, rsi - LONG $0xf2470f49 // cmova rsi, r10 - LONG $0x01c18349 // add r9, 1 - WORD $0x8949; BYTE $0xf2 // mov r10, rsi - WORD $0x394d; BYTE $0xc8 // cmp r8, r9 - JNE LBB7_7 - -LBB7_8: - WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi - WORD $0x8948; BYTE $0x02 // mov qword [rdx], rax - VZEROUPPER - RET diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go deleted file mode 100644 index f9d3c44e3..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.go +++ /dev/null @@ -1,56 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import "unsafe" - -// This file contains convenience functions for utilizing Arm64 Neon intrinsics to quickly -// and efficiently get the min and max from an integral slice. - -//go:noescape -func _int32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int32MaxMinNEON(values []int32) (min, max int32) { - _int32_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint32MaxMinNEON(values []uint32) (min, max uint32) { - _uint32_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int64MaxMinNEON(values []int64) (min, max int64) { - _int64_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint64MaxMinNEON(values []uint64) (min, max uint64) { - _uint64_max_min_neon(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s deleted file mode 100644 index b679bb6e3..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_neon_arm64.s +++ /dev/null @@ -1,324 +0,0 @@ -//+build !noasm !appengine - -// ARROW-15336 -// (C2GOASM doesn't work correctly for Arm64) -// Partly GENERATED BY asm2plan9s. - - -// func _int32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) -TEXT ·_int32_max_min_neon(SB), $0-32 - - MOVD values+0(FP), R0 - MOVD length+8(FP), R1 - MOVD minout+16(FP), R2 - MOVD maxout+24(FP), R3 - - WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! - WORD $0x7100043f // cmp w1, #1 - WORD $0x910003fd // mov x29, sp - BLT LBB0_3 - - WORD $0x71000c3f // cmp w1, #3 - WORD $0x2a0103e8 // mov w8, w1 - BHI LBB0_4 - - WORD $0xaa1f03e9 // mov x9, xzr - WORD $0x52b0000b // mov w11, #-2147483648 - WORD $0x12b0000a // mov w10, #2147483647 - JMP LBB0_7 -LBB0_3: - WORD $0x12b0000a // mov w10, #2147483647 - WORD $0x52b0000b // mov w11, #-2147483648 - WORD $0xb900006b // str w11, [x3] - WORD $0xb900004a // str w10, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET -LBB0_4: - WORD $0x927e7509 // and x9, x8, #0xfffffffc - WORD $0x9100200a // add x10, x0, #8 - WORD $0x0f046402 // movi v2.2s, #128, lsl #24 - WORD $0x2f046400 // mvni v0.2s, #128, lsl #24 - WORD $0x2f046401 // mvni v1.2s, #128, lsl #24 - WORD $0xaa0903eb // mov x11, x9 - WORD $0x0f046403 // movi v3.2s, #128, lsl #24 -LBB0_5: - WORD $0x6d7f9544 // ldp d4, d5, [x10, #-8] - WORD $0xf100116b // subs x11, x11, #4 - WORD $0x9100414a // add x10, x10, #16 - WORD $0x0ea46c00 // smin v0.2s, v0.2s, v4.2s - WORD $0x0ea56c21 // smin v1.2s, v1.2s, v5.2s - WORD $0x0ea46442 // smax v2.2s, v2.2s, v4.2s - WORD $0x0ea56463 // smax v3.2s, v3.2s, v5.2s - BNE LBB0_5 - - WORD $0x0ea36442 // smax v2.2s, v2.2s, v3.2s - WORD $0x0ea16c00 // smin v0.2s, v0.2s, v1.2s - WORD $0x0e0c0441 // dup v1.2s, v2.s[1] - WORD $0x0e0c0403 // dup v3.2s, v0.s[1] - WORD $0x0ea16441 // smax v1.2s, v2.2s, v1.2s - WORD $0x0ea36c00 // smin v0.2s, v0.2s, v3.2s - WORD $0xeb08013f // cmp x9, x8 - WORD $0x1e26002b // fmov w11, s1 - WORD $0x1e26000a // fmov w10, s0 - BEQ LBB0_9 -LBB0_7: - WORD $0x8b09080c // add x12, x0, x9, lsl #2 - WORD $0xcb090108 // sub x8, x8, x9 -LBB0_8: - WORD $0xb8404589 // ldr w9, [x12], #4 - WORD $0x6b09015f // cmp w10, w9 - WORD $0x1a89b14a // csel w10, w10, w9, lt - WORD $0x6b09017f // cmp w11, w9 - WORD $0x1a89c16b // csel w11, w11, w9, gt - WORD $0xf1000508 // subs x8, x8, #1 - BNE LBB0_8 -LBB0_9: - WORD $0xb900006b // str w11, [x3] - WORD $0xb900004a // str w10, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET - -// func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) -TEXT ·_uint32_max_min_neon(SB), $0-32 - - MOVD values+0(FP), R0 - MOVD length+8(FP), R1 - MOVD minout+16(FP), R2 - MOVD maxout+24(FP), R3 - - WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! - WORD $0x7100043f // cmp w1, #1 - WORD $0x910003fd // mov x29, sp - BLT LBB1_3 - - WORD $0x71000c3f // cmp w1, #3 - WORD $0x2a0103e8 // mov w8, w1 - BHI LBB1_4 - - WORD $0xaa1f03e9 // mov x9, xzr - WORD $0x2a1f03ea // mov w10, wzr - WORD $0x1280000b // mov w11, #-1 - JMP LBB1_7 -LBB1_3: - WORD $0x2a1f03ea // mov w10, wzr - WORD $0x1280000b // mov w11, #-1 - WORD $0xb900006a // str w10, [x3] - WORD $0xb900004b // str w11, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET -LBB1_4: - WORD $0x927e7509 // and x9, x8, #0xfffffffc - WORD $0x6f00e401 // movi v1.2d, #0000000000000000 - WORD $0x6f07e7e0 // movi v0.2d, #0xffffffffffffffff - WORD $0x9100200a // add x10, x0, #8 - WORD $0x6f07e7e2 // movi v2.2d, #0xffffffffffffffff - WORD $0xaa0903eb // mov x11, x9 - WORD $0x6f00e403 // movi v3.2d, #0000000000000000 -LBB1_5: - WORD $0x6d7f9544 // ldp d4, d5, [x10, #-8] - WORD $0xf100116b // subs x11, x11, #4 - WORD $0x9100414a // add x10, x10, #16 - WORD $0x2ea46c00 // umin v0.2s, v0.2s, v4.2s - WORD $0x2ea56c42 // umin v2.2s, v2.2s, v5.2s - WORD $0x2ea46421 // umax v1.2s, v1.2s, v4.2s - WORD $0x2ea56463 // umax v3.2s, v3.2s, v5.2s - BNE LBB1_5 - - WORD $0x2ea36421 // umax v1.2s, v1.2s, v3.2s - WORD $0x2ea26c00 // umin v0.2s, v0.2s, v2.2s - WORD $0x0e0c0422 // dup v2.2s, v1.s[1] - WORD $0x0e0c0403 // dup v3.2s, v0.s[1] - WORD $0x2ea26421 // umax v1.2s, v1.2s, v2.2s - WORD $0x2ea36c00 // umin v0.2s, v0.2s, v3.2s - WORD $0xeb08013f // cmp x9, x8 - WORD $0x1e26002a // fmov w10, s1 - WORD $0x1e26000b // fmov w11, s0 - BEQ LBB1_9 -LBB1_7: - WORD $0x8b09080c // add x12, x0, x9, lsl #2 - WORD $0xcb090108 // sub x8, x8, x9 -LBB1_8: - WORD $0xb8404589 // ldr w9, [x12], #4 - WORD $0x6b09017f // cmp w11, w9 - WORD $0x1a89316b // csel w11, w11, w9, lo - WORD $0x6b09015f // cmp w10, w9 - WORD $0x1a89814a // csel w10, w10, w9, hi - WORD $0xf1000508 // subs x8, x8, #1 - BNE LBB1_8 -LBB1_9: - WORD $0xb900006a // str w10, [x3] - WORD $0xb900004b // str w11, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET - -// func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) -TEXT ·_int64_max_min_neon(SB), $0-32 - - MOVD values+0(FP), R0 - MOVD length+8(FP), R1 - MOVD minout+16(FP), R2 - MOVD maxout+24(FP), R3 - - WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! - WORD $0x7100043f // cmp w1, #1 - WORD $0x910003fd // mov x29, sp - BLT LBB2_3 - - WORD $0x2a0103e8 // mov w8, w1 - WORD $0xd2f0000b // mov x11, #-9223372036854775808 - WORD $0x71000c3f // cmp w1, #3 - WORD $0x92f0000a // mov x10, #9223372036854775807 - BHI LBB2_4 - - WORD $0xaa1f03e9 // mov x9, xzr - JMP LBB2_7 -LBB2_3: - WORD $0x92f0000a // mov x10, #9223372036854775807 - WORD $0xd2f0000b // mov x11, #-9223372036854775808 - WORD $0xf900006b // str x11, [x3] - WORD $0xf900004a // str x10, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET -LBB2_4: - WORD $0x927e7509 // and x9, x8, #0xfffffffc - WORD $0x4e080d61 // dup v1.2d, x11 - WORD $0x4e080d40 // dup v0.2d, x10 - WORD $0x9100400a // add x10, x0, #16 - WORD $0xaa0903eb // mov x11, x9 - WORD $0x4ea01c02 // mov v2.16b, v0.16b - WORD $0x4ea11c23 // mov v3.16b, v1.16b -LBB2_5: - WORD $0xad7f9544 // ldp q4, q5, [x10, #-16] - WORD $0x4ea31c66 // mov v6.16b, v3.16b - WORD $0x4ea11c27 // mov v7.16b, v1.16b - WORD $0x4ea21c43 // mov v3.16b, v2.16b - WORD $0x4ea01c01 // mov v1.16b, v0.16b - WORD $0x4ee03480 // cmgt v0.2d, v4.2d, v0.2d - WORD $0x4ee234a2 // cmgt v2.2d, v5.2d, v2.2d - WORD $0x6e641c20 // bsl v0.16b, v1.16b, v4.16b - WORD $0x4ee434e1 // cmgt v1.2d, v7.2d, v4.2d - WORD $0x6e651c62 // bsl v2.16b, v3.16b, v5.16b - WORD $0x4ee534c3 // cmgt v3.2d, v6.2d, v5.2d - WORD $0xf100116b // subs x11, x11, #4 - WORD $0x6e641ce1 // bsl v1.16b, v7.16b, v4.16b - WORD $0x6e651cc3 // bsl v3.16b, v6.16b, v5.16b - WORD $0x9100814a // add x10, x10, #32 - BNE LBB2_5 - - WORD $0x4ee33424 // cmgt v4.2d, v1.2d, v3.2d - WORD $0x4ee03445 // cmgt v5.2d, v2.2d, v0.2d - WORD $0x6e631c24 // bsl v4.16b, v1.16b, v3.16b - WORD $0x6e621c05 // bsl v5.16b, v0.16b, v2.16b - WORD $0x4e180480 // dup v0.2d, v4.d[1] - WORD $0x4e1804a1 // dup v1.2d, v5.d[1] - WORD $0x4ee03482 // cmgt v2.2d, v4.2d, v0.2d - WORD $0x4ee53423 // cmgt v3.2d, v1.2d, v5.2d - WORD $0x6e601c82 // bsl v2.16b, v4.16b, v0.16b - WORD $0x6e611ca3 // bsl v3.16b, v5.16b, v1.16b - WORD $0xeb08013f // cmp x9, x8 - WORD $0x9e66004b // fmov x11, d2 - WORD $0x9e66006a // fmov x10, d3 - BEQ LBB2_9 -LBB2_7: - WORD $0x8b090c0c // add x12, x0, x9, lsl #3 - WORD $0xcb090108 // sub x8, x8, x9 -LBB2_8: - WORD $0xf8408589 // ldr x9, [x12], #8 - WORD $0xeb09015f // cmp x10, x9 - WORD $0x9a89b14a // csel x10, x10, x9, lt - WORD $0xeb09017f // cmp x11, x9 - WORD $0x9a89c16b // csel x11, x11, x9, gt - WORD $0xf1000508 // subs x8, x8, #1 - BNE LBB2_8 -LBB2_9: - WORD $0xf900006b // str x11, [x3] - WORD $0xf900004a // str x10, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET - - -// func _uint64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) -TEXT ·_uint64_max_min_neon(SB), $0-32 - - MOVD values+0(FP), R0 - MOVD length+8(FP), R1 - MOVD minout+16(FP), R2 - MOVD maxout+24(FP), R3 - - WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! - WORD $0x7100043f // cmp w1, #1 - WORD $0x910003fd // mov x29, sp - BLT LBB3_3 - - WORD $0x71000c3f // cmp w1, #3 - WORD $0x2a0103e8 // mov w8, w1 - BHI LBB3_4 - - WORD $0xaa1f03e9 // mov x9, xzr - WORD $0xaa1f03ea // mov x10, xzr - WORD $0x9280000b // mov x11, #-1 - JMP LBB3_7 -LBB3_3: - WORD $0xaa1f03ea // mov x10, xzr - WORD $0x9280000b // mov x11, #-1 - WORD $0xf900006a // str x10, [x3] - WORD $0xf900004b // str x11, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET -LBB3_4: - WORD $0x927e7509 // and x9, x8, #0xfffffffc - WORD $0x9100400a // add x10, x0, #16 - WORD $0x6f00e401 // movi v1.2d, #0000000000000000 - WORD $0x6f07e7e0 // movi v0.2d, #0xffffffffffffffff - WORD $0x6f07e7e2 // movi v2.2d, #0xffffffffffffffff - WORD $0xaa0903eb // mov x11, x9 - WORD $0x6f00e403 // movi v3.2d, #0000000000000000 -LBB3_5: - WORD $0xad7f9544 // ldp q4, q5, [x10, #-16] - WORD $0x4ea31c66 // mov v6.16b, v3.16b - WORD $0x4ea11c27 // mov v7.16b, v1.16b - WORD $0x4ea21c43 // mov v3.16b, v2.16b - WORD $0x4ea01c01 // mov v1.16b, v0.16b - WORD $0x6ee03480 // cmhi v0.2d, v4.2d, v0.2d - WORD $0x6ee234a2 // cmhi v2.2d, v5.2d, v2.2d - WORD $0x6e641c20 // bsl v0.16b, v1.16b, v4.16b - WORD $0x6ee434e1 // cmhi v1.2d, v7.2d, v4.2d - WORD $0x6e651c62 // bsl v2.16b, v3.16b, v5.16b - WORD $0x6ee534c3 // cmhi v3.2d, v6.2d, v5.2d - WORD $0xf100116b // subs x11, x11, #4 - WORD $0x6e641ce1 // bsl v1.16b, v7.16b, v4.16b - WORD $0x6e651cc3 // bsl v3.16b, v6.16b, v5.16b - WORD $0x9100814a // add x10, x10, #32 - BNE LBB3_5 - - WORD $0x6ee33424 // cmhi v4.2d, v1.2d, v3.2d - WORD $0x6ee03445 // cmhi v5.2d, v2.2d, v0.2d - WORD $0x6e631c24 // bsl v4.16b, v1.16b, v3.16b - WORD $0x6e621c05 // bsl v5.16b, v0.16b, v2.16b - WORD $0x4e180480 // dup v0.2d, v4.d[1] - WORD $0x4e1804a1 // dup v1.2d, v5.d[1] - WORD $0x6ee03482 // cmhi v2.2d, v4.2d, v0.2d - WORD $0x6ee53423 // cmhi v3.2d, v1.2d, v5.2d - WORD $0x6e601c82 // bsl v2.16b, v4.16b, v0.16b - WORD $0x6e611ca3 // bsl v3.16b, v5.16b, v1.16b - WORD $0xeb08013f // cmp x9, x8 - WORD $0x9e66004a // fmov x10, d2 - WORD $0x9e66006b // fmov x11, d3 - BEQ LBB3_9 -LBB3_7: - WORD $0x8b090c0c // add x12, x0, x9, lsl #3 - WORD $0xcb090108 // sub x8, x8, x9 -LBB3_8: - WORD $0xf8408589 // ldr x9, [x12], #8 - WORD $0xeb09017f // cmp x11, x9 - WORD $0x9a89316b // csel x11, x11, x9, lo - WORD $0xeb09015f // cmp x10, x9 - WORD $0x9a89814a // csel x10, x10, x9, hi - WORD $0xf1000508 // subs x8, x8, #1 - BNE LBB3_8 -LBB3_9: - WORD $0xf900006a // str x10, [x3] - WORD $0xf900004b // str x11, [x2] - WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 - RET - diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go deleted file mode 100644 index 19c24b590..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_noasm.go +++ /dev/null @@ -1,31 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build noasm - -package utils - -// if building with the 'noasm' tag, then point to the pure go implementations -func init() { - minmaxFuncs.i8 = int8MinMax - minmaxFuncs.ui8 = uint8MinMax - minmaxFuncs.i16 = int16MinMax - minmaxFuncs.ui16 = uint16MinMax - minmaxFuncs.i32 = int32MinMax - minmaxFuncs.ui32 = uint32MinMax - minmaxFuncs.i64 = int64MinMax - minmaxFuncs.ui64 = uint64MinMax -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go deleted file mode 100644 index ffd2db006..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_ppc64le.go +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -func init() { - minmaxFuncs.i8 = int8MinMax - minmaxFuncs.ui8 = uint8MinMax - minmaxFuncs.i16 = int16MinMax - minmaxFuncs.ui16 = uint16MinMax - minmaxFuncs.i32 = int32MinMax - minmaxFuncs.ui32 = uint32MinMax - minmaxFuncs.i64 = int64MinMax - minmaxFuncs.ui64 = uint64MinMax -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go deleted file mode 100644 index ffd2db006..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_s390x.go +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -func init() { - minmaxFuncs.i8 = int8MinMax - minmaxFuncs.ui8 = uint8MinMax - minmaxFuncs.i16 = int16MinMax - minmaxFuncs.ui16 = uint16MinMax - minmaxFuncs.i32 = int32MinMax - minmaxFuncs.ui32 = uint32MinMax - minmaxFuncs.i64 = int64MinMax - minmaxFuncs.ui64 = uint64MinMax -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go deleted file mode 100644 index 1e12a8d17..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.go +++ /dev/null @@ -1,88 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import "unsafe" - -// This file contains convenience functions for utilizing SSE4 intrinsics to quickly -// and efficiently get the min and max from an integral slice. - -//go:noescape -func _int8_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int8MaxMinSSE4(values []int8) (min, max int8) { - _int8_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint8_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint8MaxMinSSE4(values []uint8) (min, max uint8) { - _uint8_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _int16_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int16MaxMinSSE4(values []int16) (min, max int16) { - _int16_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint16_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint16MaxMinSSE4(values []uint16) (min, max uint16) { - _uint16_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _int32_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int32MaxMinSSE4(values []int32) (min, max int32) { - _int32_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint32_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint32MaxMinSSE4(values []uint32) (min, max uint32) { - _uint32_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _int64_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func int64MaxMinSSE4(values []int64) (min, max int64) { - _int64_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} - -//go:noescape -func _uint64_max_min_sse4(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) - -func uint64MaxMinSSE4(values []uint64) (min, max uint64) { - _uint64_max_min_sse4(unsafe.Pointer(&values[0]), len(values), unsafe.Pointer(&min), unsafe.Pointer(&max)) - return -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s deleted file mode 100644 index 8f1eccf60..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/min_max_sse4_amd64.s +++ /dev/null @@ -1,1044 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080 -DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080 -DATA LCDATA1<>+0x010(SB)/8, $0x7f7f7f7f7f7f7f7f -DATA LCDATA1<>+0x018(SB)/8, $0x7f7f7f7f7f7f7f7f -GLOBL LCDATA1<>(SB), 8, $32 - -TEXT ·_int8_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA1<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB0_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB0_4 - WORD $0xb041; BYTE $0x80 // mov r8b, -128 - WORD $0xb640; BYTE $0x7f // mov sil, 127 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - JMP LBB0_11 - -LBB0_1: - WORD $0xb640; BYTE $0x7f // mov sil, 127 - WORD $0xb041; BYTE $0x80 // mov r8b, -128 - JMP LBB0_12 - -LBB0_4: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xe0e38341 // and r11d, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x05e8c149 // shr r8, 5 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB0_5 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */ - LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06f0f66 // movdqa xmm2, xmm0 - LONG $0xd96f0f66 // movdqa xmm3, xmm1 - -LBB0_7: - LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] - LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] - LONG $0x746f0ff3; WORD $0x2007 // movdqu xmm6, oword [rdi + rax + 32] - LONG $0x7c6f0ff3; WORD $0x3007 // movdqu xmm7, oword [rdi + rax + 48] - LONG $0x38380f66; BYTE $0xc4 // pminsb xmm0, xmm4 - LONG $0x38380f66; BYTE $0xd5 // pminsb xmm2, xmm5 - LONG $0x3c380f66; BYTE $0xcc // pmaxsb xmm1, xmm4 - LONG $0x3c380f66; BYTE $0xdd // pmaxsb xmm3, xmm5 - LONG $0x38380f66; BYTE $0xc6 // pminsb xmm0, xmm6 - LONG $0x38380f66; BYTE $0xd7 // pminsb xmm2, xmm7 - LONG $0x3c380f66; BYTE $0xce // pmaxsb xmm1, xmm6 - LONG $0x3c380f66; BYTE $0xdf // pmaxsb xmm3, xmm7 - LONG $0x40c08348 // add rax, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB0_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB0_10 - -LBB0_9: - LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] - LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] - LONG $0x3c380f66; BYTE $0xdd // pmaxsb xmm3, xmm5 - LONG $0x3c380f66; BYTE $0xcc // pmaxsb xmm1, xmm4 - LONG $0x38380f66; BYTE $0xd5 // pminsb xmm2, xmm5 - LONG $0x38380f66; BYTE $0xc4 // pminsb xmm0, xmm4 - -LBB0_10: - LONG $0x38380f66; BYTE $0xc2 // pminsb xmm0, xmm2 - LONG $0x3c380f66; BYTE $0xcb // pmaxsb xmm1, xmm3 - LONG $0x4def0f66; BYTE $0x10 // pxor xmm1, oword 16[rbp] /* [rip + .LCPI0_1] */ - LONG $0xd16f0f66 // movdqa xmm2, xmm1 - LONG $0xd2710f66; BYTE $0x08 // psrlw xmm2, 8 - LONG $0xd1da0f66 // pminub xmm2, xmm1 - LONG $0x41380f66; BYTE $0xca // phminposuw xmm1, xmm2 - LONG $0x7e0f4166; BYTE $0xc8 // movd r8d, xmm1 - LONG $0x7ff08041 // xor r8b, 127 - LONG $0x45ef0f66; BYTE $0x00 // pxor xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */ - LONG $0xc86f0f66 // movdqa xmm1, xmm0 - LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8 - LONG $0xc8da0f66 // pminub xmm1, xmm0 - LONG $0x41380f66; BYTE $0xc1 // phminposuw xmm0, xmm1 - LONG $0xc67e0f66 // movd esi, xmm0 - LONG $0x80f68040 // xor sil, -128 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB0_12 - -LBB0_11: - LONG $0x04b60f42; BYTE $0x1f // movzx eax, byte [rdi + r11] - WORD $0x3840; BYTE $0xc6 // cmp sil, al - LONG $0xf6b60f40 // movzx esi, sil - WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax - WORD $0x3841; BYTE $0xc0 // cmp r8b, al - LONG $0xc0b60f45 // movzx r8d, r8b - LONG $0xc04c0f44 // cmovl r8d, eax - LONG $0x01c38349 // add r11, 1 - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB0_11 - -LBB0_12: - WORD $0x8844; BYTE $0x01 // mov byte [rcx], r8b - WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil - RET - -LBB0_5: - LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */ - LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06f0f66 // movdqa xmm2, xmm0 - LONG $0xd96f0f66 // movdqa xmm3, xmm1 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB0_9 - JMP LBB0_10 - -TEXT ·_uint8_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - - WORD $0xf685 // test esi, esi - JLE LBB1_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x1f // cmp esi, 31 - JA LBB1_4 - WORD $0xb640; BYTE $0xff // mov sil, -1 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0xc031 // xor eax, eax - JMP LBB1_11 - -LBB1_1: - WORD $0xb640; BYTE $0xff // mov sil, -1 - WORD $0xc031 // xor eax, eax - JMP LBB1_12 - -LBB1_4: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xe0e38341 // and r11d, -32 - LONG $0xe0438d49 // lea rax, [r11 - 32] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x05e8c149 // shr r8, 5 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB1_5 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0xc9ef0f66 // pxor xmm1, xmm1 - LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 - WORD $0xc031 // xor eax, eax - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xdbef0f66 // pxor xmm3, xmm3 - -LBB1_7: - LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] - LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] - LONG $0x746f0ff3; WORD $0x2007 // movdqu xmm6, oword [rdi + rax + 32] - LONG $0x7c6f0ff3; WORD $0x3007 // movdqu xmm7, oword [rdi + rax + 48] - LONG $0xc4da0f66 // pminub xmm0, xmm4 - LONG $0xd5da0f66 // pminub xmm2, xmm5 - LONG $0xccde0f66 // pmaxub xmm1, xmm4 - LONG $0xddde0f66 // pmaxub xmm3, xmm5 - LONG $0xc6da0f66 // pminub xmm0, xmm6 - LONG $0xd7da0f66 // pminub xmm2, xmm7 - LONG $0xcede0f66 // pmaxub xmm1, xmm6 - LONG $0xdfde0f66 // pmaxub xmm3, xmm7 - LONG $0x40c08348 // add rax, 64 - LONG $0x02c28349 // add r10, 2 - JNE LBB1_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB1_10 - -LBB1_9: - LONG $0x246f0ff3; BYTE $0x07 // movdqu xmm4, oword [rdi + rax] - LONG $0x6c6f0ff3; WORD $0x1007 // movdqu xmm5, oword [rdi + rax + 16] - LONG $0xddde0f66 // pmaxub xmm3, xmm5 - LONG $0xccde0f66 // pmaxub xmm1, xmm4 - LONG $0xd5da0f66 // pminub xmm2, xmm5 - LONG $0xc4da0f66 // pminub xmm0, xmm4 - -LBB1_10: - LONG $0xc2da0f66 // pminub xmm0, xmm2 - LONG $0xcbde0f66 // pmaxub xmm1, xmm3 - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xd1ef0f66 // pxor xmm2, xmm1 - LONG $0xca6f0f66 // movdqa xmm1, xmm2 - LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8 - LONG $0xcada0f66 // pminub xmm1, xmm2 - LONG $0x41380f66; BYTE $0xc9 // phminposuw xmm1, xmm1 - LONG $0xc87e0f66 // movd eax, xmm1 - WORD $0xd0f6 // not al - LONG $0xc86f0f66 // movdqa xmm1, xmm0 - LONG $0xd1710f66; BYTE $0x08 // psrlw xmm1, 8 - LONG $0xc8da0f66 // pminub xmm1, xmm0 - LONG $0x41380f66; BYTE $0xc1 // phminposuw xmm0, xmm1 - LONG $0xc67e0f66 // movd esi, xmm0 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB1_12 - -LBB1_11: - LONG $0x04b60f46; BYTE $0x1f // movzx r8d, byte [rdi + r11] - WORD $0x3844; BYTE $0xc6 // cmp sil, r8b - LONG $0xf6b60f40 // movzx esi, sil - LONG $0xf0430f41 // cmovae esi, r8d - WORD $0x3844; BYTE $0xc0 // cmp al, r8b - WORD $0xb60f; BYTE $0xc0 // movzx eax, al - LONG $0xc0460f41 // cmovbe eax, r8d - LONG $0x01c38349 // add r11, 1 - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB1_11 - -LBB1_12: - WORD $0x0188 // mov byte [rcx], al - WORD $0x8840; BYTE $0x32 // mov byte [rdx], sil - RET - -LBB1_5: - LONG $0xc9ef0f66 // pxor xmm1, xmm1 - LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 - WORD $0xc031 // xor eax, eax - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xdbef0f66 // pxor xmm3, xmm3 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB1_9 - JMP LBB1_10 - -DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000 -DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000 -DATA LCDATA2<>+0x010(SB)/8, $0x7fff7fff7fff7fff -DATA LCDATA2<>+0x018(SB)/8, $0x7fff7fff7fff7fff -GLOBL LCDATA2<>(SB), 8, $32 - -TEXT ·_int16_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA2<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB2_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x0f // cmp esi, 15 - JA LBB2_4 - LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 - LONG $0x7fffbe66 // mov si, 32767 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - JMP LBB2_11 - -LBB2_1: - LONG $0x7fffbe66 // mov si, 32767 - LONG $0x00b84166; BYTE $0x80 // mov r8w, -32768 - JMP LBB2_12 - -LBB2_4: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xf0e38341 // and r11d, -16 - LONG $0xf0438d49 // lea rax, [r11 - 16] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x04e8c149 // shr r8, 4 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB2_5 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */ - LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06f0f66 // movdqa xmm2, xmm0 - LONG $0xd96f0f66 // movdqa xmm3, xmm1 - -LBB2_7: - LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] - LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] - LONG $0x746f0ff3; WORD $0x2047 // movdqu xmm6, oword [rdi + 2*rax + 32] - LONG $0x7c6f0ff3; WORD $0x3047 // movdqu xmm7, oword [rdi + 2*rax + 48] - LONG $0xc4ea0f66 // pminsw xmm0, xmm4 - LONG $0xd5ea0f66 // pminsw xmm2, xmm5 - LONG $0xccee0f66 // pmaxsw xmm1, xmm4 - LONG $0xddee0f66 // pmaxsw xmm3, xmm5 - LONG $0xc6ea0f66 // pminsw xmm0, xmm6 - LONG $0xd7ea0f66 // pminsw xmm2, xmm7 - LONG $0xceee0f66 // pmaxsw xmm1, xmm6 - LONG $0xdfee0f66 // pmaxsw xmm3, xmm7 - LONG $0x20c08348 // add rax, 32 - LONG $0x02c28349 // add r10, 2 - JNE LBB2_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB2_10 - -LBB2_9: - LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] - LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] - LONG $0xddee0f66 // pmaxsw xmm3, xmm5 - LONG $0xccee0f66 // pmaxsw xmm1, xmm4 - LONG $0xd5ea0f66 // pminsw xmm2, xmm5 - LONG $0xc4ea0f66 // pminsw xmm0, xmm4 - -LBB2_10: - LONG $0xc2ea0f66 // pminsw xmm0, xmm2 - LONG $0xcbee0f66 // pmaxsw xmm1, xmm3 - LONG $0x4def0f66; BYTE $0x10 // pxor xmm1, oword 16[rbp] /* [rip + .LCPI2_1] */ - LONG $0x41380f66; BYTE $0xc9 // phminposuw xmm1, xmm1 - LONG $0x7e0f4166; BYTE $0xc8 // movd r8d, xmm1 - LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor r8d, 32767 - LONG $0x45ef0f66; BYTE $0x00 // pxor xmm0, oword 0[rbp] /* [rip + .LCPI2_0] */ - LONG $0x41380f66; BYTE $0xc0 // phminposuw xmm0, xmm0 - LONG $0xc67e0f66 // movd esi, xmm0 - LONG $0x8000f681; WORD $0x0000 // xor esi, 32768 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB2_12 - -LBB2_11: - LONG $0x04b70f42; BYTE $0x5f // movzx eax, word [rdi + 2*r11] - WORD $0x3966; BYTE $0xc6 // cmp si, ax - WORD $0x4f0f; BYTE $0xf0 // cmovg esi, eax - LONG $0xc0394166 // cmp r8w, ax - LONG $0xc04c0f44 // cmovl r8d, eax - LONG $0x01c38349 // add r11, 1 - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB2_11 - -LBB2_12: - LONG $0x01894466 // mov word [rcx], r8w - WORD $0x8966; BYTE $0x32 // mov word [rdx], si - RET - -LBB2_5: - LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI2_0] */ - LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI2_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06f0f66 // movdqa xmm2, xmm0 - LONG $0xd96f0f66 // movdqa xmm3, xmm1 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB2_9 - JMP LBB2_10 - -TEXT ·_uint16_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - - WORD $0xf685 // test esi, esi - JLE LBB3_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x0f // cmp esi, 15 - JA LBB3_4 - LONG $0xffb84166; BYTE $0xff // mov r8w, -1 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0xf631 // xor esi, esi - JMP LBB3_11 - -LBB3_1: - LONG $0xffb84166; BYTE $0xff // mov r8w, -1 - WORD $0xf631 // xor esi, esi - JMP LBB3_12 - -LBB3_4: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xf0e38341 // and r11d, -16 - LONG $0xf0438d49 // lea rax, [r11 - 16] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x04e8c149 // shr r8, 4 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB3_5 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0xc9ef0f66 // pxor xmm1, xmm1 - LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 - WORD $0xc031 // xor eax, eax - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xdbef0f66 // pxor xmm3, xmm3 - -LBB3_7: - LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] - LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] - LONG $0x746f0ff3; WORD $0x2047 // movdqu xmm6, oword [rdi + 2*rax + 32] - LONG $0x7c6f0ff3; WORD $0x3047 // movdqu xmm7, oword [rdi + 2*rax + 48] - LONG $0x3a380f66; BYTE $0xc4 // pminuw xmm0, xmm4 - LONG $0x3a380f66; BYTE $0xd5 // pminuw xmm2, xmm5 - LONG $0x3e380f66; BYTE $0xcc // pmaxuw xmm1, xmm4 - LONG $0x3e380f66; BYTE $0xdd // pmaxuw xmm3, xmm5 - LONG $0x3a380f66; BYTE $0xc6 // pminuw xmm0, xmm6 - LONG $0x3a380f66; BYTE $0xd7 // pminuw xmm2, xmm7 - LONG $0x3e380f66; BYTE $0xce // pmaxuw xmm1, xmm6 - LONG $0x3e380f66; BYTE $0xdf // pmaxuw xmm3, xmm7 - LONG $0x20c08348 // add rax, 32 - LONG $0x02c28349 // add r10, 2 - JNE LBB3_7 - LONG $0x01c0f641 // test r8b, 1 - JE LBB3_10 - -LBB3_9: - LONG $0x246f0ff3; BYTE $0x47 // movdqu xmm4, oword [rdi + 2*rax] - LONG $0x6c6f0ff3; WORD $0x1047 // movdqu xmm5, oword [rdi + 2*rax + 16] - LONG $0x3e380f66; BYTE $0xdd // pmaxuw xmm3, xmm5 - LONG $0x3e380f66; BYTE $0xcc // pmaxuw xmm1, xmm4 - LONG $0x3a380f66; BYTE $0xd5 // pminuw xmm2, xmm5 - LONG $0x3a380f66; BYTE $0xc4 // pminuw xmm0, xmm4 - -LBB3_10: - LONG $0x3a380f66; BYTE $0xc2 // pminuw xmm0, xmm2 - LONG $0x3e380f66; BYTE $0xcb // pmaxuw xmm1, xmm3 - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xd1ef0f66 // pxor xmm2, xmm1 - LONG $0x41380f66; BYTE $0xca // phminposuw xmm1, xmm2 - LONG $0xce7e0f66 // movd esi, xmm1 - WORD $0xd6f7 // not esi - LONG $0x41380f66; BYTE $0xc0 // phminposuw xmm0, xmm0 - LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB3_12 - -LBB3_11: - LONG $0x04b70f42; BYTE $0x5f // movzx eax, word [rdi + 2*r11] - LONG $0xc0394166 // cmp r8w, ax - LONG $0xc0430f44 // cmovae r8d, eax - WORD $0x3966; BYTE $0xc6 // cmp si, ax - WORD $0x460f; BYTE $0xf0 // cmovbe esi, eax - LONG $0x01c38349 // add r11, 1 - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB3_11 - -LBB3_12: - WORD $0x8966; BYTE $0x31 // mov word [rcx], si - LONG $0x02894466 // mov word [rdx], r8w - RET - -LBB3_5: - LONG $0xc9ef0f66 // pxor xmm1, xmm1 - LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 - WORD $0xc031 // xor eax, eax - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xdbef0f66 // pxor xmm3, xmm3 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB3_9 - JMP LBB3_10 - -DATA LCDATA3<>+0x000(SB)/8, $0x8000000080000000 -DATA LCDATA3<>+0x008(SB)/8, $0x8000000080000000 -DATA LCDATA3<>+0x010(SB)/8, $0x7fffffff7fffffff -DATA LCDATA3<>+0x018(SB)/8, $0x7fffffff7fffffff -GLOBL LCDATA3<>(SB), 8, $32 - -TEXT ·_int32_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA3<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB4_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x07 // cmp esi, 7 - JA LBB4_6 - LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 - LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - JMP LBB4_4 - -LBB4_1: - LONG $0xffffb841; WORD $0x7fff // mov r8d, 2147483647 - LONG $0x000000b8; BYTE $0x80 // mov eax, -2147483648 - JMP LBB4_13 - -LBB4_6: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xf8e38341 // and r11d, -8 - LONG $0xf8438d49 // lea rax, [r11 - 8] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x03e8c149 // shr r8, 3 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB4_7 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */ - LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06f0f66 // movdqa xmm2, xmm0 - LONG $0xd96f0f66 // movdqa xmm3, xmm1 - -LBB4_9: - LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] - LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] - LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32] - LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48] - LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4 - LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5 - LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4 - LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5 - LONG $0x39380f66; BYTE $0xc6 // pminsd xmm0, xmm6 - LONG $0x39380f66; BYTE $0xd7 // pminsd xmm2, xmm7 - LONG $0x3d380f66; BYTE $0xce // pmaxsd xmm1, xmm6 - LONG $0x3d380f66; BYTE $0xdf // pmaxsd xmm3, xmm7 - LONG $0x10c08348 // add rax, 16 - LONG $0x02c28349 // add r10, 2 - JNE LBB4_9 - LONG $0x01c0f641 // test r8b, 1 - JE LBB4_12 - -LBB4_11: - LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] - LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] - LONG $0x3d380f66; BYTE $0xdd // pmaxsd xmm3, xmm5 - LONG $0x3d380f66; BYTE $0xcc // pmaxsd xmm1, xmm4 - LONG $0x39380f66; BYTE $0xd5 // pminsd xmm2, xmm5 - LONG $0x39380f66; BYTE $0xc4 // pminsd xmm0, xmm4 - -LBB4_12: - LONG $0x39380f66; BYTE $0xc2 // pminsd xmm0, xmm2 - LONG $0x3d380f66; BYTE $0xcb // pmaxsd xmm1, xmm3 - LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78 - LONG $0x3d380f66; BYTE $0xd1 // pmaxsd xmm2, xmm1 - LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229 - LONG $0x3d380f66; BYTE $0xca // pmaxsd xmm1, xmm2 - LONG $0xc87e0f66 // movd eax, xmm1 - LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78 - LONG $0x39380f66; BYTE $0xc8 // pminsd xmm1, xmm0 - LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229 - LONG $0x39380f66; BYTE $0xc1 // pminsd xmm0, xmm1 - LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB4_13 - -LBB4_4: - WORD $0xc689 // mov esi, eax - -LBB4_5: - LONG $0x9f048b42 // mov eax, dword [rdi + 4*r11] - WORD $0x3941; BYTE $0xc0 // cmp r8d, eax - LONG $0xc04f0f44 // cmovg r8d, eax - WORD $0xc639 // cmp esi, eax - WORD $0x4d0f; BYTE $0xc6 // cmovge eax, esi - LONG $0x01c38349 // add r11, 1 - WORD $0xc689 // mov esi, eax - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB4_5 - -LBB4_13: - WORD $0x0189 // mov dword [rcx], eax - WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d - RET - -LBB4_7: - LONG $0x4d6f0f66; BYTE $0x00 // movdqa xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */ - LONG $0x456f0f66; BYTE $0x10 // movdqa xmm0, oword 16[rbp] /* [rip + .LCPI4_1] */ - WORD $0xc031 // xor eax, eax - LONG $0xd06f0f66 // movdqa xmm2, xmm0 - LONG $0xd96f0f66 // movdqa xmm3, xmm1 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB4_11 - JMP LBB4_12 - -TEXT ·_uint32_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - - WORD $0xf685 // test esi, esi - JLE LBB5_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x07 // cmp esi, 7 - JA LBB5_6 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - LONG $0xffffb841; WORD $0xffff // mov r8d, -1 - WORD $0xf631 // xor esi, esi - JMP LBB5_4 - -LBB5_1: - LONG $0xffffb841; WORD $0xffff // mov r8d, -1 - WORD $0xf631 // xor esi, esi - JMP LBB5_13 - -LBB5_6: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xf8e38341 // and r11d, -8 - LONG $0xf8438d49 // lea rax, [r11 - 8] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x03e8c149 // shr r8, 3 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB5_7 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0xc9ef0f66 // pxor xmm1, xmm1 - LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 - WORD $0xc031 // xor eax, eax - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xdbef0f66 // pxor xmm3, xmm3 - -LBB5_9: - LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] - LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] - LONG $0x746f0ff3; WORD $0x2087 // movdqu xmm6, oword [rdi + 4*rax + 32] - LONG $0x7c6f0ff3; WORD $0x3087 // movdqu xmm7, oword [rdi + 4*rax + 48] - LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4 - LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5 - LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4 - LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5 - LONG $0x3b380f66; BYTE $0xc6 // pminud xmm0, xmm6 - LONG $0x3b380f66; BYTE $0xd7 // pminud xmm2, xmm7 - LONG $0x3f380f66; BYTE $0xce // pmaxud xmm1, xmm6 - LONG $0x3f380f66; BYTE $0xdf // pmaxud xmm3, xmm7 - LONG $0x10c08348 // add rax, 16 - LONG $0x02c28349 // add r10, 2 - JNE LBB5_9 - LONG $0x01c0f641 // test r8b, 1 - JE LBB5_12 - -LBB5_11: - LONG $0x246f0ff3; BYTE $0x87 // movdqu xmm4, oword [rdi + 4*rax] - LONG $0x6c6f0ff3; WORD $0x1087 // movdqu xmm5, oword [rdi + 4*rax + 16] - LONG $0x3f380f66; BYTE $0xdd // pmaxud xmm3, xmm5 - LONG $0x3f380f66; BYTE $0xcc // pmaxud xmm1, xmm4 - LONG $0x3b380f66; BYTE $0xd5 // pminud xmm2, xmm5 - LONG $0x3b380f66; BYTE $0xc4 // pminud xmm0, xmm4 - -LBB5_12: - LONG $0x3b380f66; BYTE $0xc2 // pminud xmm0, xmm2 - LONG $0x3f380f66; BYTE $0xcb // pmaxud xmm1, xmm3 - LONG $0xd1700f66; BYTE $0x4e // pshufd xmm2, xmm1, 78 - LONG $0x3f380f66; BYTE $0xd1 // pmaxud xmm2, xmm1 - LONG $0xca700f66; BYTE $0xe5 // pshufd xmm1, xmm2, 229 - LONG $0x3f380f66; BYTE $0xca // pmaxud xmm1, xmm2 - LONG $0xce7e0f66 // movd esi, xmm1 - LONG $0xc8700f66; BYTE $0x4e // pshufd xmm1, xmm0, 78 - LONG $0x3b380f66; BYTE $0xc8 // pminud xmm1, xmm0 - LONG $0xc1700f66; BYTE $0xe5 // pshufd xmm0, xmm1, 229 - LONG $0x3b380f66; BYTE $0xc1 // pminud xmm0, xmm1 - LONG $0x7e0f4166; BYTE $0xc0 // movd r8d, xmm0 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB5_13 - -LBB5_4: - WORD $0xf089 // mov eax, esi - -LBB5_5: - LONG $0x9f348b42 // mov esi, dword [rdi + 4*r11] - WORD $0x3941; BYTE $0xf0 // cmp r8d, esi - LONG $0xc6430f44 // cmovae r8d, esi - WORD $0xf039 // cmp eax, esi - WORD $0x470f; BYTE $0xf0 // cmova esi, eax - LONG $0x01c38349 // add r11, 1 - WORD $0xf089 // mov eax, esi - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB5_5 - -LBB5_13: - WORD $0x3189 // mov dword [rcx], esi - WORD $0x8944; BYTE $0x02 // mov dword [rdx], r8d - RET - -LBB5_7: - LONG $0xc9ef0f66 // pxor xmm1, xmm1 - LONG $0xc0760f66 // pcmpeqd xmm0, xmm0 - WORD $0xc031 // xor eax, eax - LONG $0xd2760f66 // pcmpeqd xmm2, xmm2 - LONG $0xdbef0f66 // pxor xmm3, xmm3 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB5_11 - JMP LBB5_12 - -DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000 -DATA LCDATA4<>+0x008(SB)/8, $0x8000000000000000 -DATA LCDATA4<>+0x010(SB)/8, $0x7fffffffffffffff -DATA LCDATA4<>+0x018(SB)/8, $0x7fffffffffffffff -GLOBL LCDATA4<>(SB), 8, $32 - -TEXT ·_int64_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA4<>(SB), BP - - QUAD $0xffffffffffffb849; WORD $0x7fff // mov r8, 9223372036854775807 - WORD $0xf685 // test esi, esi - JLE LBB6_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x03 // cmp esi, 3 - JA LBB6_6 - LONG $0x01708d49 // lea rsi, [r8 + 1] - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - JMP LBB6_4 - -LBB6_1: - LONG $0x01708d49 // lea rsi, [r8 + 1] - JMP LBB6_13 - -LBB6_6: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xfce38341 // and r11d, -4 - LONG $0xfc438d49 // lea rax, [r11 - 4] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x02e8c149 // shr r8, 2 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB6_7 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0x6f0f4466; WORD $0x004d // movdqa xmm9, oword 0[rbp] /* [rip + .LCPI6_0] */ - LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */ - WORD $0xc031 // xor eax, eax - LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8 - LONG $0x6f0f4166; BYTE $0xf1 // movdqa xmm6, xmm9 - -LBB6_9: - LONG $0x3c6f0ff3; BYTE $0xc7 // movdqu xmm7, oword [rdi + 8*rax] - LONG $0xc76f0f66 // movdqa xmm0, xmm7 - LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 - LONG $0xe76f0f66 // movdqa xmm4, xmm7 - LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0 - LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16] - LONG $0xc16f0f66 // movdqa xmm0, xmm1 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0xe96f0f66 // movdqa xmm5, xmm1 - LONG $0x15380f66; BYTE $0xea // blendvpd xmm5, xmm2, xmm0 - LONG $0x6f0f4166; BYTE $0xc1 // movdqa xmm0, xmm9 - LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7 - LONG $0x380f4166; WORD $0xf915 // blendvpd xmm7, xmm9, xmm0 - LONG $0xc66f0f66 // movdqa xmm0, xmm6 - LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 - LONG $0x15380f66; BYTE $0xce // blendvpd xmm1, xmm6, xmm0 - LONG $0x5c6f0ff3; WORD $0x20c7 // movdqu xmm3, oword [rdi + 8*rax + 32] - LONG $0xc36f0f66 // movdqa xmm0, xmm3 - LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 - LONG $0x6f0f4466; BYTE $0xc3 // movdqa xmm8, xmm3 - LONG $0x380f4466; WORD $0xc415 // blendvpd xmm8, xmm4, xmm0 - LONG $0x646f0ff3; WORD $0x30c7 // movdqu xmm4, oword [rdi + 8*rax + 48] - LONG $0xc46f0f66 // movdqa xmm0, xmm4 - LONG $0x37380f66; BYTE $0xc5 // pcmpgtq xmm0, xmm5 - LONG $0xd46f0f66 // movdqa xmm2, xmm4 - LONG $0x15380f66; BYTE $0xd5 // blendvpd xmm2, xmm5, xmm0 - LONG $0xc7280f66 // movapd xmm0, xmm7 - LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 - LONG $0x15380f66; BYTE $0xdf // blendvpd xmm3, xmm7, xmm0 - LONG $0xc1280f66 // movapd xmm0, xmm1 - LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 - LONG $0x15380f66; BYTE $0xe1 // blendvpd xmm4, xmm1, xmm0 - LONG $0x08c08348 // add rax, 8 - LONG $0x280f4466; BYTE $0xcb // movapd xmm9, xmm3 - LONG $0xf4280f66 // movapd xmm6, xmm4 - LONG $0x02c28349 // add r10, 2 - JNE LBB6_9 - LONG $0x01c0f641 // test r8b, 1 - JE LBB6_12 - -LBB6_11: - LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu xmm1, oword [rdi + 8*rax + 16] - LONG $0xc4280f66 // movapd xmm0, xmm4 - LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 - LONG $0xe96f0f66 // movdqa xmm5, xmm1 - LONG $0x15380f66; BYTE $0xec // blendvpd xmm5, xmm4, xmm0 - LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax] - LONG $0xc3280f66 // movapd xmm0, xmm3 - LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 - LONG $0xf46f0f66 // movdqa xmm6, xmm4 - LONG $0x15380f66; BYTE $0xf3 // blendvpd xmm6, xmm3, xmm0 - LONG $0xc16f0f66 // movdqa xmm0, xmm1 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0 - LONG $0xc46f0f66 // movdqa xmm0, xmm4 - LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 - LONG $0x380f4166; WORD $0xe015 // blendvpd xmm4, xmm8, xmm0 - LONG $0x280f4466; BYTE $0xc4 // movapd xmm8, xmm4 - LONG $0xd1280f66 // movapd xmm2, xmm1 - LONG $0xde280f66 // movapd xmm3, xmm6 - LONG $0xe5280f66 // movapd xmm4, xmm5 - -LBB6_12: - LONG $0xc3280f66 // movapd xmm0, xmm3 - LONG $0x37380f66; BYTE $0xc4 // pcmpgtq xmm0, xmm4 - LONG $0x15380f66; BYTE $0xe3 // blendvpd xmm4, xmm3, xmm0 - LONG $0xcc700f66; BYTE $0x4e // pshufd xmm1, xmm4, 78 - LONG $0xc46f0f66 // movdqa xmm0, xmm4 - LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 - LONG $0x15380f66; BYTE $0xcc // blendvpd xmm1, xmm4, xmm0 - LONG $0x7e0f4866; BYTE $0xce // movq rsi, xmm1 - LONG $0xc26f0f66 // movdqa xmm0, xmm2 - LONG $0x380f4166; WORD $0xc037 // pcmpgtq xmm0, xmm8 - LONG $0x380f4166; WORD $0xd015 // blendvpd xmm2, xmm8, xmm0 - LONG $0xca700f66; BYTE $0x4e // pshufd xmm1, xmm2, 78 - LONG $0xc16f0f66 // movdqa xmm0, xmm1 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0x15380f66; BYTE $0xca // blendvpd xmm1, xmm2, xmm0 - LONG $0x7e0f4966; BYTE $0xc8 // movq r8, xmm1 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB6_13 - -LBB6_4: - WORD $0x8948; BYTE $0xf0 // mov rax, rsi - -LBB6_5: - LONG $0xdf348b4a // mov rsi, qword [rdi + 8*r11] - WORD $0x3949; BYTE $0xf0 // cmp r8, rsi - LONG $0xc64f0f4c // cmovg r8, rsi - WORD $0x3948; BYTE $0xf0 // cmp rax, rsi - LONG $0xf04d0f48 // cmovge rsi, rax - LONG $0x01c38349 // add r11, 1 - WORD $0x8948; BYTE $0xf0 // mov rax, rsi - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB6_5 - -LBB6_13: - WORD $0x8948; BYTE $0x31 // mov qword [rcx], rsi - WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8 - RET - -LBB6_7: - LONG $0x5d280f66; BYTE $0x00 // movapd xmm3, oword 0[rbp] /* [rip + .LCPI6_0] */ - LONG $0x6f0f4466; WORD $0x1045 // movdqa xmm8, oword 16[rbp] /* [rip + .LCPI6_1] */ - WORD $0xc031 // xor eax, eax - LONG $0x6f0f4166; BYTE $0xd0 // movdqa xmm2, xmm8 - LONG $0xe3280f66 // movapd xmm4, xmm3 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB6_11 - JMP LBB6_12 - -DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000 -DATA LCDATA5<>+0x008(SB)/8, $0x8000000000000000 -GLOBL LCDATA5<>(SB), 8, $16 - -TEXT ·_uint64_max_min_sse4(SB), $0-32 - - MOVQ values+0(FP), DI - MOVQ length+8(FP), SI - MOVQ minout+16(FP), DX - MOVQ maxout+24(FP), CX - LEAQ LCDATA5<>(SB), BP - - WORD $0xf685 // test esi, esi - JLE LBB7_1 - WORD $0x8941; BYTE $0xf1 // mov r9d, esi - WORD $0xfe83; BYTE $0x03 // cmp esi, 3 - JA LBB7_6 - LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1 - WORD $0x3145; BYTE $0xdb // xor r11d, r11d - WORD $0xc031 // xor eax, eax - JMP LBB7_4 - -LBB7_1: - LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov r8, -1 - WORD $0xc031 // xor eax, eax - JMP LBB7_13 - -LBB7_6: - WORD $0x8945; BYTE $0xcb // mov r11d, r9d - LONG $0xfce38341 // and r11d, -4 - LONG $0xfc438d49 // lea rax, [r11 - 4] - WORD $0x8949; BYTE $0xc0 // mov r8, rax - LONG $0x02e8c149 // shr r8, 2 - LONG $0x01c08349 // add r8, 1 - WORD $0x8548; BYTE $0xc0 // test rax, rax - JE LBB7_7 - WORD $0x894d; BYTE $0xc2 // mov r10, r8 - LONG $0xfee28349 // and r10, -2 - WORD $0xf749; BYTE $0xda // neg r10 - LONG $0xef0f4566; BYTE $0xc9 // pxor xmm9, xmm9 - LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10 - WORD $0xc031 // xor eax, eax - LONG $0x6f0f4466; WORD $0x0045 // movdqa xmm8, oword 0[rbp] /* [rip + .LCPI7_0] */ - LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11 - LONG $0xef0f4566; BYTE $0xe4 // pxor xmm12, xmm12 - -LBB7_9: - LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10 - LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8 - LONG $0x246f0ff3; BYTE $0xc7 // movdqu xmm4, oword [rdi + 8*rax] - LONG $0x6c6f0ff3; WORD $0x10c7 // movdqu xmm5, oword [rdi + 8*rax + 16] - LONG $0x6f0f44f3; WORD $0xc76c; BYTE $0x20 // movdqu xmm13, oword [rdi + 8*rax + 32] - LONG $0xc46f0f66 // movdqa xmm0, xmm4 - LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 - LONG $0x6f0f4166; BYTE $0xc9 // movdqa xmm1, xmm9 - LONG $0xef0f4166; BYTE $0xc8 // pxor xmm1, xmm8 - LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0xdc6f0f66 // movdqa xmm3, xmm4 - LONG $0x380f4166; WORD $0xda15 // blendvpd xmm3, xmm10, xmm0 - LONG $0x746f0ff3; WORD $0x30c7 // movdqu xmm6, oword [rdi + 8*rax + 48] - LONG $0x6f0f4166; BYTE $0xfb // movdqa xmm7, xmm11 - LONG $0xef0f4166; BYTE $0xf8 // pxor xmm7, xmm8 - LONG $0xc56f0f66 // movdqa xmm0, xmm5 - LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 - LONG $0x6f0f4166; BYTE $0xd4 // movdqa xmm2, xmm12 - LONG $0xef0f4166; BYTE $0xd0 // pxor xmm2, xmm8 - LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0 - LONG $0x37380f66; BYTE $0xc7 // pcmpgtq xmm0, xmm7 - LONG $0xfd6f0f66 // movdqa xmm7, xmm5 - LONG $0x380f4166; WORD $0xfb15 // blendvpd xmm7, xmm11, xmm0 - LONG $0xc16f0f66 // movdqa xmm0, xmm1 - LONG $0x380f4166; WORD $0xe115 // blendvpd xmm4, xmm9, xmm0 - LONG $0xc26f0f66 // movdqa xmm0, xmm2 - LONG $0x380f4166; WORD $0xec15 // blendvpd xmm5, xmm12, xmm0 - LONG $0xd3280f66 // movapd xmm2, xmm3 - LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8 - LONG $0x6f0f4166; BYTE $0xc5 // movdqa xmm0, xmm13 - LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 - LONG $0xcc280f66 // movapd xmm1, xmm4 - LONG $0x570f4166; BYTE $0xc8 // xorpd xmm1, xmm8 - LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0x6f0f4566; BYTE $0xd5 // movdqa xmm10, xmm13 - LONG $0x380f4466; WORD $0xd315 // blendvpd xmm10, xmm3, xmm0 - LONG $0xdf280f66 // movapd xmm3, xmm7 - LONG $0x570f4166; BYTE $0xd8 // xorpd xmm3, xmm8 - LONG $0xc66f0f66 // movdqa xmm0, xmm6 - LONG $0xef0f4166; BYTE $0xc0 // pxor xmm0, xmm8 - LONG $0xd5280f66 // movapd xmm2, xmm5 - LONG $0x570f4166; BYTE $0xd0 // xorpd xmm2, xmm8 - LONG $0x37380f66; BYTE $0xd0 // pcmpgtq xmm2, xmm0 - LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 - LONG $0x6f0f4466; BYTE $0xde // movdqa xmm11, xmm6 - LONG $0x380f4466; WORD $0xdf15 // blendvpd xmm11, xmm7, xmm0 - LONG $0xc16f0f66 // movdqa xmm0, xmm1 - LONG $0x380f4466; WORD $0xec15 // blendvpd xmm13, xmm4, xmm0 - LONG $0xc26f0f66 // movdqa xmm0, xmm2 - LONG $0x15380f66; BYTE $0xf5 // blendvpd xmm6, xmm5, xmm0 - LONG $0x08c08348 // add rax, 8 - LONG $0x280f4566; BYTE $0xcd // movapd xmm9, xmm13 - LONG $0x280f4466; BYTE $0xe6 // movapd xmm12, xmm6 - LONG $0x02c28349 // add r10, 2 - JNE LBB7_9 - LONG $0x01c0f641 // test r8b, 1 - JE LBB7_12 - -LBB7_11: - LONG $0x24100f66; BYTE $0xc7 // movupd xmm4, oword [rdi + 8*rax] - LONG $0x5c100f66; WORD $0x10c7 // movupd xmm3, oword [rdi + 8*rax + 16] - LONG $0x6d280f66; BYTE $0x00 // movapd xmm5, oword 0[rbp] /* [rip + .LCPI7_0] */ - LONG $0xc6280f66 // movapd xmm0, xmm6 - LONG $0xc5570f66 // xorpd xmm0, xmm5 - LONG $0xcb280f66 // movapd xmm1, xmm3 - LONG $0xcd570f66 // xorpd xmm1, xmm5 - LONG $0x37380f66; BYTE $0xc1 // pcmpgtq xmm0, xmm1 - LONG $0xfb280f66 // movapd xmm7, xmm3 - LONG $0x15380f66; BYTE $0xfe // blendvpd xmm7, xmm6, xmm0 - LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13 - LONG $0xc5570f66 // xorpd xmm0, xmm5 - LONG $0xd4280f66 // movapd xmm2, xmm4 - LONG $0xd5570f66 // xorpd xmm2, xmm5 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0xf4280f66 // movapd xmm6, xmm4 - LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0 - LONG $0x280f4166; BYTE $0xc3 // movapd xmm0, xmm11 - LONG $0xc5570f66 // xorpd xmm0, xmm5 - LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 - LONG $0xc16f0f66 // movdqa xmm0, xmm1 - LONG $0x380f4166; WORD $0xdb15 // blendvpd xmm3, xmm11, xmm0 - LONG $0x570f4166; BYTE $0xea // xorpd xmm5, xmm10 - LONG $0x37380f66; BYTE $0xd5 // pcmpgtq xmm2, xmm5 - LONG $0xc26f0f66 // movdqa xmm0, xmm2 - LONG $0x380f4166; WORD $0xe215 // blendvpd xmm4, xmm10, xmm0 - LONG $0x280f4466; BYTE $0xd4 // movapd xmm10, xmm4 - LONG $0x280f4466; BYTE $0xdb // movapd xmm11, xmm3 - LONG $0x280f4466; BYTE $0xee // movapd xmm13, xmm6 - LONG $0xf7280f66 // movapd xmm6, xmm7 - -LBB7_12: - LONG $0x4d280f66; BYTE $0x00 // movapd xmm1, oword 0[rbp] /* [rip + .LCPI7_0] */ - LONG $0xd6280f66 // movapd xmm2, xmm6 - LONG $0xd1570f66 // xorpd xmm2, xmm1 - LONG $0x280f4166; BYTE $0xc5 // movapd xmm0, xmm13 - LONG $0xc1570f66 // xorpd xmm0, xmm1 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0x380f4166; WORD $0xf515 // blendvpd xmm6, xmm13, xmm0 - LONG $0xd6700f66; BYTE $0x4e // pshufd xmm2, xmm6, 78 - LONG $0xc6280f66 // movapd xmm0, xmm6 - LONG $0xc1570f66 // xorpd xmm0, xmm1 - LONG $0xda6f0f66 // movdqa xmm3, xmm2 - LONG $0xd9ef0f66 // pxor xmm3, xmm1 - LONG $0x37380f66; BYTE $0xc3 // pcmpgtq xmm0, xmm3 - LONG $0x15380f66; BYTE $0xd6 // blendvpd xmm2, xmm6, xmm0 - LONG $0x7e0f4866; BYTE $0xd0 // movq rax, xmm2 - LONG $0x6f0f4166; BYTE $0xd2 // movdqa xmm2, xmm10 - LONG $0xd1ef0f66 // pxor xmm2, xmm1 - LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11 - LONG $0xc1ef0f66 // pxor xmm0, xmm1 - LONG $0x37380f66; BYTE $0xc2 // pcmpgtq xmm0, xmm2 - LONG $0x380f4566; WORD $0xda15 // blendvpd xmm11, xmm10, xmm0 - LONG $0x700f4166; WORD $0x4ed3 // pshufd xmm2, xmm11, 78 - LONG $0x6f0f4166; BYTE $0xc3 // movdqa xmm0, xmm11 - LONG $0xc1ef0f66 // pxor xmm0, xmm1 - LONG $0xcaef0f66 // pxor xmm1, xmm2 - LONG $0x37380f66; BYTE $0xc8 // pcmpgtq xmm1, xmm0 - LONG $0xc16f0f66 // movdqa xmm0, xmm1 - LONG $0x380f4166; WORD $0xd315 // blendvpd xmm2, xmm11, xmm0 - LONG $0x7e0f4966; BYTE $0xd0 // movq r8, xmm2 - WORD $0x394d; BYTE $0xcb // cmp r11, r9 - JE LBB7_13 - -LBB7_4: - WORD $0x8948; BYTE $0xc6 // mov rsi, rax - -LBB7_5: - LONG $0xdf048b4a // mov rax, qword [rdi + 8*r11] - WORD $0x3949; BYTE $0xc0 // cmp r8, rax - LONG $0xc0430f4c // cmovae r8, rax - WORD $0x3948; BYTE $0xc6 // cmp rsi, rax - LONG $0xc6470f48 // cmova rax, rsi - LONG $0x01c38349 // add r11, 1 - WORD $0x8948; BYTE $0xc6 // mov rsi, rax - WORD $0x394d; BYTE $0xd9 // cmp r9, r11 - JNE LBB7_5 - -LBB7_13: - WORD $0x8948; BYTE $0x01 // mov qword [rcx], rax - WORD $0x894c; BYTE $0x02 // mov qword [rdx], r8 - RET - -LBB7_7: - LONG $0x570f4566; BYTE $0xed // xorpd xmm13, xmm13 - LONG $0x760f4566; BYTE $0xd2 // pcmpeqd xmm10, xmm10 - WORD $0xc031 // xor eax, eax - LONG $0x760f4566; BYTE $0xdb // pcmpeqd xmm11, xmm11 - LONG $0xf6570f66 // xorpd xmm6, xmm6 - LONG $0x01c0f641 // test r8b, 1 - JNE LBB7_11 - JMP LBB7_12 diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go deleted file mode 100644 index 1666df129..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go +++ /dev/null @@ -1,407 +0,0 @@ -// Code generated by transpose_ints.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -// when we upgrade to support go1.18, this can be massively simplified by using -// Go Generics, but since we aren't supporting go1.18 yet, I didn't want to use -// them here so we can maintain the backwards compatibility. - -func transposeInt8Int8(src []int8, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeInt8Uint8(src []int8, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeInt8Int16(src []int8, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeInt8Uint16(src []int8, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeInt8Int32(src []int8, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeInt8Uint32(src []int8, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeInt8Int64(src []int8, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeInt8Uint64(src []int8, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} - -func transposeUint8Int8(src []uint8, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeUint8Uint8(src []uint8, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeUint8Int16(src []uint8, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeUint8Uint16(src []uint8, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeUint8Int32(src []uint8, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeUint8Uint32(src []uint8, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeUint8Int64(src []uint8, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeUint8Uint64(src []uint8, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} - -func transposeInt16Int8(src []int16, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeInt16Uint8(src []int16, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeInt16Int16(src []int16, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeInt16Uint16(src []int16, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeInt16Int32(src []int16, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeInt16Uint32(src []int16, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeInt16Int64(src []int16, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeInt16Uint64(src []int16, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} - -func transposeUint16Int8(src []uint16, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeUint16Uint8(src []uint16, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeUint16Int16(src []uint16, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeUint16Uint16(src []uint16, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeUint16Int32(src []uint16, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeUint16Uint32(src []uint16, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeUint16Int64(src []uint16, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeUint16Uint64(src []uint16, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} - -func transposeInt32Int8(src []int32, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeInt32Uint8(src []int32, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeInt32Int16(src []int32, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeInt32Uint16(src []int32, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeInt32Int32(src []int32, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeInt32Uint32(src []int32, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeInt32Int64(src []int32, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeInt32Uint64(src []int32, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} - -func transposeUint32Int8(src []uint32, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeUint32Uint8(src []uint32, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeUint32Int16(src []uint32, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeUint32Uint16(src []uint32, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeUint32Int32(src []uint32, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeUint32Uint32(src []uint32, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeUint32Int64(src []uint32, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeUint32Uint64(src []uint32, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} - -func transposeInt64Int8(src []int64, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeInt64Uint8(src []int64, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeInt64Int16(src []int64, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeInt64Uint16(src []int64, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeInt64Int32(src []int64, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeInt64Uint32(src []int64, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeInt64Int64(src []int64, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeInt64Uint64(src []int64, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} - -func transposeUint64Int8(src []uint64, dest []int8, transposeMap []int32) { - for i, s := range src { - dest[i] = int8(transposeMap[s]) - } -} - -func transposeUint64Uint8(src []uint64, dest []uint8, transposeMap []int32) { - for i, s := range src { - dest[i] = uint8(transposeMap[s]) - } -} - -func transposeUint64Int16(src []uint64, dest []int16, transposeMap []int32) { - for i, s := range src { - dest[i] = int16(transposeMap[s]) - } -} - -func transposeUint64Uint16(src []uint64, dest []uint16, transposeMap []int32) { - for i, s := range src { - dest[i] = uint16(transposeMap[s]) - } -} - -func transposeUint64Int32(src []uint64, dest []int32, transposeMap []int32) { - for i, s := range src { - dest[i] = int32(transposeMap[s]) - } -} - -func transposeUint64Uint32(src []uint64, dest []uint32, transposeMap []int32) { - for i, s := range src { - dest[i] = uint32(transposeMap[s]) - } -} - -func transposeUint64Int64(src []uint64, dest []int64, transposeMap []int32) { - for i, s := range src { - dest[i] = int64(transposeMap[s]) - } -} - -func transposeUint64Uint64(src []uint64, dest []uint64, transposeMap []int32) { - for i, s := range src { - dest[i] = uint64(transposeMap[s]) - } -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl deleted file mode 100644 index 680ae1ee7..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.go.tmpl +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -{{ $typelist := .In }} -{{range .In}} -{{ $src := .Type }} -{{ $srcName := .Name }} -{{ range $typelist }} -{{ $dest := .Type }} -{{ $destName := .Name }} - -func transpose{{ $srcName }}{{ $destName }}(src []{{$src}}, dest []{{$dest}}, transposeMap []int32) { - for i, s := range src { - dest[i] = {{ $dest }}(transposeMap[s]) - } -} - -{{ end }} -{{ end }} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata deleted file mode 100644 index 72eaf300c..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints.tmpldata +++ /dev/null @@ -1,34 +0,0 @@ -[ - { - "Name": "Int8", - "Type": "int8" - }, - { - "Name": "Uint8", - "Type": "uint8" - }, - { - "Name": "Int16", - "Type": "int16" - }, - { - "Name": "Uint16", - "Type": "uint16" - }, - { - "Name": "Int32", - "Type": "int32" - }, - { - "Name": "Uint32", - "Type": "uint32" - }, - { - "Name": "Int64", - "Type": "int64" - }, - { - "Name": "Uint64", - "Type": "uint64" - } -] diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go deleted file mode 100644 index d4433d368..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go +++ /dev/null @@ -1,325 +0,0 @@ -// Code generated by transpose_ints_amd64.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import ( - "golang.org/x/sys/cpu" -) - -var ( - TransposeInt8Int8 func([]int8, []int8, []int32) - TransposeInt8Uint8 func([]int8, []uint8, []int32) - TransposeInt8Int16 func([]int8, []int16, []int32) - TransposeInt8Uint16 func([]int8, []uint16, []int32) - TransposeInt8Int32 func([]int8, []int32, []int32) - TransposeInt8Uint32 func([]int8, []uint32, []int32) - TransposeInt8Int64 func([]int8, []int64, []int32) - TransposeInt8Uint64 func([]int8, []uint64, []int32) - - TransposeUint8Int8 func([]uint8, []int8, []int32) - TransposeUint8Uint8 func([]uint8, []uint8, []int32) - TransposeUint8Int16 func([]uint8, []int16, []int32) - TransposeUint8Uint16 func([]uint8, []uint16, []int32) - TransposeUint8Int32 func([]uint8, []int32, []int32) - TransposeUint8Uint32 func([]uint8, []uint32, []int32) - TransposeUint8Int64 func([]uint8, []int64, []int32) - TransposeUint8Uint64 func([]uint8, []uint64, []int32) - - TransposeInt16Int8 func([]int16, []int8, []int32) - TransposeInt16Uint8 func([]int16, []uint8, []int32) - TransposeInt16Int16 func([]int16, []int16, []int32) - TransposeInt16Uint16 func([]int16, []uint16, []int32) - TransposeInt16Int32 func([]int16, []int32, []int32) - TransposeInt16Uint32 func([]int16, []uint32, []int32) - TransposeInt16Int64 func([]int16, []int64, []int32) - TransposeInt16Uint64 func([]int16, []uint64, []int32) - - TransposeUint16Int8 func([]uint16, []int8, []int32) - TransposeUint16Uint8 func([]uint16, []uint8, []int32) - TransposeUint16Int16 func([]uint16, []int16, []int32) - TransposeUint16Uint16 func([]uint16, []uint16, []int32) - TransposeUint16Int32 func([]uint16, []int32, []int32) - TransposeUint16Uint32 func([]uint16, []uint32, []int32) - TransposeUint16Int64 func([]uint16, []int64, []int32) - TransposeUint16Uint64 func([]uint16, []uint64, []int32) - - TransposeInt32Int8 func([]int32, []int8, []int32) - TransposeInt32Uint8 func([]int32, []uint8, []int32) - TransposeInt32Int16 func([]int32, []int16, []int32) - TransposeInt32Uint16 func([]int32, []uint16, []int32) - TransposeInt32Int32 func([]int32, []int32, []int32) - TransposeInt32Uint32 func([]int32, []uint32, []int32) - TransposeInt32Int64 func([]int32, []int64, []int32) - TransposeInt32Uint64 func([]int32, []uint64, []int32) - - TransposeUint32Int8 func([]uint32, []int8, []int32) - TransposeUint32Uint8 func([]uint32, []uint8, []int32) - TransposeUint32Int16 func([]uint32, []int16, []int32) - TransposeUint32Uint16 func([]uint32, []uint16, []int32) - TransposeUint32Int32 func([]uint32, []int32, []int32) - TransposeUint32Uint32 func([]uint32, []uint32, []int32) - TransposeUint32Int64 func([]uint32, []int64, []int32) - TransposeUint32Uint64 func([]uint32, []uint64, []int32) - - TransposeInt64Int8 func([]int64, []int8, []int32) - TransposeInt64Uint8 func([]int64, []uint8, []int32) - TransposeInt64Int16 func([]int64, []int16, []int32) - TransposeInt64Uint16 func([]int64, []uint16, []int32) - TransposeInt64Int32 func([]int64, []int32, []int32) - TransposeInt64Uint32 func([]int64, []uint32, []int32) - TransposeInt64Int64 func([]int64, []int64, []int32) - TransposeInt64Uint64 func([]int64, []uint64, []int32) - - TransposeUint64Int8 func([]uint64, []int8, []int32) - TransposeUint64Uint8 func([]uint64, []uint8, []int32) - TransposeUint64Int16 func([]uint64, []int16, []int32) - TransposeUint64Uint16 func([]uint64, []uint16, []int32) - TransposeUint64Int32 func([]uint64, []int32, []int32) - TransposeUint64Uint32 func([]uint64, []uint32, []int32) - TransposeUint64Int64 func([]uint64, []int64, []int32) - TransposeUint64Uint64 func([]uint64, []uint64, []int32) -) - -func init() { - if cpu.X86.HasAVX2 { - - TransposeInt8Int8 = transposeInt8Int8avx2 - TransposeInt8Uint8 = transposeInt8Uint8avx2 - TransposeInt8Int16 = transposeInt8Int16avx2 - TransposeInt8Uint16 = transposeInt8Uint16avx2 - TransposeInt8Int32 = transposeInt8Int32avx2 - TransposeInt8Uint32 = transposeInt8Uint32avx2 - TransposeInt8Int64 = transposeInt8Int64avx2 - TransposeInt8Uint64 = transposeInt8Uint64avx2 - - TransposeUint8Int8 = transposeUint8Int8avx2 - TransposeUint8Uint8 = transposeUint8Uint8avx2 - TransposeUint8Int16 = transposeUint8Int16avx2 - TransposeUint8Uint16 = transposeUint8Uint16avx2 - TransposeUint8Int32 = transposeUint8Int32avx2 - TransposeUint8Uint32 = transposeUint8Uint32avx2 - TransposeUint8Int64 = transposeUint8Int64avx2 - TransposeUint8Uint64 = transposeUint8Uint64avx2 - - TransposeInt16Int8 = transposeInt16Int8avx2 - TransposeInt16Uint8 = transposeInt16Uint8avx2 - TransposeInt16Int16 = transposeInt16Int16avx2 - TransposeInt16Uint16 = transposeInt16Uint16avx2 - TransposeInt16Int32 = transposeInt16Int32avx2 - TransposeInt16Uint32 = transposeInt16Uint32avx2 - TransposeInt16Int64 = transposeInt16Int64avx2 - TransposeInt16Uint64 = transposeInt16Uint64avx2 - - TransposeUint16Int8 = transposeUint16Int8avx2 - TransposeUint16Uint8 = transposeUint16Uint8avx2 - TransposeUint16Int16 = transposeUint16Int16avx2 - TransposeUint16Uint16 = transposeUint16Uint16avx2 - TransposeUint16Int32 = transposeUint16Int32avx2 - TransposeUint16Uint32 = transposeUint16Uint32avx2 - TransposeUint16Int64 = transposeUint16Int64avx2 - TransposeUint16Uint64 = transposeUint16Uint64avx2 - - TransposeInt32Int8 = transposeInt32Int8avx2 - TransposeInt32Uint8 = transposeInt32Uint8avx2 - TransposeInt32Int16 = transposeInt32Int16avx2 - TransposeInt32Uint16 = transposeInt32Uint16avx2 - TransposeInt32Int32 = transposeInt32Int32avx2 - TransposeInt32Uint32 = transposeInt32Uint32avx2 - TransposeInt32Int64 = transposeInt32Int64avx2 - TransposeInt32Uint64 = transposeInt32Uint64avx2 - - TransposeUint32Int8 = transposeUint32Int8avx2 - TransposeUint32Uint8 = transposeUint32Uint8avx2 - TransposeUint32Int16 = transposeUint32Int16avx2 - TransposeUint32Uint16 = transposeUint32Uint16avx2 - TransposeUint32Int32 = transposeUint32Int32avx2 - TransposeUint32Uint32 = transposeUint32Uint32avx2 - TransposeUint32Int64 = transposeUint32Int64avx2 - TransposeUint32Uint64 = transposeUint32Uint64avx2 - - TransposeInt64Int8 = transposeInt64Int8avx2 - TransposeInt64Uint8 = transposeInt64Uint8avx2 - TransposeInt64Int16 = transposeInt64Int16avx2 - TransposeInt64Uint16 = transposeInt64Uint16avx2 - TransposeInt64Int32 = transposeInt64Int32avx2 - TransposeInt64Uint32 = transposeInt64Uint32avx2 - TransposeInt64Int64 = transposeInt64Int64avx2 - TransposeInt64Uint64 = transposeInt64Uint64avx2 - - TransposeUint64Int8 = transposeUint64Int8avx2 - TransposeUint64Uint8 = transposeUint64Uint8avx2 - TransposeUint64Int16 = transposeUint64Int16avx2 - TransposeUint64Uint16 = transposeUint64Uint16avx2 - TransposeUint64Int32 = transposeUint64Int32avx2 - TransposeUint64Uint32 = transposeUint64Uint32avx2 - TransposeUint64Int64 = transposeUint64Int64avx2 - TransposeUint64Uint64 = transposeUint64Uint64avx2 - - } else if cpu.X86.HasSSE42 { - - TransposeInt8Int8 = transposeInt8Int8sse4 - TransposeInt8Uint8 = transposeInt8Uint8sse4 - TransposeInt8Int16 = transposeInt8Int16sse4 - TransposeInt8Uint16 = transposeInt8Uint16sse4 - TransposeInt8Int32 = transposeInt8Int32sse4 - TransposeInt8Uint32 = transposeInt8Uint32sse4 - TransposeInt8Int64 = transposeInt8Int64sse4 - TransposeInt8Uint64 = transposeInt8Uint64sse4 - - TransposeUint8Int8 = transposeUint8Int8sse4 - TransposeUint8Uint8 = transposeUint8Uint8sse4 - TransposeUint8Int16 = transposeUint8Int16sse4 - TransposeUint8Uint16 = transposeUint8Uint16sse4 - TransposeUint8Int32 = transposeUint8Int32sse4 - TransposeUint8Uint32 = transposeUint8Uint32sse4 - TransposeUint8Int64 = transposeUint8Int64sse4 - TransposeUint8Uint64 = transposeUint8Uint64sse4 - - TransposeInt16Int8 = transposeInt16Int8sse4 - TransposeInt16Uint8 = transposeInt16Uint8sse4 - TransposeInt16Int16 = transposeInt16Int16sse4 - TransposeInt16Uint16 = transposeInt16Uint16sse4 - TransposeInt16Int32 = transposeInt16Int32sse4 - TransposeInt16Uint32 = transposeInt16Uint32sse4 - TransposeInt16Int64 = transposeInt16Int64sse4 - TransposeInt16Uint64 = transposeInt16Uint64sse4 - - TransposeUint16Int8 = transposeUint16Int8sse4 - TransposeUint16Uint8 = transposeUint16Uint8sse4 - TransposeUint16Int16 = transposeUint16Int16sse4 - TransposeUint16Uint16 = transposeUint16Uint16sse4 - TransposeUint16Int32 = transposeUint16Int32sse4 - TransposeUint16Uint32 = transposeUint16Uint32sse4 - TransposeUint16Int64 = transposeUint16Int64sse4 - TransposeUint16Uint64 = transposeUint16Uint64sse4 - - TransposeInt32Int8 = transposeInt32Int8sse4 - TransposeInt32Uint8 = transposeInt32Uint8sse4 - TransposeInt32Int16 = transposeInt32Int16sse4 - TransposeInt32Uint16 = transposeInt32Uint16sse4 - TransposeInt32Int32 = transposeInt32Int32sse4 - TransposeInt32Uint32 = transposeInt32Uint32sse4 - TransposeInt32Int64 = transposeInt32Int64sse4 - TransposeInt32Uint64 = transposeInt32Uint64sse4 - - TransposeUint32Int8 = transposeUint32Int8sse4 - TransposeUint32Uint8 = transposeUint32Uint8sse4 - TransposeUint32Int16 = transposeUint32Int16sse4 - TransposeUint32Uint16 = transposeUint32Uint16sse4 - TransposeUint32Int32 = transposeUint32Int32sse4 - TransposeUint32Uint32 = transposeUint32Uint32sse4 - TransposeUint32Int64 = transposeUint32Int64sse4 - TransposeUint32Uint64 = transposeUint32Uint64sse4 - - TransposeInt64Int8 = transposeInt64Int8sse4 - TransposeInt64Uint8 = transposeInt64Uint8sse4 - TransposeInt64Int16 = transposeInt64Int16sse4 - TransposeInt64Uint16 = transposeInt64Uint16sse4 - TransposeInt64Int32 = transposeInt64Int32sse4 - TransposeInt64Uint32 = transposeInt64Uint32sse4 - TransposeInt64Int64 = transposeInt64Int64sse4 - TransposeInt64Uint64 = transposeInt64Uint64sse4 - - TransposeUint64Int8 = transposeUint64Int8sse4 - TransposeUint64Uint8 = transposeUint64Uint8sse4 - TransposeUint64Int16 = transposeUint64Int16sse4 - TransposeUint64Uint16 = transposeUint64Uint16sse4 - TransposeUint64Int32 = transposeUint64Int32sse4 - TransposeUint64Uint32 = transposeUint64Uint32sse4 - TransposeUint64Int64 = transposeUint64Int64sse4 - TransposeUint64Uint64 = transposeUint64Uint64sse4 - - } else { - - TransposeInt8Int8 = transposeInt8Int8 - TransposeInt8Uint8 = transposeInt8Uint8 - TransposeInt8Int16 = transposeInt8Int16 - TransposeInt8Uint16 = transposeInt8Uint16 - TransposeInt8Int32 = transposeInt8Int32 - TransposeInt8Uint32 = transposeInt8Uint32 - TransposeInt8Int64 = transposeInt8Int64 - TransposeInt8Uint64 = transposeInt8Uint64 - - TransposeUint8Int8 = transposeUint8Int8 - TransposeUint8Uint8 = transposeUint8Uint8 - TransposeUint8Int16 = transposeUint8Int16 - TransposeUint8Uint16 = transposeUint8Uint16 - TransposeUint8Int32 = transposeUint8Int32 - TransposeUint8Uint32 = transposeUint8Uint32 - TransposeUint8Int64 = transposeUint8Int64 - TransposeUint8Uint64 = transposeUint8Uint64 - - TransposeInt16Int8 = transposeInt16Int8 - TransposeInt16Uint8 = transposeInt16Uint8 - TransposeInt16Int16 = transposeInt16Int16 - TransposeInt16Uint16 = transposeInt16Uint16 - TransposeInt16Int32 = transposeInt16Int32 - TransposeInt16Uint32 = transposeInt16Uint32 - TransposeInt16Int64 = transposeInt16Int64 - TransposeInt16Uint64 = transposeInt16Uint64 - - TransposeUint16Int8 = transposeUint16Int8 - TransposeUint16Uint8 = transposeUint16Uint8 - TransposeUint16Int16 = transposeUint16Int16 - TransposeUint16Uint16 = transposeUint16Uint16 - TransposeUint16Int32 = transposeUint16Int32 - TransposeUint16Uint32 = transposeUint16Uint32 - TransposeUint16Int64 = transposeUint16Int64 - TransposeUint16Uint64 = transposeUint16Uint64 - - TransposeInt32Int8 = transposeInt32Int8 - TransposeInt32Uint8 = transposeInt32Uint8 - TransposeInt32Int16 = transposeInt32Int16 - TransposeInt32Uint16 = transposeInt32Uint16 - TransposeInt32Int32 = transposeInt32Int32 - TransposeInt32Uint32 = transposeInt32Uint32 - TransposeInt32Int64 = transposeInt32Int64 - TransposeInt32Uint64 = transposeInt32Uint64 - - TransposeUint32Int8 = transposeUint32Int8 - TransposeUint32Uint8 = transposeUint32Uint8 - TransposeUint32Int16 = transposeUint32Int16 - TransposeUint32Uint16 = transposeUint32Uint16 - TransposeUint32Int32 = transposeUint32Int32 - TransposeUint32Uint32 = transposeUint32Uint32 - TransposeUint32Int64 = transposeUint32Int64 - TransposeUint32Uint64 = transposeUint32Uint64 - - TransposeInt64Int8 = transposeInt64Int8 - TransposeInt64Uint8 = transposeInt64Uint8 - TransposeInt64Int16 = transposeInt64Int16 - TransposeInt64Uint16 = transposeInt64Uint16 - TransposeInt64Int32 = transposeInt64Int32 - TransposeInt64Uint32 = transposeInt64Uint32 - TransposeInt64Int64 = transposeInt64Int64 - TransposeInt64Uint64 = transposeInt64Uint64 - - TransposeUint64Int8 = transposeUint64Int8 - TransposeUint64Uint8 = transposeUint64Uint8 - TransposeUint64Int16 = transposeUint64Int16 - TransposeUint64Uint16 = transposeUint64Uint16 - TransposeUint64Int32 = transposeUint64Int32 - TransposeUint64Uint32 = transposeUint64Uint32 - TransposeUint64Int64 = transposeUint64Int64 - TransposeUint64Uint64 = transposeUint64Uint64 - - } -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl deleted file mode 100644 index eac0208e5..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_amd64.go.tmpl +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package utils - -import ( - "golang.org/x/sys/cpu" -) - -var ( -{{ $typelist := .In }} -{{range .In}} -{{ $src := .Type -}} -{{ $srcName := .Name -}} -{{ range $typelist -}} -{{ $dest := .Type -}} -{{ $destName := .Name -}} - Transpose{{$srcName}}{{$destName}} func([]{{$src}}, []{{$dest}}, []int32) -{{end}} -{{end}} -) - - -func init() { - if cpu.X86.HasAVX2 { -{{ $typelist := .In }} -{{range .In}} -{{ $src := .Type -}} -{{ $srcName := .Name -}} -{{ range $typelist -}} -{{ $dest := .Type -}} -{{ $destName := .Name -}} - Transpose{{$srcName}}{{$destName}} = transpose{{ $srcName }}{{ $destName }}avx2 -{{end}} -{{end}} - } else if cpu.X86.HasSSE42 { -{{ $typelist := .In }} -{{range .In}} -{{ $src := .Type -}} -{{ $srcName := .Name -}} -{{ range $typelist -}} -{{ $dest := .Type -}} -{{ $destName := .Name -}} - Transpose{{$srcName}}{{$destName}} = transpose{{ $srcName }}{{ $destName }}sse4 -{{end}} -{{end}} - } else { -{{ $typelist := .In }} -{{range .In}} -{{ $src := .Type -}} -{{ $srcName := .Name -}} -{{ range $typelist -}} -{{ $dest := .Type -}} -{{ $destName := .Name -}} - Transpose{{$srcName}}{{$destName}} = transpose{{ $srcName }}{{ $destName }} -{{end}} -{{end}} - } -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go deleted file mode 100644 index cc957cdaa..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_arm64.go +++ /dev/null @@ -1,96 +0,0 @@ -// Code generated by transpose_ints_s390x.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -// if building with the 'noasm' tag, then point to the pure go implementations -var ( - TransposeInt8Int8 = transposeInt8Int8 - TransposeInt8Uint8 = transposeInt8Uint8 - TransposeInt8Int16 = transposeInt8Int16 - TransposeInt8Uint16 = transposeInt8Uint16 - TransposeInt8Int32 = transposeInt8Int32 - TransposeInt8Uint32 = transposeInt8Uint32 - TransposeInt8Int64 = transposeInt8Int64 - TransposeInt8Uint64 = transposeInt8Uint64 - - TransposeUint8Int8 = transposeUint8Int8 - TransposeUint8Uint8 = transposeUint8Uint8 - TransposeUint8Int16 = transposeUint8Int16 - TransposeUint8Uint16 = transposeUint8Uint16 - TransposeUint8Int32 = transposeUint8Int32 - TransposeUint8Uint32 = transposeUint8Uint32 - TransposeUint8Int64 = transposeUint8Int64 - TransposeUint8Uint64 = transposeUint8Uint64 - - TransposeInt16Int8 = transposeInt16Int8 - TransposeInt16Uint8 = transposeInt16Uint8 - TransposeInt16Int16 = transposeInt16Int16 - TransposeInt16Uint16 = transposeInt16Uint16 - TransposeInt16Int32 = transposeInt16Int32 - TransposeInt16Uint32 = transposeInt16Uint32 - TransposeInt16Int64 = transposeInt16Int64 - TransposeInt16Uint64 = transposeInt16Uint64 - - TransposeUint16Int8 = transposeUint16Int8 - TransposeUint16Uint8 = transposeUint16Uint8 - TransposeUint16Int16 = transposeUint16Int16 - TransposeUint16Uint16 = transposeUint16Uint16 - TransposeUint16Int32 = transposeUint16Int32 - TransposeUint16Uint32 = transposeUint16Uint32 - TransposeUint16Int64 = transposeUint16Int64 - TransposeUint16Uint64 = transposeUint16Uint64 - - TransposeInt32Int8 = transposeInt32Int8 - TransposeInt32Uint8 = transposeInt32Uint8 - TransposeInt32Int16 = transposeInt32Int16 - TransposeInt32Uint16 = transposeInt32Uint16 - TransposeInt32Int32 = transposeInt32Int32 - TransposeInt32Uint32 = transposeInt32Uint32 - TransposeInt32Int64 = transposeInt32Int64 - TransposeInt32Uint64 = transposeInt32Uint64 - - TransposeUint32Int8 = transposeUint32Int8 - TransposeUint32Uint8 = transposeUint32Uint8 - TransposeUint32Int16 = transposeUint32Int16 - TransposeUint32Uint16 = transposeUint32Uint16 - TransposeUint32Int32 = transposeUint32Int32 - TransposeUint32Uint32 = transposeUint32Uint32 - TransposeUint32Int64 = transposeUint32Int64 - TransposeUint32Uint64 = transposeUint32Uint64 - - TransposeInt64Int8 = transposeInt64Int8 - TransposeInt64Uint8 = transposeInt64Uint8 - TransposeInt64Int16 = transposeInt64Int16 - TransposeInt64Uint16 = transposeInt64Uint16 - TransposeInt64Int32 = transposeInt64Int32 - TransposeInt64Uint32 = transposeInt64Uint32 - TransposeInt64Int64 = transposeInt64Int64 - TransposeInt64Uint64 = transposeInt64Uint64 - - TransposeUint64Int8 = transposeUint64Int8 - TransposeUint64Uint8 = transposeUint64Uint8 - TransposeUint64Int16 = transposeUint64Int16 - TransposeUint64Uint16 = transposeUint64Uint16 - TransposeUint64Int32 = transposeUint64Int32 - TransposeUint64Uint32 = transposeUint64Uint32 - TransposeUint64Int64 = transposeUint64Int64 - TransposeUint64Uint64 = transposeUint64Uint64 -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go deleted file mode 100644 index f1421ddf5..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.go +++ /dev/null @@ -1,473 +0,0 @@ -// Code generated by transpose_ints_simd.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import ( - "unsafe" -) - -//go:noescape -func _transpose_int8_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int8avx2(src []int8, dest []int8, transposeMap []int32) { - _transpose_int8_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint8avx2(src []int8, dest []uint8, transposeMap []int32) { - _transpose_int8_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int16avx2(src []int8, dest []int16, transposeMap []int32) { - _transpose_int8_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint16avx2(src []int8, dest []uint16, transposeMap []int32) { - _transpose_int8_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int32avx2(src []int8, dest []int32, transposeMap []int32) { - _transpose_int8_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint32avx2(src []int8, dest []uint32, transposeMap []int32) { - _transpose_int8_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int64avx2(src []int8, dest []int64, transposeMap []int32) { - _transpose_int8_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint64avx2(src []int8, dest []uint64, transposeMap []int32) { - _transpose_int8_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int8avx2(src []uint8, dest []int8, transposeMap []int32) { - _transpose_uint8_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint8avx2(src []uint8, dest []uint8, transposeMap []int32) { - _transpose_uint8_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int16avx2(src []uint8, dest []int16, transposeMap []int32) { - _transpose_uint8_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint16avx2(src []uint8, dest []uint16, transposeMap []int32) { - _transpose_uint8_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int32avx2(src []uint8, dest []int32, transposeMap []int32) { - _transpose_uint8_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint32avx2(src []uint8, dest []uint32, transposeMap []int32) { - _transpose_uint8_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int64avx2(src []uint8, dest []int64, transposeMap []int32) { - _transpose_uint8_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint64avx2(src []uint8, dest []uint64, transposeMap []int32) { - _transpose_uint8_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int8avx2(src []int16, dest []int8, transposeMap []int32) { - _transpose_int16_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint8avx2(src []int16, dest []uint8, transposeMap []int32) { - _transpose_int16_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int16avx2(src []int16, dest []int16, transposeMap []int32) { - _transpose_int16_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint16avx2(src []int16, dest []uint16, transposeMap []int32) { - _transpose_int16_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int32avx2(src []int16, dest []int32, transposeMap []int32) { - _transpose_int16_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint32avx2(src []int16, dest []uint32, transposeMap []int32) { - _transpose_int16_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int64avx2(src []int16, dest []int64, transposeMap []int32) { - _transpose_int16_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint64avx2(src []int16, dest []uint64, transposeMap []int32) { - _transpose_int16_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int8avx2(src []uint16, dest []int8, transposeMap []int32) { - _transpose_uint16_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint8avx2(src []uint16, dest []uint8, transposeMap []int32) { - _transpose_uint16_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int16avx2(src []uint16, dest []int16, transposeMap []int32) { - _transpose_uint16_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint16avx2(src []uint16, dest []uint16, transposeMap []int32) { - _transpose_uint16_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int32avx2(src []uint16, dest []int32, transposeMap []int32) { - _transpose_uint16_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint32avx2(src []uint16, dest []uint32, transposeMap []int32) { - _transpose_uint16_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int64avx2(src []uint16, dest []int64, transposeMap []int32) { - _transpose_uint16_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint64avx2(src []uint16, dest []uint64, transposeMap []int32) { - _transpose_uint16_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int8avx2(src []int32, dest []int8, transposeMap []int32) { - _transpose_int32_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint8avx2(src []int32, dest []uint8, transposeMap []int32) { - _transpose_int32_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int16avx2(src []int32, dest []int16, transposeMap []int32) { - _transpose_int32_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint16avx2(src []int32, dest []uint16, transposeMap []int32) { - _transpose_int32_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int32avx2(src []int32, dest []int32, transposeMap []int32) { - _transpose_int32_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint32avx2(src []int32, dest []uint32, transposeMap []int32) { - _transpose_int32_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int64avx2(src []int32, dest []int64, transposeMap []int32) { - _transpose_int32_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint64avx2(src []int32, dest []uint64, transposeMap []int32) { - _transpose_int32_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int8avx2(src []uint32, dest []int8, transposeMap []int32) { - _transpose_uint32_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint8avx2(src []uint32, dest []uint8, transposeMap []int32) { - _transpose_uint32_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int16avx2(src []uint32, dest []int16, transposeMap []int32) { - _transpose_uint32_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint16avx2(src []uint32, dest []uint16, transposeMap []int32) { - _transpose_uint32_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int32avx2(src []uint32, dest []int32, transposeMap []int32) { - _transpose_uint32_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint32avx2(src []uint32, dest []uint32, transposeMap []int32) { - _transpose_uint32_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int64avx2(src []uint32, dest []int64, transposeMap []int32) { - _transpose_uint32_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint64avx2(src []uint32, dest []uint64, transposeMap []int32) { - _transpose_uint32_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int8avx2(src []int64, dest []int8, transposeMap []int32) { - _transpose_int64_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint8avx2(src []int64, dest []uint8, transposeMap []int32) { - _transpose_int64_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int16avx2(src []int64, dest []int16, transposeMap []int32) { - _transpose_int64_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint16avx2(src []int64, dest []uint16, transposeMap []int32) { - _transpose_int64_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int32avx2(src []int64, dest []int32, transposeMap []int32) { - _transpose_int64_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint32avx2(src []int64, dest []uint32, transposeMap []int32) { - _transpose_int64_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int64avx2(src []int64, dest []int64, transposeMap []int32) { - _transpose_int64_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint64avx2(src []int64, dest []uint64, transposeMap []int32) { - _transpose_int64_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int8avx2(src []uint64, dest []int8, transposeMap []int32) { - _transpose_uint64_int8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint8_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint8avx2(src []uint64, dest []uint8, transposeMap []int32) { - _transpose_uint64_uint8_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int16avx2(src []uint64, dest []int16, transposeMap []int32) { - _transpose_uint64_int16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint16_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint16avx2(src []uint64, dest []uint16, transposeMap []int32) { - _transpose_uint64_uint16_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int32avx2(src []uint64, dest []int32, transposeMap []int32) { - _transpose_uint64_int32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint32_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint32avx2(src []uint64, dest []uint32, transposeMap []int32) { - _transpose_uint64_uint32_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int64avx2(src []uint64, dest []int64, transposeMap []int32) { - _transpose_uint64_int64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint64_avx2(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint64avx2(src []uint64, dest []uint64, transposeMap []int32) { - _transpose_uint64_uint64_avx2(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s deleted file mode 100644 index fbcc101eb..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_avx2_amd64.s +++ /dev/null @@ -1,3074 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -TEXT ·_transpose_uint8_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB0_1 - -LBB0_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB0_5 - -LBB0_1: - WORD $0xd285 // test edx, edx - JLE LBB0_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB0_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB0_3 - -LBB0_4: - RET - -TEXT ·_transpose_int8_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB1_1 - -LBB1_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB1_5 - -LBB1_1: - WORD $0xd285 // test edx, edx - JLE LBB1_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB1_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB1_3 - -LBB1_4: - RET - -TEXT ·_transpose_uint16_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB2_1 - -LBB2_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB2_5 - -LBB2_1: - WORD $0xd285 // test edx, edx - JLE LBB2_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB2_3: - LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB2_3 - -LBB2_4: - RET - -TEXT ·_transpose_int16_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB3_1 - -LBB3_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB3_5 - -LBB3_1: - WORD $0xd285 // test edx, edx - JLE LBB3_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB3_3: - LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB3_3 - -LBB3_4: - RET - -TEXT ·_transpose_uint32_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB4_1 - -LBB4_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB4_5 - -LBB4_1: - WORD $0xd285 // test edx, edx - JLE LBB4_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB4_3: - LONG $0x87048b42 // mov eax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB4_3 - -LBB4_4: - RET - -TEXT ·_transpose_int32_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB5_1 - -LBB5_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB5_5 - -LBB5_1: - WORD $0xd285 // test edx, edx - JLE LBB5_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB5_3: - LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB5_3 - -LBB5_4: - RET - -TEXT ·_transpose_uint64_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB6_1 - -LBB6_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB6_5 - -LBB6_1: - WORD $0xd285 // test edx, edx - JLE LBB6_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB6_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB6_3 - -LBB6_4: - RET - -TEXT ·_transpose_int64_uint8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB7_1 - -LBB7_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB7_5 - -LBB7_1: - WORD $0xd285 // test edx, edx - JLE LBB7_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB7_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB7_3 - -LBB7_4: - RET - -TEXT ·_transpose_uint8_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB8_1 - -LBB8_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB8_5 - -LBB8_1: - WORD $0xd285 // test edx, edx - JLE LBB8_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB8_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB8_3 - -LBB8_4: - RET - -TEXT ·_transpose_int8_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB9_1 - -LBB9_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB9_5 - -LBB9_1: - WORD $0xd285 // test edx, edx - JLE LBB9_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB9_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB9_3 - -LBB9_4: - RET - -TEXT ·_transpose_uint16_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB10_1 - -LBB10_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB10_5 - -LBB10_1: - WORD $0xd285 // test edx, edx - JLE LBB10_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB10_3: - LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB10_3 - -LBB10_4: - RET - -TEXT ·_transpose_int16_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB11_1 - -LBB11_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB11_5 - -LBB11_1: - WORD $0xd285 // test edx, edx - JLE LBB11_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB11_3: - LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB11_3 - -LBB11_4: - RET - -TEXT ·_transpose_uint32_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB12_1 - -LBB12_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB12_5 - -LBB12_1: - WORD $0xd285 // test edx, edx - JLE LBB12_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB12_3: - LONG $0x87048b42 // mov eax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB12_3 - -LBB12_4: - RET - -TEXT ·_transpose_int32_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB13_1 - -LBB13_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB13_5 - -LBB13_1: - WORD $0xd285 // test edx, edx - JLE LBB13_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB13_3: - LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB13_3 - -LBB13_4: - RET - -TEXT ·_transpose_uint64_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB14_1 - -LBB14_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB14_5 - -LBB14_1: - WORD $0xd285 // test edx, edx - JLE LBB14_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB14_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB14_3 - -LBB14_4: - RET - -TEXT ·_transpose_int64_int8_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB15_1 - -LBB15_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB15_5 - -LBB15_1: - WORD $0xd285 // test edx, edx - JLE LBB15_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB15_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB15_3 - -LBB15_4: - RET - -TEXT ·_transpose_uint8_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB16_1 - -LBB16_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB16_5 - -LBB16_1: - WORD $0xd285 // test edx, edx - JLE LBB16_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB16_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB16_3 - -LBB16_4: - RET - -TEXT ·_transpose_int8_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB17_1 - -LBB17_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB17_5 - -LBB17_1: - WORD $0xd285 // test edx, edx - JLE LBB17_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB17_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB17_3 - -LBB17_4: - RET - -TEXT ·_transpose_uint16_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB18_1 - -LBB18_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB18_5 - -LBB18_1: - WORD $0xd285 // test edx, edx - JLE LBB18_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB18_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB18_3 - -LBB18_4: - RET - -TEXT ·_transpose_int16_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB19_1 - -LBB19_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB19_5 - -LBB19_1: - WORD $0xd285 // test edx, edx - JLE LBB19_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB19_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB19_3 - -LBB19_4: - RET - -TEXT ·_transpose_uint32_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB20_1 - -LBB20_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB20_5 - -LBB20_1: - WORD $0xd285 // test edx, edx - JLE LBB20_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB20_3: - LONG $0x47048b42 // mov eax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB20_3 - -LBB20_4: - RET - -TEXT ·_transpose_int32_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB21_1 - -LBB21_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB21_5 - -LBB21_1: - WORD $0xd285 // test edx, edx - JLE LBB21_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB21_3: - LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB21_3 - -LBB21_4: - RET - -TEXT ·_transpose_uint64_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB22_1 - -LBB22_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB22_5 - -LBB22_1: - WORD $0xd285 // test edx, edx - JLE LBB22_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB22_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB22_3 - -LBB22_4: - RET - -TEXT ·_transpose_int64_uint16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB23_1 - -LBB23_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB23_5 - -LBB23_1: - WORD $0xd285 // test edx, edx - JLE LBB23_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB23_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB23_3 - -LBB23_4: - RET - -TEXT ·_transpose_uint8_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB24_1 - -LBB24_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB24_5 - -LBB24_1: - WORD $0xd285 // test edx, edx - JLE LBB24_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB24_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB24_3 - -LBB24_4: - RET - -TEXT ·_transpose_int8_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB25_1 - -LBB25_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB25_5 - -LBB25_1: - WORD $0xd285 // test edx, edx - JLE LBB25_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB25_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB25_3 - -LBB25_4: - RET - -TEXT ·_transpose_uint16_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB26_1 - -LBB26_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB26_5 - -LBB26_1: - WORD $0xd285 // test edx, edx - JLE LBB26_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB26_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB26_3 - -LBB26_4: - RET - -TEXT ·_transpose_int16_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB27_1 - -LBB27_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB27_5 - -LBB27_1: - WORD $0xd285 // test edx, edx - JLE LBB27_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB27_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB27_3 - -LBB27_4: - RET - -TEXT ·_transpose_uint32_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB28_1 - -LBB28_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB28_5 - -LBB28_1: - WORD $0xd285 // test edx, edx - JLE LBB28_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB28_3: - LONG $0x47048b42 // mov eax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB28_3 - -LBB28_4: - RET - -TEXT ·_transpose_int32_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB29_1 - -LBB29_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB29_5 - -LBB29_1: - WORD $0xd285 // test edx, edx - JLE LBB29_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB29_3: - LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB29_3 - -LBB29_4: - RET - -TEXT ·_transpose_uint64_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB30_1 - -LBB30_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB30_5 - -LBB30_1: - WORD $0xd285 // test edx, edx - JLE LBB30_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB30_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB30_3 - -LBB30_4: - RET - -TEXT ·_transpose_int64_int16_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB31_1 - -LBB31_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB31_5 - -LBB31_1: - WORD $0xd285 // test edx, edx - JLE LBB31_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB31_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB31_3 - -LBB31_4: - RET - -TEXT ·_transpose_uint8_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB32_1 - -LBB32_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB32_5 - -LBB32_1: - WORD $0xd285 // test edx, edx - JLE LBB32_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB32_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB32_3 - -LBB32_4: - RET - -TEXT ·_transpose_int8_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB33_1 - -LBB33_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB33_5 - -LBB33_1: - WORD $0xd285 // test edx, edx - JLE LBB33_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB33_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB33_3 - -LBB33_4: - RET - -TEXT ·_transpose_uint16_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB34_1 - -LBB34_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB34_5 - -LBB34_1: - WORD $0xd285 // test edx, edx - JLE LBB34_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB34_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB34_3 - -LBB34_4: - RET - -TEXT ·_transpose_int16_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB35_1 - -LBB35_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB35_5 - -LBB35_1: - WORD $0xd285 // test edx, edx - JLE LBB35_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB35_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB35_3 - -LBB35_4: - RET - -TEXT ·_transpose_uint32_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB36_1 - -LBB36_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB36_5 - -LBB36_1: - WORD $0xd285 // test edx, edx - JLE LBB36_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB36_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB36_3 - -LBB36_4: - RET - -TEXT ·_transpose_int32_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB37_1 - -LBB37_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB37_5 - -LBB37_1: - WORD $0xd285 // test edx, edx - JLE LBB37_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB37_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB37_3 - -LBB37_4: - RET - -TEXT ·_transpose_uint64_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB38_1 - -LBB38_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB38_5 - -LBB38_1: - WORD $0xd285 // test edx, edx - JLE LBB38_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB38_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB38_3 - -LBB38_4: - RET - -TEXT ·_transpose_int64_uint32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB39_1 - -LBB39_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB39_5 - -LBB39_1: - WORD $0xd285 // test edx, edx - JLE LBB39_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB39_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB39_3 - -LBB39_4: - RET - -TEXT ·_transpose_uint8_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB40_1 - -LBB40_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB40_5 - -LBB40_1: - WORD $0xd285 // test edx, edx - JLE LBB40_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB40_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB40_3 - -LBB40_4: - RET - -TEXT ·_transpose_int8_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB41_1 - -LBB41_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB41_5 - -LBB41_1: - WORD $0xd285 // test edx, edx - JLE LBB41_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB41_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB41_3 - -LBB41_4: - RET - -TEXT ·_transpose_uint16_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB42_1 - -LBB42_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB42_5 - -LBB42_1: - WORD $0xd285 // test edx, edx - JLE LBB42_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB42_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB42_3 - -LBB42_4: - RET - -TEXT ·_transpose_int16_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB43_1 - -LBB43_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB43_5 - -LBB43_1: - WORD $0xd285 // test edx, edx - JLE LBB43_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB43_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB43_3 - -LBB43_4: - RET - -TEXT ·_transpose_uint32_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB44_1 - -LBB44_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB44_5 - -LBB44_1: - WORD $0xd285 // test edx, edx - JLE LBB44_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB44_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB44_3 - -LBB44_4: - RET - -TEXT ·_transpose_int32_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB45_1 - -LBB45_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB45_5 - -LBB45_1: - WORD $0xd285 // test edx, edx - JLE LBB45_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB45_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB45_3 - -LBB45_4: - RET - -TEXT ·_transpose_uint64_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB46_1 - -LBB46_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB46_5 - -LBB46_1: - WORD $0xd285 // test edx, edx - JLE LBB46_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB46_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB46_3 - -LBB46_4: - RET - -TEXT ·_transpose_int64_int32_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB47_1 - -LBB47_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB47_5 - -LBB47_1: - WORD $0xd285 // test edx, edx - JLE LBB47_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB47_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB47_3 - -LBB47_4: - RET - -TEXT ·_transpose_uint8_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB48_1 - -LBB48_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB48_5 - -LBB48_1: - WORD $0xd285 // test edx, edx - JLE LBB48_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB48_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB48_3 - -LBB48_4: - RET - -TEXT ·_transpose_int8_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB49_1 - -LBB49_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB49_5 - -LBB49_1: - WORD $0xd285 // test edx, edx - JLE LBB49_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB49_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB49_3 - -LBB49_4: - RET - -TEXT ·_transpose_uint16_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB50_1 - -LBB50_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB50_5 - -LBB50_1: - WORD $0xd285 // test edx, edx - JLE LBB50_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB50_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB50_3 - -LBB50_4: - RET - -TEXT ·_transpose_int16_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB51_1 - -LBB51_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB51_5 - -LBB51_1: - WORD $0xd285 // test edx, edx - JLE LBB51_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB51_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB51_3 - -LBB51_4: - RET - -TEXT ·_transpose_uint32_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB52_1 - -LBB52_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB52_5 - -LBB52_1: - WORD $0xd285 // test edx, edx - JLE LBB52_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB52_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB52_3 - -LBB52_4: - RET - -TEXT ·_transpose_int32_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB53_1 - -LBB53_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB53_5 - -LBB53_1: - WORD $0xd285 // test edx, edx - JLE LBB53_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB53_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB53_3 - -LBB53_4: - RET - -TEXT ·_transpose_uint64_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB54_1 - -LBB54_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB54_5 - -LBB54_1: - WORD $0xd285 // test edx, edx - JLE LBB54_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB54_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB54_3 - -LBB54_4: - RET - -TEXT ·_transpose_int64_uint64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB55_1 - -LBB55_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB55_5 - -LBB55_1: - WORD $0xd285 // test edx, edx - JLE LBB55_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB55_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB55_3 - -LBB55_4: - RET - -TEXT ·_transpose_uint8_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB56_1 - -LBB56_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB56_5 - -LBB56_1: - WORD $0xd285 // test edx, edx - JLE LBB56_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB56_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB56_3 - -LBB56_4: - RET - -TEXT ·_transpose_int8_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB57_1 - -LBB57_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB57_5 - -LBB57_1: - WORD $0xd285 // test edx, edx - JLE LBB57_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB57_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB57_3 - -LBB57_4: - RET - -TEXT ·_transpose_uint16_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB58_1 - -LBB58_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB58_5 - -LBB58_1: - WORD $0xd285 // test edx, edx - JLE LBB58_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB58_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB58_3 - -LBB58_4: - RET - -TEXT ·_transpose_int16_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB59_1 - -LBB59_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB59_5 - -LBB59_1: - WORD $0xd285 // test edx, edx - JLE LBB59_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB59_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB59_3 - -LBB59_4: - RET - -TEXT ·_transpose_uint32_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB60_1 - -LBB60_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB60_5 - -LBB60_1: - WORD $0xd285 // test edx, edx - JLE LBB60_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB60_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB60_3 - -LBB60_4: - RET - -TEXT ·_transpose_int32_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB61_1 - -LBB61_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB61_5 - -LBB61_1: - WORD $0xd285 // test edx, edx - JLE LBB61_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB61_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB61_3 - -LBB61_4: - RET - -TEXT ·_transpose_uint64_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB62_1 - -LBB62_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB62_5 - -LBB62_1: - WORD $0xd285 // test edx, edx - JLE LBB62_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB62_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB62_3 - -LBB62_4: - RET - -TEXT ·_transpose_int64_int64_avx2(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB63_1 - -LBB63_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB63_5 - -LBB63_1: - WORD $0xd285 // test edx, edx - JLE LBB63_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB63_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB63_3 - -LBB63_4: - RET diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go deleted file mode 100644 index cc3b0abb5..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_def.go +++ /dev/null @@ -1,227 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package utils - -import ( - "errors" - - "github.com/apache/arrow/go/v14/arrow" -) - -//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=avx2 transpose_ints_simd.go.tmpl=transpose_ints_avx2_amd64.go -//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata -d arch=sse4 transpose_ints_simd.go.tmpl=transpose_ints_sse4_amd64.go -//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints_s390x.go.tmpl=transpose_ints_s390x.go -//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints_s390x.go.tmpl=transpose_ints_arm64.go -//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints_noasm.go.tmpl=transpose_ints_noasm.go -//go:generate go run ../../arrow/_tools/tmpl -i -data=transpose_ints.tmpldata transpose_ints.go.tmpl=transpose_ints.go - -func bufToTyped(typ arrow.DataType, buf []byte, offset, length int) (interface{}, error) { - switch typ.ID() { - case arrow.INT8: - return arrow.Int8Traits.CastFromBytes(buf)[offset : offset+length], nil - case arrow.INT16: - return arrow.Int16Traits.CastFromBytes(buf)[offset : offset+length], nil - case arrow.INT32: - return arrow.Int32Traits.CastFromBytes(buf)[offset : offset+length], nil - case arrow.INT64: - return arrow.Int64Traits.CastFromBytes(buf)[offset : offset+length], nil - case arrow.UINT8: - return arrow.Uint8Traits.CastFromBytes(buf)[offset : offset+length], nil - case arrow.UINT16: - return arrow.Uint16Traits.CastFromBytes(buf)[offset : offset+length], nil - case arrow.UINT32: - return arrow.Uint32Traits.CastFromBytes(buf)[offset : offset+length], nil - case arrow.UINT64: - return arrow.Uint64Traits.CastFromBytes(buf)[offset : offset+length], nil - } - return nil, errors.New("only accepts integral types") -} - -// TransposeIntsBuffers takes the data-types, byte buffers, and offsets of a source and destination -// buffer to perform TransposeInts on with the provided mapping data. -func TransposeIntsBuffers(inType, outType arrow.DataType, indata, outdata []byte, inOffset, outOffset int, length int, transposeMap []int32) error { - src, err := bufToTyped(inType, indata, inOffset, length) - if err != nil { - return err - } - dest, err := bufToTyped(outType, outdata, outOffset, length) - if err != nil { - return err - } - - return TransposeInts(src, dest, transposeMap) -} - -// TransposeInts expects two integral slices and the values they map to. Returning -// an error if either src or dest are not an integral type. -func TransposeInts(src, dest interface{}, mapping []int32) error { - switch s := src.(type) { - case []int8: - switch d := dest.(type) { - case []int8: - TransposeInt8Int8(s, d, mapping) - case []int16: - TransposeInt8Int16(s, d, mapping) - case []int32: - TransposeInt8Int32(s, d, mapping) - case []int64: - TransposeInt8Int64(s, d, mapping) - case []uint8: - TransposeInt8Uint8(s, d, mapping) - case []uint16: - TransposeInt8Uint16(s, d, mapping) - case []uint32: - TransposeInt8Uint32(s, d, mapping) - case []uint64: - TransposeInt8Uint64(s, d, mapping) - } - case []int16: - switch d := dest.(type) { - case []int8: - TransposeInt16Int8(s, d, mapping) - case []int16: - TransposeInt16Int16(s, d, mapping) - case []int32: - TransposeInt16Int32(s, d, mapping) - case []int64: - TransposeInt16Int64(s, d, mapping) - case []uint8: - TransposeInt16Uint8(s, d, mapping) - case []uint16: - TransposeInt16Uint16(s, d, mapping) - case []uint32: - TransposeInt16Uint32(s, d, mapping) - case []uint64: - TransposeInt16Uint64(s, d, mapping) - } - case []int32: - switch d := dest.(type) { - case []int8: - TransposeInt32Int8(s, d, mapping) - case []int16: - TransposeInt32Int16(s, d, mapping) - case []int32: - TransposeInt32Int32(s, d, mapping) - case []int64: - TransposeInt32Int64(s, d, mapping) - case []uint8: - TransposeInt32Uint8(s, d, mapping) - case []uint16: - TransposeInt32Uint16(s, d, mapping) - case []uint32: - TransposeInt32Uint32(s, d, mapping) - case []uint64: - TransposeInt32Uint64(s, d, mapping) - } - case []int64: - switch d := dest.(type) { - case []int8: - TransposeInt64Int8(s, d, mapping) - case []int16: - TransposeInt64Int16(s, d, mapping) - case []int32: - TransposeInt64Int32(s, d, mapping) - case []int64: - TransposeInt64Int64(s, d, mapping) - case []uint8: - TransposeInt64Uint8(s, d, mapping) - case []uint16: - TransposeInt64Uint16(s, d, mapping) - case []uint32: - TransposeInt64Uint32(s, d, mapping) - case []uint64: - TransposeInt64Uint64(s, d, mapping) - } - case []uint8: - switch d := dest.(type) { - case []int8: - TransposeUint8Int8(s, d, mapping) - case []int16: - TransposeUint8Int16(s, d, mapping) - case []int32: - TransposeUint8Int32(s, d, mapping) - case []int64: - TransposeUint8Int64(s, d, mapping) - case []uint8: - TransposeUint8Uint8(s, d, mapping) - case []uint16: - TransposeUint8Uint16(s, d, mapping) - case []uint32: - TransposeUint8Uint32(s, d, mapping) - case []uint64: - TransposeUint8Uint64(s, d, mapping) - } - case []uint16: - switch d := dest.(type) { - case []int8: - TransposeUint16Int8(s, d, mapping) - case []int16: - TransposeUint16Int16(s, d, mapping) - case []int32: - TransposeUint16Int32(s, d, mapping) - case []int64: - TransposeUint16Int64(s, d, mapping) - case []uint8: - TransposeUint16Uint8(s, d, mapping) - case []uint16: - TransposeUint16Uint16(s, d, mapping) - case []uint32: - TransposeUint16Uint32(s, d, mapping) - case []uint64: - TransposeUint16Uint64(s, d, mapping) - } - case []uint32: - switch d := dest.(type) { - case []int8: - TransposeUint32Int8(s, d, mapping) - case []int16: - TransposeUint32Int16(s, d, mapping) - case []int32: - TransposeUint32Int32(s, d, mapping) - case []int64: - TransposeUint32Int64(s, d, mapping) - case []uint8: - TransposeUint32Uint8(s, d, mapping) - case []uint16: - TransposeUint32Uint16(s, d, mapping) - case []uint32: - TransposeUint32Uint32(s, d, mapping) - case []uint64: - TransposeUint32Uint64(s, d, mapping) - } - case []uint64: - switch d := dest.(type) { - case []int8: - TransposeUint64Int8(s, d, mapping) - case []int16: - TransposeUint64Int16(s, d, mapping) - case []int32: - TransposeUint64Int32(s, d, mapping) - case []int64: - TransposeUint64Int64(s, d, mapping) - case []uint8: - TransposeUint64Uint8(s, d, mapping) - case []uint16: - TransposeUint64Uint16(s, d, mapping) - case []uint32: - TransposeUint64Uint32(s, d, mapping) - case []uint64: - TransposeUint64Uint64(s, d, mapping) - } - } - return nil -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go deleted file mode 100644 index 461aaf31f..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go +++ /dev/null @@ -1,96 +0,0 @@ -// Code generated by transpose_ints_noasm.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build noasm || (!amd64 && !arm64 && !s390x && !ppc64le) - -package utils - -// if building with the 'noasm' tag, then point to the pure go implementations -var ( - TransposeInt8Int8 = transposeInt8Int8 - TransposeInt8Uint8 = transposeInt8Uint8 - TransposeInt8Int16 = transposeInt8Int16 - TransposeInt8Uint16 = transposeInt8Uint16 - TransposeInt8Int32 = transposeInt8Int32 - TransposeInt8Uint32 = transposeInt8Uint32 - TransposeInt8Int64 = transposeInt8Int64 - TransposeInt8Uint64 = transposeInt8Uint64 - - TransposeUint8Int8 = transposeUint8Int8 - TransposeUint8Uint8 = transposeUint8Uint8 - TransposeUint8Int16 = transposeUint8Int16 - TransposeUint8Uint16 = transposeUint8Uint16 - TransposeUint8Int32 = transposeUint8Int32 - TransposeUint8Uint32 = transposeUint8Uint32 - TransposeUint8Int64 = transposeUint8Int64 - TransposeUint8Uint64 = transposeUint8Uint64 - - TransposeInt16Int8 = transposeInt16Int8 - TransposeInt16Uint8 = transposeInt16Uint8 - TransposeInt16Int16 = transposeInt16Int16 - TransposeInt16Uint16 = transposeInt16Uint16 - TransposeInt16Int32 = transposeInt16Int32 - TransposeInt16Uint32 = transposeInt16Uint32 - TransposeInt16Int64 = transposeInt16Int64 - TransposeInt16Uint64 = transposeInt16Uint64 - - TransposeUint16Int8 = transposeUint16Int8 - TransposeUint16Uint8 = transposeUint16Uint8 - TransposeUint16Int16 = transposeUint16Int16 - TransposeUint16Uint16 = transposeUint16Uint16 - TransposeUint16Int32 = transposeUint16Int32 - TransposeUint16Uint32 = transposeUint16Uint32 - TransposeUint16Int64 = transposeUint16Int64 - TransposeUint16Uint64 = transposeUint16Uint64 - - TransposeInt32Int8 = transposeInt32Int8 - TransposeInt32Uint8 = transposeInt32Uint8 - TransposeInt32Int16 = transposeInt32Int16 - TransposeInt32Uint16 = transposeInt32Uint16 - TransposeInt32Int32 = transposeInt32Int32 - TransposeInt32Uint32 = transposeInt32Uint32 - TransposeInt32Int64 = transposeInt32Int64 - TransposeInt32Uint64 = transposeInt32Uint64 - - TransposeUint32Int8 = transposeUint32Int8 - TransposeUint32Uint8 = transposeUint32Uint8 - TransposeUint32Int16 = transposeUint32Int16 - TransposeUint32Uint16 = transposeUint32Uint16 - TransposeUint32Int32 = transposeUint32Int32 - TransposeUint32Uint32 = transposeUint32Uint32 - TransposeUint32Int64 = transposeUint32Int64 - TransposeUint32Uint64 = transposeUint32Uint64 - - TransposeInt64Int8 = transposeInt64Int8 - TransposeInt64Uint8 = transposeInt64Uint8 - TransposeInt64Int16 = transposeInt64Int16 - TransposeInt64Uint16 = transposeInt64Uint16 - TransposeInt64Int32 = transposeInt64Int32 - TransposeInt64Uint32 = transposeInt64Uint32 - TransposeInt64Int64 = transposeInt64Int64 - TransposeInt64Uint64 = transposeInt64Uint64 - - TransposeUint64Int8 = transposeUint64Int8 - TransposeUint64Uint8 = transposeUint64Uint8 - TransposeUint64Int16 = transposeUint64Int16 - TransposeUint64Uint16 = transposeUint64Uint16 - TransposeUint64Int32 = transposeUint64Int32 - TransposeUint64Uint32 = transposeUint64Uint32 - TransposeUint64Int64 = transposeUint64Int64 - TransposeUint64Uint64 = transposeUint64Uint64 -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl deleted file mode 100644 index faffdce35..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_noasm.go.tmpl +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build noasm -// +build noasm - -package utils - -// if building with the 'noasm' tag, then point to the pure go implementations -var ( -{{ $typelist := .In }} -{{range .In}} -{{ $src := .Type -}} -{{ $srcName := .Name -}} -{{ range $typelist -}} -{{ $dest := .Type -}} -{{ $destName := .Name -}} - Transpose{{$srcName}}{{$destName}} = transpose{{$srcName}}{{$destName}} -{{end}} -{{end}} -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go deleted file mode 100644 index cc957cdaa..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_ppc64le.go +++ /dev/null @@ -1,96 +0,0 @@ -// Code generated by transpose_ints_s390x.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -// if building with the 'noasm' tag, then point to the pure go implementations -var ( - TransposeInt8Int8 = transposeInt8Int8 - TransposeInt8Uint8 = transposeInt8Uint8 - TransposeInt8Int16 = transposeInt8Int16 - TransposeInt8Uint16 = transposeInt8Uint16 - TransposeInt8Int32 = transposeInt8Int32 - TransposeInt8Uint32 = transposeInt8Uint32 - TransposeInt8Int64 = transposeInt8Int64 - TransposeInt8Uint64 = transposeInt8Uint64 - - TransposeUint8Int8 = transposeUint8Int8 - TransposeUint8Uint8 = transposeUint8Uint8 - TransposeUint8Int16 = transposeUint8Int16 - TransposeUint8Uint16 = transposeUint8Uint16 - TransposeUint8Int32 = transposeUint8Int32 - TransposeUint8Uint32 = transposeUint8Uint32 - TransposeUint8Int64 = transposeUint8Int64 - TransposeUint8Uint64 = transposeUint8Uint64 - - TransposeInt16Int8 = transposeInt16Int8 - TransposeInt16Uint8 = transposeInt16Uint8 - TransposeInt16Int16 = transposeInt16Int16 - TransposeInt16Uint16 = transposeInt16Uint16 - TransposeInt16Int32 = transposeInt16Int32 - TransposeInt16Uint32 = transposeInt16Uint32 - TransposeInt16Int64 = transposeInt16Int64 - TransposeInt16Uint64 = transposeInt16Uint64 - - TransposeUint16Int8 = transposeUint16Int8 - TransposeUint16Uint8 = transposeUint16Uint8 - TransposeUint16Int16 = transposeUint16Int16 - TransposeUint16Uint16 = transposeUint16Uint16 - TransposeUint16Int32 = transposeUint16Int32 - TransposeUint16Uint32 = transposeUint16Uint32 - TransposeUint16Int64 = transposeUint16Int64 - TransposeUint16Uint64 = transposeUint16Uint64 - - TransposeInt32Int8 = transposeInt32Int8 - TransposeInt32Uint8 = transposeInt32Uint8 - TransposeInt32Int16 = transposeInt32Int16 - TransposeInt32Uint16 = transposeInt32Uint16 - TransposeInt32Int32 = transposeInt32Int32 - TransposeInt32Uint32 = transposeInt32Uint32 - TransposeInt32Int64 = transposeInt32Int64 - TransposeInt32Uint64 = transposeInt32Uint64 - - TransposeUint32Int8 = transposeUint32Int8 - TransposeUint32Uint8 = transposeUint32Uint8 - TransposeUint32Int16 = transposeUint32Int16 - TransposeUint32Uint16 = transposeUint32Uint16 - TransposeUint32Int32 = transposeUint32Int32 - TransposeUint32Uint32 = transposeUint32Uint32 - TransposeUint32Int64 = transposeUint32Int64 - TransposeUint32Uint64 = transposeUint32Uint64 - - TransposeInt64Int8 = transposeInt64Int8 - TransposeInt64Uint8 = transposeInt64Uint8 - TransposeInt64Int16 = transposeInt64Int16 - TransposeInt64Uint16 = transposeInt64Uint16 - TransposeInt64Int32 = transposeInt64Int32 - TransposeInt64Uint32 = transposeInt64Uint32 - TransposeInt64Int64 = transposeInt64Int64 - TransposeInt64Uint64 = transposeInt64Uint64 - - TransposeUint64Int8 = transposeUint64Int8 - TransposeUint64Uint8 = transposeUint64Uint8 - TransposeUint64Int16 = transposeUint64Int16 - TransposeUint64Uint16 = transposeUint64Uint16 - TransposeUint64Int32 = transposeUint64Int32 - TransposeUint64Uint32 = transposeUint64Uint32 - TransposeUint64Int64 = transposeUint64Int64 - TransposeUint64Uint64 = transposeUint64Uint64 -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go deleted file mode 100644 index cc957cdaa..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go +++ /dev/null @@ -1,96 +0,0 @@ -// Code generated by transpose_ints_s390x.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -// if building with the 'noasm' tag, then point to the pure go implementations -var ( - TransposeInt8Int8 = transposeInt8Int8 - TransposeInt8Uint8 = transposeInt8Uint8 - TransposeInt8Int16 = transposeInt8Int16 - TransposeInt8Uint16 = transposeInt8Uint16 - TransposeInt8Int32 = transposeInt8Int32 - TransposeInt8Uint32 = transposeInt8Uint32 - TransposeInt8Int64 = transposeInt8Int64 - TransposeInt8Uint64 = transposeInt8Uint64 - - TransposeUint8Int8 = transposeUint8Int8 - TransposeUint8Uint8 = transposeUint8Uint8 - TransposeUint8Int16 = transposeUint8Int16 - TransposeUint8Uint16 = transposeUint8Uint16 - TransposeUint8Int32 = transposeUint8Int32 - TransposeUint8Uint32 = transposeUint8Uint32 - TransposeUint8Int64 = transposeUint8Int64 - TransposeUint8Uint64 = transposeUint8Uint64 - - TransposeInt16Int8 = transposeInt16Int8 - TransposeInt16Uint8 = transposeInt16Uint8 - TransposeInt16Int16 = transposeInt16Int16 - TransposeInt16Uint16 = transposeInt16Uint16 - TransposeInt16Int32 = transposeInt16Int32 - TransposeInt16Uint32 = transposeInt16Uint32 - TransposeInt16Int64 = transposeInt16Int64 - TransposeInt16Uint64 = transposeInt16Uint64 - - TransposeUint16Int8 = transposeUint16Int8 - TransposeUint16Uint8 = transposeUint16Uint8 - TransposeUint16Int16 = transposeUint16Int16 - TransposeUint16Uint16 = transposeUint16Uint16 - TransposeUint16Int32 = transposeUint16Int32 - TransposeUint16Uint32 = transposeUint16Uint32 - TransposeUint16Int64 = transposeUint16Int64 - TransposeUint16Uint64 = transposeUint16Uint64 - - TransposeInt32Int8 = transposeInt32Int8 - TransposeInt32Uint8 = transposeInt32Uint8 - TransposeInt32Int16 = transposeInt32Int16 - TransposeInt32Uint16 = transposeInt32Uint16 - TransposeInt32Int32 = transposeInt32Int32 - TransposeInt32Uint32 = transposeInt32Uint32 - TransposeInt32Int64 = transposeInt32Int64 - TransposeInt32Uint64 = transposeInt32Uint64 - - TransposeUint32Int8 = transposeUint32Int8 - TransposeUint32Uint8 = transposeUint32Uint8 - TransposeUint32Int16 = transposeUint32Int16 - TransposeUint32Uint16 = transposeUint32Uint16 - TransposeUint32Int32 = transposeUint32Int32 - TransposeUint32Uint32 = transposeUint32Uint32 - TransposeUint32Int64 = transposeUint32Int64 - TransposeUint32Uint64 = transposeUint32Uint64 - - TransposeInt64Int8 = transposeInt64Int8 - TransposeInt64Uint8 = transposeInt64Uint8 - TransposeInt64Int16 = transposeInt64Int16 - TransposeInt64Uint16 = transposeInt64Uint16 - TransposeInt64Int32 = transposeInt64Int32 - TransposeInt64Uint32 = transposeInt64Uint32 - TransposeInt64Int64 = transposeInt64Int64 - TransposeInt64Uint64 = transposeInt64Uint64 - - TransposeUint64Int8 = transposeUint64Int8 - TransposeUint64Uint8 = transposeUint64Uint8 - TransposeUint64Int16 = transposeUint64Int16 - TransposeUint64Uint16 = transposeUint64Uint16 - TransposeUint64Int32 = transposeUint64Int32 - TransposeUint64Uint32 = transposeUint64Uint32 - TransposeUint64Int64 = transposeUint64Int64 - TransposeUint64Uint64 = transposeUint64Uint64 -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl deleted file mode 100644 index d93c8779c..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_s390x.go.tmpl +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package utils - -// if building with the 'noasm' tag, then point to the pure go implementations -var ( -{{ $typelist := .In }} -{{range .In}} -{{ $src := .Type -}} -{{ $srcName := .Name -}} -{{ range $typelist -}} -{{ $dest := .Type -}} -{{ $destName := .Name -}} - Transpose{{$srcName}}{{$destName}} = transpose{{$srcName}}{{$destName}} -{{end}} -{{end}} -) diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl deleted file mode 100644 index 034d0e9d2..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_simd.go.tmpl +++ /dev/null @@ -1,42 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm -// +build !noasm - -package utils - -import ( - "unsafe" -) - -{{ $arch := .D.arch}} -{{ $typelist := .In}} -{{range .In}} -{{ $src := .Type }} -{{ $srcName := .Name }} -{{ range $typelist}} -{{ $dest := .Type }} -{{ $destName := .Name }} - -//go:noescape -func _transpose_{{printf "%s_%s_%s" $src $dest $arch}}(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transpose{{ $srcName }}{{ $destName }}{{ $arch }}(src []{{$src}}, dest []{{$dest}}, transposeMap []int32) { - _transpose_{{printf "%s_%s_%s" $src $dest $arch}}(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} -{{ end }} -{{ end }} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go deleted file mode 100644 index 241ca74a7..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.go +++ /dev/null @@ -1,473 +0,0 @@ -// Code generated by transpose_ints_simd.go.tmpl. DO NOT EDIT. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//go:build !noasm - -package utils - -import ( - "unsafe" -) - -//go:noescape -func _transpose_int8_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int8sse4(src []int8, dest []int8, transposeMap []int32) { - _transpose_int8_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint8sse4(src []int8, dest []uint8, transposeMap []int32) { - _transpose_int8_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int16sse4(src []int8, dest []int16, transposeMap []int32) { - _transpose_int8_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint16sse4(src []int8, dest []uint16, transposeMap []int32) { - _transpose_int8_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int32sse4(src []int8, dest []int32, transposeMap []int32) { - _transpose_int8_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint32sse4(src []int8, dest []uint32, transposeMap []int32) { - _transpose_int8_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Int64sse4(src []int8, dest []int64, transposeMap []int32) { - _transpose_int8_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int8_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt8Uint64sse4(src []int8, dest []uint64, transposeMap []int32) { - _transpose_int8_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int8sse4(src []uint8, dest []int8, transposeMap []int32) { - _transpose_uint8_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint8sse4(src []uint8, dest []uint8, transposeMap []int32) { - _transpose_uint8_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int16sse4(src []uint8, dest []int16, transposeMap []int32) { - _transpose_uint8_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint16sse4(src []uint8, dest []uint16, transposeMap []int32) { - _transpose_uint8_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int32sse4(src []uint8, dest []int32, transposeMap []int32) { - _transpose_uint8_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint32sse4(src []uint8, dest []uint32, transposeMap []int32) { - _transpose_uint8_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Int64sse4(src []uint8, dest []int64, transposeMap []int32) { - _transpose_uint8_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint8_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint8Uint64sse4(src []uint8, dest []uint64, transposeMap []int32) { - _transpose_uint8_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int8sse4(src []int16, dest []int8, transposeMap []int32) { - _transpose_int16_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint8sse4(src []int16, dest []uint8, transposeMap []int32) { - _transpose_int16_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int16sse4(src []int16, dest []int16, transposeMap []int32) { - _transpose_int16_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint16sse4(src []int16, dest []uint16, transposeMap []int32) { - _transpose_int16_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int32sse4(src []int16, dest []int32, transposeMap []int32) { - _transpose_int16_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint32sse4(src []int16, dest []uint32, transposeMap []int32) { - _transpose_int16_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Int64sse4(src []int16, dest []int64, transposeMap []int32) { - _transpose_int16_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int16_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt16Uint64sse4(src []int16, dest []uint64, transposeMap []int32) { - _transpose_int16_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int8sse4(src []uint16, dest []int8, transposeMap []int32) { - _transpose_uint16_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint8sse4(src []uint16, dest []uint8, transposeMap []int32) { - _transpose_uint16_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int16sse4(src []uint16, dest []int16, transposeMap []int32) { - _transpose_uint16_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint16sse4(src []uint16, dest []uint16, transposeMap []int32) { - _transpose_uint16_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int32sse4(src []uint16, dest []int32, transposeMap []int32) { - _transpose_uint16_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint32sse4(src []uint16, dest []uint32, transposeMap []int32) { - _transpose_uint16_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Int64sse4(src []uint16, dest []int64, transposeMap []int32) { - _transpose_uint16_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint16_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint16Uint64sse4(src []uint16, dest []uint64, transposeMap []int32) { - _transpose_uint16_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int8sse4(src []int32, dest []int8, transposeMap []int32) { - _transpose_int32_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint8sse4(src []int32, dest []uint8, transposeMap []int32) { - _transpose_int32_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int16sse4(src []int32, dest []int16, transposeMap []int32) { - _transpose_int32_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint16sse4(src []int32, dest []uint16, transposeMap []int32) { - _transpose_int32_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int32sse4(src []int32, dest []int32, transposeMap []int32) { - _transpose_int32_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint32sse4(src []int32, dest []uint32, transposeMap []int32) { - _transpose_int32_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Int64sse4(src []int32, dest []int64, transposeMap []int32) { - _transpose_int32_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int32_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt32Uint64sse4(src []int32, dest []uint64, transposeMap []int32) { - _transpose_int32_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int8sse4(src []uint32, dest []int8, transposeMap []int32) { - _transpose_uint32_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint8sse4(src []uint32, dest []uint8, transposeMap []int32) { - _transpose_uint32_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int16sse4(src []uint32, dest []int16, transposeMap []int32) { - _transpose_uint32_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint16sse4(src []uint32, dest []uint16, transposeMap []int32) { - _transpose_uint32_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int32sse4(src []uint32, dest []int32, transposeMap []int32) { - _transpose_uint32_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint32sse4(src []uint32, dest []uint32, transposeMap []int32) { - _transpose_uint32_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Int64sse4(src []uint32, dest []int64, transposeMap []int32) { - _transpose_uint32_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint32_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint32Uint64sse4(src []uint32, dest []uint64, transposeMap []int32) { - _transpose_uint32_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int8sse4(src []int64, dest []int8, transposeMap []int32) { - _transpose_int64_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint8sse4(src []int64, dest []uint8, transposeMap []int32) { - _transpose_int64_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int16sse4(src []int64, dest []int16, transposeMap []int32) { - _transpose_int64_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint16sse4(src []int64, dest []uint16, transposeMap []int32) { - _transpose_int64_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int32sse4(src []int64, dest []int32, transposeMap []int32) { - _transpose_int64_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint32sse4(src []int64, dest []uint32, transposeMap []int32) { - _transpose_int64_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Int64sse4(src []int64, dest []int64, transposeMap []int32) { - _transpose_int64_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_int64_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeInt64Uint64sse4(src []int64, dest []uint64, transposeMap []int32) { - _transpose_int64_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int8sse4(src []uint64, dest []int8, transposeMap []int32) { - _transpose_uint64_int8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint8_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint8sse4(src []uint64, dest []uint8, transposeMap []int32) { - _transpose_uint64_uint8_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int16sse4(src []uint64, dest []int16, transposeMap []int32) { - _transpose_uint64_int16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint16_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint16sse4(src []uint64, dest []uint16, transposeMap []int32) { - _transpose_uint64_uint16_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int32sse4(src []uint64, dest []int32, transposeMap []int32) { - _transpose_uint64_int32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint32_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint32sse4(src []uint64, dest []uint32, transposeMap []int32) { - _transpose_uint64_uint32_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_int64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Int64sse4(src []uint64, dest []int64, transposeMap []int32) { - _transpose_uint64_int64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} - -//go:noescape -func _transpose_uint64_uint64_sse4(src, dest unsafe.Pointer, length int, transposeMap unsafe.Pointer) - -func transposeUint64Uint64sse4(src []uint64, dest []uint64, transposeMap []int32) { - _transpose_uint64_uint64_sse4(unsafe.Pointer(&src[0]), unsafe.Pointer(&dest[0]), len(dest), unsafe.Pointer(&transposeMap[0])) -} diff --git a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s b/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s deleted file mode 100644 index ee5199a5a..000000000 --- a/vendor/github.com/apache/arrow/go/v14/internal/utils/transpose_ints_sse4_amd64.s +++ /dev/null @@ -1,3074 +0,0 @@ -//+build !noasm !appengine -// AUTO-GENERATED BY C2GOASM -- DO NOT EDIT - -TEXT ·_transpose_uint8_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB0_1 - -LBB0_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB0_5 - -LBB0_1: - WORD $0xd285 // test edx, edx - JLE LBB0_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB0_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB0_3 - -LBB0_4: - RET - -TEXT ·_transpose_int8_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB1_1 - -LBB1_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB1_5 - -LBB1_1: - WORD $0xd285 // test edx, edx - JLE LBB1_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB1_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB1_3 - -LBB1_4: - RET - -TEXT ·_transpose_uint16_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB2_1 - -LBB2_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB2_5 - -LBB2_1: - WORD $0xd285 // test edx, edx - JLE LBB2_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB2_3: - LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB2_3 - -LBB2_4: - RET - -TEXT ·_transpose_int16_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB3_1 - -LBB3_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB3_5 - -LBB3_1: - WORD $0xd285 // test edx, edx - JLE LBB3_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB3_3: - LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB3_3 - -LBB3_4: - RET - -TEXT ·_transpose_uint32_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB4_1 - -LBB4_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB4_5 - -LBB4_1: - WORD $0xd285 // test edx, edx - JLE LBB4_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB4_3: - LONG $0x87048b42 // mov eax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB4_3 - -LBB4_4: - RET - -TEXT ·_transpose_int32_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB5_1 - -LBB5_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB5_5 - -LBB5_1: - WORD $0xd285 // test edx, edx - JLE LBB5_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB5_3: - LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB5_3 - -LBB5_4: - RET - -TEXT ·_transpose_uint64_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB6_1 - -LBB6_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB6_5 - -LBB6_1: - WORD $0xd285 // test edx, edx - JLE LBB6_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB6_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB6_3 - -LBB6_4: - RET - -TEXT ·_transpose_int64_uint8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB7_1 - -LBB7_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB7_5 - -LBB7_1: - WORD $0xd285 // test edx, edx - JLE LBB7_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB7_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB7_3 - -LBB7_4: - RET - -TEXT ·_transpose_uint8_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB8_1 - -LBB8_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB8_5 - -LBB8_1: - WORD $0xd285 // test edx, edx - JLE LBB8_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB8_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB8_3 - -LBB8_4: - RET - -TEXT ·_transpose_int8_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB9_1 - -LBB9_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB9_5 - -LBB9_1: - WORD $0xd285 // test edx, edx - JLE LBB9_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB9_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB9_3 - -LBB9_4: - RET - -TEXT ·_transpose_uint16_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB10_1 - -LBB10_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB10_5 - -LBB10_1: - WORD $0xd285 // test edx, edx - JLE LBB10_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB10_3: - LONG $0x04b70f42; BYTE $0x47 // movzx eax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB10_3 - -LBB10_4: - RET - -TEXT ·_transpose_int16_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB11_1 - -LBB11_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB11_5 - -LBB11_1: - WORD $0xd285 // test edx, edx - JLE LBB11_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB11_3: - LONG $0x04bf0f4a; BYTE $0x47 // movsx rax, word [rdi + 2*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB11_3 - -LBB11_4: - RET - -TEXT ·_transpose_uint32_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB12_1 - -LBB12_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB12_5 - -LBB12_1: - WORD $0xd285 // test edx, edx - JLE LBB12_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB12_3: - LONG $0x87048b42 // mov eax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB12_3 - -LBB12_4: - RET - -TEXT ·_transpose_int32_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB13_1 - -LBB13_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB13_5 - -LBB13_1: - WORD $0xd285 // test edx, edx - JLE LBB13_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB13_3: - LONG $0x8704634a // movsxd rax, dword [rdi + 4*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB13_3 - -LBB13_4: - RET - -TEXT ·_transpose_uint64_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB14_1 - -LBB14_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB14_5 - -LBB14_1: - WORD $0xd285 // test edx, edx - JLE LBB14_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB14_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB14_3 - -LBB14_4: - RET - -TEXT ·_transpose_int64_int8_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB15_1 - -LBB15_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x1688 // mov byte [rsi], dl - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x01 // mov byte [rsi + 1], dl - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x02 // mov byte [rsi + 2], dl - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b60f // movzx edx, byte [rcx + 4*rdx] - WORD $0x5688; BYTE $0x03 // mov byte [rsi + 3], dl - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x04c68348 // add rsi, 4 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB15_5 - -LBB15_1: - WORD $0xd285 // test edx, edx - JLE LBB15_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB15_3: - LONG $0xc7048b4a // mov rax, qword [rdi + 8*r8] - LONG $0x8104b60f // movzx eax, byte [rcx + 4*rax] - LONG $0x06048842 // mov byte [rsi + r8], al - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB15_3 - -LBB15_4: - RET - -TEXT ·_transpose_uint8_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB16_1 - -LBB16_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB16_5 - -LBB16_1: - WORD $0xd285 // test edx, edx - JLE LBB16_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB16_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB16_3 - -LBB16_4: - RET - -TEXT ·_transpose_int8_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB17_1 - -LBB17_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB17_5 - -LBB17_1: - WORD $0xd285 // test edx, edx - JLE LBB17_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB17_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB17_3 - -LBB17_4: - RET - -TEXT ·_transpose_uint16_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB18_1 - -LBB18_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB18_5 - -LBB18_1: - WORD $0xd285 // test edx, edx - JLE LBB18_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB18_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB18_3 - -LBB18_4: - RET - -TEXT ·_transpose_int16_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB19_1 - -LBB19_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB19_5 - -LBB19_1: - WORD $0xd285 // test edx, edx - JLE LBB19_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB19_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB19_3 - -LBB19_4: - RET - -TEXT ·_transpose_uint32_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB20_1 - -LBB20_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB20_5 - -LBB20_1: - WORD $0xd285 // test edx, edx - JLE LBB20_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB20_3: - LONG $0x47048b42 // mov eax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB20_3 - -LBB20_4: - RET - -TEXT ·_transpose_int32_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB21_1 - -LBB21_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB21_5 - -LBB21_1: - WORD $0xd285 // test edx, edx - JLE LBB21_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB21_3: - LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB21_3 - -LBB21_4: - RET - -TEXT ·_transpose_uint64_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB22_1 - -LBB22_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB22_5 - -LBB22_1: - WORD $0xd285 // test edx, edx - JLE LBB22_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB22_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB22_3 - -LBB22_4: - RET - -TEXT ·_transpose_int64_uint16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB23_1 - -LBB23_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB23_5 - -LBB23_1: - WORD $0xd285 // test edx, edx - JLE LBB23_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB23_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB23_3 - -LBB23_4: - RET - -TEXT ·_transpose_uint8_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB24_1 - -LBB24_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB24_5 - -LBB24_1: - WORD $0xd285 // test edx, edx - JLE LBB24_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB24_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB24_3 - -LBB24_4: - RET - -TEXT ·_transpose_int8_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB25_1 - -LBB25_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB25_5 - -LBB25_1: - WORD $0xd285 // test edx, edx - JLE LBB25_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB25_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x46 // mov word [rsi + 2*r8], ax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB25_3 - -LBB25_4: - RET - -TEXT ·_transpose_uint16_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB26_1 - -LBB26_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB26_5 - -LBB26_1: - WORD $0xd285 // test edx, edx - JLE LBB26_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB26_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB26_3 - -LBB26_4: - RET - -TEXT ·_transpose_int16_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB27_1 - -LBB27_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB27_5 - -LBB27_1: - WORD $0xd285 // test edx, edx - JLE LBB27_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB27_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB27_3 - -LBB27_4: - RET - -TEXT ·_transpose_uint32_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB28_1 - -LBB28_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB28_5 - -LBB28_1: - WORD $0xd285 // test edx, edx - JLE LBB28_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB28_3: - LONG $0x47048b42 // mov eax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB28_3 - -LBB28_4: - RET - -TEXT ·_transpose_int32_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB29_1 - -LBB29_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB29_5 - -LBB29_1: - WORD $0xd285 // test edx, edx - JLE LBB29_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB29_3: - LONG $0x4704634a // movsxd rax, dword [rdi + 2*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB29_3 - -LBB29_4: - RET - -TEXT ·_transpose_uint64_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB30_1 - -LBB30_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB30_5 - -LBB30_1: - WORD $0xd285 // test edx, edx - JLE LBB30_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB30_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB30_3 - -LBB30_4: - RET - -TEXT ·_transpose_int64_int16_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB31_1 - -LBB31_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - WORD $0x8966; BYTE $0x16 // mov word [rsi], dx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x02568966 // mov word [rsi + 2], dx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x04568966 // mov word [rsi + 4], dx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x9114b70f // movzx edx, word [rcx + 4*rdx] - LONG $0x06568966 // mov word [rsi + 6], dx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x08c68348 // add rsi, 8 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB31_5 - -LBB31_1: - WORD $0xd285 // test edx, edx - JLE LBB31_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB31_3: - LONG $0x87048b4a // mov rax, qword [rdi + 4*r8] - LONG $0x8104b70f // movzx eax, word [rcx + 4*rax] - LONG $0x04894266; BYTE $0x06 // mov word [rsi + r8], ax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB31_3 - -LBB31_4: - RET - -TEXT ·_transpose_uint8_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB32_1 - -LBB32_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB32_5 - -LBB32_1: - WORD $0xd285 // test edx, edx - JLE LBB32_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB32_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB32_3 - -LBB32_4: - RET - -TEXT ·_transpose_int8_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB33_1 - -LBB33_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB33_5 - -LBB33_1: - WORD $0xd285 // test edx, edx - JLE LBB33_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB33_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB33_3 - -LBB33_4: - RET - -TEXT ·_transpose_uint16_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB34_1 - -LBB34_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB34_5 - -LBB34_1: - WORD $0xd285 // test edx, edx - JLE LBB34_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB34_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB34_3 - -LBB34_4: - RET - -TEXT ·_transpose_int16_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB35_1 - -LBB35_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB35_5 - -LBB35_1: - WORD $0xd285 // test edx, edx - JLE LBB35_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB35_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB35_3 - -LBB35_4: - RET - -TEXT ·_transpose_uint32_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB36_1 - -LBB36_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB36_5 - -LBB36_1: - WORD $0xd285 // test edx, edx - JLE LBB36_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB36_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB36_3 - -LBB36_4: - RET - -TEXT ·_transpose_int32_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB37_1 - -LBB37_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB37_5 - -LBB37_1: - WORD $0xd285 // test edx, edx - JLE LBB37_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB37_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB37_3 - -LBB37_4: - RET - -TEXT ·_transpose_uint64_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB38_1 - -LBB38_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB38_5 - -LBB38_1: - WORD $0xd285 // test edx, edx - JLE LBB38_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB38_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB38_3 - -LBB38_4: - RET - -TEXT ·_transpose_int64_uint32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB39_1 - -LBB39_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB39_5 - -LBB39_1: - WORD $0xd285 // test edx, edx - JLE LBB39_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB39_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB39_3 - -LBB39_4: - RET - -TEXT ·_transpose_uint8_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB40_1 - -LBB40_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB40_5 - -LBB40_1: - WORD $0xd285 // test edx, edx - JLE LBB40_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB40_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB40_3 - -LBB40_4: - RET - -TEXT ·_transpose_int8_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB41_1 - -LBB41_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB41_5 - -LBB41_1: - WORD $0xd285 // test edx, edx - JLE LBB41_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB41_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x86048942 // mov dword [rsi + 4*r8], eax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB41_3 - -LBB41_4: - RET - -TEXT ·_transpose_uint16_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB42_1 - -LBB42_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB42_5 - -LBB42_1: - WORD $0xd285 // test edx, edx - JLE LBB42_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB42_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB42_3 - -LBB42_4: - RET - -TEXT ·_transpose_int16_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB43_1 - -LBB43_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB43_5 - -LBB43_1: - WORD $0xd285 // test edx, edx - JLE LBB43_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB43_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x46048942 // mov dword [rsi + 2*r8], eax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB43_3 - -LBB43_4: - RET - -TEXT ·_transpose_uint32_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB44_1 - -LBB44_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB44_5 - -LBB44_1: - WORD $0xd285 // test edx, edx - JLE LBB44_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB44_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB44_3 - -LBB44_4: - RET - -TEXT ·_transpose_int32_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB45_1 - -LBB45_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB45_5 - -LBB45_1: - WORD $0xd285 // test edx, edx - JLE LBB45_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB45_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB45_3 - -LBB45_4: - RET - -TEXT ·_transpose_uint64_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB46_1 - -LBB46_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB46_5 - -LBB46_1: - WORD $0xd285 // test edx, edx - JLE LBB46_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB46_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB46_3 - -LBB46_4: - RET - -TEXT ·_transpose_int64_int32_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB47_1 - -LBB47_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x1689 // mov dword [rsi], edx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x04 // mov dword [rsi + 4], edx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x08 // mov dword [rsi + 8], edx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - WORD $0x148b; BYTE $0x91 // mov edx, dword [rcx + 4*rdx] - WORD $0x5689; BYTE $0x0c // mov dword [rsi + 12], edx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x10c68348 // add rsi, 16 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB47_5 - -LBB47_1: - WORD $0xd285 // test edx, edx - JLE LBB47_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB47_3: - LONG $0x47048b4a // mov rax, qword [rdi + 2*r8] - WORD $0x048b; BYTE $0x81 // mov eax, dword [rcx + 4*rax] - LONG $0x06048942 // mov dword [rsi + r8], eax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB47_3 - -LBB47_4: - RET - -TEXT ·_transpose_uint8_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB48_1 - -LBB48_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB48_5 - -LBB48_1: - WORD $0xd285 // test edx, edx - JLE LBB48_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB48_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB48_3 - -LBB48_4: - RET - -TEXT ·_transpose_int8_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB49_1 - -LBB49_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB49_5 - -LBB49_1: - WORD $0xd285 // test edx, edx - JLE LBB49_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB49_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB49_3 - -LBB49_4: - RET - -TEXT ·_transpose_uint16_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB50_1 - -LBB50_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB50_5 - -LBB50_1: - WORD $0xd285 // test edx, edx - JLE LBB50_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB50_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB50_3 - -LBB50_4: - RET - -TEXT ·_transpose_int16_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB51_1 - -LBB51_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB51_5 - -LBB51_1: - WORD $0xd285 // test edx, edx - JLE LBB51_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB51_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB51_3 - -LBB51_4: - RET - -TEXT ·_transpose_uint32_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB52_1 - -LBB52_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB52_5 - -LBB52_1: - WORD $0xd285 // test edx, edx - JLE LBB52_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB52_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB52_3 - -LBB52_4: - RET - -TEXT ·_transpose_int32_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB53_1 - -LBB53_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB53_5 - -LBB53_1: - WORD $0xd285 // test edx, edx - JLE LBB53_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB53_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB53_3 - -LBB53_4: - RET - -TEXT ·_transpose_uint64_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB54_1 - -LBB54_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB54_5 - -LBB54_1: - WORD $0xd285 // test edx, edx - JLE LBB54_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB54_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB54_3 - -LBB54_4: - RET - -TEXT ·_transpose_int64_uint64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB55_1 - -LBB55_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB55_5 - -LBB55_1: - WORD $0xd285 // test edx, edx - JLE LBB55_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB55_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB55_3 - -LBB55_4: - RET - -TEXT ·_transpose_uint8_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB56_1 - -LBB56_5: - WORD $0xd089 // mov eax, edx - WORD $0xb60f; BYTE $0x17 // movzx edx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0157b60f // movzx edx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0257b60f // movzx edx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0357b60f // movzx edx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB56_5 - -LBB56_1: - WORD $0xd285 // test edx, edx - JLE LBB56_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB56_3: - LONG $0x04b60f42; BYTE $0x07 // movzx eax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB56_3 - -LBB56_4: - RET - -TEXT ·_transpose_int8_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB57_1 - -LBB57_5: - WORD $0xd089 // mov eax, edx - LONG $0x17be0f48 // movsx rdx, byte [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57be0f48; BYTE $0x01 // movsx rdx, byte [rdi + 1] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57be0f48; BYTE $0x02 // movsx rdx, byte [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57be0f48; BYTE $0x03 // movsx rdx, byte [rdi + 3] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x04c78348 // add rdi, 4 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB57_5 - -LBB57_1: - WORD $0xd285 // test edx, edx - JLE LBB57_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB57_3: - LONG $0x04be0f4a; BYTE $0x07 // movsx rax, byte [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0xc604894a // mov qword [rsi + 8*r8], rax - LONG $0x01c08349 // add r8, 1 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB57_3 - -LBB57_4: - RET - -TEXT ·_transpose_uint16_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB58_1 - -LBB58_5: - WORD $0xd089 // mov eax, edx - WORD $0xb70f; BYTE $0x17 // movzx edx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x0257b70f // movzx edx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x0457b70f // movzx edx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0657b70f // movzx edx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB58_5 - -LBB58_1: - WORD $0xd285 // test edx, edx - JLE LBB58_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB58_3: - LONG $0x04b70f42; BYTE $0x07 // movzx eax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB58_3 - -LBB58_4: - RET - -TEXT ·_transpose_int16_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB59_1 - -LBB59_5: - WORD $0xd089 // mov eax, edx - LONG $0x17bf0f48 // movsx rdx, word [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x57bf0f48; BYTE $0x02 // movsx rdx, word [rdi + 2] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x57bf0f48; BYTE $0x04 // movsx rdx, word [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x57bf0f48; BYTE $0x06 // movsx rdx, word [rdi + 6] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x08c78348 // add rdi, 8 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB59_5 - -LBB59_1: - WORD $0xd285 // test edx, edx - JLE LBB59_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB59_3: - LONG $0x04bf0f4a; BYTE $0x07 // movsx rax, word [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x8604894a // mov qword [rsi + 4*r8], rax - LONG $0x02c08349 // add r8, 2 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB59_3 - -LBB59_4: - RET - -TEXT ·_transpose_uint32_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB60_1 - -LBB60_5: - WORD $0xd089 // mov eax, edx - WORD $0x178b // mov edx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - WORD $0x578b; BYTE $0x04 // mov edx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - WORD $0x578b; BYTE $0x08 // mov edx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - WORD $0x578b; BYTE $0x0c // mov edx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB60_5 - -LBB60_1: - WORD $0xd285 // test edx, edx - JLE LBB60_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB60_3: - LONG $0x07048b42 // mov eax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB60_3 - -LBB60_4: - RET - -TEXT ·_transpose_int32_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB61_1 - -LBB61_5: - WORD $0xd089 // mov eax, edx - WORD $0x6348; BYTE $0x17 // movsxd rdx, dword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x04576348 // movsxd rdx, dword [rdi + 4] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x08576348 // movsxd rdx, dword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x0c576348 // movsxd rdx, dword [rdi + 12] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x10c78348 // add rdi, 16 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB61_5 - -LBB61_1: - WORD $0xd285 // test edx, edx - JLE LBB61_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB61_3: - LONG $0x0704634a // movsxd rax, dword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x4604894a // mov qword [rsi + 2*r8], rax - LONG $0x04c08349 // add r8, 4 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB61_3 - -LBB61_4: - RET - -TEXT ·_transpose_uint64_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB62_1 - -LBB62_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB62_5 - -LBB62_1: - WORD $0xd285 // test edx, edx - JLE LBB62_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB62_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB62_3 - -LBB62_4: - RET - -TEXT ·_transpose_int64_int64_sse4(SB), $0-32 - - MOVQ src+0(FP), DI - MOVQ dest+8(FP), SI - MOVQ length+16(FP), DX - MOVQ transposeMap+24(FP), CX - - WORD $0xfa83; BYTE $0x04 // cmp edx, 4 - JL LBB63_1 - -LBB63_5: - WORD $0xd089 // mov eax, edx - WORD $0x8b48; BYTE $0x17 // mov rdx, qword [rdi] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - WORD $0x8948; BYTE $0x16 // mov qword [rsi], rdx - LONG $0x08578b48 // mov rdx, qword [rdi + 8] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x08568948 // mov qword [rsi + 8], rdx - LONG $0x10578b48 // mov rdx, qword [rdi + 16] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x10568948 // mov qword [rsi + 16], rdx - LONG $0x18578b48 // mov rdx, qword [rdi + 24] - LONG $0x91146348 // movsxd rdx, dword [rcx + 4*rdx] - LONG $0x18568948 // mov qword [rsi + 24], rdx - WORD $0x508d; BYTE $0xfc // lea edx, [rax - 4] - LONG $0x20c78348 // add rdi, 32 - LONG $0x20c68348 // add rsi, 32 - WORD $0xf883; BYTE $0x07 // cmp eax, 7 - JG LBB63_5 - -LBB63_1: - WORD $0xd285 // test edx, edx - JLE LBB63_4 - WORD $0xc283; BYTE $0x01 // add edx, 1 - WORD $0x3145; BYTE $0xc0 // xor r8d, r8d - -LBB63_3: - LONG $0x07048b4a // mov rax, qword [rdi + r8] - LONG $0x81046348 // movsxd rax, dword [rcx + 4*rax] - LONG $0x0604894a // mov qword [rsi + r8], rax - LONG $0x08c08349 // add r8, 8 - WORD $0xc283; BYTE $0xff // add edx, -1 - WORD $0xfa83; BYTE $0x01 // cmp edx, 1 - JG LBB63_3 - -LBB63_4: - RET -- cgit mrf-deployment