diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/LICENSE b/cmd/gost/vendor/github.com/codahale/chacha20/LICENSE
deleted file mode 100644
index f9835c2..0000000
--- a/cmd/gost/vendor/github.com/codahale/chacha20/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2014 Coda Hale
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/README.md b/cmd/gost/vendor/github.com/codahale/chacha20/README.md
deleted file mode 100644
index e0cc2ec..0000000
--- a/cmd/gost/vendor/github.com/codahale/chacha20/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-chacha20
-========
-
-[](https://travis-ci.org/codahale/chacha20)
-
-A pure Go implementation of the ChaCha20 stream cipher.
-
-For documentation, check [godoc](http://godoc.org/github.com/codahale/chacha20).
diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/chacha20.go b/cmd/gost/vendor/github.com/codahale/chacha20/chacha20.go
deleted file mode 100644
index ae671bc..0000000
--- a/cmd/gost/vendor/github.com/codahale/chacha20/chacha20.go
+++ /dev/null
@@ -1,235 +0,0 @@
-// Package chacha20 provides a pure Go implementation of ChaCha20, a fast,
-// secure stream cipher.
-//
-// From Bernstein, Daniel J. "ChaCha, a variant of Salsa20." Workshop Record of
-// SASC. 2008. (http://cr.yp.to/chacha/chacha-20080128.pdf):
-//
-// ChaCha8 is a 256-bit stream cipher based on the 8-round cipher Salsa20/8.
-// The changes from Salsa20/8 to ChaCha8 are designed to improve diffusion per
-// round, conjecturally increasing resistance to cryptanalysis, while
-// preserving -- and often improving -- time per round. ChaCha12 and ChaCha20
-// are analogous modifications of the 12-round and 20-round ciphers Salsa20/12
-// and Salsa20/20. This paper presents the ChaCha family and explains the
-// differences between Salsa20 and ChaCha.
-//
-// For more information, see http://cr.yp.to/chacha.html
-package chacha20
-
-import (
- "crypto/cipher"
- "encoding/binary"
- "errors"
- "unsafe"
-)
-
-const (
- // KeySize is the length of ChaCha20 keys, in bytes.
- KeySize = 32
- // NonceSize is the length of ChaCha20 nonces, in bytes.
- NonceSize = 8
- // XNonceSize is the length of XChaCha20 nonces, in bytes.
- XNonceSize = 24
-)
-
-var (
- // ErrInvalidKey is returned when the provided key is not 256 bits long.
- ErrInvalidKey = errors.New("invalid key length (must be 256 bits)")
- // ErrInvalidNonce is returned when the provided nonce is not 64 bits long.
- ErrInvalidNonce = errors.New("invalid nonce length (must be 64 bits)")
- // ErrInvalidXNonce is returned when the provided nonce is not 192 bits
- // long.
- ErrInvalidXNonce = errors.New("invalid nonce length (must be 192 bits)")
- // ErrInvalidRounds is returned when the provided rounds is not
- // 8, 12, or 20.
- ErrInvalidRounds = errors.New("invalid rounds number (must be 8, 12, or 20)")
-)
-
-// New creates and returns a new cipher.Stream. The key argument must be 256
-// bits long, and the nonce argument must be 64 bits long. The nonce must be
-// randomly generated or used only once. This Stream instance must not be used
-// to encrypt more than 2^70 bytes (~1 zettabyte).
-func New(key []byte, nonce []byte) (cipher.Stream, error) {
- return NewWithRounds(key, nonce, 20)
-}
-
-// NewWithRounds creates and returns a new cipher.Stream just like New but
-// the rounds number of 8, 12, or 20 can be specified.
-func NewWithRounds(key []byte, nonce []byte, rounds uint8) (cipher.Stream, error) {
- if len(key) != KeySize {
- return nil, ErrInvalidKey
- }
-
- if len(nonce) != NonceSize {
- return nil, ErrInvalidNonce
- }
-
- if (rounds != 8) && (rounds != 12) && (rounds != 20) {
- return nil, ErrInvalidRounds
- }
-
- s := new(stream)
- s.init(key, nonce, rounds)
- s.advance()
-
- return s, nil
-}
-
-// NewXChaCha creates and returns a new cipher.Stream. The key argument must be
-// 256 bits long, and the nonce argument must be 192 bits long. The nonce must
-// be randomly generated or only used once. This Stream instance must not be
-// used to encrypt more than 2^70 bytes (~1 zetta byte).
-func NewXChaCha(key []byte, nonce []byte) (cipher.Stream, error) {
- return NewXChaChaWithRounds(key, nonce, 20)
-}
-
-// NewXChaChaWithRounds creates and returns a new cipher.Stream just like
-// NewXChaCha but the rounds number of 8, 12, or 20 can be specified.
-func NewXChaChaWithRounds(key []byte, nonce []byte, rounds uint8) (cipher.Stream, error) {
- if len(key) != KeySize {
- return nil, ErrInvalidKey
- }
-
- if len(nonce) != XNonceSize {
- return nil, ErrInvalidXNonce
- }
-
- if (rounds != 8) && (rounds != 12) && (rounds != 20) {
- return nil, ErrInvalidRounds
- }
-
- s := new(stream)
- s.init(key, nonce, rounds)
-
- // Call HChaCha to derive the subkey using the key and the first 16 bytes
- // of the nonce, and re-initialize the state using the subkey and the
- // remaining nonce.
- blockArr := (*[stateSize]uint32)(unsafe.Pointer(&s.block))
- core(&s.state, blockArr, s.rounds, true)
- copy(s.state[4:8], blockArr[0:4])
- copy(s.state[8:12], blockArr[12:16])
- s.state[12] = 0
- s.state[13] = 0
- s.state[14] = binary.LittleEndian.Uint32(nonce[16:])
- s.state[15] = binary.LittleEndian.Uint32(nonce[20:])
-
- s.advance()
-
- return s, nil
-}
-
-type stream struct {
- state [stateSize]uint32 // the state as an array of 16 32-bit words
- block [blockSize]byte // the keystream as an array of 64 bytes
- offset int // the offset of used bytes in block
- rounds uint8
-}
-
-func (s *stream) XORKeyStream(dst, src []byte) {
- // Stride over the input in 64-byte blocks, minus the amount of keystream
- // previously used. This will produce best results when processing blocks
- // of a size evenly divisible by 64.
- i := 0
- max := len(src)
- for i < max {
- gap := blockSize - s.offset
-
- limit := i + gap
- if limit > max {
- limit = max
- }
-
- o := s.offset
- for j := i; j < limit; j++ {
- dst[j] = src[j] ^ s.block[o]
- o++
- }
-
- i += gap
- s.offset = o
-
- if o == blockSize {
- s.advance()
- }
- }
-}
-
-func (s *stream) init(key []byte, nonce []byte, rounds uint8) {
- // the magic constants for 256-bit keys
- s.state[0] = 0x61707865
- s.state[1] = 0x3320646e
- s.state[2] = 0x79622d32
- s.state[3] = 0x6b206574
-
- s.state[4] = binary.LittleEndian.Uint32(key[0:])
- s.state[5] = binary.LittleEndian.Uint32(key[4:])
- s.state[6] = binary.LittleEndian.Uint32(key[8:])
- s.state[7] = binary.LittleEndian.Uint32(key[12:])
- s.state[8] = binary.LittleEndian.Uint32(key[16:])
- s.state[9] = binary.LittleEndian.Uint32(key[20:])
- s.state[10] = binary.LittleEndian.Uint32(key[24:])
- s.state[11] = binary.LittleEndian.Uint32(key[28:])
-
- switch len(nonce) {
- case NonceSize:
- // ChaCha20 uses 8 byte nonces.
- s.state[12] = 0
- s.state[13] = 0
- s.state[14] = binary.LittleEndian.Uint32(nonce[0:])
- s.state[15] = binary.LittleEndian.Uint32(nonce[4:])
- case XNonceSize:
- // XChaCha20 derives the subkey via HChaCha initialized
- // with the first 16 bytes of the nonce.
- s.state[12] = binary.LittleEndian.Uint32(nonce[0:])
- s.state[13] = binary.LittleEndian.Uint32(nonce[4:])
- s.state[14] = binary.LittleEndian.Uint32(nonce[8:])
- s.state[15] = binary.LittleEndian.Uint32(nonce[12:])
- default:
- // Never happens, both ctors validate the nonce length.
- panic("invalid nonce size")
- }
-
- s.rounds = rounds
-}
-
-// BUG(codahale): Totally untested on big-endian CPUs. Would very much
-// appreciate someone with an ARM device giving this a swing.
-
-// advances the keystream
-func (s *stream) advance() {
- core(&s.state, (*[stateSize]uint32)(unsafe.Pointer(&s.block)), s.rounds, false)
-
- if bigEndian {
- j := blockSize - 1
- for i := 0; i < blockSize/2; i++ {
- s.block[j], s.block[i] = s.block[i], s.block[j]
- j--
- }
- }
-
- s.offset = 0
- i := s.state[12] + 1
- s.state[12] = i
- if i == 0 {
- s.state[13]++
- }
-}
-
-const (
- wordSize = 4 // the size of ChaCha20's words
- stateSize = 16 // the size of ChaCha20's state, in words
- blockSize = stateSize * wordSize // the size of ChaCha20's block, in bytes
-)
-
-var (
- bigEndian bool // whether or not we're running on a bigEndian CPU
-)
-
-// Do some up-front bookkeeping on what sort of CPU we're using. ChaCha20 treats
-// its state as a little-endian byte array when it comes to generating the
-// keystream, which allows for a zero-copy approach to the core transform. On
-// big-endian architectures, we have to take a hit to reverse the bytes.
-func init() {
- x := uint32(0x04030201)
- y := [4]byte{0x1, 0x2, 0x3, 0x4}
- bigEndian = *(*[4]byte)(unsafe.Pointer(&x)) != y
-}
diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/core_ref.go b/cmd/gost/vendor/github.com/codahale/chacha20/core_ref.go
deleted file mode 100644
index 84f5e6c..0000000
--- a/cmd/gost/vendor/github.com/codahale/chacha20/core_ref.go
+++ /dev/null
@@ -1,166 +0,0 @@
-// The ChaCha20 core transform.
-// An unrolled and inlined implementation in pure Go.
-
-package chacha20
-
-func core(input, output *[stateSize]uint32, rounds uint8, hchacha bool) {
- var (
- x00 = input[0]
- x01 = input[1]
- x02 = input[2]
- x03 = input[3]
- x04 = input[4]
- x05 = input[5]
- x06 = input[6]
- x07 = input[7]
- x08 = input[8]
- x09 = input[9]
- x10 = input[10]
- x11 = input[11]
- x12 = input[12]
- x13 = input[13]
- x14 = input[14]
- x15 = input[15]
- )
-
- var x uint32
-
- // Unrolling all 20 rounds kills performance on modern Intel processors
- // (Tested on a i5 Haswell, likely applies to Sandy Bridge+), due to uop
- // cache thrashing. The straight forward 2 rounds per loop implementation
- // of this has double the performance of the fully unrolled version.
- for i := uint8(0); i < rounds; i += 2 {
- x00 += x04
- x = x12 ^ x00
- x12 = (x << 16) | (x >> 16)
- x08 += x12
- x = x04 ^ x08
- x04 = (x << 12) | (x >> 20)
- x00 += x04
- x = x12 ^ x00
- x12 = (x << 8) | (x >> 24)
- x08 += x12
- x = x04 ^ x08
- x04 = (x << 7) | (x >> 25)
- x01 += x05
- x = x13 ^ x01
- x13 = (x << 16) | (x >> 16)
- x09 += x13
- x = x05 ^ x09
- x05 = (x << 12) | (x >> 20)
- x01 += x05
- x = x13 ^ x01
- x13 = (x << 8) | (x >> 24)
- x09 += x13
- x = x05 ^ x09
- x05 = (x << 7) | (x >> 25)
- x02 += x06
- x = x14 ^ x02
- x14 = (x << 16) | (x >> 16)
- x10 += x14
- x = x06 ^ x10
- x06 = (x << 12) | (x >> 20)
- x02 += x06
- x = x14 ^ x02
- x14 = (x << 8) | (x >> 24)
- x10 += x14
- x = x06 ^ x10
- x06 = (x << 7) | (x >> 25)
- x03 += x07
- x = x15 ^ x03
- x15 = (x << 16) | (x >> 16)
- x11 += x15
- x = x07 ^ x11
- x07 = (x << 12) | (x >> 20)
- x03 += x07
- x = x15 ^ x03
- x15 = (x << 8) | (x >> 24)
- x11 += x15
- x = x07 ^ x11
- x07 = (x << 7) | (x >> 25)
- x00 += x05
- x = x15 ^ x00
- x15 = (x << 16) | (x >> 16)
- x10 += x15
- x = x05 ^ x10
- x05 = (x << 12) | (x >> 20)
- x00 += x05
- x = x15 ^ x00
- x15 = (x << 8) | (x >> 24)
- x10 += x15
- x = x05 ^ x10
- x05 = (x << 7) | (x >> 25)
- x01 += x06
- x = x12 ^ x01
- x12 = (x << 16) | (x >> 16)
- x11 += x12
- x = x06 ^ x11
- x06 = (x << 12) | (x >> 20)
- x01 += x06
- x = x12 ^ x01
- x12 = (x << 8) | (x >> 24)
- x11 += x12
- x = x06 ^ x11
- x06 = (x << 7) | (x >> 25)
- x02 += x07
- x = x13 ^ x02
- x13 = (x << 16) | (x >> 16)
- x08 += x13
- x = x07 ^ x08
- x07 = (x << 12) | (x >> 20)
- x02 += x07
- x = x13 ^ x02
- x13 = (x << 8) | (x >> 24)
- x08 += x13
- x = x07 ^ x08
- x07 = (x << 7) | (x >> 25)
- x03 += x04
- x = x14 ^ x03
- x14 = (x << 16) | (x >> 16)
- x09 += x14
- x = x04 ^ x09
- x04 = (x << 12) | (x >> 20)
- x03 += x04
- x = x14 ^ x03
- x14 = (x << 8) | (x >> 24)
- x09 += x14
- x = x04 ^ x09
- x04 = (x << 7) | (x >> 25)
- }
-
- if !hchacha {
- output[0] = x00 + input[0]
- output[1] = x01 + input[1]
- output[2] = x02 + input[2]
- output[3] = x03 + input[3]
- output[4] = x04 + input[4]
- output[5] = x05 + input[5]
- output[6] = x06 + input[6]
- output[7] = x07 + input[7]
- output[8] = x08 + input[8]
- output[9] = x09 + input[9]
- output[10] = x10 + input[10]
- output[11] = x11 + input[11]
- output[12] = x12 + input[12]
- output[13] = x13 + input[13]
- output[14] = x14 + input[14]
- output[15] = x15 + input[15]
- } else {
- output[0] = x00
- output[1] = x01
- output[2] = x02
- output[3] = x03
- output[4] = x04
- output[5] = x05
- output[6] = x06
- output[7] = x07
- output[8] = x08
- output[9] = x09
- output[10] = x10
- output[11] = x11
- output[12] = x12
- output[13] = x13
- output[14] = x14
- output[15] = x15
- }
-}
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/LICENSE b/cmd/gost/vendor/github.com/klauspost/crc32/LICENSE
deleted file mode 100644
index 4fd5963..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/LICENSE
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright (c) 2012 The Go Authors. All rights reserved.
-Copyright (c) 2015 Klaus Post
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
- * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
- * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/README.md b/cmd/gost/vendor/github.com/klauspost/crc32/README.md
deleted file mode 100644
index 029625d..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/README.md
+++ /dev/null
@@ -1,87 +0,0 @@
-# crc32
-CRC32 hash with x64 optimizations
-
-This package is a drop-in replacement for the standard library `hash/crc32` package, that features SSE 4.2 optimizations on x64 platforms, for a 10x speedup.
-
-[](https://travis-ci.org/klauspost/crc32)
-
-# usage
-
-Install using `go get github.com/klauspost/crc32`. This library is based on Go 1.5 code and requires Go 1.3 or newer.
-
-Replace `import "hash/crc32"` with `import "github.com/klauspost/crc32"` and you are good to go.
-
-# changes
-* Oct 20, 2016: Changes have been merged to upstream Go. Package updated to match.
-* Dec 4, 2015: Uses the "slice-by-8" trick more extensively, which gives a 1.5 to 2.5x speedup if assembler is unavailable.
-
-
-# performance
-
-For *Go 1.7* performance is equivalent to the standard library. So if you use this package for Go 1.7 you can switch back.
-
-
-For IEEE tables (the most common), there is approximately a factor 10 speedup with "CLMUL" (Carryless multiplication) instruction:
-```
-benchmark old ns/op new ns/op delta
-BenchmarkCrc32KB 99955 10258 -89.74%
-
-benchmark old MB/s new MB/s speedup
-BenchmarkCrc32KB 327.83 3194.20 9.74x
-```
-
-For other tables and "CLMUL" capable machines the performance is the same as the standard library.
-
-Here are some detailed benchmarks, comparing to go 1.5 standard library with and without assembler enabled.
-
-```
-Std: Standard Go 1.5 library
-Crc: Indicates IEEE type CRC.
-40B: Size of each slice encoded.
-NoAsm: Assembler was disabled (ie. not an AMD64 or SSE 4.2+ capable machine).
-Castagnoli: Castagnoli CRC type.
-
-BenchmarkStdCrc40B-4 10000000 158 ns/op 252.88 MB/s
-BenchmarkCrc40BNoAsm-4 20000000 105 ns/op 377.38 MB/s (slice8)
-BenchmarkCrc40B-4 20000000 105 ns/op 378.77 MB/s (slice8)
-
-BenchmarkStdCrc1KB-4 500000 3604 ns/op 284.10 MB/s
-BenchmarkCrc1KBNoAsm-4 1000000 1463 ns/op 699.79 MB/s (slice8)
-BenchmarkCrc1KB-4 3000000 396 ns/op 2583.69 MB/s (asm)
-
-BenchmarkStdCrc8KB-4 200000 11417 ns/op 717.48 MB/s (slice8)
-BenchmarkCrc8KBNoAsm-4 200000 11317 ns/op 723.85 MB/s (slice8)
-BenchmarkCrc8KB-4 500000 2919 ns/op 2805.73 MB/s (asm)
-
-BenchmarkStdCrc32KB-4 30000 45749 ns/op 716.24 MB/s (slice8)
-BenchmarkCrc32KBNoAsm-4 30000 45109 ns/op 726.42 MB/s (slice8)
-BenchmarkCrc32KB-4 100000 11497 ns/op 2850.09 MB/s (asm)
-
-BenchmarkStdNoAsmCastagnol40B-4 10000000 161 ns/op 246.94 MB/s
-BenchmarkStdCastagnoli40B-4 50000000 28.4 ns/op 1410.69 MB/s (asm)
-BenchmarkCastagnoli40BNoAsm-4 20000000 100 ns/op 398.01 MB/s (slice8)
-BenchmarkCastagnoli40B-4 50000000 28.2 ns/op 1419.54 MB/s (asm)
-
-BenchmarkStdNoAsmCastagnoli1KB-4 500000 3622 ns/op 282.67 MB/s
-BenchmarkStdCastagnoli1KB-4 10000000 144 ns/op 7099.78 MB/s (asm)
-BenchmarkCastagnoli1KBNoAsm-4 1000000 1475 ns/op 694.14 MB/s (slice8)
-BenchmarkCastagnoli1KB-4 10000000 146 ns/op 6993.35 MB/s (asm)
-
-BenchmarkStdNoAsmCastagnoli8KB-4 50000 28781 ns/op 284.63 MB/s
-BenchmarkStdCastagnoli8KB-4 1000000 1029 ns/op 7957.89 MB/s (asm)
-BenchmarkCastagnoli8KBNoAsm-4 200000 11410 ns/op 717.94 MB/s (slice8)
-BenchmarkCastagnoli8KB-4 1000000 1000 ns/op 8188.71 MB/s (asm)
-
-BenchmarkStdNoAsmCastagnoli32KB-4 10000 115426 ns/op 283.89 MB/s
-BenchmarkStdCastagnoli32KB-4 300000 4065 ns/op 8059.13 MB/s (asm)
-BenchmarkCastagnoli32KBNoAsm-4 30000 45171 ns/op 725.41 MB/s (slice8)
-BenchmarkCastagnoli32KB-4 500000 4077 ns/op 8035.89 MB/s (asm)
-```
-
-The IEEE assembler optimizations has been submitted and will be part of the Go 1.6 standard library.
-
-However, the improved use of slice-by-8 has not, but will probably be submitted for Go 1.7.
-
-# license
-
-Standard Go license. Changes are Copyright (c) 2015 Klaus Post under same conditions.
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32.go
deleted file mode 100644
index 8aa91b1..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32.go
+++ /dev/null
@@ -1,207 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package crc32 implements the 32-bit cyclic redundancy check, or CRC-32,
-// checksum. See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for
-// information.
-//
-// Polynomials are represented in LSB-first form also known as reversed representation.
-//
-// See http://en.wikipedia.org/wiki/Mathematics_of_cyclic_redundancy_checks#Reversed_representations_and_reciprocal_polynomials
-// for information.
-package crc32
-
-import (
- "hash"
- "sync"
-)
-
-// The size of a CRC-32 checksum in bytes.
-const Size = 4
-
-// Predefined polynomials.
-const (
- // IEEE is by far and away the most common CRC-32 polynomial.
- // Used by ethernet (IEEE 802.3), v.42, fddi, gzip, zip, png, ...
- IEEE = 0xedb88320
-
- // Castagnoli's polynomial, used in iSCSI.
- // Has better error detection characteristics than IEEE.
- // http://dx.doi.org/10.1109/26.231911
- Castagnoli = 0x82f63b78
-
- // Koopman's polynomial.
- // Also has better error detection characteristics than IEEE.
- // http://dx.doi.org/10.1109/DSN.2002.1028931
- Koopman = 0xeb31d82e
-)
-
-// Table is a 256-word table representing the polynomial for efficient processing.
-type Table [256]uint32
-
-// This file makes use of functions implemented in architecture-specific files.
-// The interface that they implement is as follows:
-//
-// // archAvailableIEEE reports whether an architecture-specific CRC32-IEEE
-// // algorithm is available.
-// archAvailableIEEE() bool
-//
-// // archInitIEEE initializes the architecture-specific CRC3-IEEE algorithm.
-// // It can only be called if archAvailableIEEE() returns true.
-// archInitIEEE()
-//
-// // archUpdateIEEE updates the given CRC32-IEEE. It can only be called if
-// // archInitIEEE() was previously called.
-// archUpdateIEEE(crc uint32, p []byte) uint32
-//
-// // archAvailableCastagnoli reports whether an architecture-specific
-// // CRC32-C algorithm is available.
-// archAvailableCastagnoli() bool
-//
-// // archInitCastagnoli initializes the architecture-specific CRC32-C
-// // algorithm. It can only be called if archAvailableCastagnoli() returns
-// // true.
-// archInitCastagnoli()
-//
-// // archUpdateCastagnoli updates the given CRC32-C. It can only be called
-// // if archInitCastagnoli() was previously called.
-// archUpdateCastagnoli(crc uint32, p []byte) uint32
-
-// castagnoliTable points to a lazily initialized Table for the Castagnoli
-// polynomial. MakeTable will always return this value when asked to make a
-// Castagnoli table so we can compare against it to find when the caller is
-// using this polynomial.
-var castagnoliTable *Table
-var castagnoliTable8 *slicing8Table
-var castagnoliArchImpl bool
-var updateCastagnoli func(crc uint32, p []byte) uint32
-var castagnoliOnce sync.Once
-
-func castagnoliInit() {
- castagnoliTable = simpleMakeTable(Castagnoli)
- castagnoliArchImpl = archAvailableCastagnoli()
-
- if castagnoliArchImpl {
- archInitCastagnoli()
- updateCastagnoli = archUpdateCastagnoli
- } else {
- // Initialize the slicing-by-8 table.
- castagnoliTable8 = slicingMakeTable(Castagnoli)
- updateCastagnoli = func(crc uint32, p []byte) uint32 {
- return slicingUpdate(crc, castagnoliTable8, p)
- }
- }
-}
-
-// IEEETable is the table for the IEEE polynomial.
-var IEEETable = simpleMakeTable(IEEE)
-
-// ieeeTable8 is the slicing8Table for IEEE
-var ieeeTable8 *slicing8Table
-var ieeeArchImpl bool
-var updateIEEE func(crc uint32, p []byte) uint32
-var ieeeOnce sync.Once
-
-func ieeeInit() {
- ieeeArchImpl = archAvailableIEEE()
-
- if ieeeArchImpl {
- archInitIEEE()
- updateIEEE = archUpdateIEEE
- } else {
- // Initialize the slicing-by-8 table.
- ieeeTable8 = slicingMakeTable(IEEE)
- updateIEEE = func(crc uint32, p []byte) uint32 {
- return slicingUpdate(crc, ieeeTable8, p)
- }
- }
-}
-
-// MakeTable returns a Table constructed from the specified polynomial.
-// The contents of this Table must not be modified.
-func MakeTable(poly uint32) *Table {
- switch poly {
- case IEEE:
- ieeeOnce.Do(ieeeInit)
- return IEEETable
- case Castagnoli:
- castagnoliOnce.Do(castagnoliInit)
- return castagnoliTable
- }
- return simpleMakeTable(poly)
-}
-
-// digest represents the partial evaluation of a checksum.
-type digest struct {
- crc uint32
- tab *Table
-}
-
-// New creates a new hash.Hash32 computing the CRC-32 checksum
-// using the polynomial represented by the Table.
-// Its Sum method will lay the value out in big-endian byte order.
-func New(tab *Table) hash.Hash32 {
- if tab == IEEETable {
- ieeeOnce.Do(ieeeInit)
- }
- return &digest{0, tab}
-}
-
-// NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum
-// using the IEEE polynomial.
-// Its Sum method will lay the value out in big-endian byte order.
-func NewIEEE() hash.Hash32 { return New(IEEETable) }
-
-func (d *digest) Size() int { return Size }
-
-func (d *digest) BlockSize() int { return 1 }
-
-func (d *digest) Reset() { d.crc = 0 }
-
-// Update returns the result of adding the bytes in p to the crc.
-func Update(crc uint32, tab *Table, p []byte) uint32 {
- switch tab {
- case castagnoliTable:
- return updateCastagnoli(crc, p)
- case IEEETable:
- // Unfortunately, because IEEETable is exported, IEEE may be used without a
- // call to MakeTable. We have to make sure it gets initialized in that case.
- ieeeOnce.Do(ieeeInit)
- return updateIEEE(crc, p)
- default:
- return simpleUpdate(crc, tab, p)
- }
-}
-
-func (d *digest) Write(p []byte) (n int, err error) {
- switch d.tab {
- case castagnoliTable:
- d.crc = updateCastagnoli(d.crc, p)
- case IEEETable:
- // We only create digest objects through New() which takes care of
- // initialization in this case.
- d.crc = updateIEEE(d.crc, p)
- default:
- d.crc = simpleUpdate(d.crc, d.tab, p)
- }
- return len(p), nil
-}
-
-func (d *digest) Sum32() uint32 { return d.crc }
-
-func (d *digest) Sum(in []byte) []byte {
- s := d.Sum32()
- return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s))
-}
-
-// Checksum returns the CRC-32 checksum of data
-// using the polynomial represented by the Table.
-func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) }
-
-// ChecksumIEEE returns the CRC-32 checksum of data
-// using the IEEE polynomial.
-func ChecksumIEEE(data []byte) uint32 {
- ieeeOnce.Do(ieeeInit)
- return updateIEEE(0, data)
-}
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.go
deleted file mode 100644
index af2a0b8..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.go
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine,!gccgo
-
-// AMD64-specific hardware-assisted CRC32 algorithms. See crc32.go for a
-// description of the interface that each architecture-specific file
-// implements.
-
-package crc32
-
-import "unsafe"
-
-// This file contains the code to call the SSE 4.2 version of the Castagnoli
-// and IEEE CRC.
-
-// haveSSE41/haveSSE42/haveCLMUL are defined in crc_amd64.s and use
-// CPUID to test for SSE 4.1, 4.2 and CLMUL support.
-func haveSSE41() bool
-func haveSSE42() bool
-func haveCLMUL() bool
-
-// castagnoliSSE42 is defined in crc32_amd64.s and uses the SSE4.2 CRC32
-// instruction.
-//go:noescape
-func castagnoliSSE42(crc uint32, p []byte) uint32
-
-// castagnoliSSE42Triple is defined in crc32_amd64.s and uses the SSE4.2 CRC32
-// instruction.
-//go:noescape
-func castagnoliSSE42Triple(
- crcA, crcB, crcC uint32,
- a, b, c []byte,
- rounds uint32,
-) (retA uint32, retB uint32, retC uint32)
-
-// ieeeCLMUL is defined in crc_amd64.s and uses the PCLMULQDQ
-// instruction as well as SSE 4.1.
-//go:noescape
-func ieeeCLMUL(crc uint32, p []byte) uint32
-
-var sse42 = haveSSE42()
-var useFastIEEE = haveCLMUL() && haveSSE41()
-
-const castagnoliK1 = 168
-const castagnoliK2 = 1344
-
-type sse42Table [4]Table
-
-var castagnoliSSE42TableK1 *sse42Table
-var castagnoliSSE42TableK2 *sse42Table
-
-func archAvailableCastagnoli() bool {
- return sse42
-}
-
-func archInitCastagnoli() {
- if !sse42 {
- panic("arch-specific Castagnoli not available")
- }
- castagnoliSSE42TableK1 = new(sse42Table)
- castagnoliSSE42TableK2 = new(sse42Table)
- // See description in updateCastagnoli.
- // t[0][i] = CRC(i000, O)
- // t[1][i] = CRC(0i00, O)
- // t[2][i] = CRC(00i0, O)
- // t[3][i] = CRC(000i, O)
- // where O is a sequence of K zeros.
- var tmp [castagnoliK2]byte
- for b := 0; b < 4; b++ {
- for i := 0; i < 256; i++ {
- val := uint32(i) << uint32(b*8)
- castagnoliSSE42TableK1[b][i] = castagnoliSSE42(val, tmp[:castagnoliK1])
- castagnoliSSE42TableK2[b][i] = castagnoliSSE42(val, tmp[:])
- }
- }
-}
-
-// castagnoliShift computes the CRC32-C of K1 or K2 zeroes (depending on the
-// table given) with the given initial crc value. This corresponds to
-// CRC(crc, O) in the description in updateCastagnoli.
-func castagnoliShift(table *sse42Table, crc uint32) uint32 {
- return table[3][crc>>24] ^
- table[2][(crc>>16)&0xFF] ^
- table[1][(crc>>8)&0xFF] ^
- table[0][crc&0xFF]
-}
-
-func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
- if !sse42 {
- panic("not available")
- }
-
- // This method is inspired from the algorithm in Intel's white paper:
- // "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction"
- // The same strategy of splitting the buffer in three is used but the
- // combining calculation is different; the complete derivation is explained
- // below.
- //
- // -- The basic idea --
- //
- // The CRC32 instruction (available in SSE4.2) can process 8 bytes at a
- // time. In recent Intel architectures the instruction takes 3 cycles;
- // however the processor can pipeline up to three instructions if they
- // don't depend on each other.
- //
- // Roughly this means that we can process three buffers in about the same
- // time we can process one buffer.
- //
- // The idea is then to split the buffer in three, CRC the three pieces
- // separately and then combine the results.
- //
- // Combining the results requires precomputed tables, so we must choose a
- // fixed buffer length to optimize. The longer the length, the faster; but
- // only buffers longer than this length will use the optimization. We choose
- // two cutoffs and compute tables for both:
- // - one around 512: 168*3=504
- // - one around 4KB: 1344*3=4032
- //
- // -- The nitty gritty --
- //
- // Let CRC(I, X) be the non-inverted CRC32-C of the sequence X (with
- // initial non-inverted CRC I). This function has the following properties:
- // (a) CRC(I, AB) = CRC(CRC(I, A), B)
- // (b) CRC(I, A xor B) = CRC(I, A) xor CRC(0, B)
- //
- // Say we want to compute CRC(I, ABC) where A, B, C are three sequences of
- // K bytes each, where K is a fixed constant. Let O be the sequence of K zero
- // bytes.
- //
- // CRC(I, ABC) = CRC(I, ABO xor C)
- // = CRC(I, ABO) xor CRC(0, C)
- // = CRC(CRC(I, AB), O) xor CRC(0, C)
- // = CRC(CRC(I, AO xor B), O) xor CRC(0, C)
- // = CRC(CRC(I, AO) xor CRC(0, B), O) xor CRC(0, C)
- // = CRC(CRC(CRC(I, A), O) xor CRC(0, B), O) xor CRC(0, C)
- //
- // The castagnoliSSE42Triple function can compute CRC(I, A), CRC(0, B),
- // and CRC(0, C) efficiently. We just need to find a way to quickly compute
- // CRC(uvwx, O) given a 4-byte initial value uvwx. We can precompute these
- // values; since we can't have a 32-bit table, we break it up into four
- // 8-bit tables:
- //
- // CRC(uvwx, O) = CRC(u000, O) xor
- // CRC(0v00, O) xor
- // CRC(00w0, O) xor
- // CRC(000x, O)
- //
- // We can compute tables corresponding to the four terms for all 8-bit
- // values.
-
- crc = ^crc
-
- // If a buffer is long enough to use the optimization, process the first few
- // bytes to align the buffer to an 8 byte boundary (if necessary).
- if len(p) >= castagnoliK1*3 {
- delta := int(uintptr(unsafe.Pointer(&p[0])) & 7)
- if delta != 0 {
- delta = 8 - delta
- crc = castagnoliSSE42(crc, p[:delta])
- p = p[delta:]
- }
- }
-
- // Process 3*K2 at a time.
- for len(p) >= castagnoliK2*3 {
- // Compute CRC(I, A), CRC(0, B), and CRC(0, C).
- crcA, crcB, crcC := castagnoliSSE42Triple(
- crc, 0, 0,
- p, p[castagnoliK2:], p[castagnoliK2*2:],
- castagnoliK2/24)
-
- // CRC(I, AB) = CRC(CRC(I, A), O) xor CRC(0, B)
- crcAB := castagnoliShift(castagnoliSSE42TableK2, crcA) ^ crcB
- // CRC(I, ABC) = CRC(CRC(I, AB), O) xor CRC(0, C)
- crc = castagnoliShift(castagnoliSSE42TableK2, crcAB) ^ crcC
- p = p[castagnoliK2*3:]
- }
-
- // Process 3*K1 at a time.
- for len(p) >= castagnoliK1*3 {
- // Compute CRC(I, A), CRC(0, B), and CRC(0, C).
- crcA, crcB, crcC := castagnoliSSE42Triple(
- crc, 0, 0,
- p, p[castagnoliK1:], p[castagnoliK1*2:],
- castagnoliK1/24)
-
- // CRC(I, AB) = CRC(CRC(I, A), O) xor CRC(0, B)
- crcAB := castagnoliShift(castagnoliSSE42TableK1, crcA) ^ crcB
- // CRC(I, ABC) = CRC(CRC(I, AB), O) xor CRC(0, C)
- crc = castagnoliShift(castagnoliSSE42TableK1, crcAB) ^ crcC
- p = p[castagnoliK1*3:]
- }
-
- // Use the simple implementation for what's left.
- crc = castagnoliSSE42(crc, p)
- return ^crc
-}
-
-func archAvailableIEEE() bool {
- return useFastIEEE
-}
-
-var archIeeeTable8 *slicing8Table
-
-func archInitIEEE() {
- if !useFastIEEE {
- panic("not available")
- }
- // We still use slicing-by-8 for small buffers.
- archIeeeTable8 = slicingMakeTable(IEEE)
-}
-
-func archUpdateIEEE(crc uint32, p []byte) uint32 {
- if !useFastIEEE {
- panic("not available")
- }
-
- if len(p) >= 64 {
- left := len(p) & 15
- do := len(p) - left
- crc = ^ieeeCLMUL(^crc, p[:do])
- p = p[do:]
- }
- if len(p) == 0 {
- return crc
- }
- return slicingUpdate(crc, archIeeeTable8, p)
-}
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.s b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.s
deleted file mode 100644
index e8a7941..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.s
+++ /dev/null
@@ -1,319 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build gc
-
-#define NOSPLIT 4
-#define RODATA 8
-
-// castagnoliSSE42 updates the (non-inverted) crc with the given buffer.
-//
-// func castagnoliSSE42(crc uint32, p []byte) uint32
-TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
- MOVL crc+0(FP), AX // CRC value
- MOVQ p+8(FP), SI // data pointer
- MOVQ p_len+16(FP), CX // len(p)
-
- // If there are fewer than 8 bytes to process, skip alignment.
- CMPQ CX, $8
- JL less_than_8
-
- MOVQ SI, BX
- ANDQ $7, BX
- JZ aligned
-
- // Process the first few bytes to 8-byte align the input.
-
- // BX = 8 - BX. We need to process this many bytes to align.
- SUBQ $1, BX
- XORQ $7, BX
-
- BTQ $0, BX
- JNC align_2
-
- CRC32B (SI), AX
- DECQ CX
- INCQ SI
-
-align_2:
- BTQ $1, BX
- JNC align_4
-
- // CRC32W (SI), AX
- BYTE $0x66; BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
-
- SUBQ $2, CX
- ADDQ $2, SI
-
-align_4:
- BTQ $2, BX
- JNC aligned
-
- // CRC32L (SI), AX
- BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
-
- SUBQ $4, CX
- ADDQ $4, SI
-
-aligned:
- // The input is now 8-byte aligned and we can process 8-byte chunks.
- CMPQ CX, $8
- JL less_than_8
-
- CRC32Q (SI), AX
- ADDQ $8, SI
- SUBQ $8, CX
- JMP aligned
-
-less_than_8:
- // We may have some bytes left over; process 4 bytes, then 2, then 1.
- BTQ $2, CX
- JNC less_than_4
-
- // CRC32L (SI), AX
- BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
- ADDQ $4, SI
-
-less_than_4:
- BTQ $1, CX
- JNC less_than_2
-
- // CRC32W (SI), AX
- BYTE $0x66; BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06
- ADDQ $2, SI
-
-less_than_2:
- BTQ $0, CX
- JNC done
-
- CRC32B (SI), AX
-
-done:
- MOVL AX, ret+32(FP)
- RET
-
-// castagnoliSSE42Triple updates three (non-inverted) crcs with (24*rounds)
-// bytes from each buffer.
-//
-// func castagnoliSSE42Triple(
-// crc1, crc2, crc3 uint32,
-// a, b, c []byte,
-// rounds uint32,
-// ) (retA uint32, retB uint32, retC uint32)
-TEXT ·castagnoliSSE42Triple(SB), NOSPLIT, $0
- MOVL crcA+0(FP), AX
- MOVL crcB+4(FP), CX
- MOVL crcC+8(FP), DX
-
- MOVQ a+16(FP), R8 // data pointer
- MOVQ b+40(FP), R9 // data pointer
- MOVQ c+64(FP), R10 // data pointer
-
- MOVL rounds+88(FP), R11
-
-loop:
- CRC32Q (R8), AX
- CRC32Q (R9), CX
- CRC32Q (R10), DX
-
- CRC32Q 8(R8), AX
- CRC32Q 8(R9), CX
- CRC32Q 8(R10), DX
-
- CRC32Q 16(R8), AX
- CRC32Q 16(R9), CX
- CRC32Q 16(R10), DX
-
- ADDQ $24, R8
- ADDQ $24, R9
- ADDQ $24, R10
-
- DECQ R11
- JNZ loop
-
- MOVL AX, retA+96(FP)
- MOVL CX, retB+100(FP)
- MOVL DX, retC+104(FP)
- RET
-
-// func haveSSE42() bool
-TEXT ·haveSSE42(SB), NOSPLIT, $0
- XORQ AX, AX
- INCL AX
- CPUID
- SHRQ $20, CX
- ANDQ $1, CX
- MOVB CX, ret+0(FP)
- RET
-
-// func haveCLMUL() bool
-TEXT ·haveCLMUL(SB), NOSPLIT, $0
- XORQ AX, AX
- INCL AX
- CPUID
- SHRQ $1, CX
- ANDQ $1, CX
- MOVB CX, ret+0(FP)
- RET
-
-// func haveSSE41() bool
-TEXT ·haveSSE41(SB), NOSPLIT, $0
- XORQ AX, AX
- INCL AX
- CPUID
- SHRQ $19, CX
- ANDQ $1, CX
- MOVB CX, ret+0(FP)
- RET
-
-// CRC32 polynomial data
-//
-// These constants are lifted from the
-// Linux kernel, since they avoid the costly
-// PSHUFB 16 byte reversal proposed in the
-// original Intel paper.
-DATA r2r1kp<>+0(SB)/8, $0x154442bd4
-DATA r2r1kp<>+8(SB)/8, $0x1c6e41596
-DATA r4r3kp<>+0(SB)/8, $0x1751997d0
-DATA r4r3kp<>+8(SB)/8, $0x0ccaa009e
-DATA rupolykp<>+0(SB)/8, $0x1db710641
-DATA rupolykp<>+8(SB)/8, $0x1f7011641
-DATA r5kp<>+0(SB)/8, $0x163cd6124
-
-GLOBL r2r1kp<>(SB), RODATA, $16
-GLOBL r4r3kp<>(SB), RODATA, $16
-GLOBL rupolykp<>(SB), RODATA, $16
-GLOBL r5kp<>(SB), RODATA, $8
-
-// Based on http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-// len(p) must be at least 64, and must be a multiple of 16.
-
-// func ieeeCLMUL(crc uint32, p []byte) uint32
-TEXT ·ieeeCLMUL(SB), NOSPLIT, $0
- MOVL crc+0(FP), X0 // Initial CRC value
- MOVQ p+8(FP), SI // data pointer
- MOVQ p_len+16(FP), CX // len(p)
-
- MOVOU (SI), X1
- MOVOU 16(SI), X2
- MOVOU 32(SI), X3
- MOVOU 48(SI), X4
- PXOR X0, X1
- ADDQ $64, SI // buf+=64
- SUBQ $64, CX // len-=64
- CMPQ CX, $64 // Less than 64 bytes left
- JB remain64
-
- MOVOA r2r1kp<>+0(SB), X0
-
-loopback64:
- MOVOA X1, X5
- MOVOA X2, X6
- MOVOA X3, X7
- MOVOA X4, X8
-
- PCLMULQDQ $0, X0, X1
- PCLMULQDQ $0, X0, X2
- PCLMULQDQ $0, X0, X3
- PCLMULQDQ $0, X0, X4
-
- // Load next early
- MOVOU (SI), X11
- MOVOU 16(SI), X12
- MOVOU 32(SI), X13
- MOVOU 48(SI), X14
-
- PCLMULQDQ $0x11, X0, X5
- PCLMULQDQ $0x11, X0, X6
- PCLMULQDQ $0x11, X0, X7
- PCLMULQDQ $0x11, X0, X8
-
- PXOR X5, X1
- PXOR X6, X2
- PXOR X7, X3
- PXOR X8, X4
-
- PXOR X11, X1
- PXOR X12, X2
- PXOR X13, X3
- PXOR X14, X4
-
- ADDQ $0x40, DI
- ADDQ $64, SI // buf+=64
- SUBQ $64, CX // len-=64
- CMPQ CX, $64 // Less than 64 bytes left?
- JGE loopback64
-
- // Fold result into a single register (X1)
-remain64:
- MOVOA r4r3kp<>+0(SB), X0
-
- MOVOA X1, X5
- PCLMULQDQ $0, X0, X1
- PCLMULQDQ $0x11, X0, X5
- PXOR X5, X1
- PXOR X2, X1
-
- MOVOA X1, X5
- PCLMULQDQ $0, X0, X1
- PCLMULQDQ $0x11, X0, X5
- PXOR X5, X1
- PXOR X3, X1
-
- MOVOA X1, X5
- PCLMULQDQ $0, X0, X1
- PCLMULQDQ $0x11, X0, X5
- PXOR X5, X1
- PXOR X4, X1
-
- // If there is less than 16 bytes left we are done
- CMPQ CX, $16
- JB finish
-
- // Encode 16 bytes
-remain16:
- MOVOU (SI), X10
- MOVOA X1, X5
- PCLMULQDQ $0, X0, X1
- PCLMULQDQ $0x11, X0, X5
- PXOR X5, X1
- PXOR X10, X1
- SUBQ $16, CX
- ADDQ $16, SI
- CMPQ CX, $16
- JGE remain16
-
-finish:
- // Fold final result into 32 bits and return it
- PCMPEQB X3, X3
- PCLMULQDQ $1, X1, X0
- PSRLDQ $8, X1
- PXOR X0, X1
-
- MOVOA X1, X2
- MOVQ r5kp<>+0(SB), X0
-
- // Creates 32 bit mask. Note that we don't care about upper half.
- PSRLQ $32, X3
-
- PSRLDQ $4, X2
- PAND X3, X1
- PCLMULQDQ $0, X0, X1
- PXOR X2, X1
-
- MOVOA rupolykp<>+0(SB), X0
-
- MOVOA X1, X2
- PAND X3, X1
- PCLMULQDQ $0x10, X0, X1
- PAND X3, X1
- PCLMULQDQ $0, X0, X1
- PXOR X2, X1
-
- // PEXTRD $1, X1, AX (SSE 4.1)
- BYTE $0x66; BYTE $0x0f; BYTE $0x3a
- BYTE $0x16; BYTE $0xc8; BYTE $0x01
- MOVL AX, ret+32(FP)
-
- RET
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.go
deleted file mode 100644
index 3222b06..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.go
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine,!gccgo
-
-package crc32
-
-// This file contains the code to call the SSE 4.2 version of the Castagnoli
-// CRC.
-
-// haveSSE42 is defined in crc32_amd64p32.s and uses CPUID to test for SSE 4.2
-// support.
-func haveSSE42() bool
-
-// castagnoliSSE42 is defined in crc32_amd64p32.s and uses the SSE4.2 CRC32
-// instruction.
-//go:noescape
-func castagnoliSSE42(crc uint32, p []byte) uint32
-
-var sse42 = haveSSE42()
-
-func archAvailableCastagnoli() bool {
- return sse42
-}
-
-func archInitCastagnoli() {
- if !sse42 {
- panic("not available")
- }
- // No initialization necessary.
-}
-
-func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
- if !sse42 {
- panic("not available")
- }
- return castagnoliSSE42(crc, p)
-}
-
-func archAvailableIEEE() bool { return false }
-func archInitIEEE() { panic("not available") }
-func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.s b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.s
deleted file mode 100644
index a578d68..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.s
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build gc
-
-#define NOSPLIT 4
-#define RODATA 8
-
-// func castagnoliSSE42(crc uint32, p []byte) uint32
-TEXT ·castagnoliSSE42(SB), NOSPLIT, $0
- MOVL crc+0(FP), AX // CRC value
- MOVL p+4(FP), SI // data pointer
- MOVL p_len+8(FP), CX // len(p)
-
- NOTL AX
-
- // If there's less than 8 bytes to process, we do it byte-by-byte.
- CMPQ CX, $8
- JL cleanup
-
- // Process individual bytes until the input is 8-byte aligned.
-startup:
- MOVQ SI, BX
- ANDQ $7, BX
- JZ aligned
-
- CRC32B (SI), AX
- DECQ CX
- INCQ SI
- JMP startup
-
-aligned:
- // The input is now 8-byte aligned and we can process 8-byte chunks.
- CMPQ CX, $8
- JL cleanup
-
- CRC32Q (SI), AX
- ADDQ $8, SI
- SUBQ $8, CX
- JMP aligned
-
-cleanup:
- // We may have some bytes left over that we process one at a time.
- CMPQ CX, $0
- JE done
-
- CRC32B (SI), AX
- INCQ SI
- DECQ CX
- JMP cleanup
-
-done:
- NOTL AX
- MOVL AX, ret+16(FP)
- RET
-
-// func haveSSE42() bool
-TEXT ·haveSSE42(SB), NOSPLIT, $0
- XORQ AX, AX
- INCL AX
- CPUID
- SHRQ $20, CX
- ANDQ $1, CX
- MOVB CX, ret+0(FP)
- RET
-
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_generic.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_generic.go
deleted file mode 100644
index abacbb6..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_generic.go
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This file contains CRC32 algorithms that are not specific to any architecture
-// and don't use hardware acceleration.
-//
-// The simple (and slow) CRC32 implementation only uses a 256*4 bytes table.
-//
-// The slicing-by-8 algorithm is a faster implementation that uses a bigger
-// table (8*256*4 bytes).
-
-package crc32
-
-// simpleMakeTable allocates and constructs a Table for the specified
-// polynomial. The table is suitable for use with the simple algorithm
-// (simpleUpdate).
-func simpleMakeTable(poly uint32) *Table {
- t := new(Table)
- simplePopulateTable(poly, t)
- return t
-}
-
-// simplePopulateTable constructs a Table for the specified polynomial, suitable
-// for use with simpleUpdate.
-func simplePopulateTable(poly uint32, t *Table) {
- for i := 0; i < 256; i++ {
- crc := uint32(i)
- for j := 0; j < 8; j++ {
- if crc&1 == 1 {
- crc = (crc >> 1) ^ poly
- } else {
- crc >>= 1
- }
- }
- t[i] = crc
- }
-}
-
-// simpleUpdate uses the simple algorithm to update the CRC, given a table that
-// was previously computed using simpleMakeTable.
-func simpleUpdate(crc uint32, tab *Table, p []byte) uint32 {
- crc = ^crc
- for _, v := range p {
- crc = tab[byte(crc)^v] ^ (crc >> 8)
- }
- return ^crc
-}
-
-// Use slicing-by-8 when payload >= this value.
-const slicing8Cutoff = 16
-
-// slicing8Table is array of 8 Tables, used by the slicing-by-8 algorithm.
-type slicing8Table [8]Table
-
-// slicingMakeTable constructs a slicing8Table for the specified polynomial. The
-// table is suitable for use with the slicing-by-8 algorithm (slicingUpdate).
-func slicingMakeTable(poly uint32) *slicing8Table {
- t := new(slicing8Table)
- simplePopulateTable(poly, &t[0])
- for i := 0; i < 256; i++ {
- crc := t[0][i]
- for j := 1; j < 8; j++ {
- crc = t[0][crc&0xFF] ^ (crc >> 8)
- t[j][i] = crc
- }
- }
- return t
-}
-
-// slicingUpdate uses the slicing-by-8 algorithm to update the CRC, given a
-// table that was previously computed using slicingMakeTable.
-func slicingUpdate(crc uint32, tab *slicing8Table, p []byte) uint32 {
- if len(p) >= slicing8Cutoff {
- crc = ^crc
- for len(p) > 8 {
- crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24
- crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^
- tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^
- tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF]
- p = p[8:]
- }
- crc = ^crc
- }
- if len(p) == 0 {
- return crc
- }
- return simpleUpdate(crc, &tab[0], p)
-}
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_otherarch.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_otherarch.go
deleted file mode 100644
index cc96076..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_otherarch.go
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !amd64,!amd64p32,!s390x
-
-package crc32
-
-func archAvailableIEEE() bool { return false }
-func archInitIEEE() { panic("not available") }
-func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") }
-
-func archAvailableCastagnoli() bool { return false }
-func archInitCastagnoli() { panic("not available") }
-func archUpdateCastagnoli(crc uint32, p []byte) uint32 { panic("not available") }
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.go
deleted file mode 100644
index ce96f03..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.go
+++ /dev/null
@@ -1,91 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x
-
-package crc32
-
-const (
- vxMinLen = 64
- vxAlignMask = 15 // align to 16 bytes
-)
-
-// hasVectorFacility reports whether the machine has the z/Architecture
-// vector facility installed and enabled.
-func hasVectorFacility() bool
-
-var hasVX = hasVectorFacility()
-
-// vectorizedCastagnoli implements CRC32 using vector instructions.
-// It is defined in crc32_s390x.s.
-//go:noescape
-func vectorizedCastagnoli(crc uint32, p []byte) uint32
-
-// vectorizedIEEE implements CRC32 using vector instructions.
-// It is defined in crc32_s390x.s.
-//go:noescape
-func vectorizedIEEE(crc uint32, p []byte) uint32
-
-func archAvailableCastagnoli() bool {
- return hasVX
-}
-
-var archCastagnoliTable8 *slicing8Table
-
-func archInitCastagnoli() {
- if !hasVX {
- panic("not available")
- }
- // We still use slicing-by-8 for small buffers.
- archCastagnoliTable8 = slicingMakeTable(Castagnoli)
-}
-
-// archUpdateCastagnoli calculates the checksum of p using
-// vectorizedCastagnoli.
-func archUpdateCastagnoli(crc uint32, p []byte) uint32 {
- if !hasVX {
- panic("not available")
- }
- // Use vectorized function if data length is above threshold.
- if len(p) >= vxMinLen {
- aligned := len(p) & ^vxAlignMask
- crc = vectorizedCastagnoli(crc, p[:aligned])
- p = p[aligned:]
- }
- if len(p) == 0 {
- return crc
- }
- return slicingUpdate(crc, archCastagnoliTable8, p)
-}
-
-func archAvailableIEEE() bool {
- return hasVX
-}
-
-var archIeeeTable8 *slicing8Table
-
-func archInitIEEE() {
- if !hasVX {
- panic("not available")
- }
- // We still use slicing-by-8 for small buffers.
- archIeeeTable8 = slicingMakeTable(IEEE)
-}
-
-// archUpdateIEEE calculates the checksum of p using vectorizedIEEE.
-func archUpdateIEEE(crc uint32, p []byte) uint32 {
- if !hasVX {
- panic("not available")
- }
- // Use vectorized function if data length is above threshold.
- if len(p) >= vxMinLen {
- aligned := len(p) & ^vxAlignMask
- crc = vectorizedIEEE(crc, p[:aligned])
- p = p[aligned:]
- }
- if len(p) == 0 {
- return crc
- }
- return slicingUpdate(crc, archIeeeTable8, p)
-}
diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.s b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.s
deleted file mode 100644
index e980ca2..0000000
--- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.s
+++ /dev/null
@@ -1,249 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build s390x
-
-#include "textflag.h"
-
-// Vector register range containing CRC-32 constants
-
-#define CONST_PERM_LE2BE V9
-#define CONST_R2R1 V10
-#define CONST_R4R3 V11
-#define CONST_R5 V12
-#define CONST_RU_POLY V13
-#define CONST_CRC_POLY V14
-
-// The CRC-32 constant block contains reduction constants to fold and
-// process particular chunks of the input data stream in parallel.
-//
-// Note that the constant definitions below are extended in order to compute
-// intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction.
-// The rightmost doubleword can be 0 to prevent contribution to the result or
-// can be multiplied by 1 to perform an XOR without the need for a separate
-// VECTOR EXCLUSIVE OR instruction.
-//
-// The polynomials used are bit-reflected:
-//
-// IEEE: P'(x) = 0x0edb88320
-// Castagnoli: P'(x) = 0x082f63b78
-
-// IEEE polynomial constants
-DATA ·crcleconskp+0(SB)/8, $0x0F0E0D0C0B0A0908 // LE-to-BE mask
-DATA ·crcleconskp+8(SB)/8, $0x0706050403020100
-DATA ·crcleconskp+16(SB)/8, $0x00000001c6e41596 // R2
-DATA ·crcleconskp+24(SB)/8, $0x0000000154442bd4 // R1
-DATA ·crcleconskp+32(SB)/8, $0x00000000ccaa009e // R4
-DATA ·crcleconskp+40(SB)/8, $0x00000001751997d0 // R3
-DATA ·crcleconskp+48(SB)/8, $0x0000000000000000
-DATA ·crcleconskp+56(SB)/8, $0x0000000163cd6124 // R5
-DATA ·crcleconskp+64(SB)/8, $0x0000000000000000
-DATA ·crcleconskp+72(SB)/8, $0x00000001F7011641 // u'
-DATA ·crcleconskp+80(SB)/8, $0x0000000000000000
-DATA ·crcleconskp+88(SB)/8, $0x00000001DB710641 // P'(x) << 1
-
-GLOBL ·crcleconskp(SB), RODATA, $144
-
-// Castagonli Polynomial constants
-DATA ·crccleconskp+0(SB)/8, $0x0F0E0D0C0B0A0908 // LE-to-BE mask
-DATA ·crccleconskp+8(SB)/8, $0x0706050403020100
-DATA ·crccleconskp+16(SB)/8, $0x000000009e4addf8 // R2
-DATA ·crccleconskp+24(SB)/8, $0x00000000740eef02 // R1
-DATA ·crccleconskp+32(SB)/8, $0x000000014cd00bd6 // R4
-DATA ·crccleconskp+40(SB)/8, $0x00000000f20c0dfe // R3
-DATA ·crccleconskp+48(SB)/8, $0x0000000000000000
-DATA ·crccleconskp+56(SB)/8, $0x00000000dd45aab8 // R5
-DATA ·crccleconskp+64(SB)/8, $0x0000000000000000
-DATA ·crccleconskp+72(SB)/8, $0x00000000dea713f1 // u'
-DATA ·crccleconskp+80(SB)/8, $0x0000000000000000
-DATA ·crccleconskp+88(SB)/8, $0x0000000105ec76f0 // P'(x) << 1
-
-GLOBL ·crccleconskp(SB), RODATA, $144
-
-// func hasVectorFacility() bool
-TEXT ·hasVectorFacility(SB), NOSPLIT, $24-1
- MOVD $x-24(SP), R1
- XC $24, 0(R1), 0(R1) // clear the storage
- MOVD $2, R0 // R0 is the number of double words stored -1
- WORD $0xB2B01000 // STFLE 0(R1)
- XOR R0, R0 // reset the value of R0
- MOVBZ z-8(SP), R1
- AND $0x40, R1
- BEQ novector
-
-vectorinstalled:
- // check if the vector instruction has been enabled
- VLEIB $0, $0xF, V16
- VLGVB $0, V16, R1
- CMPBNE R1, $0xF, novector
- MOVB $1, ret+0(FP) // have vx
- RET
-
-novector:
- MOVB $0, ret+0(FP) // no vx
- RET
-
-// The CRC-32 function(s) use these calling conventions:
-//
-// Parameters:
-//
-// R2: Initial CRC value, typically ~0; and final CRC (return) value.
-// R3: Input buffer pointer, performance might be improved if the
-// buffer is on a doubleword boundary.
-// R4: Length of the buffer, must be 64 bytes or greater.
-//
-// Register usage:
-//
-// R5: CRC-32 constant pool base pointer.
-// V0: Initial CRC value and intermediate constants and results.
-// V1..V4: Data for CRC computation.
-// V5..V8: Next data chunks that are fetched from the input buffer.
-//
-// V9..V14: CRC-32 constants.
-
-// func vectorizedIEEE(crc uint32, p []byte) uint32
-TEXT ·vectorizedIEEE(SB), NOSPLIT, $0
- MOVWZ crc+0(FP), R2 // R2 stores the CRC value
- MOVD p+8(FP), R3 // data pointer
- MOVD p_len+16(FP), R4 // len(p)
-
- MOVD $·crcleconskp(SB), R5
- BR vectorizedBody<>(SB)
-
-// func vectorizedCastagnoli(crc uint32, p []byte) uint32
-TEXT ·vectorizedCastagnoli(SB), NOSPLIT, $0
- MOVWZ crc+0(FP), R2 // R2 stores the CRC value
- MOVD p+8(FP), R3 // data pointer
- MOVD p_len+16(FP), R4 // len(p)
-
- // R5: crc-32 constant pool base pointer, constant is used to reduce crc
- MOVD $·crccleconskp(SB), R5
- BR vectorizedBody<>(SB)
-
-TEXT vectorizedBody<>(SB), NOSPLIT, $0
- XOR $0xffffffff, R2 // NOTW R2
- VLM 0(R5), CONST_PERM_LE2BE, CONST_CRC_POLY
-
- // Load the initial CRC value into the rightmost word of V0
- VZERO V0
- VLVGF $3, R2, V0
-
- // Crash if the input size is less than 64-bytes.
- CMP R4, $64
- BLT crash
-
- // Load a 64-byte data chunk and XOR with CRC
- VLM 0(R3), V1, V4 // 64-bytes into V1..V4
-
- // Reflect the data if the CRC operation is in the bit-reflected domain
- VPERM V1, V1, CONST_PERM_LE2BE, V1
- VPERM V2, V2, CONST_PERM_LE2BE, V2
- VPERM V3, V3, CONST_PERM_LE2BE, V3
- VPERM V4, V4, CONST_PERM_LE2BE, V4
-
- VX V0, V1, V1 // V1 ^= CRC
- ADD $64, R3 // BUF = BUF + 64
- ADD $(-64), R4
-
- // Check remaining buffer size and jump to proper folding method
- CMP R4, $64
- BLT less_than_64bytes
-
-fold_64bytes_loop:
- // Load the next 64-byte data chunk into V5 to V8
- VLM 0(R3), V5, V8
- VPERM V5, V5, CONST_PERM_LE2BE, V5
- VPERM V6, V6, CONST_PERM_LE2BE, V6
- VPERM V7, V7, CONST_PERM_LE2BE, V7
- VPERM V8, V8, CONST_PERM_LE2BE, V8
-
- // Perform a GF(2) multiplication of the doublewords in V1 with
- // the reduction constants in V0. The intermediate result is
- // then folded (accumulated) with the next data chunk in V5 and
- // stored in V1. Repeat this step for the register contents
- // in V2, V3, and V4 respectively.
-
- VGFMAG CONST_R2R1, V1, V5, V1
- VGFMAG CONST_R2R1, V2, V6, V2
- VGFMAG CONST_R2R1, V3, V7, V3
- VGFMAG CONST_R2R1, V4, V8, V4
-
- // Adjust buffer pointer and length for next loop
- ADD $64, R3 // BUF = BUF + 64
- ADD $(-64), R4 // LEN = LEN - 64
-
- CMP R4, $64
- BGE fold_64bytes_loop
-
-less_than_64bytes:
- // Fold V1 to V4 into a single 128-bit value in V1
- VGFMAG CONST_R4R3, V1, V2, V1
- VGFMAG CONST_R4R3, V1, V3, V1
- VGFMAG CONST_R4R3, V1, V4, V1
-
- // Check whether to continue with 64-bit folding
- CMP R4, $16
- BLT final_fold
-
-fold_16bytes_loop:
- VL 0(R3), V2 // Load next data chunk
- VPERM V2, V2, CONST_PERM_LE2BE, V2
-
- VGFMAG CONST_R4R3, V1, V2, V1 // Fold next data chunk
-
- // Adjust buffer pointer and size for folding next data chunk
- ADD $16, R3
- ADD $-16, R4
-
- // Process remaining data chunks
- CMP R4, $16
- BGE fold_16bytes_loop
-
-final_fold:
- VLEIB $7, $0x40, V9
- VSRLB V9, CONST_R4R3, V0
- VLEIG $0, $1, V0
-
- VGFMG V0, V1, V1
-
- VLEIB $7, $0x20, V9 // Shift by words
- VSRLB V9, V1, V2 // Store remaining bits in V2
- VUPLLF V1, V1 // Split rightmost doubleword
- VGFMAG CONST_R5, V1, V2, V1 // V1 = (V1 * R5) XOR V2
-
- // The input values to the Barret reduction are the degree-63 polynomial
- // in V1 (R(x)), degree-32 generator polynomial, and the reduction
- // constant u. The Barret reduction result is the CRC value of R(x) mod
- // P(x).
- //
- // The Barret reduction algorithm is defined as:
- //
- // 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
- // 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
- // 3. C(x) = R(x) XOR T2(x) mod x^32
- //
- // Note: To compensate the division by x^32, use the vector unpack
- // instruction to move the leftmost word into the leftmost doubleword
- // of the vector register. The rightmost doubleword is multiplied
- // with zero to not contribute to the intermedate results.
-
- // T1(x) = floor( R(x) / x^32 ) GF2MUL u
- VUPLLF V1, V2
- VGFMG CONST_RU_POLY, V2, V2
-
- // Compute the GF(2) product of the CRC polynomial in VO with T1(x) in
- // V2 and XOR the intermediate result, T2(x), with the value in V1.
- // The final result is in the rightmost word of V2.
-
- VUPLLF V2, V2
- VGFMAG CONST_CRC_POLY, V2, V1, V2
-
-done:
- VLGVF $2, V2, R2
- XOR $0xffffffff, R2 // NOTW R2
- MOVWZ R2, ret + 32(FP)
- RET
-
-crash:
- MOVD $0, (R0) // input size is less than 64-bytes
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md
index 3f34820..c8139b3 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md
+++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md
@@ -1,4 +1,5 @@
-# kcp-go
+
+
[![GoDoc][1]][2] [![Powered][9]][10] [![MIT licensed][11]][12] [![Build Status][3]][4] [![Go Report Card][5]][6] [![Coverage Statusd][7]][8]
@@ -19,12 +20,12 @@
## Introduction
-kcp-go is a full-featured ***reliable-UDP*** library for golang. It provides ***reliable, ordered, and error-checked*** delivery of a stream of octets between applications running on hosts communicating over an IP network.
+kcp-go is a full-featured ***Reliable-UDP*** library for golang. It provides ***reliable, ordered, and error-checked*** delivery of a stream of octets between applications running on hosts communicating over an IP network.
## Features
-1. Optimized for ***Real-Time Strategy Game***.
-1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with modifications.
+1. Optimized for ***Online Games, Audio/Video Streaming***.
+1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with optimizations.
1. ***Cache friendly*** and ***Memory optimized*** design in golang.
1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener).
1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction)
@@ -40,7 +41,7 @@ For complete documentation, see the associated [Godoc](https://godoc.org/github.
## Specification
-#
+
## Usage
@@ -75,14 +76,14 @@ PASS
ok github.com/xtaci/kcp-go 0.600s
```
+## Who is using this?
+
+1. https://github.com/xtaci/kcptun
+2. https://github.com/getlantern/lantern
+3. https://github.com/smallnest/rpcx
+
## Links
-1. https://github.com/xtaci/libkcp -- Official client library for iOS/Android(C++11)
+1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++
2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol
3. https://github.com/klauspost/reedsolomon -- Reed-Solomon Erasure Coding in Go
-
-## Donation
-
-
-
-All donations on this project will be used to support the development of [gonet/2](http://gonet2.github.io/).
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go
index df85278..2e456b8 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go
@@ -20,7 +20,9 @@ var (
saltxor = `sH3CIVoF#rWLtJo6`
)
-// BlockCrypt defines encryption/decryption methods for a given byte slice
+// BlockCrypt defines encryption/decryption methods for a given byte slice.
+// Notes on implementing: the data to be encrypted contains a builtin
+// nonce at the first 16 bytes
type BlockCrypt interface {
// Encrypt encrypts the whole block in src into dst.
// Dst and src may point at the same memory.
@@ -31,40 +33,35 @@ type BlockCrypt interface {
Decrypt(dst, src []byte)
}
-// Salsa20BlockCrypt implements BlockCrypt
-type Salsa20BlockCrypt struct {
+type salsa20BlockCrypt struct {
key [32]byte
}
-// NewSalsa20BlockCrypt initates BlockCrypt by the given key
+// NewSalsa20BlockCrypt https://en.wikipedia.org/wiki/Salsa20
func NewSalsa20BlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(Salsa20BlockCrypt)
+ c := new(salsa20BlockCrypt)
copy(c.key[:], key)
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *Salsa20BlockCrypt) Encrypt(dst, src []byte) {
+func (c *salsa20BlockCrypt) Encrypt(dst, src []byte) {
+ salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
+ copy(dst[:8], src[:8])
+}
+func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) {
salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
copy(dst[:8], src[:8])
}
-// Decrypt implements Decrypt interface
-func (c *Salsa20BlockCrypt) Decrypt(dst, src []byte) {
- salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key)
- copy(dst[:8], src[:8])
-}
-
-// TwofishBlockCrypt implements BlockCrypt
-type TwofishBlockCrypt struct {
+type twofishBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
-// NewTwofishBlockCrypt initates BlockCrypt by the given key
+// NewTwofishBlockCrypt https://en.wikipedia.org/wiki/Twofish
func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(TwofishBlockCrypt)
+ c := new(twofishBlockCrypt)
block, err := twofish.NewCipher(key)
if err != nil {
return nil, err
@@ -75,22 +72,18 @@ func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *TwofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-// Decrypt implements Decrypt interface
-func (c *TwofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-
-// TripleDESBlockCrypt implements BlockCrypt
-type TripleDESBlockCrypt struct {
+type tripleDESBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
-// NewTripleDESBlockCrypt initates BlockCrypt by the given key
+// NewTripleDESBlockCrypt https://en.wikipedia.org/wiki/Triple_DES
func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(TripleDESBlockCrypt)
+ c := new(tripleDESBlockCrypt)
block, err := des.NewTripleDESCipher(key)
if err != nil {
return nil, err
@@ -101,22 +94,18 @@ func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *TripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-// Decrypt implements Decrypt interface
-func (c *TripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-
-// Cast5BlockCrypt implements BlockCrypt
-type Cast5BlockCrypt struct {
+type cast5BlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
-// NewCast5BlockCrypt initates BlockCrypt by the given key
+// NewCast5BlockCrypt https://en.wikipedia.org/wiki/CAST-128
func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(Cast5BlockCrypt)
+ c := new(cast5BlockCrypt)
block, err := cast5.NewCipher(key)
if err != nil {
return nil, err
@@ -127,22 +116,18 @@ func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *Cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-// Decrypt implements Decrypt interface
-func (c *Cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-
-// BlowfishBlockCrypt implements BlockCrypt
-type BlowfishBlockCrypt struct {
+type blowfishBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
-// NewBlowfishBlockCrypt initates BlockCrypt by the given key
+// NewBlowfishBlockCrypt https://en.wikipedia.org/wiki/Blowfish_(cipher)
func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(BlowfishBlockCrypt)
+ c := new(blowfishBlockCrypt)
block, err := blowfish.NewCipher(key)
if err != nil {
return nil, err
@@ -153,22 +138,18 @@ func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *BlowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-// Decrypt implements Decrypt interface
-func (c *BlowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-
-// AESBlockCrypt implements BlockCrypt
-type AESBlockCrypt struct {
+type aesBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
-// NewAESBlockCrypt initates BlockCrypt by the given key
+// NewAESBlockCrypt https://en.wikipedia.org/wiki/Advanced_Encryption_Standard
func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(AESBlockCrypt)
+ c := new(aesBlockCrypt)
block, err := aes.NewCipher(key)
if err != nil {
return nil, err
@@ -179,22 +160,18 @@ func NewAESBlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *AESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-// Decrypt implements Decrypt interface
-func (c *AESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-
-// TEABlockCrypt implements BlockCrypt
-type TEABlockCrypt struct {
+type teaBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
-// NewTEABlockCrypt initate BlockCrypt by the given key
+// NewTEABlockCrypt https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm
func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(TEABlockCrypt)
+ c := new(teaBlockCrypt)
block, err := tea.NewCipherWithRounds(key, 16)
if err != nil {
return nil, err
@@ -205,22 +182,18 @@ func NewTEABlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *TEABlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-// Decrypt implements Decrypt interface
-func (c *TEABlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-
-// XTEABlockCrypt implements BlockCrypt
-type XTEABlockCrypt struct {
+type xteaBlockCrypt struct {
encbuf []byte
decbuf []byte
block cipher.Block
}
-// NewXTEABlockCrypt initate BlockCrypt by the given key
+// NewXTEABlockCrypt https://en.wikipedia.org/wiki/XTEA
func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(XTEABlockCrypt)
+ c := new(xteaBlockCrypt)
block, err := xtea.NewCipher(key)
if err != nil {
return nil, err
@@ -231,43 +204,32 @@ func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) {
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *XTEABlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) }
+func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-// Decrypt implements Decrypt interface
-func (c *XTEABlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) }
-
-// SimpleXORBlockCrypt implements BlockCrypt
-type SimpleXORBlockCrypt struct {
+type simpleXORBlockCrypt struct {
xortbl []byte
}
-// NewSimpleXORBlockCrypt initate BlockCrypt by the given key
+// NewSimpleXORBlockCrypt simple xor with key expanding
func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
- c := new(SimpleXORBlockCrypt)
+ c := new(simpleXORBlockCrypt)
c.xortbl = pbkdf2.Key(key, []byte(saltxor), 32, mtuLimit, sha1.New)
return c, nil
}
-// Encrypt implements Encrypt interface
-func (c *SimpleXORBlockCrypt) Encrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
+func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
+func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
-// Decrypt implements Decrypt interface
-func (c *SimpleXORBlockCrypt) Decrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
+type noneBlockCrypt struct{}
-// NoneBlockCrypt simple returns the plaintext
-type NoneBlockCrypt struct{}
-
-// NewNoneBlockCrypt initate by the given key
+// NewNoneBlockCrypt does nothing but copying
func NewNoneBlockCrypt(key []byte) (BlockCrypt, error) {
- return new(NoneBlockCrypt), nil
+ return new(noneBlockCrypt), nil
}
-// Encrypt implements Encrypt interface
-func (c *NoneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) }
-
-// Decrypt implements Decrypt interface
-func (c *NoneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
+func (c *noneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) }
+func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) }
// packet encryption with local CFB mode
func encrypt(block cipher.Block, dst, src, buf []byte) {
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go
index 10ad1c0..25201bb 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go
@@ -2,7 +2,7 @@ package kcp
import (
"encoding/binary"
- "sync"
+ "sync/atomic"
"github.com/klauspost/reedsolomon"
)
@@ -26,10 +26,10 @@ type (
next uint32 // next seqid
enc reedsolomon.Encoder
shards [][]byte
+ shards2 [][]byte // for calcECC
shardsflag []bool
paws uint32 // Protect Against Wrapped Sequence numbers
lastCheck uint32
- xmitBuf sync.Pool
}
fecPacket struct {
@@ -60,11 +60,8 @@ func newFEC(rxlimit, dataShards, parityShards int) *FEC {
}
fec.enc = enc
fec.shards = make([][]byte, fec.shardSize)
+ fec.shards2 = make([][]byte, fec.shardSize)
fec.shardsflag = make([]bool, fec.shardSize)
- fec.xmitBuf.New = func() interface{} {
- return make([]byte, mtuLimit)
- }
-
return fec
}
@@ -75,9 +72,8 @@ func (fec *FEC) decode(data []byte) fecPacket {
pkt.flag = binary.LittleEndian.Uint16(data[4:])
pkt.ts = currentMs()
// allocate memory & copy
- buf := fec.xmitBuf.Get().([]byte)
- n := copy(buf, data[6:])
- xorBytes(buf[n:], buf[n:], buf[n:])
+ buf := xmitBuf.Get().([]byte)[:len(data)-6]
+ copy(buf, data[6:])
pkt.data = buf
return pkt
}
@@ -107,7 +103,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
if now-fec.rx[k].ts < fecExpire {
rx = append(rx, fec.rx[k])
} else {
- fec.xmitBuf.Put(fec.rx[k].data)
+ xmitBuf.Put(fec.rx[k].data)
}
}
fec.rx = rx
@@ -119,7 +115,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
insertIdx := 0
for i := n; i >= 0; i-- {
if pkt.seqid == fec.rx[i].seqid { // de-duplicate
- fec.xmitBuf.Put(pkt.data)
+ xmitBuf.Put(pkt.data)
return nil
} else if pkt.seqid > fec.rx[i].seqid { // insertion
insertIdx = i + 1
@@ -184,7 +180,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
if numDataShard == fec.dataShards { // no lost
for i := first; i < first+numshard; i++ { // free
- fec.xmitBuf.Put(fec.rx[i].data)
+ xmitBuf.Put(fec.rx[i].data)
}
copy(fec.rx[first:], fec.rx[first+numshard:])
for i := 0; i < numshard; i++ { // dereference
@@ -194,7 +190,9 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
} else if numshard >= fec.dataShards { // recoverable
for k := range shards {
if shards[k] != nil {
+ dlen := len(shards[k])
shards[k] = shards[k][:maxlen]
+ xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:])
}
}
if err := fec.enc.Reconstruct(shards); err == nil {
@@ -206,7 +204,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
}
for i := first; i < first+numshard; i++ { // free
- fec.xmitBuf.Put(fec.rx[i].data)
+ xmitBuf.Put(fec.rx[i].data)
}
copy(fec.rx[first:], fec.rx[first+numshard:])
for i := 0; i < numshard; i++ { // dereference
@@ -218,7 +216,10 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
// keep rxlimit
if len(fec.rx) > fec.rxlimit {
- fec.xmitBuf.Put(fec.rx[0].data) // free
+ if fec.rx[0].flag == typeData { // record unrecoverable data
+ atomic.AddUint64(&DefaultSnmp.FECShortShards, 1)
+ }
+ xmitBuf.Put(fec.rx[0].data) // free
fec.rx[0].data = nil
fec.rx = fec.rx[1:]
}
@@ -229,7 +230,7 @@ func (fec *FEC) calcECC(data [][]byte, offset, maxlen int) (ecc [][]byte) {
if len(data) != fec.shardSize {
return nil
}
- shards := make([][]byte, fec.shardSize)
+ shards := fec.shards2
for k := range shards {
shards[k] = data[k][offset:maxlen]
}
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png
index 7952e4a..0b0aefd 100644
Binary files a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png and b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png differ
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp-go.png b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp-go.png
new file mode 100644
index 0000000..151b7c4
Binary files /dev/null and b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp-go.png differ
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go
index 78ccf26..f53e834 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go
@@ -2,7 +2,6 @@
package kcp
import (
- "container/heap"
"encoding/binary"
"sync/atomic"
)
@@ -123,13 +122,6 @@ func (seg *Segment) encode(ptr []byte) []byte {
return ptr
}
-// NewSegment creates a KCP segment
-func NewSegment(size int) *Segment {
- seg := new(Segment)
- seg.data = make([]byte, size)
- return seg
-}
-
// KCP defines a single KCP connection
type KCP struct {
conv, mtu, mss, state uint32
@@ -137,7 +129,7 @@ type KCP struct {
ssthresh uint32
rx_rttval, rx_srtt, rx_rto, rx_minrto uint32
snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe uint32
- current, interval, ts_flush, xmit uint32
+ interval, ts_flush, xmit uint32
nodelay, updated uint32
ts_probe, probe_wait uint32
dead_link, incr uint32
@@ -150,33 +142,17 @@ type KCP struct {
snd_buf []Segment
rcv_buf []Segment
- acklist ACKList
+ acklist []ackItem
buffer []byte
output Output
}
-// ACK packet to return
-type ACK struct {
+type ackItem struct {
sn uint32
ts uint32
}
-// ACKList is heapified
-type ACKList []ACK
-
-func (l ACKList) Len() int { return len(l) }
-func (l ACKList) Less(i, j int) bool { return l[i].sn < l[j].sn }
-func (l ACKList) Swap(i, j int) { l[i], l[j] = l[j], l[i] }
-func (l *ACKList) Push(x interface{}) { *l = append(*l, x.(ACK)) }
-func (l *ACKList) Pop() interface{} {
- old := *l
- n := len(old)
- x := old[n-1]
- *l = old[0 : n-1]
- return x
-}
-
// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
// from the same connection.
func NewKCP(conv uint32, output Output) *KCP {
@@ -198,6 +174,18 @@ func NewKCP(conv uint32, output Output) *KCP {
return kcp
}
+// newSegment creates a KCP segment
+func (kcp *KCP) newSegment(size int) *Segment {
+ seg := new(Segment)
+ seg.data = xmitBuf.Get().([]byte)[:size]
+ return seg
+}
+
+// delSegment recycles a KCP segment
+func (kcp *KCP) delSegment(seg *Segment) {
+ xmitBuf.Put(seg.data)
+}
+
// PeekSize checks the size of next message in the recv queue
func (kcp *KCP) PeekSize() (length int) {
if len(kcp.rcv_queue) == 0 {
@@ -251,7 +239,7 @@ func (kcp *KCP) Recv(buffer []byte) (n int) {
buffer = buffer[len(seg.data):]
n += len(seg.data)
count++
- seg.data = nil
+ kcp.delSegment(seg)
if seg.frg == 0 {
break
}
@@ -263,14 +251,13 @@ func (kcp *KCP) Recv(buffer []byte) (n int) {
for k := range kcp.rcv_buf {
seg := &kcp.rcv_buf[k]
if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
- kcp.rcv_queue = append(kcp.rcv_queue, *seg)
kcp.rcv_nxt++
count++
- seg.data = nil
} else {
break
}
}
+ kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
kcp.rcv_buf = kcp.rcv_buf[count:]
// fast recover
@@ -300,11 +287,12 @@ func (kcp *KCP) Send(buffer []byte) int {
if len(buffer) < capacity {
extend = len(buffer)
}
- seg := NewSegment(len(old.data) + extend)
+ seg := kcp.newSegment(len(old.data) + extend)
seg.frg = 0
copy(seg.data, old.data)
copy(seg.data[len(old.data):], buffer)
buffer = buffer[extend:]
+ kcp.delSegment(old)
kcp.snd_queue[n-1] = *seg
}
}
@@ -335,7 +323,7 @@ func (kcp *KCP) Send(buffer []byte) int {
} else {
size = len(buffer)
}
- seg := NewSegment(size)
+ seg := kcp.newSegment(size)
copy(seg.data, buffer[:size])
if kcp.stream == 0 { // message mode
seg.frg = uint32(count - i - 1)
@@ -348,8 +336,8 @@ func (kcp *KCP) Send(buffer []byte) int {
return 0
}
-// https://tools.ietf.org/html/rfc6298
func (kcp *KCP) update_ack(rtt int32) {
+ // https://tools.ietf.org/html/rfc6298
var rto uint32
if kcp.rx_srtt == 0 {
kcp.rx_srtt = uint32(rtt)
@@ -365,7 +353,7 @@ func (kcp *KCP) update_ack(rtt int32) {
kcp.rx_srtt = 1
}
}
- rto = kcp.rx_srtt + _imax_(1, 4*kcp.rx_rttval)
+ rto = kcp.rx_srtt + _imax_(kcp.interval, 4*kcp.rx_rttval)
kcp.rx_rto = _ibound_(kcp.rx_minrto, rto, IKCP_RTO_MAX)
}
@@ -386,6 +374,7 @@ func (kcp *KCP) parse_ack(sn uint32) {
for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k]
if sn == seg.sn {
+ kcp.delSegment(seg)
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
kcp.snd_buf[len(kcp.snd_buf)-1] = Segment{}
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
@@ -417,8 +406,8 @@ func (kcp *KCP) parse_una(una uint32) {
for k := range kcp.snd_buf {
seg := &kcp.snd_buf[k]
if _itimediff(una, seg.sn) > 0 {
+ kcp.delSegment(seg)
count++
- seg.data = nil
} else {
break
}
@@ -428,14 +417,14 @@ func (kcp *KCP) parse_una(una uint32) {
// ack append
func (kcp *KCP) ack_push(sn, ts uint32) {
- heap.Push(&kcp.acklist, ACK{sn, ts})
+ kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
}
func (kcp *KCP) parse_data(newseg *Segment) {
sn := newseg.sn
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
_itimediff(sn, kcp.rcv_nxt) < 0 {
- atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
+ kcp.delSegment(newseg)
return
}
@@ -463,6 +452,8 @@ func (kcp *KCP) parse_data(newseg *Segment) {
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
kcp.rcv_buf[insert_idx] = *newseg
}
+ } else {
+ kcp.delSegment(newseg)
}
// move available data from rcv_buf -> rcv_queue
@@ -470,14 +461,13 @@ func (kcp *KCP) parse_data(newseg *Segment) {
for k := range kcp.rcv_buf {
seg := &kcp.rcv_buf[k]
if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) {
- kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[k])
kcp.rcv_nxt++
count++
- seg.data = nil
} else {
break
}
}
+ kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...)
kcp.rcv_buf = kcp.rcv_buf[count:]
}
@@ -489,7 +479,9 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
}
var maxack uint32
+ var recentack uint32
var flag int
+
for {
var ts, sn, length, una, conv uint32
var wnd uint16
@@ -525,9 +517,6 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
kcp.shrink_buf()
if cmd == IKCP_CMD_ACK {
- if update_ack && _itimediff(kcp.current, ts) >= 0 {
- kcp.update_ack(_itimediff(kcp.current, ts))
- }
kcp.parse_ack(sn)
kcp.shrink_buf()
if flag == 0 {
@@ -536,11 +525,12 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
} else if _itimediff(sn, maxack) > 0 {
maxack = sn
}
+ recentack = ts
} else if cmd == IKCP_CMD_PUSH {
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
kcp.ack_push(sn, ts)
if _itimediff(sn, kcp.rcv_nxt) >= 0 {
- seg := NewSegment(int(length))
+ seg := kcp.newSegment(int(length))
seg.conv = conv
seg.cmd = uint32(cmd)
seg.frg = uint32(frg)
@@ -550,7 +540,11 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
seg.una = una
copy(seg.data, data[:length])
kcp.parse_data(seg)
+ } else {
+ atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
}
+ } else {
+ atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1)
}
} else if cmd == IKCP_CMD_WASK {
// ready to send back IKCP_CMD_WINS in Ikcp_flush
@@ -565,8 +559,12 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
data = data[length:]
}
+ current := currentMs()
if flag != 0 && update_ack {
kcp.parse_fastack(maxack)
+ if _itimediff(current, recentack) >= 0 {
+ kcp.update_ack(_itimediff(current, recentack))
+ }
}
if _itimediff(kcp.snd_una, una) > 0 {
@@ -603,14 +601,10 @@ func (kcp *KCP) wnd_unused() int32 {
// flush pending data
func (kcp *KCP) flush() {
- current := kcp.current
buffer := kcp.buffer
change := 0
lost := false
- if kcp.updated == 0 {
- return
- }
var seg Segment
seg.conv = kcp.conv
seg.cmd = IKCP_CMD_ACK
@@ -619,25 +613,28 @@ func (kcp *KCP) flush() {
// flush acknowledges
ptr := buffer
- for kcp.acklist.Len() > 0 {
+ for i, ack := range kcp.acklist {
size := len(buffer) - len(ptr)
if size+IKCP_OVERHEAD > int(kcp.mtu) {
kcp.output(buffer, size)
ptr = buffer
}
- ack := heap.Pop(&kcp.acklist).(ACK)
- seg.sn, seg.ts = ack.sn, ack.ts
- ptr = seg.encode(ptr)
+ // filter jitters caused by bufferbloat
+ if ack.sn >= kcp.rcv_nxt || len(kcp.acklist)-1 == i {
+ seg.sn, seg.ts = ack.sn, ack.ts
+ ptr = seg.encode(ptr)
+ }
}
kcp.acklist = nil
+ current := currentMs()
// probe window size (if remote window size equals zero)
if kcp.rmt_wnd == 0 {
if kcp.probe_wait == 0 {
kcp.probe_wait = IKCP_PROBE_INIT
- kcp.ts_probe = kcp.current + kcp.probe_wait
+ kcp.ts_probe = current + kcp.probe_wait
} else {
- if _itimediff(kcp.current, kcp.ts_probe) >= 0 {
+ if _itimediff(current, kcp.ts_probe) >= 0 {
if kcp.probe_wait < IKCP_PROBE_INIT {
kcp.probe_wait = IKCP_PROBE_INIT
}
@@ -645,7 +642,7 @@ func (kcp *KCP) flush() {
if kcp.probe_wait > IKCP_PROBE_LIMIT {
kcp.probe_wait = IKCP_PROBE_LIMIT
}
- kcp.ts_probe = kcp.current + kcp.probe_wait
+ kcp.ts_probe = current + kcp.probe_wait
kcp.probe |= IKCP_ASK_SEND
}
}
@@ -684,6 +681,7 @@ func (kcp *KCP) flush() {
cwnd = _imin_(kcp.cwnd, cwnd)
}
+ // sliding window, controlled by snd_nxt && sna_una+cwnd
count := 0
for k := range kcp.snd_queue {
if _itimediff(kcp.snd_nxt, kcp.snd_una+cwnd) >= 0 {
@@ -696,10 +694,8 @@ func (kcp *KCP) flush() {
newseg.ts = current
newseg.sn = kcp.snd_nxt
newseg.una = kcp.rcv_nxt
- newseg.resendts = current
+ newseg.resendts = newseg.ts
newseg.rto = kcp.rx_rto
- newseg.fastack = 0
- newseg.xmit = 0
kcp.snd_buf = append(kcp.snd_buf, newseg)
kcp.snd_nxt++
count++
@@ -707,27 +703,29 @@ func (kcp *KCP) flush() {
}
kcp.snd_queue = kcp.snd_queue[count:]
+ // flag pending data
+ hasPending := false
+ if count > 0 {
+ hasPending = true
+ }
+
// calculate resent
resent := uint32(kcp.fastresend)
if kcp.fastresend <= 0 {
resent = 0xffffffff
}
- rtomin := (kcp.rx_rto >> 3)
- if kcp.nodelay != 0 {
- rtomin = 0
- }
// flush data segments
- nque := len(kcp.snd_queue)
var lostSegs, fastRetransSegs, earlyRetransSegs uint64
for k := range kcp.snd_buf {
+ current := currentMs()
segment := &kcp.snd_buf[k]
needsend := false
if segment.xmit == 0 {
needsend = true
segment.xmit++
segment.rto = kcp.rx_rto
- segment.resendts = current + segment.rto + rtomin
+ segment.resendts = current + segment.rto
} else if _itimediff(current, segment.resendts) >= 0 {
needsend = true
segment.xmit++
@@ -740,21 +738,26 @@ func (kcp *KCP) flush() {
segment.resendts = current + segment.rto
lost = true
lostSegs++
- } else if segment.fastack >= resent {
- needsend = true
- segment.xmit++
- segment.fastack = 0
- segment.resendts = current + segment.rto
- change++
- fastRetransSegs++
- } else if segment.fastack > 0 && nque == 0 {
- // early retransmit
- needsend = true
- segment.xmit++
- segment.fastack = 0
- segment.resendts = current + segment.rto
- change++
- earlyRetransSegs++
+ } else if segment.fastack >= resent { // fast retransmit
+ lastsend := segment.resendts - segment.rto
+ if _itimediff(current, lastsend) >= int32(kcp.rx_rto/4) {
+ needsend = true
+ segment.xmit++
+ segment.fastack = 0
+ segment.resendts = current + segment.rto
+ change++
+ fastRetransSegs++
+ }
+ } else if segment.fastack > 0 && !hasPending { // early retransmit
+ lastsend := segment.resendts - segment.rto
+ if _itimediff(current, lastsend) >= int32(kcp.rx_rto/4) {
+ needsend = true
+ segment.xmit++
+ segment.fastack = 0
+ segment.resendts = current + segment.rto
+ change++
+ earlyRetransSegs++
+ }
}
if needsend {
@@ -822,27 +825,26 @@ func (kcp *KCP) flush() {
// Update updates state (call it repeatedly, every 10ms-100ms), or you can ask
// ikcp_check when to call it again (without ikcp_input/_send calling).
// 'current' - current timestamp in millisec.
-func (kcp *KCP) Update(current uint32) {
+func (kcp *KCP) Update() {
var slap int32
- kcp.current = current
-
+ current := currentMs()
if kcp.updated == 0 {
kcp.updated = 1
- kcp.ts_flush = kcp.current
+ kcp.ts_flush = current
}
- slap = _itimediff(kcp.current, kcp.ts_flush)
+ slap = _itimediff(current, kcp.ts_flush)
if slap >= 10000 || slap < -10000 {
- kcp.ts_flush = kcp.current
+ kcp.ts_flush = current
slap = 0
}
if slap >= 0 {
kcp.ts_flush += kcp.interval
- if _itimediff(kcp.current, kcp.ts_flush) >= 0 {
- kcp.ts_flush = kcp.current + kcp.interval
+ if _itimediff(current, kcp.ts_flush) >= 0 {
+ kcp.ts_flush = current + kcp.interval
}
kcp.flush()
}
@@ -855,7 +857,8 @@ func (kcp *KCP) Update(current uint32) {
// Important to reduce unnacessary ikcp_update invoking. use it to
// schedule ikcp_update (eg. implementing an epoll-like mechanism,
// or optimize ikcp_update when handling massive kcp connections)
-func (kcp *KCP) Check(current uint32) uint32 {
+func (kcp *KCP) Check() uint32 {
+ current := currentMs()
ts_flush := kcp.ts_flush
tm_flush := int32(0x7fffffff)
tm_packet := int32(0x7fffffff)
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go
index 737b99d..4879e2a 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go
@@ -3,6 +3,7 @@ package kcp
import (
"crypto/rand"
"encoding/binary"
+ "hash/crc32"
"io"
"net"
"sync"
@@ -10,20 +11,9 @@ import (
"time"
"github.com/pkg/errors"
-
- "github.com/klauspost/crc32"
-
"golang.org/x/net/ipv4"
)
-// Option defines extra options
-type Option interface{}
-
-// OptionWithConvId defines conversation id
-type OptionWithConvId struct {
- Id uint32
-}
-
type errTimeout struct {
error
}
@@ -38,11 +28,26 @@ const (
crcSize = 4 // 4bytes packet checksum
cryptHeaderSize = nonceSize + crcSize
mtuLimit = 2048
- txQueueLimit = 8192
- rxFecLimit = 8192
- defaultKeepAliveInterval = 10 * time.Second
+ rxQueueLimit = 8192
+ rxFECMulti = 3 // FEC keeps rxFECMulti* (dataShard+parityShard) ordered packets in memory
+ defaultKeepAliveInterval = 10
)
+const (
+ errBrokenPipe = "broken pipe"
+ errInvalidOperation = "invalid operation"
+)
+
+var (
+ xmitBuf sync.Pool
+)
+
+func init() {
+ xmitBuf.New = func() interface{} {
+ return make([]byte, mtuLimit)
+ }
+}
+
type (
// UDPSession defines a KCP session implemented by UDP
UDPSession struct {
@@ -58,14 +63,13 @@ type (
die chan struct{}
chReadEvent chan struct{}
chWriteEvent chan struct{}
- chTicker chan time.Time
chUDPOutput chan []byte
headerSize int
ackNoDelay bool
isClosed bool
- keepAliveInterval time.Duration
- xmitBuf sync.Pool
+ keepAliveInterval int32
mu sync.Mutex
+ updateInterval int32
}
setReadBuffer interface {
@@ -80,8 +84,7 @@ type (
// newUDPSession create a new udp session for client or server
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
sess := new(UDPSession)
- sess.chTicker = make(chan time.Time, 1)
- sess.chUDPOutput = make(chan []byte, txQueueLimit)
+ sess.chUDPOutput = make(chan []byte)
sess.die = make(chan struct{})
sess.chReadEvent = make(chan struct{}, 1)
sess.chWriteEvent = make(chan struct{}, 1)
@@ -90,10 +93,7 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn
sess.keepAliveInterval = defaultKeepAliveInterval
sess.l = l
sess.block = block
- sess.fec = newFEC(rxFecLimit, dataShards, parityShards)
- sess.xmitBuf.New = func() interface{} {
- return make([]byte, mtuLimit)
- }
+ sess.fec = newFEC(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
// calculate header size
if sess.block != nil {
sess.headerSize += cryptHeaderSize
@@ -104,7 +104,7 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn
sess.kcp = NewKCP(conv, func(buf []byte, size int) {
if size >= IKCP_OVERHEAD {
- ext := sess.xmitBuf.Get().([]byte)[:sess.headerSize+size]
+ ext := xmitBuf.Get().([]byte)[:sess.headerSize+size]
copy(ext[sess.headerSize:], buf)
select {
case sess.chUDPOutput <- ext:
@@ -145,7 +145,7 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
if s.isClosed {
s.mu.Unlock()
- return 0, errors.New("broken pipe")
+ return 0, errors.New(errBrokenPipe)
}
if !s.rd.IsZero() {
@@ -169,19 +169,25 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
return n, nil
}
- var timeout <-chan time.Time
+ var timeout *time.Timer
+ var c <-chan time.Time
if !s.rd.IsZero() {
delay := s.rd.Sub(time.Now())
- timeout = time.After(delay)
+ timeout = time.NewTimer(delay)
+ c = timeout.C
}
s.mu.Unlock()
// wait for read event or timeout
select {
case <-s.chReadEvent:
- case <-timeout:
+ case <-c:
case <-s.die:
}
+
+ if timeout != nil {
+ timeout.Stop()
+ }
}
}
@@ -191,7 +197,7 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
s.mu.Lock()
if s.isClosed {
s.mu.Unlock()
- return 0, errors.New("broken pipe")
+ return 0, errors.New(errBrokenPipe)
}
if !s.wd.IsZero() {
@@ -201,7 +207,7 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
}
}
- if s.kcp.WaitSnd() < 2*int(s.kcp.snd_wnd) {
+ if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
n = len(b)
max := s.kcp.mss << 8
for {
@@ -213,26 +219,31 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
b = b[max:]
}
}
- s.kcp.current = currentMs()
s.kcp.flush()
s.mu.Unlock()
atomic.AddUint64(&DefaultSnmp.BytesSent, uint64(n))
return n, nil
}
- var timeout <-chan time.Time
+ var timeout *time.Timer
+ var c <-chan time.Time
if !s.wd.IsZero() {
delay := s.wd.Sub(time.Now())
- timeout = time.After(delay)
+ timeout = time.NewTimer(delay)
+ c = timeout.C
}
s.mu.Unlock()
// wait for write event or timeout
select {
case <-s.chWriteEvent:
- case <-timeout:
+ case <-c:
case <-s.die:
}
+
+ if timeout != nil {
+ timeout.Stop()
+ }
}
}
@@ -241,7 +252,7 @@ func (s *UDPSession) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
if s.isClosed {
- return errors.New("broken pipe")
+ return errors.New(errBrokenPipe)
}
close(s.die)
s.isClosed = true
@@ -321,6 +332,7 @@ func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
s.mu.Lock()
defer s.mu.Unlock()
s.kcp.NoDelay(nodelay, interval, resend, nc)
+ atomic.StoreInt32(&s.updateInterval, int32(interval))
}
// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
@@ -328,11 +340,13 @@ func (s *UDPSession) SetDSCP(dscp int) error {
s.mu.Lock()
defer s.mu.Unlock()
if s.l == nil {
- if nc, ok := s.conn.(net.Conn); ok {
+ if nc, ok := s.conn.(*ConnectedUDPConn); ok {
+ return ipv4.NewConn(nc.Conn).SetTOS(dscp << 2)
+ } else if nc, ok := s.conn.(net.Conn); ok {
return ipv4.NewConn(nc).SetTOS(dscp << 2)
}
}
- return nil
+ return errors.New(errInvalidOperation)
}
// SetReadBuffer sets the socket read buffer, no effect if it's accepted from Listener
@@ -344,7 +358,7 @@ func (s *UDPSession) SetReadBuffer(bytes int) error {
return nc.SetReadBuffer(bytes)
}
}
- return nil
+ return errors.New(errInvalidOperation)
}
// SetWriteBuffer sets the socket write buffer, no effect if it's accepted from Listener
@@ -356,24 +370,12 @@ func (s *UDPSession) SetWriteBuffer(bytes int) error {
return nc.SetWriteBuffer(bytes)
}
}
- return nil
+ return errors.New(errInvalidOperation)
}
// SetKeepAlive changes per-connection NAT keepalive interval; 0 to disable, default to 10s
func (s *UDPSession) SetKeepAlive(interval int) {
- s.mu.Lock()
- defer s.mu.Unlock()
- s.keepAliveInterval = time.Duration(interval) * time.Second
-}
-
-// writeTo wraps write method for client & listener
-func (s *UDPSession) writeTo(b []byte, addr net.Addr) (int, error) {
- if s.l == nil {
- if nc, ok := s.conn.(io.Writer); ok {
- return nc.Write(b)
- }
- }
- return s.conn.WriteTo(b, addr)
+ atomic.StoreInt32(&s.keepAliveInterval, int32(interval))
}
func (s *UDPSession) outputTask() {
@@ -385,13 +387,15 @@ func (s *UDPSession) outputTask() {
szOffset := fecOffset + fecHeaderSize
// fec data group
+ var cacheLine []byte
var fecGroup [][]byte
var fecCnt int
var fecMaxSize int
if s.fec != nil {
+ cacheLine = make([]byte, s.fec.shardSize*mtuLimit)
fecGroup = make([][]byte, s.fec.shardSize)
for k := range fecGroup {
- fecGroup[k] = make([]byte, mtuLimit)
+ fecGroup[k] = cacheLine[k*mtuLimit : (k+1)*mtuLimit]
}
}
@@ -402,23 +406,31 @@ func (s *UDPSession) outputTask() {
for {
select {
+ // receive from a synchronous channel
+ // buffered channel must be avoided, because of "bufferbloat"
case ext := <-s.chUDPOutput:
var ecc [][]byte
if s.fec != nil {
s.fec.markData(ext[fecOffset:])
- // explicit size
+ // explicit size, including 2bytes size itself.
binary.LittleEndian.PutUint16(ext[szOffset:], uint16(len(ext[szOffset:])))
// copy data to fec group
- xorBytes(fecGroup[fecCnt], fecGroup[fecCnt], fecGroup[fecCnt])
+ sz := len(ext)
+ fecGroup[fecCnt] = fecGroup[fecCnt][:sz]
copy(fecGroup[fecCnt], ext)
fecCnt++
- if len(ext) > fecMaxSize {
- fecMaxSize = len(ext)
+ if sz > fecMaxSize {
+ fecMaxSize = sz
}
// calculate Reed-Solomon Erasure Code
if fecCnt == s.fec.dataShards {
+ for i := 0; i < s.fec.dataShards; i++ {
+ shard := fecGroup[i]
+ slen := len(shard)
+ xorBytes(shard[slen:fecMaxSize], shard[slen:fecMaxSize], shard[slen:fecMaxSize])
+ }
ecc = s.fec.calcECC(fecGroup, szOffset, fecMaxSize)
for k := range ecc {
s.fec.markFEC(ecc[k][fecOffset:])
@@ -445,38 +457,36 @@ func (s *UDPSession) outputTask() {
}
}
- //if rand.Intn(100) < 80 {
- if n, err := s.writeTo(ext, s.remote); err == nil {
- atomic.AddUint64(&DefaultSnmp.OutSegs, 1)
- atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(n))
+ nbytes := 0
+ nsegs := 0
+ // if mrand.Intn(100) < 50 {
+ if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
+ nbytes += n
+ nsegs++
}
- //}
+ // }
if ecc != nil {
for k := range ecc {
- if n, err := s.writeTo(ecc[k], s.remote); err == nil {
- atomic.AddUint64(&DefaultSnmp.OutSegs, 1)
- atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(n))
+ if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
+ nbytes += n
+ nsegs++
}
}
}
- xorBytes(ext, ext, ext)
- s.xmitBuf.Put(ext)
+ atomic.AddUint64(&DefaultSnmp.OutSegs, uint64(nsegs))
+ atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
+ xmitBuf.Put(ext)
case <-ticker.C: // NAT keep-alive
- if len(s.chUDPOutput) == 0 {
- s.mu.Lock()
- interval := s.keepAliveInterval
- s.mu.Unlock()
- if interval > 0 && time.Now().After(lastPing.Add(interval)) {
- buf := make([]byte, 2)
- io.ReadFull(rand.Reader, buf)
- rnd := int(binary.LittleEndian.Uint16(buf))
- sz := rnd%(IKCP_MTU_DEF-s.headerSize-IKCP_OVERHEAD) + s.headerSize + IKCP_OVERHEAD
- ping := make([]byte, sz)
- io.ReadFull(rand.Reader, ping)
- s.writeTo(ping, s.remote)
- lastPing = time.Now()
- }
+ interval := time.Duration(atomic.LoadInt32(&s.keepAliveInterval)) * time.Second
+ if interval > 0 && time.Now().After(lastPing.Add(interval)) {
+ var rnd uint16
+ binary.Read(rand.Reader, binary.LittleEndian, &rnd)
+ sz := int(rnd)%(IKCP_MTU_DEF-s.headerSize-IKCP_OVERHEAD) + s.headerSize + IKCP_OVERHEAD
+ ping := make([]byte, sz) // randomized ping packet
+ io.ReadFull(rand.Reader, ping)
+ s.conn.WriteTo(ping, s.remote)
+ lastPing = time.Now()
}
case <-s.die:
return
@@ -486,25 +496,18 @@ func (s *UDPSession) outputTask() {
// kcp update, input loop
func (s *UDPSession) updateTask() {
- var tc <-chan time.Time
- if s.l == nil { // client
- ticker := time.NewTicker(10 * time.Millisecond)
- tc = ticker.C
- defer ticker.Stop()
- } else {
- tc = s.chTicker
- }
+ tc := time.After(time.Duration(atomic.LoadInt32(&s.updateInterval)) * time.Millisecond)
for {
select {
case <-tc:
s.mu.Lock()
- current := currentMs()
- s.kcp.Update(current)
- if s.kcp.WaitSnd() < 2*int(s.kcp.snd_wnd) {
+ s.kcp.flush()
+ if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
s.notifyWriteEvent()
}
s.mu.Unlock()
+ tc = time.After(time.Duration(atomic.LoadInt32(&s.updateInterval)) * time.Millisecond)
case <-s.die:
if s.l != nil { // has listener
select {
@@ -537,58 +540,84 @@ func (s *UDPSession) notifyWriteEvent() {
}
func (s *UDPSession) kcpInput(data []byte) {
- current := currentMs()
+ var kcpInErrors, fecErrs, fecRecovered, fecSegs uint64
+
if s.fec != nil {
f := s.fec.decode(data)
+ s.mu.Lock()
+ if f.flag == typeData {
+ if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true); ret != 0 {
+ kcpInErrors++
+ }
+ }
+
if f.flag == typeData || f.flag == typeFEC {
if f.flag == typeFEC {
- atomic.AddUint64(&DefaultSnmp.FECSegs, 1)
+ fecSegs++
}
if recovers := s.fec.input(f); recovers != nil {
- s.mu.Lock()
- s.kcp.current = current
- for k := range recovers {
- sz := binary.LittleEndian.Uint16(recovers[k])
- if int(sz) <= len(recovers[k]) && sz >= 2 {
- s.kcp.Input(recovers[k][2:sz], false)
+ for _, r := range recovers {
+ if len(r) >= 2 { // must be larger than 2bytes
+ sz := binary.LittleEndian.Uint16(r)
+ if int(sz) <= len(r) && sz >= 2 {
+ if ret := s.kcp.Input(r[2:sz], false); ret == 0 {
+ fecRecovered++
+ } else {
+ kcpInErrors++
+ }
+ } else {
+ fecErrs++
+ }
} else {
- atomic.AddUint64(&DefaultSnmp.FECErrs, 1)
+ fecErrs++
}
}
- s.mu.Unlock()
- atomic.AddUint64(&DefaultSnmp.FECRecovered, uint64(len(recovers)))
}
}
- if f.flag == typeData {
- s.mu.Lock()
- s.kcp.current = current
- s.kcp.Input(data[fecHeaderSizePlus2:], true)
- s.mu.Unlock()
+
+ // notify reader
+ if n := s.kcp.PeekSize(); n > 0 {
+ s.notifyReadEvent()
}
+ if s.ackNoDelay {
+ s.kcp.flush()
+ }
+ s.mu.Unlock()
} else {
s.mu.Lock()
- s.kcp.current = current
- s.kcp.Input(data, true)
+ if ret := s.kcp.Input(data, true); ret != 0 {
+ kcpInErrors++
+ }
+ // notify reader
+ if n := s.kcp.PeekSize(); n > 0 {
+ s.notifyReadEvent()
+ }
+ if s.ackNoDelay {
+ s.kcp.flush()
+ }
s.mu.Unlock()
}
- // notify reader
- s.mu.Lock()
- if n := s.kcp.PeekSize(); n > 0 {
- s.notifyReadEvent()
- }
- if s.ackNoDelay {
- s.kcp.current = current
- s.kcp.flush()
- }
- s.mu.Unlock()
atomic.AddUint64(&DefaultSnmp.InSegs, 1)
+ atomic.AddUint64(&DefaultSnmp.InBytes, uint64(len(data)))
+ if fecSegs > 0 {
+ atomic.AddUint64(&DefaultSnmp.FECSegs, fecSegs)
+ }
+ if kcpInErrors > 0 {
+ atomic.AddUint64(&DefaultSnmp.KCPInErrors, kcpInErrors)
+ }
+ if fecErrs > 0 {
+ atomic.AddUint64(&DefaultSnmp.FECErrs, fecErrs)
+ }
+ if fecRecovered > 0 {
+ atomic.AddUint64(&DefaultSnmp.FECRecovered, fecRecovered)
+ }
}
func (s *UDPSession) receiver(ch chan []byte) {
for {
- data := s.xmitBuf.Get().([]byte)[:mtuLimit]
+ data := xmitBuf.Get().([]byte)[:mtuLimit]
if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
select {
case ch <- data[:n]:
@@ -604,7 +633,7 @@ func (s *UDPSession) receiver(ch chan []byte) {
// read loop for client session
func (s *UDPSession) readLoop() {
- chPacket := make(chan []byte, txQueueLimit)
+ chPacket := make(chan []byte, rxQueueLimit)
go s.receiver(chPacket)
for {
@@ -629,8 +658,7 @@ func (s *UDPSession) readLoop() {
if dataValid {
s.kcpInput(data)
}
- xorBytes(raw, raw, raw)
- s.xmitBuf.Put(raw)
+ xmitBuf.Put(raw)
case <-s.die:
return
}
@@ -662,10 +690,8 @@ type (
// monitor incoming data for all connections of server
func (l *Listener) monitor() {
- chPacket := make(chan packet, txQueueLimit)
+ chPacket := make(chan packet, rxQueueLimit)
go l.receiver(chPacket)
- ticker := time.NewTicker(10 * time.Millisecond)
- defer ticker.Stop()
for {
select {
case p := <-chPacket:
@@ -715,20 +741,11 @@ func (l *Listener) monitor() {
}
}
- xorBytes(raw, raw, raw)
l.rxbuf.Put(raw)
case deadlink := <-l.chDeadlinks:
delete(l.sessions, deadlink.String())
case <-l.die:
return
- case <-ticker.C:
- now := time.Now()
- for _, s := range l.sessions {
- select {
- case s.chTicker <- now:
- default:
- }
- }
}
}
}
@@ -751,7 +768,7 @@ func (l *Listener) SetReadBuffer(bytes int) error {
if nc, ok := l.conn.(setReadBuffer); ok {
return nc.SetReadBuffer(bytes)
}
- return nil
+ return errors.New(errInvalidOperation)
}
// SetWriteBuffer sets the socket write buffer for the Listener
@@ -759,7 +776,7 @@ func (l *Listener) SetWriteBuffer(bytes int) error {
if nc, ok := l.conn.(setWriteBuffer); ok {
return nc.SetWriteBuffer(bytes)
}
- return nil
+ return errors.New(errInvalidOperation)
}
// SetDSCP sets the 6bit DSCP field of IP header
@@ -767,7 +784,7 @@ func (l *Listener) SetDSCP(dscp int) error {
if nc, ok := l.conn.(net.Conn); ok {
return ipv4.NewConn(nc).SetTOS(dscp << 2)
}
- return nil
+ return errors.New(errInvalidOperation)
}
// Accept implements the Accept method in the Listener interface; it waits for the next call and returns a generic Conn.
@@ -788,7 +805,7 @@ func (l *Listener) AcceptKCP() (*UDPSession, error) {
case c := <-l.chAccepts:
return c, nil
case <-l.die:
- return nil, errors.New("listener stopped")
+ return nil, errors.New(errBrokenPipe)
}
}
@@ -823,7 +840,7 @@ func (l *Listener) Addr() net.Addr {
}
// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
-func Listen(laddr string) (*Listener, error) {
+func Listen(laddr string) (net.Listener, error) {
return ListenWithOptions(laddr, nil, 0, 0)
}
@@ -839,6 +856,11 @@ func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards
return nil, errors.Wrap(err, "net.ListenUDP")
}
+ return ServeConn(block, dataShards, parityShards, conn)
+}
+
+// ServeConn serves KCP protocol for a single packet connection.
+func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*Listener, error) {
l := new(Listener)
l.conn = conn
l.sessions = make(map[string]*UDPSession)
@@ -848,7 +870,7 @@ func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards
l.dataShards = dataShards
l.parityShards = parityShards
l.block = block
- l.fec = newFEC(rxFecLimit, dataShards, parityShards)
+ l.fec = newFEC(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
l.rxbuf.New = func() interface{} {
return make([]byte, mtuLimit)
}
@@ -866,12 +888,12 @@ func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards
}
// Dial connects to the remote address "raddr" on the network "udp"
-func Dial(raddr string) (*UDPSession, error) {
+func Dial(raddr string) (net.Conn, error) {
return DialWithOptions(raddr, nil, 0, 0)
}
// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
-func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int, opts ...Option) (*UDPSession, error) {
+func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
if err != nil {
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
@@ -882,20 +904,34 @@ func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards in
return nil, errors.Wrap(err, "net.DialUDP")
}
- buf := make([]byte, 4)
- io.ReadFull(rand.Reader, buf)
- convid := binary.LittleEndian.Uint32(buf)
- for k := range opts {
- switch opt := opts[k].(type) {
- case OptionWithConvId:
- convid = opt.Id
- default:
- return nil, errors.New("unrecognized option")
- }
+ return NewConn(raddr, block, dataShards, parityShards, &ConnectedUDPConn{udpconn, udpconn})
+}
+
+// NewConn establishes a session and talks KCP protocol over a packet connection.
+func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
+ udpaddr, err := net.ResolveUDPAddr("udp", raddr)
+ if err != nil {
+ return nil, errors.Wrap(err, "net.ResolveUDPAddr")
}
- return newUDPSession(convid, dataShards, parityShards, nil, udpconn, udpaddr, block), nil
+
+ var convid uint32
+ binary.Read(rand.Reader, binary.LittleEndian, &convid)
+ return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
}
func currentMs() uint32 {
return uint32(time.Now().UnixNano() / int64(time.Millisecond))
}
+
+// ConnectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
+// to Write syscalls that are 4 times faster on some OS'es. This should only be
+// used for connections that were produced by a net.Dial* call.
+type ConnectedUDPConn struct {
+ *net.UDPConn
+ Conn net.Conn // underlying connection if any
+}
+
+// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
+func (c *ConnectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) {
+ return c.Write(b)
+}
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go
index 997b163..e8ab194 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go
@@ -1,34 +1,95 @@
package kcp
-import "sync/atomic"
+import (
+ "fmt"
+ "sync/atomic"
+)
// Snmp defines network statistics indicator
type Snmp struct {
- BytesSent uint64 // payload bytes sent
+ BytesSent uint64 // raw bytes sent
BytesReceived uint64
MaxConn uint64
ActiveOpens uint64
PassiveOpens uint64
- CurrEstab uint64
- InErrs uint64
- InCsumErrors uint64 // checksum errors
+ CurrEstab uint64 // count of connections for now
+ InErrs uint64 // udp read errors
+ InCsumErrors uint64 // checksum errors from CRC32
+ KCPInErrors uint64 // packet iput errors from kcp
InSegs uint64
OutSegs uint64
+ InBytes uint64 // udp bytes received
OutBytes uint64 // udp bytes sent
RetransSegs uint64
FastRetransSegs uint64
EarlyRetransSegs uint64
- LostSegs uint64
- RepeatSegs uint64
- FECRecovered uint64
- FECErrs uint64
- FECSegs uint64 // fec segments received
+ LostSegs uint64 // number of segs infered as lost
+ RepeatSegs uint64 // number of segs duplicated
+ FECRecovered uint64 // correct packets recovered from FEC
+ FECErrs uint64 // incorrect packets recovered from FEC
+ FECSegs uint64 // FEC segments received
+ FECShortShards uint64 // number of data shards that's not enough for recovery
}
func newSnmp() *Snmp {
return new(Snmp)
}
+func (s *Snmp) Header() []string {
+ return []string{
+ "BytesSent",
+ "BytesReceived",
+ "MaxConn",
+ "ActiveOpens",
+ "PassiveOpens",
+ "CurrEstab",
+ "InErrs",
+ "InCsumErrors",
+ "KCPInErrors",
+ "InSegs",
+ "OutSegs",
+ "InBytes",
+ "OutBytes",
+ "RetransSegs",
+ "FastRetransSegs",
+ "EarlyRetransSegs",
+ "LostSegs",
+ "RepeatSegs",
+ "FECSegs",
+ "FECErrs",
+ "FECRecovered",
+ "FECShortShards",
+ }
+}
+
+func (s *Snmp) ToSlice() []string {
+ snmp := s.Copy()
+ return []string{
+ fmt.Sprint(snmp.BytesSent),
+ fmt.Sprint(snmp.BytesReceived),
+ fmt.Sprint(snmp.MaxConn),
+ fmt.Sprint(snmp.ActiveOpens),
+ fmt.Sprint(snmp.PassiveOpens),
+ fmt.Sprint(snmp.CurrEstab),
+ fmt.Sprint(snmp.InErrs),
+ fmt.Sprint(snmp.InCsumErrors),
+ fmt.Sprint(snmp.KCPInErrors),
+ fmt.Sprint(snmp.InSegs),
+ fmt.Sprint(snmp.OutSegs),
+ fmt.Sprint(snmp.InBytes),
+ fmt.Sprint(snmp.OutBytes),
+ fmt.Sprint(snmp.RetransSegs),
+ fmt.Sprint(snmp.FastRetransSegs),
+ fmt.Sprint(snmp.EarlyRetransSegs),
+ fmt.Sprint(snmp.LostSegs),
+ fmt.Sprint(snmp.RepeatSegs),
+ fmt.Sprint(snmp.FECSegs),
+ fmt.Sprint(snmp.FECErrs),
+ fmt.Sprint(snmp.FECRecovered),
+ fmt.Sprint(snmp.FECShortShards),
+ }
+}
+
// Copy make a copy of current snmp snapshot
func (s *Snmp) Copy() *Snmp {
d := newSnmp()
@@ -40,8 +101,10 @@ func (s *Snmp) Copy() *Snmp {
d.CurrEstab = atomic.LoadUint64(&s.CurrEstab)
d.InErrs = atomic.LoadUint64(&s.InErrs)
d.InCsumErrors = atomic.LoadUint64(&s.InCsumErrors)
+ d.KCPInErrors = atomic.LoadUint64(&s.KCPInErrors)
d.InSegs = atomic.LoadUint64(&s.InSegs)
d.OutSegs = atomic.LoadUint64(&s.OutSegs)
+ d.InBytes = atomic.LoadUint64(&s.InBytes)
d.OutBytes = atomic.LoadUint64(&s.OutBytes)
d.RetransSegs = atomic.LoadUint64(&s.RetransSegs)
d.FastRetransSegs = atomic.LoadUint64(&s.FastRetransSegs)
@@ -51,9 +114,36 @@ func (s *Snmp) Copy() *Snmp {
d.FECSegs = atomic.LoadUint64(&s.FECSegs)
d.FECErrs = atomic.LoadUint64(&s.FECErrs)
d.FECRecovered = atomic.LoadUint64(&s.FECRecovered)
+ d.FECShortShards = atomic.LoadUint64(&s.FECShortShards)
return d
}
+// Reset values to zero
+func (s *Snmp) Reset() {
+ atomic.StoreUint64(&s.BytesSent, 0)
+ atomic.StoreUint64(&s.BytesReceived, 0)
+ atomic.StoreUint64(&s.MaxConn, 0)
+ atomic.StoreUint64(&s.ActiveOpens, 0)
+ atomic.StoreUint64(&s.PassiveOpens, 0)
+ atomic.StoreUint64(&s.CurrEstab, 0)
+ atomic.StoreUint64(&s.InErrs, 0)
+ atomic.StoreUint64(&s.InCsumErrors, 0)
+ atomic.StoreUint64(&s.KCPInErrors, 0)
+ atomic.StoreUint64(&s.InSegs, 0)
+ atomic.StoreUint64(&s.OutSegs, 0)
+ atomic.StoreUint64(&s.InBytes, 0)
+ atomic.StoreUint64(&s.OutBytes, 0)
+ atomic.StoreUint64(&s.RetransSegs, 0)
+ atomic.StoreUint64(&s.FastRetransSegs, 0)
+ atomic.StoreUint64(&s.EarlyRetransSegs, 0)
+ atomic.StoreUint64(&s.LostSegs, 0)
+ atomic.StoreUint64(&s.RepeatSegs, 0)
+ atomic.StoreUint64(&s.FECSegs, 0)
+ atomic.StoreUint64(&s.FECErrs, 0)
+ atomic.StoreUint64(&s.FECRecovered, 0)
+ atomic.StoreUint64(&s.FECShortShards, 0)
+}
+
// DefaultSnmp is the global KCP connection statistics collector
var DefaultSnmp *Snmp
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go
index 5d21095..20fa2e4 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go
@@ -44,15 +44,18 @@ func safeXORBytes(dst, a, b []byte) int {
}
for i := ex; i < n; i += 8 {
- dst[i] = a[i] ^ b[i]
- dst[i+1] = a[i+1] ^ b[i+1]
- dst[i+2] = a[i+2] ^ b[i+2]
- dst[i+3] = a[i+3] ^ b[i+3]
+ _dst := dst[i : i+8]
+ _a := a[i : i+8]
+ _b := b[i : i+8]
+ _dst[0] = _a[0] ^ _b[0]
+ _dst[1] = _a[1] ^ _b[1]
+ _dst[2] = _a[2] ^ _b[2]
+ _dst[3] = _a[3] ^ _b[3]
- dst[i+4] = a[i+4] ^ b[i+4]
- dst[i+5] = a[i+5] ^ b[i+5]
- dst[i+6] = a[i+6] ^ b[i+6]
- dst[i+7] = a[i+7] ^ b[i+7]
+ _dst[4] = _a[4] ^ _b[4]
+ _dst[5] = _a[5] ^ _b[5]
+ _dst[6] = _a[6] ^ _b[6]
+ _dst[7] = _a[7] ^ _b[7]
}
return n
}
@@ -85,14 +88,17 @@ func fastXORWords(dst, a, b []byte) {
}
for i := ex; i < n; i += 8 {
- dw[i] = aw[i] ^ bw[i]
- dw[i+1] = aw[i+1] ^ bw[i+1]
- dw[i+2] = aw[i+2] ^ bw[i+2]
- dw[i+3] = aw[i+3] ^ bw[i+3]
- dw[i+4] = aw[i+4] ^ bw[i+4]
- dw[i+5] = aw[i+5] ^ bw[i+5]
- dw[i+6] = aw[i+6] ^ bw[i+6]
- dw[i+7] = aw[i+7] ^ bw[i+7]
+ _dw := dw[i : i+8]
+ _aw := aw[i : i+8]
+ _bw := bw[i : i+8]
+ _dw[0] = _aw[0] ^ _bw[0]
+ _dw[1] = _aw[1] ^ _bw[1]
+ _dw[2] = _aw[2] ^ _bw[2]
+ _dw[3] = _aw[3] ^ _bw[3]
+ _dw[4] = _aw[4] ^ _bw[4]
+ _dw[5] = _aw[5] ^ _bw[5]
+ _dw[6] = _aw[6] ^ _bw[6]
+ _dw[7] = _aw[7] ^ _bw[7]
}
}
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md
index 15e9871..9a760d2 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md
+++ b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md
@@ -62,7 +62,7 @@ func client() {
panic(err)
}
- // Stream implements net.Conn
+ // Stream implements io.ReadWriteCloser
stream.Write([]byte("ping"))
}
@@ -94,4 +94,4 @@ func server() {
## Status
-Beta
+Stable
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go
index a06f2ec..5c759c0 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go
@@ -16,10 +16,19 @@ const (
const (
errBrokenPipe = "broken pipe"
- errConnReset = "connection reset by peer"
errInvalidProtocol = "invalid protocol version"
)
+type writeRequest struct {
+ frame Frame
+ result chan writeResult
+}
+
+type writeResult struct {
+ n int
+ err error
+}
+
// Session defines a multiplexed connection for streams
type Session struct {
conn io.ReadWriteCloser
@@ -38,7 +47,12 @@ type Session struct {
dieLock sync.Mutex
chAccepts chan *Stream
+ xmitPool sync.Pool
dataReady int32 // flag data has arrived
+
+ deadline atomic.Value
+
+ writes chan writeRequest
}
func newSession(config *Config, conn io.ReadWriteCloser, client bool) *Session {
@@ -50,12 +64,18 @@ func newSession(config *Config, conn io.ReadWriteCloser, client bool) *Session {
s.chAccepts = make(chan *Stream, defaultAcceptBacklog)
s.bucket = int32(config.MaxReceiveBuffer)
s.bucketCond = sync.NewCond(&sync.Mutex{})
+ s.xmitPool.New = func() interface{} {
+ return make([]byte, (1<<16)+headerSize)
+ }
+ s.writes = make(chan writeRequest)
+
if client {
s.nextStreamID = 1
} else {
s.nextStreamID = 2
}
go s.recvLoop()
+ go s.sendLoop()
go s.keepalive()
return s
}
@@ -82,9 +102,17 @@ func (s *Session) OpenStream() (*Stream, error) {
// AcceptStream is used to block until the next available stream
// is ready to be accepted.
func (s *Session) AcceptStream() (*Stream, error) {
+ var deadline <-chan time.Time
+ if d, ok := s.deadline.Load().(time.Time); ok && !d.IsZero() {
+ timer := time.NewTimer(d.Sub(time.Now()))
+ defer timer.Stop()
+ deadline = timer.C
+ }
select {
case stream := <-s.chAccepts:
return stream, nil
+ case <-deadline:
+ return nil, errTimeout
case <-s.die:
return nil, errors.New(errBrokenPipe)
}
@@ -93,13 +121,14 @@ func (s *Session) AcceptStream() (*Stream, error) {
// Close is used to close the session and all streams.
func (s *Session) Close() (err error) {
s.dieLock.Lock()
- defer s.dieLock.Unlock()
select {
case <-s.die:
+ s.dieLock.Unlock()
return errors.New(errBrokenPipe)
default:
close(s.die)
+ s.dieLock.Unlock()
s.streamLock.Lock()
for k := range s.streams {
s.streams[k].sessionClose()
@@ -130,6 +159,13 @@ func (s *Session) NumStreams() int {
return len(s.streams)
}
+// SetDeadline sets a deadline used by Accept* calls.
+// A zero time value disables the deadline.
+func (s *Session) SetDeadline(t time.Time) error {
+ s.deadline.Store(t)
+ return nil
+}
+
// notify the session that a stream has closed
func (s *Session) streamClosed(sid uint32) {
s.streamLock.Lock()
@@ -144,9 +180,12 @@ func (s *Session) streamClosed(sid uint32) {
// returnTokens is called by stream to return token after read
func (s *Session) returnTokens(n int) {
- if atomic.AddInt32(&s.bucket, int32(n)) > 0 {
+ oldvalue := atomic.LoadInt32(&s.bucket)
+ newvalue := atomic.AddInt32(&s.bucket, int32(n))
+ if oldvalue <= 0 && newvalue > 0 {
s.bucketCond.Signal()
}
+
}
// session read a frame from underlying connection
@@ -250,26 +289,56 @@ func (s *Session) keepalive() {
}
}
+func (s *Session) sendLoop() {
+ for {
+ select {
+ case <-s.die:
+ return
+ case request, ok := <-s.writes:
+ if !ok {
+ continue
+ }
+ buf := s.xmitPool.Get().([]byte)
+ buf[0] = request.frame.ver
+ buf[1] = request.frame.cmd
+ binary.LittleEndian.PutUint16(buf[2:], uint16(len(request.frame.data)))
+ binary.LittleEndian.PutUint32(buf[4:], request.frame.sid)
+ copy(buf[headerSize:], request.frame.data)
+
+ s.writeLock.Lock()
+ n, err := s.conn.Write(buf[:headerSize+len(request.frame.data)])
+ s.writeLock.Unlock()
+ s.xmitPool.Put(buf)
+
+ n -= headerSize
+ if n < 0 {
+ n = 0
+ }
+
+ result := writeResult{
+ n: n,
+ err: err,
+ }
+
+ request.result <- result
+ close(request.result)
+ }
+ }
+}
+
// writeFrame writes the frame to the underlying connection
// and returns the number of bytes written if successful
func (s *Session) writeFrame(f Frame) (n int, err error) {
- buf := make([]byte, headerSize+len(f.data))
- buf[0] = f.ver
- buf[1] = f.cmd
- binary.LittleEndian.PutUint16(buf[2:], uint16(len(f.data)))
- binary.LittleEndian.PutUint32(buf[4:], f.sid)
- copy(buf[headerSize:], f.data)
+ req := writeRequest{
+ frame: f,
+ result: make(chan writeResult, 1),
+ }
+ select {
+ case <-s.die:
+ return 0, errors.New(errBrokenPipe)
+ case s.writes <- req:
+ }
- s.writeLock.Lock()
- n, err = s.conn.Write(buf)
- s.writeLock.Unlock()
- return n, err
-}
-
-// writeBinary writes the byte slice to the underlying connection
-func (s *Session) writeBinary(bts []byte) (n int, err error) {
- s.writeLock.Lock()
- n, err = s.conn.Write(bts)
- s.writeLock.Unlock()
- return n, err
+ result := <-req.result
+ return result.n, result.err
}
diff --git a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go
index 8c44dd8..34e4abb 100644
--- a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go
+++ b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go
@@ -2,24 +2,28 @@ package smux
import (
"bytes"
- "encoding/binary"
+ "io"
+ "net"
"sync"
"sync/atomic"
+ "time"
"github.com/pkg/errors"
)
// Stream implements io.ReadWriteCloser
type Stream struct {
- id uint32
- rstflag int32
- sess *Session
- buffer bytes.Buffer
- bufferLock sync.Mutex
- frameSize int
- chReadEvent chan struct{} // notify a read event
- die chan struct{} // flag the stream has closed
- dieLock sync.Mutex
+ id uint32
+ rstflag int32
+ sess *Session
+ buffer bytes.Buffer
+ bufferLock sync.Mutex
+ frameSize int
+ chReadEvent chan struct{} // notify a read event
+ die chan struct{} // flag the stream has closed
+ dieLock sync.Mutex
+ readDeadline atomic.Value
+ writeDeadline atomic.Value
}
// newStream initiates a Stream struct
@@ -35,10 +39,19 @@ func newStream(id uint32, frameSize int, sess *Session) *Stream {
// Read implements io.ReadWriteCloser
func (s *Stream) Read(b []byte) (n int, err error) {
+ var deadline <-chan time.Time
+ if d, ok := s.readDeadline.Load().(time.Time); ok && !d.IsZero() {
+ timer := time.NewTimer(d.Sub(time.Now()))
+ defer timer.Stop()
+ deadline = timer.C
+ }
+
READ:
select {
case <-s.die:
return 0, errors.New(errBrokenPipe)
+ case <-deadline:
+ return n, errTimeout
default:
}
@@ -51,12 +64,14 @@ READ:
return n, nil
} else if atomic.LoadInt32(&s.rstflag) == 1 {
_ = s.Close()
- return 0, errors.New(errConnReset)
+ return 0, io.EOF
}
select {
case <-s.chReadEvent:
goto READ
+ case <-deadline:
+ return n, errTimeout
case <-s.die:
return 0, errors.New(errBrokenPipe)
}
@@ -64,6 +79,13 @@ READ:
// Write implements io.ReadWriteCloser
func (s *Stream) Write(b []byte) (n int, err error) {
+ var deadline <-chan time.Time
+ if d, ok := s.writeDeadline.Load().(time.Time); ok && !d.IsZero() {
+ timer := time.NewTimer(d.Sub(time.Now()))
+ defer timer.Stop()
+ deadline = timer.C
+ }
+
select {
case <-s.die:
return 0, errors.New(errBrokenPipe)
@@ -71,42 +93,82 @@ func (s *Stream) Write(b []byte) (n int, err error) {
}
frames := s.split(b, cmdPSH, s.id)
- // preallocate buffer
- buffer := make([]byte, len(frames)*headerSize+len(b))
- bts := buffer
-
- // combine frames into a large blob
+ sent := 0
for k := range frames {
- bts[0] = version
- bts[1] = frames[k].cmd
- binary.LittleEndian.PutUint16(bts[2:], uint16(len(frames[k].data)))
- binary.LittleEndian.PutUint32(bts[4:], frames[k].sid)
- copy(bts[headerSize:], frames[k].data)
- bts = bts[len(frames[k].data)+headerSize:]
- }
+ req := writeRequest{
+ frame: frames[k],
+ result: make(chan writeResult, 1),
+ }
- if _, err = s.sess.writeBinary(buffer); err != nil {
- return 0, err
+ select {
+ case s.sess.writes <- req:
+ case <-s.die:
+ return sent, errors.New(errBrokenPipe)
+ case <-deadline:
+ return sent, errTimeout
+ }
+
+ select {
+ case result := <-req.result:
+ sent += result.n
+ if result.err != nil {
+ return sent, result.err
+ }
+ case <-s.die:
+ return sent, errors.New(errBrokenPipe)
+ case <-deadline:
+ return sent, errTimeout
+ }
}
- return len(b), nil
+ return sent, nil
}
// Close implements io.ReadWriteCloser
func (s *Stream) Close() error {
s.dieLock.Lock()
- defer s.dieLock.Unlock()
select {
case <-s.die:
+ s.dieLock.Unlock()
return errors.New(errBrokenPipe)
default:
close(s.die)
+ s.dieLock.Unlock()
s.sess.streamClosed(s.id)
_, err := s.sess.writeFrame(newFrame(cmdRST, s.id))
return err
}
}
+// SetReadDeadline sets the read deadline as defined by
+// net.Conn.SetReadDeadline.
+// A zero time value disables the deadline.
+func (s *Stream) SetReadDeadline(t time.Time) error {
+ s.readDeadline.Store(t)
+ return nil
+}
+
+// SetWriteDeadline sets the write deadline as defined by
+// net.Conn.SetWriteDeadline.
+// A zero time value disables the deadline.
+func (s *Stream) SetWriteDeadline(t time.Time) error {
+ s.writeDeadline.Store(t)
+ return nil
+}
+
+// SetDeadline sets both read and write deadlines as defined by
+// net.Conn.SetDeadline.
+// A zero time value disables the deadlines.
+func (s *Stream) SetDeadline(t time.Time) error {
+ if err := s.SetReadDeadline(t); err != nil {
+ return err
+ }
+ if err := s.SetWriteDeadline(t); err != nil {
+ return err
+ }
+ return nil
+}
+
// session closes the stream
func (s *Stream) sessionClose() {
s.dieLock.Lock()
@@ -119,6 +181,26 @@ func (s *Stream) sessionClose() {
}
}
+// LocalAddr satisfies net.Conn interface
+func (s *Stream) LocalAddr() net.Addr {
+ if ts, ok := s.sess.conn.(interface {
+ LocalAddr() net.Addr
+ }); ok {
+ return ts.LocalAddr()
+ }
+ return nil
+}
+
+// RemoteAddr satisfies net.Conn interface
+func (s *Stream) RemoteAddr() net.Addr {
+ if ts, ok := s.sess.conn.(interface {
+ RemoteAddr() net.Addr
+ }); ok {
+ return ts.RemoteAddr()
+ }
+ return nil
+}
+
// pushBytes a slice into buffer
func (s *Stream) pushBytes(p []byte) {
s.bufferLock.Lock()
@@ -164,3 +246,11 @@ func (s *Stream) notifyReadEvent() {
func (s *Stream) markRST() {
atomic.StoreInt32(&s.rstflag, 1)
}
+
+var errTimeout error = &timeoutError{}
+
+type timeoutError struct{}
+
+func (e *timeoutError) Error() string { return "i/o timeout" }
+func (e *timeoutError) Timeout() bool { return true }
+func (e *timeoutError) Temporary() bool { return true }
diff --git a/cmd/gost/vendor/vendor.json b/cmd/gost/vendor/vendor.json
index 51f6d88..57223ca 100644
--- a/cmd/gost/vendor/vendor.json
+++ b/cmd/gost/vendor/vendor.json
@@ -8,12 +8,6 @@
"revision": "c91e78db502ff629614837aacb7aa4efa61c651a",
"revisionTime": "2016-04-30T09:49:23Z"
},
- {
- "checksumSHA1": "QPs3L3mjPoi+a9GJCjW8HhyJczM=",
- "path": "github.com/codahale/chacha20",
- "revision": "ec07b4f69a3f70b1dd2a8ad77230deb1ba5d6953",
- "revisionTime": "2015-11-07T02:50:05Z"
- },
{
"checksumSHA1": "aIhLeVAIrsjs63CwqmU3+GU8yT4=",
"path": "github.com/ginuerzh/gosocks4",
@@ -68,12 +62,6 @@
"revision": "09cded8978dc9e80714c4d85b0322337b0a1e5e0",
"revisionTime": "2016-03-02T07:53:16Z"
},
- {
- "checksumSHA1": "BM6ZlNJmtKy3GBoWwg2X55gnZ4A=",
- "path": "github.com/klauspost/crc32",
- "revision": "cb6bfca970f6908083f26f39a79009d608efd5cd",
- "revisionTime": "2016-10-16T15:41:25Z"
- },
{
"checksumSHA1": "dwSGkUfh3A2h0VkXndzBX/27hVc=",
"path": "github.com/klauspost/reedsolomon",
@@ -291,16 +279,16 @@
"revisionTime": "2016-12-15T22:53:35Z"
},
{
- "checksumSHA1": "nkIlj9QTxHQ78Vb+VgjhXZ4rZ3E=",
+ "checksumSHA1": "SbBORpjEg3VfPfdSrW82pa3f9Io=",
"path": "gopkg.in/xtaci/kcp-go.v2",
- "revision": "6610d527ea5c4890cf593796ff8ff1f10486bb68",
- "revisionTime": "2016-09-08T14:44:41Z"
+ "revision": "6da5044c742f24f05b00db9214b57b2ac943c9ab",
+ "revisionTime": "2017-01-20T08:43:10Z"
},
{
- "checksumSHA1": "aIqXwA82JxLOXcgmuVSgcRqdJvU=",
+ "checksumSHA1": "EutBuLS2elfcDCMifXNMGj9farQ=",
"path": "gopkg.in/xtaci/smux.v1",
- "revision": "9f2b528a60917e6446273926f4c676cac759d2b0",
- "revisionTime": "2016-09-22T10:26:45Z"
+ "revision": "427dd804ce9fb0a9e7b27a628f68a124fb0d67a6",
+ "revisionTime": "2016-11-29T15:03:00Z"
}
],
"rootPath": "github.com/ginuerzh/gost/cmd/gost"