diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/LICENSE b/cmd/gost/vendor/github.com/codahale/chacha20/LICENSE deleted file mode 100644 index f9835c2..0000000 --- a/cmd/gost/vendor/github.com/codahale/chacha20/LICENSE +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2014 Coda Hale - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/README.md b/cmd/gost/vendor/github.com/codahale/chacha20/README.md deleted file mode 100644 index e0cc2ec..0000000 --- a/cmd/gost/vendor/github.com/codahale/chacha20/README.md +++ /dev/null @@ -1,8 +0,0 @@ -chacha20 -======== - -[![Build Status](https://travis-ci.org/codahale/chacha20.png?branch=master)](https://travis-ci.org/codahale/chacha20) - -A pure Go implementation of the ChaCha20 stream cipher. - -For documentation, check [godoc](http://godoc.org/github.com/codahale/chacha20). diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/chacha20.go b/cmd/gost/vendor/github.com/codahale/chacha20/chacha20.go deleted file mode 100644 index ae671bc..0000000 --- a/cmd/gost/vendor/github.com/codahale/chacha20/chacha20.go +++ /dev/null @@ -1,235 +0,0 @@ -// Package chacha20 provides a pure Go implementation of ChaCha20, a fast, -// secure stream cipher. -// -// From Bernstein, Daniel J. "ChaCha, a variant of Salsa20." Workshop Record of -// SASC. 2008. (http://cr.yp.to/chacha/chacha-20080128.pdf): -// -// ChaCha8 is a 256-bit stream cipher based on the 8-round cipher Salsa20/8. -// The changes from Salsa20/8 to ChaCha8 are designed to improve diffusion per -// round, conjecturally increasing resistance to cryptanalysis, while -// preserving -- and often improving -- time per round. ChaCha12 and ChaCha20 -// are analogous modifications of the 12-round and 20-round ciphers Salsa20/12 -// and Salsa20/20. This paper presents the ChaCha family and explains the -// differences between Salsa20 and ChaCha. -// -// For more information, see http://cr.yp.to/chacha.html -package chacha20 - -import ( - "crypto/cipher" - "encoding/binary" - "errors" - "unsafe" -) - -const ( - // KeySize is the length of ChaCha20 keys, in bytes. - KeySize = 32 - // NonceSize is the length of ChaCha20 nonces, in bytes. - NonceSize = 8 - // XNonceSize is the length of XChaCha20 nonces, in bytes. - XNonceSize = 24 -) - -var ( - // ErrInvalidKey is returned when the provided key is not 256 bits long. - ErrInvalidKey = errors.New("invalid key length (must be 256 bits)") - // ErrInvalidNonce is returned when the provided nonce is not 64 bits long. - ErrInvalidNonce = errors.New("invalid nonce length (must be 64 bits)") - // ErrInvalidXNonce is returned when the provided nonce is not 192 bits - // long. - ErrInvalidXNonce = errors.New("invalid nonce length (must be 192 bits)") - // ErrInvalidRounds is returned when the provided rounds is not - // 8, 12, or 20. - ErrInvalidRounds = errors.New("invalid rounds number (must be 8, 12, or 20)") -) - -// New creates and returns a new cipher.Stream. The key argument must be 256 -// bits long, and the nonce argument must be 64 bits long. The nonce must be -// randomly generated or used only once. This Stream instance must not be used -// to encrypt more than 2^70 bytes (~1 zettabyte). -func New(key []byte, nonce []byte) (cipher.Stream, error) { - return NewWithRounds(key, nonce, 20) -} - -// NewWithRounds creates and returns a new cipher.Stream just like New but -// the rounds number of 8, 12, or 20 can be specified. -func NewWithRounds(key []byte, nonce []byte, rounds uint8) (cipher.Stream, error) { - if len(key) != KeySize { - return nil, ErrInvalidKey - } - - if len(nonce) != NonceSize { - return nil, ErrInvalidNonce - } - - if (rounds != 8) && (rounds != 12) && (rounds != 20) { - return nil, ErrInvalidRounds - } - - s := new(stream) - s.init(key, nonce, rounds) - s.advance() - - return s, nil -} - -// NewXChaCha creates and returns a new cipher.Stream. The key argument must be -// 256 bits long, and the nonce argument must be 192 bits long. The nonce must -// be randomly generated or only used once. This Stream instance must not be -// used to encrypt more than 2^70 bytes (~1 zetta byte). -func NewXChaCha(key []byte, nonce []byte) (cipher.Stream, error) { - return NewXChaChaWithRounds(key, nonce, 20) -} - -// NewXChaChaWithRounds creates and returns a new cipher.Stream just like -// NewXChaCha but the rounds number of 8, 12, or 20 can be specified. -func NewXChaChaWithRounds(key []byte, nonce []byte, rounds uint8) (cipher.Stream, error) { - if len(key) != KeySize { - return nil, ErrInvalidKey - } - - if len(nonce) != XNonceSize { - return nil, ErrInvalidXNonce - } - - if (rounds != 8) && (rounds != 12) && (rounds != 20) { - return nil, ErrInvalidRounds - } - - s := new(stream) - s.init(key, nonce, rounds) - - // Call HChaCha to derive the subkey using the key and the first 16 bytes - // of the nonce, and re-initialize the state using the subkey and the - // remaining nonce. - blockArr := (*[stateSize]uint32)(unsafe.Pointer(&s.block)) - core(&s.state, blockArr, s.rounds, true) - copy(s.state[4:8], blockArr[0:4]) - copy(s.state[8:12], blockArr[12:16]) - s.state[12] = 0 - s.state[13] = 0 - s.state[14] = binary.LittleEndian.Uint32(nonce[16:]) - s.state[15] = binary.LittleEndian.Uint32(nonce[20:]) - - s.advance() - - return s, nil -} - -type stream struct { - state [stateSize]uint32 // the state as an array of 16 32-bit words - block [blockSize]byte // the keystream as an array of 64 bytes - offset int // the offset of used bytes in block - rounds uint8 -} - -func (s *stream) XORKeyStream(dst, src []byte) { - // Stride over the input in 64-byte blocks, minus the amount of keystream - // previously used. This will produce best results when processing blocks - // of a size evenly divisible by 64. - i := 0 - max := len(src) - for i < max { - gap := blockSize - s.offset - - limit := i + gap - if limit > max { - limit = max - } - - o := s.offset - for j := i; j < limit; j++ { - dst[j] = src[j] ^ s.block[o] - o++ - } - - i += gap - s.offset = o - - if o == blockSize { - s.advance() - } - } -} - -func (s *stream) init(key []byte, nonce []byte, rounds uint8) { - // the magic constants for 256-bit keys - s.state[0] = 0x61707865 - s.state[1] = 0x3320646e - s.state[2] = 0x79622d32 - s.state[3] = 0x6b206574 - - s.state[4] = binary.LittleEndian.Uint32(key[0:]) - s.state[5] = binary.LittleEndian.Uint32(key[4:]) - s.state[6] = binary.LittleEndian.Uint32(key[8:]) - s.state[7] = binary.LittleEndian.Uint32(key[12:]) - s.state[8] = binary.LittleEndian.Uint32(key[16:]) - s.state[9] = binary.LittleEndian.Uint32(key[20:]) - s.state[10] = binary.LittleEndian.Uint32(key[24:]) - s.state[11] = binary.LittleEndian.Uint32(key[28:]) - - switch len(nonce) { - case NonceSize: - // ChaCha20 uses 8 byte nonces. - s.state[12] = 0 - s.state[13] = 0 - s.state[14] = binary.LittleEndian.Uint32(nonce[0:]) - s.state[15] = binary.LittleEndian.Uint32(nonce[4:]) - case XNonceSize: - // XChaCha20 derives the subkey via HChaCha initialized - // with the first 16 bytes of the nonce. - s.state[12] = binary.LittleEndian.Uint32(nonce[0:]) - s.state[13] = binary.LittleEndian.Uint32(nonce[4:]) - s.state[14] = binary.LittleEndian.Uint32(nonce[8:]) - s.state[15] = binary.LittleEndian.Uint32(nonce[12:]) - default: - // Never happens, both ctors validate the nonce length. - panic("invalid nonce size") - } - - s.rounds = rounds -} - -// BUG(codahale): Totally untested on big-endian CPUs. Would very much -// appreciate someone with an ARM device giving this a swing. - -// advances the keystream -func (s *stream) advance() { - core(&s.state, (*[stateSize]uint32)(unsafe.Pointer(&s.block)), s.rounds, false) - - if bigEndian { - j := blockSize - 1 - for i := 0; i < blockSize/2; i++ { - s.block[j], s.block[i] = s.block[i], s.block[j] - j-- - } - } - - s.offset = 0 - i := s.state[12] + 1 - s.state[12] = i - if i == 0 { - s.state[13]++ - } -} - -const ( - wordSize = 4 // the size of ChaCha20's words - stateSize = 16 // the size of ChaCha20's state, in words - blockSize = stateSize * wordSize // the size of ChaCha20's block, in bytes -) - -var ( - bigEndian bool // whether or not we're running on a bigEndian CPU -) - -// Do some up-front bookkeeping on what sort of CPU we're using. ChaCha20 treats -// its state as a little-endian byte array when it comes to generating the -// keystream, which allows for a zero-copy approach to the core transform. On -// big-endian architectures, we have to take a hit to reverse the bytes. -func init() { - x := uint32(0x04030201) - y := [4]byte{0x1, 0x2, 0x3, 0x4} - bigEndian = *(*[4]byte)(unsafe.Pointer(&x)) != y -} diff --git a/cmd/gost/vendor/github.com/codahale/chacha20/core_ref.go b/cmd/gost/vendor/github.com/codahale/chacha20/core_ref.go deleted file mode 100644 index 84f5e6c..0000000 --- a/cmd/gost/vendor/github.com/codahale/chacha20/core_ref.go +++ /dev/null @@ -1,166 +0,0 @@ -// The ChaCha20 core transform. -// An unrolled and inlined implementation in pure Go. - -package chacha20 - -func core(input, output *[stateSize]uint32, rounds uint8, hchacha bool) { - var ( - x00 = input[0] - x01 = input[1] - x02 = input[2] - x03 = input[3] - x04 = input[4] - x05 = input[5] - x06 = input[6] - x07 = input[7] - x08 = input[8] - x09 = input[9] - x10 = input[10] - x11 = input[11] - x12 = input[12] - x13 = input[13] - x14 = input[14] - x15 = input[15] - ) - - var x uint32 - - // Unrolling all 20 rounds kills performance on modern Intel processors - // (Tested on a i5 Haswell, likely applies to Sandy Bridge+), due to uop - // cache thrashing. The straight forward 2 rounds per loop implementation - // of this has double the performance of the fully unrolled version. - for i := uint8(0); i < rounds; i += 2 { - x00 += x04 - x = x12 ^ x00 - x12 = (x << 16) | (x >> 16) - x08 += x12 - x = x04 ^ x08 - x04 = (x << 12) | (x >> 20) - x00 += x04 - x = x12 ^ x00 - x12 = (x << 8) | (x >> 24) - x08 += x12 - x = x04 ^ x08 - x04 = (x << 7) | (x >> 25) - x01 += x05 - x = x13 ^ x01 - x13 = (x << 16) | (x >> 16) - x09 += x13 - x = x05 ^ x09 - x05 = (x << 12) | (x >> 20) - x01 += x05 - x = x13 ^ x01 - x13 = (x << 8) | (x >> 24) - x09 += x13 - x = x05 ^ x09 - x05 = (x << 7) | (x >> 25) - x02 += x06 - x = x14 ^ x02 - x14 = (x << 16) | (x >> 16) - x10 += x14 - x = x06 ^ x10 - x06 = (x << 12) | (x >> 20) - x02 += x06 - x = x14 ^ x02 - x14 = (x << 8) | (x >> 24) - x10 += x14 - x = x06 ^ x10 - x06 = (x << 7) | (x >> 25) - x03 += x07 - x = x15 ^ x03 - x15 = (x << 16) | (x >> 16) - x11 += x15 - x = x07 ^ x11 - x07 = (x << 12) | (x >> 20) - x03 += x07 - x = x15 ^ x03 - x15 = (x << 8) | (x >> 24) - x11 += x15 - x = x07 ^ x11 - x07 = (x << 7) | (x >> 25) - x00 += x05 - x = x15 ^ x00 - x15 = (x << 16) | (x >> 16) - x10 += x15 - x = x05 ^ x10 - x05 = (x << 12) | (x >> 20) - x00 += x05 - x = x15 ^ x00 - x15 = (x << 8) | (x >> 24) - x10 += x15 - x = x05 ^ x10 - x05 = (x << 7) | (x >> 25) - x01 += x06 - x = x12 ^ x01 - x12 = (x << 16) | (x >> 16) - x11 += x12 - x = x06 ^ x11 - x06 = (x << 12) | (x >> 20) - x01 += x06 - x = x12 ^ x01 - x12 = (x << 8) | (x >> 24) - x11 += x12 - x = x06 ^ x11 - x06 = (x << 7) | (x >> 25) - x02 += x07 - x = x13 ^ x02 - x13 = (x << 16) | (x >> 16) - x08 += x13 - x = x07 ^ x08 - x07 = (x << 12) | (x >> 20) - x02 += x07 - x = x13 ^ x02 - x13 = (x << 8) | (x >> 24) - x08 += x13 - x = x07 ^ x08 - x07 = (x << 7) | (x >> 25) - x03 += x04 - x = x14 ^ x03 - x14 = (x << 16) | (x >> 16) - x09 += x14 - x = x04 ^ x09 - x04 = (x << 12) | (x >> 20) - x03 += x04 - x = x14 ^ x03 - x14 = (x << 8) | (x >> 24) - x09 += x14 - x = x04 ^ x09 - x04 = (x << 7) | (x >> 25) - } - - if !hchacha { - output[0] = x00 + input[0] - output[1] = x01 + input[1] - output[2] = x02 + input[2] - output[3] = x03 + input[3] - output[4] = x04 + input[4] - output[5] = x05 + input[5] - output[6] = x06 + input[6] - output[7] = x07 + input[7] - output[8] = x08 + input[8] - output[9] = x09 + input[9] - output[10] = x10 + input[10] - output[11] = x11 + input[11] - output[12] = x12 + input[12] - output[13] = x13 + input[13] - output[14] = x14 + input[14] - output[15] = x15 + input[15] - } else { - output[0] = x00 - output[1] = x01 - output[2] = x02 - output[3] = x03 - output[4] = x04 - output[5] = x05 - output[6] = x06 - output[7] = x07 - output[8] = x08 - output[9] = x09 - output[10] = x10 - output[11] = x11 - output[12] = x12 - output[13] = x13 - output[14] = x14 - output[15] = x15 - } -} diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/LICENSE b/cmd/gost/vendor/github.com/klauspost/crc32/LICENSE deleted file mode 100644 index 4fd5963..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (c) 2012 The Go Authors. All rights reserved. -Copyright (c) 2015 Klaus Post - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/README.md b/cmd/gost/vendor/github.com/klauspost/crc32/README.md deleted file mode 100644 index 029625d..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/README.md +++ /dev/null @@ -1,87 +0,0 @@ -# crc32 -CRC32 hash with x64 optimizations - -This package is a drop-in replacement for the standard library `hash/crc32` package, that features SSE 4.2 optimizations on x64 platforms, for a 10x speedup. - -[![Build Status](https://travis-ci.org/klauspost/crc32.svg?branch=master)](https://travis-ci.org/klauspost/crc32) - -# usage - -Install using `go get github.com/klauspost/crc32`. This library is based on Go 1.5 code and requires Go 1.3 or newer. - -Replace `import "hash/crc32"` with `import "github.com/klauspost/crc32"` and you are good to go. - -# changes -* Oct 20, 2016: Changes have been merged to upstream Go. Package updated to match. -* Dec 4, 2015: Uses the "slice-by-8" trick more extensively, which gives a 1.5 to 2.5x speedup if assembler is unavailable. - - -# performance - -For *Go 1.7* performance is equivalent to the standard library. So if you use this package for Go 1.7 you can switch back. - - -For IEEE tables (the most common), there is approximately a factor 10 speedup with "CLMUL" (Carryless multiplication) instruction: -``` -benchmark old ns/op new ns/op delta -BenchmarkCrc32KB 99955 10258 -89.74% - -benchmark old MB/s new MB/s speedup -BenchmarkCrc32KB 327.83 3194.20 9.74x -``` - -For other tables and "CLMUL" capable machines the performance is the same as the standard library. - -Here are some detailed benchmarks, comparing to go 1.5 standard library with and without assembler enabled. - -``` -Std: Standard Go 1.5 library -Crc: Indicates IEEE type CRC. -40B: Size of each slice encoded. -NoAsm: Assembler was disabled (ie. not an AMD64 or SSE 4.2+ capable machine). -Castagnoli: Castagnoli CRC type. - -BenchmarkStdCrc40B-4 10000000 158 ns/op 252.88 MB/s -BenchmarkCrc40BNoAsm-4 20000000 105 ns/op 377.38 MB/s (slice8) -BenchmarkCrc40B-4 20000000 105 ns/op 378.77 MB/s (slice8) - -BenchmarkStdCrc1KB-4 500000 3604 ns/op 284.10 MB/s -BenchmarkCrc1KBNoAsm-4 1000000 1463 ns/op 699.79 MB/s (slice8) -BenchmarkCrc1KB-4 3000000 396 ns/op 2583.69 MB/s (asm) - -BenchmarkStdCrc8KB-4 200000 11417 ns/op 717.48 MB/s (slice8) -BenchmarkCrc8KBNoAsm-4 200000 11317 ns/op 723.85 MB/s (slice8) -BenchmarkCrc8KB-4 500000 2919 ns/op 2805.73 MB/s (asm) - -BenchmarkStdCrc32KB-4 30000 45749 ns/op 716.24 MB/s (slice8) -BenchmarkCrc32KBNoAsm-4 30000 45109 ns/op 726.42 MB/s (slice8) -BenchmarkCrc32KB-4 100000 11497 ns/op 2850.09 MB/s (asm) - -BenchmarkStdNoAsmCastagnol40B-4 10000000 161 ns/op 246.94 MB/s -BenchmarkStdCastagnoli40B-4 50000000 28.4 ns/op 1410.69 MB/s (asm) -BenchmarkCastagnoli40BNoAsm-4 20000000 100 ns/op 398.01 MB/s (slice8) -BenchmarkCastagnoli40B-4 50000000 28.2 ns/op 1419.54 MB/s (asm) - -BenchmarkStdNoAsmCastagnoli1KB-4 500000 3622 ns/op 282.67 MB/s -BenchmarkStdCastagnoli1KB-4 10000000 144 ns/op 7099.78 MB/s (asm) -BenchmarkCastagnoli1KBNoAsm-4 1000000 1475 ns/op 694.14 MB/s (slice8) -BenchmarkCastagnoli1KB-4 10000000 146 ns/op 6993.35 MB/s (asm) - -BenchmarkStdNoAsmCastagnoli8KB-4 50000 28781 ns/op 284.63 MB/s -BenchmarkStdCastagnoli8KB-4 1000000 1029 ns/op 7957.89 MB/s (asm) -BenchmarkCastagnoli8KBNoAsm-4 200000 11410 ns/op 717.94 MB/s (slice8) -BenchmarkCastagnoli8KB-4 1000000 1000 ns/op 8188.71 MB/s (asm) - -BenchmarkStdNoAsmCastagnoli32KB-4 10000 115426 ns/op 283.89 MB/s -BenchmarkStdCastagnoli32KB-4 300000 4065 ns/op 8059.13 MB/s (asm) -BenchmarkCastagnoli32KBNoAsm-4 30000 45171 ns/op 725.41 MB/s (slice8) -BenchmarkCastagnoli32KB-4 500000 4077 ns/op 8035.89 MB/s (asm) -``` - -The IEEE assembler optimizations has been submitted and will be part of the Go 1.6 standard library. - -However, the improved use of slice-by-8 has not, but will probably be submitted for Go 1.7. - -# license - -Standard Go license. Changes are Copyright (c) 2015 Klaus Post under same conditions. diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32.go deleted file mode 100644 index 8aa91b1..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32.go +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright 2009 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package crc32 implements the 32-bit cyclic redundancy check, or CRC-32, -// checksum. See http://en.wikipedia.org/wiki/Cyclic_redundancy_check for -// information. -// -// Polynomials are represented in LSB-first form also known as reversed representation. -// -// See http://en.wikipedia.org/wiki/Mathematics_of_cyclic_redundancy_checks#Reversed_representations_and_reciprocal_polynomials -// for information. -package crc32 - -import ( - "hash" - "sync" -) - -// The size of a CRC-32 checksum in bytes. -const Size = 4 - -// Predefined polynomials. -const ( - // IEEE is by far and away the most common CRC-32 polynomial. - // Used by ethernet (IEEE 802.3), v.42, fddi, gzip, zip, png, ... - IEEE = 0xedb88320 - - // Castagnoli's polynomial, used in iSCSI. - // Has better error detection characteristics than IEEE. - // http://dx.doi.org/10.1109/26.231911 - Castagnoli = 0x82f63b78 - - // Koopman's polynomial. - // Also has better error detection characteristics than IEEE. - // http://dx.doi.org/10.1109/DSN.2002.1028931 - Koopman = 0xeb31d82e -) - -// Table is a 256-word table representing the polynomial for efficient processing. -type Table [256]uint32 - -// This file makes use of functions implemented in architecture-specific files. -// The interface that they implement is as follows: -// -// // archAvailableIEEE reports whether an architecture-specific CRC32-IEEE -// // algorithm is available. -// archAvailableIEEE() bool -// -// // archInitIEEE initializes the architecture-specific CRC3-IEEE algorithm. -// // It can only be called if archAvailableIEEE() returns true. -// archInitIEEE() -// -// // archUpdateIEEE updates the given CRC32-IEEE. It can only be called if -// // archInitIEEE() was previously called. -// archUpdateIEEE(crc uint32, p []byte) uint32 -// -// // archAvailableCastagnoli reports whether an architecture-specific -// // CRC32-C algorithm is available. -// archAvailableCastagnoli() bool -// -// // archInitCastagnoli initializes the architecture-specific CRC32-C -// // algorithm. It can only be called if archAvailableCastagnoli() returns -// // true. -// archInitCastagnoli() -// -// // archUpdateCastagnoli updates the given CRC32-C. It can only be called -// // if archInitCastagnoli() was previously called. -// archUpdateCastagnoli(crc uint32, p []byte) uint32 - -// castagnoliTable points to a lazily initialized Table for the Castagnoli -// polynomial. MakeTable will always return this value when asked to make a -// Castagnoli table so we can compare against it to find when the caller is -// using this polynomial. -var castagnoliTable *Table -var castagnoliTable8 *slicing8Table -var castagnoliArchImpl bool -var updateCastagnoli func(crc uint32, p []byte) uint32 -var castagnoliOnce sync.Once - -func castagnoliInit() { - castagnoliTable = simpleMakeTable(Castagnoli) - castagnoliArchImpl = archAvailableCastagnoli() - - if castagnoliArchImpl { - archInitCastagnoli() - updateCastagnoli = archUpdateCastagnoli - } else { - // Initialize the slicing-by-8 table. - castagnoliTable8 = slicingMakeTable(Castagnoli) - updateCastagnoli = func(crc uint32, p []byte) uint32 { - return slicingUpdate(crc, castagnoliTable8, p) - } - } -} - -// IEEETable is the table for the IEEE polynomial. -var IEEETable = simpleMakeTable(IEEE) - -// ieeeTable8 is the slicing8Table for IEEE -var ieeeTable8 *slicing8Table -var ieeeArchImpl bool -var updateIEEE func(crc uint32, p []byte) uint32 -var ieeeOnce sync.Once - -func ieeeInit() { - ieeeArchImpl = archAvailableIEEE() - - if ieeeArchImpl { - archInitIEEE() - updateIEEE = archUpdateIEEE - } else { - // Initialize the slicing-by-8 table. - ieeeTable8 = slicingMakeTable(IEEE) - updateIEEE = func(crc uint32, p []byte) uint32 { - return slicingUpdate(crc, ieeeTable8, p) - } - } -} - -// MakeTable returns a Table constructed from the specified polynomial. -// The contents of this Table must not be modified. -func MakeTable(poly uint32) *Table { - switch poly { - case IEEE: - ieeeOnce.Do(ieeeInit) - return IEEETable - case Castagnoli: - castagnoliOnce.Do(castagnoliInit) - return castagnoliTable - } - return simpleMakeTable(poly) -} - -// digest represents the partial evaluation of a checksum. -type digest struct { - crc uint32 - tab *Table -} - -// New creates a new hash.Hash32 computing the CRC-32 checksum -// using the polynomial represented by the Table. -// Its Sum method will lay the value out in big-endian byte order. -func New(tab *Table) hash.Hash32 { - if tab == IEEETable { - ieeeOnce.Do(ieeeInit) - } - return &digest{0, tab} -} - -// NewIEEE creates a new hash.Hash32 computing the CRC-32 checksum -// using the IEEE polynomial. -// Its Sum method will lay the value out in big-endian byte order. -func NewIEEE() hash.Hash32 { return New(IEEETable) } - -func (d *digest) Size() int { return Size } - -func (d *digest) BlockSize() int { return 1 } - -func (d *digest) Reset() { d.crc = 0 } - -// Update returns the result of adding the bytes in p to the crc. -func Update(crc uint32, tab *Table, p []byte) uint32 { - switch tab { - case castagnoliTable: - return updateCastagnoli(crc, p) - case IEEETable: - // Unfortunately, because IEEETable is exported, IEEE may be used without a - // call to MakeTable. We have to make sure it gets initialized in that case. - ieeeOnce.Do(ieeeInit) - return updateIEEE(crc, p) - default: - return simpleUpdate(crc, tab, p) - } -} - -func (d *digest) Write(p []byte) (n int, err error) { - switch d.tab { - case castagnoliTable: - d.crc = updateCastagnoli(d.crc, p) - case IEEETable: - // We only create digest objects through New() which takes care of - // initialization in this case. - d.crc = updateIEEE(d.crc, p) - default: - d.crc = simpleUpdate(d.crc, d.tab, p) - } - return len(p), nil -} - -func (d *digest) Sum32() uint32 { return d.crc } - -func (d *digest) Sum(in []byte) []byte { - s := d.Sum32() - return append(in, byte(s>>24), byte(s>>16), byte(s>>8), byte(s)) -} - -// Checksum returns the CRC-32 checksum of data -// using the polynomial represented by the Table. -func Checksum(data []byte, tab *Table) uint32 { return Update(0, tab, data) } - -// ChecksumIEEE returns the CRC-32 checksum of data -// using the IEEE polynomial. -func ChecksumIEEE(data []byte) uint32 { - ieeeOnce.Do(ieeeInit) - return updateIEEE(0, data) -} diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.go deleted file mode 100644 index af2a0b8..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.go +++ /dev/null @@ -1,230 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine,!gccgo - -// AMD64-specific hardware-assisted CRC32 algorithms. See crc32.go for a -// description of the interface that each architecture-specific file -// implements. - -package crc32 - -import "unsafe" - -// This file contains the code to call the SSE 4.2 version of the Castagnoli -// and IEEE CRC. - -// haveSSE41/haveSSE42/haveCLMUL are defined in crc_amd64.s and use -// CPUID to test for SSE 4.1, 4.2 and CLMUL support. -func haveSSE41() bool -func haveSSE42() bool -func haveCLMUL() bool - -// castagnoliSSE42 is defined in crc32_amd64.s and uses the SSE4.2 CRC32 -// instruction. -//go:noescape -func castagnoliSSE42(crc uint32, p []byte) uint32 - -// castagnoliSSE42Triple is defined in crc32_amd64.s and uses the SSE4.2 CRC32 -// instruction. -//go:noescape -func castagnoliSSE42Triple( - crcA, crcB, crcC uint32, - a, b, c []byte, - rounds uint32, -) (retA uint32, retB uint32, retC uint32) - -// ieeeCLMUL is defined in crc_amd64.s and uses the PCLMULQDQ -// instruction as well as SSE 4.1. -//go:noescape -func ieeeCLMUL(crc uint32, p []byte) uint32 - -var sse42 = haveSSE42() -var useFastIEEE = haveCLMUL() && haveSSE41() - -const castagnoliK1 = 168 -const castagnoliK2 = 1344 - -type sse42Table [4]Table - -var castagnoliSSE42TableK1 *sse42Table -var castagnoliSSE42TableK2 *sse42Table - -func archAvailableCastagnoli() bool { - return sse42 -} - -func archInitCastagnoli() { - if !sse42 { - panic("arch-specific Castagnoli not available") - } - castagnoliSSE42TableK1 = new(sse42Table) - castagnoliSSE42TableK2 = new(sse42Table) - // See description in updateCastagnoli. - // t[0][i] = CRC(i000, O) - // t[1][i] = CRC(0i00, O) - // t[2][i] = CRC(00i0, O) - // t[3][i] = CRC(000i, O) - // where O is a sequence of K zeros. - var tmp [castagnoliK2]byte - for b := 0; b < 4; b++ { - for i := 0; i < 256; i++ { - val := uint32(i) << uint32(b*8) - castagnoliSSE42TableK1[b][i] = castagnoliSSE42(val, tmp[:castagnoliK1]) - castagnoliSSE42TableK2[b][i] = castagnoliSSE42(val, tmp[:]) - } - } -} - -// castagnoliShift computes the CRC32-C of K1 or K2 zeroes (depending on the -// table given) with the given initial crc value. This corresponds to -// CRC(crc, O) in the description in updateCastagnoli. -func castagnoliShift(table *sse42Table, crc uint32) uint32 { - return table[3][crc>>24] ^ - table[2][(crc>>16)&0xFF] ^ - table[1][(crc>>8)&0xFF] ^ - table[0][crc&0xFF] -} - -func archUpdateCastagnoli(crc uint32, p []byte) uint32 { - if !sse42 { - panic("not available") - } - - // This method is inspired from the algorithm in Intel's white paper: - // "Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction" - // The same strategy of splitting the buffer in three is used but the - // combining calculation is different; the complete derivation is explained - // below. - // - // -- The basic idea -- - // - // The CRC32 instruction (available in SSE4.2) can process 8 bytes at a - // time. In recent Intel architectures the instruction takes 3 cycles; - // however the processor can pipeline up to three instructions if they - // don't depend on each other. - // - // Roughly this means that we can process three buffers in about the same - // time we can process one buffer. - // - // The idea is then to split the buffer in three, CRC the three pieces - // separately and then combine the results. - // - // Combining the results requires precomputed tables, so we must choose a - // fixed buffer length to optimize. The longer the length, the faster; but - // only buffers longer than this length will use the optimization. We choose - // two cutoffs and compute tables for both: - // - one around 512: 168*3=504 - // - one around 4KB: 1344*3=4032 - // - // -- The nitty gritty -- - // - // Let CRC(I, X) be the non-inverted CRC32-C of the sequence X (with - // initial non-inverted CRC I). This function has the following properties: - // (a) CRC(I, AB) = CRC(CRC(I, A), B) - // (b) CRC(I, A xor B) = CRC(I, A) xor CRC(0, B) - // - // Say we want to compute CRC(I, ABC) where A, B, C are three sequences of - // K bytes each, where K is a fixed constant. Let O be the sequence of K zero - // bytes. - // - // CRC(I, ABC) = CRC(I, ABO xor C) - // = CRC(I, ABO) xor CRC(0, C) - // = CRC(CRC(I, AB), O) xor CRC(0, C) - // = CRC(CRC(I, AO xor B), O) xor CRC(0, C) - // = CRC(CRC(I, AO) xor CRC(0, B), O) xor CRC(0, C) - // = CRC(CRC(CRC(I, A), O) xor CRC(0, B), O) xor CRC(0, C) - // - // The castagnoliSSE42Triple function can compute CRC(I, A), CRC(0, B), - // and CRC(0, C) efficiently. We just need to find a way to quickly compute - // CRC(uvwx, O) given a 4-byte initial value uvwx. We can precompute these - // values; since we can't have a 32-bit table, we break it up into four - // 8-bit tables: - // - // CRC(uvwx, O) = CRC(u000, O) xor - // CRC(0v00, O) xor - // CRC(00w0, O) xor - // CRC(000x, O) - // - // We can compute tables corresponding to the four terms for all 8-bit - // values. - - crc = ^crc - - // If a buffer is long enough to use the optimization, process the first few - // bytes to align the buffer to an 8 byte boundary (if necessary). - if len(p) >= castagnoliK1*3 { - delta := int(uintptr(unsafe.Pointer(&p[0])) & 7) - if delta != 0 { - delta = 8 - delta - crc = castagnoliSSE42(crc, p[:delta]) - p = p[delta:] - } - } - - // Process 3*K2 at a time. - for len(p) >= castagnoliK2*3 { - // Compute CRC(I, A), CRC(0, B), and CRC(0, C). - crcA, crcB, crcC := castagnoliSSE42Triple( - crc, 0, 0, - p, p[castagnoliK2:], p[castagnoliK2*2:], - castagnoliK2/24) - - // CRC(I, AB) = CRC(CRC(I, A), O) xor CRC(0, B) - crcAB := castagnoliShift(castagnoliSSE42TableK2, crcA) ^ crcB - // CRC(I, ABC) = CRC(CRC(I, AB), O) xor CRC(0, C) - crc = castagnoliShift(castagnoliSSE42TableK2, crcAB) ^ crcC - p = p[castagnoliK2*3:] - } - - // Process 3*K1 at a time. - for len(p) >= castagnoliK1*3 { - // Compute CRC(I, A), CRC(0, B), and CRC(0, C). - crcA, crcB, crcC := castagnoliSSE42Triple( - crc, 0, 0, - p, p[castagnoliK1:], p[castagnoliK1*2:], - castagnoliK1/24) - - // CRC(I, AB) = CRC(CRC(I, A), O) xor CRC(0, B) - crcAB := castagnoliShift(castagnoliSSE42TableK1, crcA) ^ crcB - // CRC(I, ABC) = CRC(CRC(I, AB), O) xor CRC(0, C) - crc = castagnoliShift(castagnoliSSE42TableK1, crcAB) ^ crcC - p = p[castagnoliK1*3:] - } - - // Use the simple implementation for what's left. - crc = castagnoliSSE42(crc, p) - return ^crc -} - -func archAvailableIEEE() bool { - return useFastIEEE -} - -var archIeeeTable8 *slicing8Table - -func archInitIEEE() { - if !useFastIEEE { - panic("not available") - } - // We still use slicing-by-8 for small buffers. - archIeeeTable8 = slicingMakeTable(IEEE) -} - -func archUpdateIEEE(crc uint32, p []byte) uint32 { - if !useFastIEEE { - panic("not available") - } - - if len(p) >= 64 { - left := len(p) & 15 - do := len(p) - left - crc = ^ieeeCLMUL(^crc, p[:do]) - p = p[do:] - } - if len(p) == 0 { - return crc - } - return slicingUpdate(crc, archIeeeTable8, p) -} diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.s b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.s deleted file mode 100644 index e8a7941..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64.s +++ /dev/null @@ -1,319 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build gc - -#define NOSPLIT 4 -#define RODATA 8 - -// castagnoliSSE42 updates the (non-inverted) crc with the given buffer. -// -// func castagnoliSSE42(crc uint32, p []byte) uint32 -TEXT ·castagnoliSSE42(SB), NOSPLIT, $0 - MOVL crc+0(FP), AX // CRC value - MOVQ p+8(FP), SI // data pointer - MOVQ p_len+16(FP), CX // len(p) - - // If there are fewer than 8 bytes to process, skip alignment. - CMPQ CX, $8 - JL less_than_8 - - MOVQ SI, BX - ANDQ $7, BX - JZ aligned - - // Process the first few bytes to 8-byte align the input. - - // BX = 8 - BX. We need to process this many bytes to align. - SUBQ $1, BX - XORQ $7, BX - - BTQ $0, BX - JNC align_2 - - CRC32B (SI), AX - DECQ CX - INCQ SI - -align_2: - BTQ $1, BX - JNC align_4 - - // CRC32W (SI), AX - BYTE $0x66; BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06 - - SUBQ $2, CX - ADDQ $2, SI - -align_4: - BTQ $2, BX - JNC aligned - - // CRC32L (SI), AX - BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06 - - SUBQ $4, CX - ADDQ $4, SI - -aligned: - // The input is now 8-byte aligned and we can process 8-byte chunks. - CMPQ CX, $8 - JL less_than_8 - - CRC32Q (SI), AX - ADDQ $8, SI - SUBQ $8, CX - JMP aligned - -less_than_8: - // We may have some bytes left over; process 4 bytes, then 2, then 1. - BTQ $2, CX - JNC less_than_4 - - // CRC32L (SI), AX - BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06 - ADDQ $4, SI - -less_than_4: - BTQ $1, CX - JNC less_than_2 - - // CRC32W (SI), AX - BYTE $0x66; BYTE $0xf2; BYTE $0x0f; BYTE $0x38; BYTE $0xf1; BYTE $0x06 - ADDQ $2, SI - -less_than_2: - BTQ $0, CX - JNC done - - CRC32B (SI), AX - -done: - MOVL AX, ret+32(FP) - RET - -// castagnoliSSE42Triple updates three (non-inverted) crcs with (24*rounds) -// bytes from each buffer. -// -// func castagnoliSSE42Triple( -// crc1, crc2, crc3 uint32, -// a, b, c []byte, -// rounds uint32, -// ) (retA uint32, retB uint32, retC uint32) -TEXT ·castagnoliSSE42Triple(SB), NOSPLIT, $0 - MOVL crcA+0(FP), AX - MOVL crcB+4(FP), CX - MOVL crcC+8(FP), DX - - MOVQ a+16(FP), R8 // data pointer - MOVQ b+40(FP), R9 // data pointer - MOVQ c+64(FP), R10 // data pointer - - MOVL rounds+88(FP), R11 - -loop: - CRC32Q (R8), AX - CRC32Q (R9), CX - CRC32Q (R10), DX - - CRC32Q 8(R8), AX - CRC32Q 8(R9), CX - CRC32Q 8(R10), DX - - CRC32Q 16(R8), AX - CRC32Q 16(R9), CX - CRC32Q 16(R10), DX - - ADDQ $24, R8 - ADDQ $24, R9 - ADDQ $24, R10 - - DECQ R11 - JNZ loop - - MOVL AX, retA+96(FP) - MOVL CX, retB+100(FP) - MOVL DX, retC+104(FP) - RET - -// func haveSSE42() bool -TEXT ·haveSSE42(SB), NOSPLIT, $0 - XORQ AX, AX - INCL AX - CPUID - SHRQ $20, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - -// func haveCLMUL() bool -TEXT ·haveCLMUL(SB), NOSPLIT, $0 - XORQ AX, AX - INCL AX - CPUID - SHRQ $1, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - -// func haveSSE41() bool -TEXT ·haveSSE41(SB), NOSPLIT, $0 - XORQ AX, AX - INCL AX - CPUID - SHRQ $19, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - -// CRC32 polynomial data -// -// These constants are lifted from the -// Linux kernel, since they avoid the costly -// PSHUFB 16 byte reversal proposed in the -// original Intel paper. -DATA r2r1kp<>+0(SB)/8, $0x154442bd4 -DATA r2r1kp<>+8(SB)/8, $0x1c6e41596 -DATA r4r3kp<>+0(SB)/8, $0x1751997d0 -DATA r4r3kp<>+8(SB)/8, $0x0ccaa009e -DATA rupolykp<>+0(SB)/8, $0x1db710641 -DATA rupolykp<>+8(SB)/8, $0x1f7011641 -DATA r5kp<>+0(SB)/8, $0x163cd6124 - -GLOBL r2r1kp<>(SB), RODATA, $16 -GLOBL r4r3kp<>(SB), RODATA, $16 -GLOBL rupolykp<>(SB), RODATA, $16 -GLOBL r5kp<>(SB), RODATA, $8 - -// Based on http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf -// len(p) must be at least 64, and must be a multiple of 16. - -// func ieeeCLMUL(crc uint32, p []byte) uint32 -TEXT ·ieeeCLMUL(SB), NOSPLIT, $0 - MOVL crc+0(FP), X0 // Initial CRC value - MOVQ p+8(FP), SI // data pointer - MOVQ p_len+16(FP), CX // len(p) - - MOVOU (SI), X1 - MOVOU 16(SI), X2 - MOVOU 32(SI), X3 - MOVOU 48(SI), X4 - PXOR X0, X1 - ADDQ $64, SI // buf+=64 - SUBQ $64, CX // len-=64 - CMPQ CX, $64 // Less than 64 bytes left - JB remain64 - - MOVOA r2r1kp<>+0(SB), X0 - -loopback64: - MOVOA X1, X5 - MOVOA X2, X6 - MOVOA X3, X7 - MOVOA X4, X8 - - PCLMULQDQ $0, X0, X1 - PCLMULQDQ $0, X0, X2 - PCLMULQDQ $0, X0, X3 - PCLMULQDQ $0, X0, X4 - - // Load next early - MOVOU (SI), X11 - MOVOU 16(SI), X12 - MOVOU 32(SI), X13 - MOVOU 48(SI), X14 - - PCLMULQDQ $0x11, X0, X5 - PCLMULQDQ $0x11, X0, X6 - PCLMULQDQ $0x11, X0, X7 - PCLMULQDQ $0x11, X0, X8 - - PXOR X5, X1 - PXOR X6, X2 - PXOR X7, X3 - PXOR X8, X4 - - PXOR X11, X1 - PXOR X12, X2 - PXOR X13, X3 - PXOR X14, X4 - - ADDQ $0x40, DI - ADDQ $64, SI // buf+=64 - SUBQ $64, CX // len-=64 - CMPQ CX, $64 // Less than 64 bytes left? - JGE loopback64 - - // Fold result into a single register (X1) -remain64: - MOVOA r4r3kp<>+0(SB), X0 - - MOVOA X1, X5 - PCLMULQDQ $0, X0, X1 - PCLMULQDQ $0x11, X0, X5 - PXOR X5, X1 - PXOR X2, X1 - - MOVOA X1, X5 - PCLMULQDQ $0, X0, X1 - PCLMULQDQ $0x11, X0, X5 - PXOR X5, X1 - PXOR X3, X1 - - MOVOA X1, X5 - PCLMULQDQ $0, X0, X1 - PCLMULQDQ $0x11, X0, X5 - PXOR X5, X1 - PXOR X4, X1 - - // If there is less than 16 bytes left we are done - CMPQ CX, $16 - JB finish - - // Encode 16 bytes -remain16: - MOVOU (SI), X10 - MOVOA X1, X5 - PCLMULQDQ $0, X0, X1 - PCLMULQDQ $0x11, X0, X5 - PXOR X5, X1 - PXOR X10, X1 - SUBQ $16, CX - ADDQ $16, SI - CMPQ CX, $16 - JGE remain16 - -finish: - // Fold final result into 32 bits and return it - PCMPEQB X3, X3 - PCLMULQDQ $1, X1, X0 - PSRLDQ $8, X1 - PXOR X0, X1 - - MOVOA X1, X2 - MOVQ r5kp<>+0(SB), X0 - - // Creates 32 bit mask. Note that we don't care about upper half. - PSRLQ $32, X3 - - PSRLDQ $4, X2 - PAND X3, X1 - PCLMULQDQ $0, X0, X1 - PXOR X2, X1 - - MOVOA rupolykp<>+0(SB), X0 - - MOVOA X1, X2 - PAND X3, X1 - PCLMULQDQ $0x10, X0, X1 - PAND X3, X1 - PCLMULQDQ $0, X0, X1 - PXOR X2, X1 - - // PEXTRD $1, X1, AX (SSE 4.1) - BYTE $0x66; BYTE $0x0f; BYTE $0x3a - BYTE $0x16; BYTE $0xc8; BYTE $0x01 - MOVL AX, ret+32(FP) - - RET diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.go deleted file mode 100644 index 3222b06..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.go +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !appengine,!gccgo - -package crc32 - -// This file contains the code to call the SSE 4.2 version of the Castagnoli -// CRC. - -// haveSSE42 is defined in crc32_amd64p32.s and uses CPUID to test for SSE 4.2 -// support. -func haveSSE42() bool - -// castagnoliSSE42 is defined in crc32_amd64p32.s and uses the SSE4.2 CRC32 -// instruction. -//go:noescape -func castagnoliSSE42(crc uint32, p []byte) uint32 - -var sse42 = haveSSE42() - -func archAvailableCastagnoli() bool { - return sse42 -} - -func archInitCastagnoli() { - if !sse42 { - panic("not available") - } - // No initialization necessary. -} - -func archUpdateCastagnoli(crc uint32, p []byte) uint32 { - if !sse42 { - panic("not available") - } - return castagnoliSSE42(crc, p) -} - -func archAvailableIEEE() bool { return false } -func archInitIEEE() { panic("not available") } -func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") } diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.s b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.s deleted file mode 100644 index a578d68..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_amd64p32.s +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build gc - -#define NOSPLIT 4 -#define RODATA 8 - -// func castagnoliSSE42(crc uint32, p []byte) uint32 -TEXT ·castagnoliSSE42(SB), NOSPLIT, $0 - MOVL crc+0(FP), AX // CRC value - MOVL p+4(FP), SI // data pointer - MOVL p_len+8(FP), CX // len(p) - - NOTL AX - - // If there's less than 8 bytes to process, we do it byte-by-byte. - CMPQ CX, $8 - JL cleanup - - // Process individual bytes until the input is 8-byte aligned. -startup: - MOVQ SI, BX - ANDQ $7, BX - JZ aligned - - CRC32B (SI), AX - DECQ CX - INCQ SI - JMP startup - -aligned: - // The input is now 8-byte aligned and we can process 8-byte chunks. - CMPQ CX, $8 - JL cleanup - - CRC32Q (SI), AX - ADDQ $8, SI - SUBQ $8, CX - JMP aligned - -cleanup: - // We may have some bytes left over that we process one at a time. - CMPQ CX, $0 - JE done - - CRC32B (SI), AX - INCQ SI - DECQ CX - JMP cleanup - -done: - NOTL AX - MOVL AX, ret+16(FP) - RET - -// func haveSSE42() bool -TEXT ·haveSSE42(SB), NOSPLIT, $0 - XORQ AX, AX - INCL AX - CPUID - SHRQ $20, CX - ANDQ $1, CX - MOVB CX, ret+0(FP) - RET - diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_generic.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_generic.go deleted file mode 100644 index abacbb6..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_generic.go +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// This file contains CRC32 algorithms that are not specific to any architecture -// and don't use hardware acceleration. -// -// The simple (and slow) CRC32 implementation only uses a 256*4 bytes table. -// -// The slicing-by-8 algorithm is a faster implementation that uses a bigger -// table (8*256*4 bytes). - -package crc32 - -// simpleMakeTable allocates and constructs a Table for the specified -// polynomial. The table is suitable for use with the simple algorithm -// (simpleUpdate). -func simpleMakeTable(poly uint32) *Table { - t := new(Table) - simplePopulateTable(poly, t) - return t -} - -// simplePopulateTable constructs a Table for the specified polynomial, suitable -// for use with simpleUpdate. -func simplePopulateTable(poly uint32, t *Table) { - for i := 0; i < 256; i++ { - crc := uint32(i) - for j := 0; j < 8; j++ { - if crc&1 == 1 { - crc = (crc >> 1) ^ poly - } else { - crc >>= 1 - } - } - t[i] = crc - } -} - -// simpleUpdate uses the simple algorithm to update the CRC, given a table that -// was previously computed using simpleMakeTable. -func simpleUpdate(crc uint32, tab *Table, p []byte) uint32 { - crc = ^crc - for _, v := range p { - crc = tab[byte(crc)^v] ^ (crc >> 8) - } - return ^crc -} - -// Use slicing-by-8 when payload >= this value. -const slicing8Cutoff = 16 - -// slicing8Table is array of 8 Tables, used by the slicing-by-8 algorithm. -type slicing8Table [8]Table - -// slicingMakeTable constructs a slicing8Table for the specified polynomial. The -// table is suitable for use with the slicing-by-8 algorithm (slicingUpdate). -func slicingMakeTable(poly uint32) *slicing8Table { - t := new(slicing8Table) - simplePopulateTable(poly, &t[0]) - for i := 0; i < 256; i++ { - crc := t[0][i] - for j := 1; j < 8; j++ { - crc = t[0][crc&0xFF] ^ (crc >> 8) - t[j][i] = crc - } - } - return t -} - -// slicingUpdate uses the slicing-by-8 algorithm to update the CRC, given a -// table that was previously computed using slicingMakeTable. -func slicingUpdate(crc uint32, tab *slicing8Table, p []byte) uint32 { - if len(p) >= slicing8Cutoff { - crc = ^crc - for len(p) > 8 { - crc ^= uint32(p[0]) | uint32(p[1])<<8 | uint32(p[2])<<16 | uint32(p[3])<<24 - crc = tab[0][p[7]] ^ tab[1][p[6]] ^ tab[2][p[5]] ^ tab[3][p[4]] ^ - tab[4][crc>>24] ^ tab[5][(crc>>16)&0xFF] ^ - tab[6][(crc>>8)&0xFF] ^ tab[7][crc&0xFF] - p = p[8:] - } - crc = ^crc - } - if len(p) == 0 { - return crc - } - return simpleUpdate(crc, &tab[0], p) -} diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_otherarch.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_otherarch.go deleted file mode 100644 index cc96076..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_otherarch.go +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2011 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build !amd64,!amd64p32,!s390x - -package crc32 - -func archAvailableIEEE() bool { return false } -func archInitIEEE() { panic("not available") } -func archUpdateIEEE(crc uint32, p []byte) uint32 { panic("not available") } - -func archAvailableCastagnoli() bool { return false } -func archInitCastagnoli() { panic("not available") } -func archUpdateCastagnoli(crc uint32, p []byte) uint32 { panic("not available") } diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.go b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.go deleted file mode 100644 index ce96f03..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.go +++ /dev/null @@ -1,91 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build s390x - -package crc32 - -const ( - vxMinLen = 64 - vxAlignMask = 15 // align to 16 bytes -) - -// hasVectorFacility reports whether the machine has the z/Architecture -// vector facility installed and enabled. -func hasVectorFacility() bool - -var hasVX = hasVectorFacility() - -// vectorizedCastagnoli implements CRC32 using vector instructions. -// It is defined in crc32_s390x.s. -//go:noescape -func vectorizedCastagnoli(crc uint32, p []byte) uint32 - -// vectorizedIEEE implements CRC32 using vector instructions. -// It is defined in crc32_s390x.s. -//go:noescape -func vectorizedIEEE(crc uint32, p []byte) uint32 - -func archAvailableCastagnoli() bool { - return hasVX -} - -var archCastagnoliTable8 *slicing8Table - -func archInitCastagnoli() { - if !hasVX { - panic("not available") - } - // We still use slicing-by-8 for small buffers. - archCastagnoliTable8 = slicingMakeTable(Castagnoli) -} - -// archUpdateCastagnoli calculates the checksum of p using -// vectorizedCastagnoli. -func archUpdateCastagnoli(crc uint32, p []byte) uint32 { - if !hasVX { - panic("not available") - } - // Use vectorized function if data length is above threshold. - if len(p) >= vxMinLen { - aligned := len(p) & ^vxAlignMask - crc = vectorizedCastagnoli(crc, p[:aligned]) - p = p[aligned:] - } - if len(p) == 0 { - return crc - } - return slicingUpdate(crc, archCastagnoliTable8, p) -} - -func archAvailableIEEE() bool { - return hasVX -} - -var archIeeeTable8 *slicing8Table - -func archInitIEEE() { - if !hasVX { - panic("not available") - } - // We still use slicing-by-8 for small buffers. - archIeeeTable8 = slicingMakeTable(IEEE) -} - -// archUpdateIEEE calculates the checksum of p using vectorizedIEEE. -func archUpdateIEEE(crc uint32, p []byte) uint32 { - if !hasVX { - panic("not available") - } - // Use vectorized function if data length is above threshold. - if len(p) >= vxMinLen { - aligned := len(p) & ^vxAlignMask - crc = vectorizedIEEE(crc, p[:aligned]) - p = p[aligned:] - } - if len(p) == 0 { - return crc - } - return slicingUpdate(crc, archIeeeTable8, p) -} diff --git a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.s b/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.s deleted file mode 100644 index e980ca2..0000000 --- a/cmd/gost/vendor/github.com/klauspost/crc32/crc32_s390x.s +++ /dev/null @@ -1,249 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// +build s390x - -#include "textflag.h" - -// Vector register range containing CRC-32 constants - -#define CONST_PERM_LE2BE V9 -#define CONST_R2R1 V10 -#define CONST_R4R3 V11 -#define CONST_R5 V12 -#define CONST_RU_POLY V13 -#define CONST_CRC_POLY V14 - -// The CRC-32 constant block contains reduction constants to fold and -// process particular chunks of the input data stream in parallel. -// -// Note that the constant definitions below are extended in order to compute -// intermediate results with a single VECTOR GALOIS FIELD MULTIPLY instruction. -// The rightmost doubleword can be 0 to prevent contribution to the result or -// can be multiplied by 1 to perform an XOR without the need for a separate -// VECTOR EXCLUSIVE OR instruction. -// -// The polynomials used are bit-reflected: -// -// IEEE: P'(x) = 0x0edb88320 -// Castagnoli: P'(x) = 0x082f63b78 - -// IEEE polynomial constants -DATA ·crcleconskp+0(SB)/8, $0x0F0E0D0C0B0A0908 // LE-to-BE mask -DATA ·crcleconskp+8(SB)/8, $0x0706050403020100 -DATA ·crcleconskp+16(SB)/8, $0x00000001c6e41596 // R2 -DATA ·crcleconskp+24(SB)/8, $0x0000000154442bd4 // R1 -DATA ·crcleconskp+32(SB)/8, $0x00000000ccaa009e // R4 -DATA ·crcleconskp+40(SB)/8, $0x00000001751997d0 // R3 -DATA ·crcleconskp+48(SB)/8, $0x0000000000000000 -DATA ·crcleconskp+56(SB)/8, $0x0000000163cd6124 // R5 -DATA ·crcleconskp+64(SB)/8, $0x0000000000000000 -DATA ·crcleconskp+72(SB)/8, $0x00000001F7011641 // u' -DATA ·crcleconskp+80(SB)/8, $0x0000000000000000 -DATA ·crcleconskp+88(SB)/8, $0x00000001DB710641 // P'(x) << 1 - -GLOBL ·crcleconskp(SB), RODATA, $144 - -// Castagonli Polynomial constants -DATA ·crccleconskp+0(SB)/8, $0x0F0E0D0C0B0A0908 // LE-to-BE mask -DATA ·crccleconskp+8(SB)/8, $0x0706050403020100 -DATA ·crccleconskp+16(SB)/8, $0x000000009e4addf8 // R2 -DATA ·crccleconskp+24(SB)/8, $0x00000000740eef02 // R1 -DATA ·crccleconskp+32(SB)/8, $0x000000014cd00bd6 // R4 -DATA ·crccleconskp+40(SB)/8, $0x00000000f20c0dfe // R3 -DATA ·crccleconskp+48(SB)/8, $0x0000000000000000 -DATA ·crccleconskp+56(SB)/8, $0x00000000dd45aab8 // R5 -DATA ·crccleconskp+64(SB)/8, $0x0000000000000000 -DATA ·crccleconskp+72(SB)/8, $0x00000000dea713f1 // u' -DATA ·crccleconskp+80(SB)/8, $0x0000000000000000 -DATA ·crccleconskp+88(SB)/8, $0x0000000105ec76f0 // P'(x) << 1 - -GLOBL ·crccleconskp(SB), RODATA, $144 - -// func hasVectorFacility() bool -TEXT ·hasVectorFacility(SB), NOSPLIT, $24-1 - MOVD $x-24(SP), R1 - XC $24, 0(R1), 0(R1) // clear the storage - MOVD $2, R0 // R0 is the number of double words stored -1 - WORD $0xB2B01000 // STFLE 0(R1) - XOR R0, R0 // reset the value of R0 - MOVBZ z-8(SP), R1 - AND $0x40, R1 - BEQ novector - -vectorinstalled: - // check if the vector instruction has been enabled - VLEIB $0, $0xF, V16 - VLGVB $0, V16, R1 - CMPBNE R1, $0xF, novector - MOVB $1, ret+0(FP) // have vx - RET - -novector: - MOVB $0, ret+0(FP) // no vx - RET - -// The CRC-32 function(s) use these calling conventions: -// -// Parameters: -// -// R2: Initial CRC value, typically ~0; and final CRC (return) value. -// R3: Input buffer pointer, performance might be improved if the -// buffer is on a doubleword boundary. -// R4: Length of the buffer, must be 64 bytes or greater. -// -// Register usage: -// -// R5: CRC-32 constant pool base pointer. -// V0: Initial CRC value and intermediate constants and results. -// V1..V4: Data for CRC computation. -// V5..V8: Next data chunks that are fetched from the input buffer. -// -// V9..V14: CRC-32 constants. - -// func vectorizedIEEE(crc uint32, p []byte) uint32 -TEXT ·vectorizedIEEE(SB), NOSPLIT, $0 - MOVWZ crc+0(FP), R2 // R2 stores the CRC value - MOVD p+8(FP), R3 // data pointer - MOVD p_len+16(FP), R4 // len(p) - - MOVD $·crcleconskp(SB), R5 - BR vectorizedBody<>(SB) - -// func vectorizedCastagnoli(crc uint32, p []byte) uint32 -TEXT ·vectorizedCastagnoli(SB), NOSPLIT, $0 - MOVWZ crc+0(FP), R2 // R2 stores the CRC value - MOVD p+8(FP), R3 // data pointer - MOVD p_len+16(FP), R4 // len(p) - - // R5: crc-32 constant pool base pointer, constant is used to reduce crc - MOVD $·crccleconskp(SB), R5 - BR vectorizedBody<>(SB) - -TEXT vectorizedBody<>(SB), NOSPLIT, $0 - XOR $0xffffffff, R2 // NOTW R2 - VLM 0(R5), CONST_PERM_LE2BE, CONST_CRC_POLY - - // Load the initial CRC value into the rightmost word of V0 - VZERO V0 - VLVGF $3, R2, V0 - - // Crash if the input size is less than 64-bytes. - CMP R4, $64 - BLT crash - - // Load a 64-byte data chunk and XOR with CRC - VLM 0(R3), V1, V4 // 64-bytes into V1..V4 - - // Reflect the data if the CRC operation is in the bit-reflected domain - VPERM V1, V1, CONST_PERM_LE2BE, V1 - VPERM V2, V2, CONST_PERM_LE2BE, V2 - VPERM V3, V3, CONST_PERM_LE2BE, V3 - VPERM V4, V4, CONST_PERM_LE2BE, V4 - - VX V0, V1, V1 // V1 ^= CRC - ADD $64, R3 // BUF = BUF + 64 - ADD $(-64), R4 - - // Check remaining buffer size and jump to proper folding method - CMP R4, $64 - BLT less_than_64bytes - -fold_64bytes_loop: - // Load the next 64-byte data chunk into V5 to V8 - VLM 0(R3), V5, V8 - VPERM V5, V5, CONST_PERM_LE2BE, V5 - VPERM V6, V6, CONST_PERM_LE2BE, V6 - VPERM V7, V7, CONST_PERM_LE2BE, V7 - VPERM V8, V8, CONST_PERM_LE2BE, V8 - - // Perform a GF(2) multiplication of the doublewords in V1 with - // the reduction constants in V0. The intermediate result is - // then folded (accumulated) with the next data chunk in V5 and - // stored in V1. Repeat this step for the register contents - // in V2, V3, and V4 respectively. - - VGFMAG CONST_R2R1, V1, V5, V1 - VGFMAG CONST_R2R1, V2, V6, V2 - VGFMAG CONST_R2R1, V3, V7, V3 - VGFMAG CONST_R2R1, V4, V8, V4 - - // Adjust buffer pointer and length for next loop - ADD $64, R3 // BUF = BUF + 64 - ADD $(-64), R4 // LEN = LEN - 64 - - CMP R4, $64 - BGE fold_64bytes_loop - -less_than_64bytes: - // Fold V1 to V4 into a single 128-bit value in V1 - VGFMAG CONST_R4R3, V1, V2, V1 - VGFMAG CONST_R4R3, V1, V3, V1 - VGFMAG CONST_R4R3, V1, V4, V1 - - // Check whether to continue with 64-bit folding - CMP R4, $16 - BLT final_fold - -fold_16bytes_loop: - VL 0(R3), V2 // Load next data chunk - VPERM V2, V2, CONST_PERM_LE2BE, V2 - - VGFMAG CONST_R4R3, V1, V2, V1 // Fold next data chunk - - // Adjust buffer pointer and size for folding next data chunk - ADD $16, R3 - ADD $-16, R4 - - // Process remaining data chunks - CMP R4, $16 - BGE fold_16bytes_loop - -final_fold: - VLEIB $7, $0x40, V9 - VSRLB V9, CONST_R4R3, V0 - VLEIG $0, $1, V0 - - VGFMG V0, V1, V1 - - VLEIB $7, $0x20, V9 // Shift by words - VSRLB V9, V1, V2 // Store remaining bits in V2 - VUPLLF V1, V1 // Split rightmost doubleword - VGFMAG CONST_R5, V1, V2, V1 // V1 = (V1 * R5) XOR V2 - - // The input values to the Barret reduction are the degree-63 polynomial - // in V1 (R(x)), degree-32 generator polynomial, and the reduction - // constant u. The Barret reduction result is the CRC value of R(x) mod - // P(x). - // - // The Barret reduction algorithm is defined as: - // - // 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u - // 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x) - // 3. C(x) = R(x) XOR T2(x) mod x^32 - // - // Note: To compensate the division by x^32, use the vector unpack - // instruction to move the leftmost word into the leftmost doubleword - // of the vector register. The rightmost doubleword is multiplied - // with zero to not contribute to the intermedate results. - - // T1(x) = floor( R(x) / x^32 ) GF2MUL u - VUPLLF V1, V2 - VGFMG CONST_RU_POLY, V2, V2 - - // Compute the GF(2) product of the CRC polynomial in VO with T1(x) in - // V2 and XOR the intermediate result, T2(x), with the value in V1. - // The final result is in the rightmost word of V2. - - VUPLLF V2, V2 - VGFMAG CONST_CRC_POLY, V2, V1, V2 - -done: - VLGVF $2, V2, R2 - XOR $0xffffffff, R2 // NOTW R2 - MOVWZ R2, ret + 32(FP) - RET - -crash: - MOVD $0, (R0) // input size is less than 64-bytes diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md index 3f34820..c8139b3 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md +++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/README.md @@ -1,4 +1,5 @@ -# kcp-go +kcp-go + [![GoDoc][1]][2] [![Powered][9]][10] [![MIT licensed][11]][12] [![Build Status][3]][4] [![Go Report Card][5]][6] [![Coverage Statusd][7]][8] @@ -19,12 +20,12 @@ ## Introduction -kcp-go is a full-featured ***reliable-UDP*** library for golang. It provides ***reliable, ordered, and error-checked*** delivery of a stream of octets between applications running on hosts communicating over an IP network. +kcp-go is a full-featured ***Reliable-UDP*** library for golang. It provides ***reliable, ordered, and error-checked*** delivery of a stream of octets between applications running on hosts communicating over an IP network. ## Features -1. Optimized for ***Real-Time Strategy Game***. -1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with modifications. +1. Optimized for ***Online Games, Audio/Video Streaming***. +1. Compatible with [skywind3000's](https://github.com/skywind3000) C version with optimizations. 1. ***Cache friendly*** and ***Memory optimized*** design in golang. 1. Compatible with [net.Conn](https://golang.org/pkg/net/#Conn) and [net.Listener](https://golang.org/pkg/net/#Listener). 1. [FEC(Forward Error Correction)](https://en.wikipedia.org/wiki/Forward_error_correction) Support with [Reed-Solomon Codes](https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction) @@ -40,7 +41,7 @@ For complete documentation, see the associated [Godoc](https://godoc.org/github. ## Specification -# Frame Format +Frame Format ## Usage @@ -75,14 +76,14 @@ PASS ok github.com/xtaci/kcp-go 0.600s ``` +## Who is using this? + +1. https://github.com/xtaci/kcptun +2. https://github.com/getlantern/lantern +3. https://github.com/smallnest/rpcx + ## Links -1. https://github.com/xtaci/libkcp -- Official client library for iOS/Android(C++11) +1. https://github.com/xtaci/libkcp -- FEC enhanced KCP session library for iOS/Android in C++ 2. https://github.com/skywind3000/kcp -- A Fast and Reliable ARQ Protocol 3. https://github.com/klauspost/reedsolomon -- Reed-Solomon Erasure Coding in Go - -## Donation - -![donate](donate.png) - -All donations on this project will be used to support the development of [gonet/2](http://gonet2.github.io/). diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go index df85278..2e456b8 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/crypt.go @@ -20,7 +20,9 @@ var ( saltxor = `sH3CIVoF#rWLtJo6` ) -// BlockCrypt defines encryption/decryption methods for a given byte slice +// BlockCrypt defines encryption/decryption methods for a given byte slice. +// Notes on implementing: the data to be encrypted contains a builtin +// nonce at the first 16 bytes type BlockCrypt interface { // Encrypt encrypts the whole block in src into dst. // Dst and src may point at the same memory. @@ -31,40 +33,35 @@ type BlockCrypt interface { Decrypt(dst, src []byte) } -// Salsa20BlockCrypt implements BlockCrypt -type Salsa20BlockCrypt struct { +type salsa20BlockCrypt struct { key [32]byte } -// NewSalsa20BlockCrypt initates BlockCrypt by the given key +// NewSalsa20BlockCrypt https://en.wikipedia.org/wiki/Salsa20 func NewSalsa20BlockCrypt(key []byte) (BlockCrypt, error) { - c := new(Salsa20BlockCrypt) + c := new(salsa20BlockCrypt) copy(c.key[:], key) return c, nil } -// Encrypt implements Encrypt interface -func (c *Salsa20BlockCrypt) Encrypt(dst, src []byte) { +func (c *salsa20BlockCrypt) Encrypt(dst, src []byte) { + salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key) + copy(dst[:8], src[:8]) +} +func (c *salsa20BlockCrypt) Decrypt(dst, src []byte) { salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key) copy(dst[:8], src[:8]) } -// Decrypt implements Decrypt interface -func (c *Salsa20BlockCrypt) Decrypt(dst, src []byte) { - salsa20.XORKeyStream(dst[8:], src[8:], src[:8], &c.key) - copy(dst[:8], src[:8]) -} - -// TwofishBlockCrypt implements BlockCrypt -type TwofishBlockCrypt struct { +type twofishBlockCrypt struct { encbuf []byte decbuf []byte block cipher.Block } -// NewTwofishBlockCrypt initates BlockCrypt by the given key +// NewTwofishBlockCrypt https://en.wikipedia.org/wiki/Twofish func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) { - c := new(TwofishBlockCrypt) + c := new(twofishBlockCrypt) block, err := twofish.NewCipher(key) if err != nil { return nil, err @@ -75,22 +72,18 @@ func NewTwofishBlockCrypt(key []byte) (BlockCrypt, error) { return c, nil } -// Encrypt implements Encrypt interface -func (c *TwofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *twofishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *twofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } -// Decrypt implements Decrypt interface -func (c *TwofishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } - -// TripleDESBlockCrypt implements BlockCrypt -type TripleDESBlockCrypt struct { +type tripleDESBlockCrypt struct { encbuf []byte decbuf []byte block cipher.Block } -// NewTripleDESBlockCrypt initates BlockCrypt by the given key +// NewTripleDESBlockCrypt https://en.wikipedia.org/wiki/Triple_DES func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) { - c := new(TripleDESBlockCrypt) + c := new(tripleDESBlockCrypt) block, err := des.NewTripleDESCipher(key) if err != nil { return nil, err @@ -101,22 +94,18 @@ func NewTripleDESBlockCrypt(key []byte) (BlockCrypt, error) { return c, nil } -// Encrypt implements Encrypt interface -func (c *TripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *tripleDESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *tripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } -// Decrypt implements Decrypt interface -func (c *TripleDESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } - -// Cast5BlockCrypt implements BlockCrypt -type Cast5BlockCrypt struct { +type cast5BlockCrypt struct { encbuf []byte decbuf []byte block cipher.Block } -// NewCast5BlockCrypt initates BlockCrypt by the given key +// NewCast5BlockCrypt https://en.wikipedia.org/wiki/CAST-128 func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) { - c := new(Cast5BlockCrypt) + c := new(cast5BlockCrypt) block, err := cast5.NewCipher(key) if err != nil { return nil, err @@ -127,22 +116,18 @@ func NewCast5BlockCrypt(key []byte) (BlockCrypt, error) { return c, nil } -// Encrypt implements Encrypt interface -func (c *Cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *cast5BlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } -// Decrypt implements Decrypt interface -func (c *Cast5BlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } - -// BlowfishBlockCrypt implements BlockCrypt -type BlowfishBlockCrypt struct { +type blowfishBlockCrypt struct { encbuf []byte decbuf []byte block cipher.Block } -// NewBlowfishBlockCrypt initates BlockCrypt by the given key +// NewBlowfishBlockCrypt https://en.wikipedia.org/wiki/Blowfish_(cipher) func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) { - c := new(BlowfishBlockCrypt) + c := new(blowfishBlockCrypt) block, err := blowfish.NewCipher(key) if err != nil { return nil, err @@ -153,22 +138,18 @@ func NewBlowfishBlockCrypt(key []byte) (BlockCrypt, error) { return c, nil } -// Encrypt implements Encrypt interface -func (c *BlowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *blowfishBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *blowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } -// Decrypt implements Decrypt interface -func (c *BlowfishBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } - -// AESBlockCrypt implements BlockCrypt -type AESBlockCrypt struct { +type aesBlockCrypt struct { encbuf []byte decbuf []byte block cipher.Block } -// NewAESBlockCrypt initates BlockCrypt by the given key +// NewAESBlockCrypt https://en.wikipedia.org/wiki/Advanced_Encryption_Standard func NewAESBlockCrypt(key []byte) (BlockCrypt, error) { - c := new(AESBlockCrypt) + c := new(aesBlockCrypt) block, err := aes.NewCipher(key) if err != nil { return nil, err @@ -179,22 +160,18 @@ func NewAESBlockCrypt(key []byte) (BlockCrypt, error) { return c, nil } -// Encrypt implements Encrypt interface -func (c *AESBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *aesBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *aesBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } -// Decrypt implements Decrypt interface -func (c *AESBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } - -// TEABlockCrypt implements BlockCrypt -type TEABlockCrypt struct { +type teaBlockCrypt struct { encbuf []byte decbuf []byte block cipher.Block } -// NewTEABlockCrypt initate BlockCrypt by the given key +// NewTEABlockCrypt https://en.wikipedia.org/wiki/Tiny_Encryption_Algorithm func NewTEABlockCrypt(key []byte) (BlockCrypt, error) { - c := new(TEABlockCrypt) + c := new(teaBlockCrypt) block, err := tea.NewCipherWithRounds(key, 16) if err != nil { return nil, err @@ -205,22 +182,18 @@ func NewTEABlockCrypt(key []byte) (BlockCrypt, error) { return c, nil } -// Encrypt implements Encrypt interface -func (c *TEABlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *teaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *teaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } -// Decrypt implements Decrypt interface -func (c *TEABlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } - -// XTEABlockCrypt implements BlockCrypt -type XTEABlockCrypt struct { +type xteaBlockCrypt struct { encbuf []byte decbuf []byte block cipher.Block } -// NewXTEABlockCrypt initate BlockCrypt by the given key +// NewXTEABlockCrypt https://en.wikipedia.org/wiki/XTEA func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) { - c := new(XTEABlockCrypt) + c := new(xteaBlockCrypt) block, err := xtea.NewCipher(key) if err != nil { return nil, err @@ -231,43 +204,32 @@ func NewXTEABlockCrypt(key []byte) (BlockCrypt, error) { return c, nil } -// Encrypt implements Encrypt interface -func (c *XTEABlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *xteaBlockCrypt) Encrypt(dst, src []byte) { encrypt(c.block, dst, src, c.encbuf) } +func (c *xteaBlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } -// Decrypt implements Decrypt interface -func (c *XTEABlockCrypt) Decrypt(dst, src []byte) { decrypt(c.block, dst, src, c.decbuf) } - -// SimpleXORBlockCrypt implements BlockCrypt -type SimpleXORBlockCrypt struct { +type simpleXORBlockCrypt struct { xortbl []byte } -// NewSimpleXORBlockCrypt initate BlockCrypt by the given key +// NewSimpleXORBlockCrypt simple xor with key expanding func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) { - c := new(SimpleXORBlockCrypt) + c := new(simpleXORBlockCrypt) c.xortbl = pbkdf2.Key(key, []byte(saltxor), 32, mtuLimit, sha1.New) return c, nil } -// Encrypt implements Encrypt interface -func (c *SimpleXORBlockCrypt) Encrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) } +func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) } +func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) } -// Decrypt implements Decrypt interface -func (c *SimpleXORBlockCrypt) Decrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) } +type noneBlockCrypt struct{} -// NoneBlockCrypt simple returns the plaintext -type NoneBlockCrypt struct{} - -// NewNoneBlockCrypt initate by the given key +// NewNoneBlockCrypt does nothing but copying func NewNoneBlockCrypt(key []byte) (BlockCrypt, error) { - return new(NoneBlockCrypt), nil + return new(noneBlockCrypt), nil } -// Encrypt implements Encrypt interface -func (c *NoneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) } - -// Decrypt implements Decrypt interface -func (c *NoneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) } +func (c *noneBlockCrypt) Encrypt(dst, src []byte) { copy(dst, src) } +func (c *noneBlockCrypt) Decrypt(dst, src []byte) { copy(dst, src) } // packet encryption with local CFB mode func encrypt(block cipher.Block, dst, src, buf []byte) { diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go index 10ad1c0..25201bb 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/fec.go @@ -2,7 +2,7 @@ package kcp import ( "encoding/binary" - "sync" + "sync/atomic" "github.com/klauspost/reedsolomon" ) @@ -26,10 +26,10 @@ type ( next uint32 // next seqid enc reedsolomon.Encoder shards [][]byte + shards2 [][]byte // for calcECC shardsflag []bool paws uint32 // Protect Against Wrapped Sequence numbers lastCheck uint32 - xmitBuf sync.Pool } fecPacket struct { @@ -60,11 +60,8 @@ func newFEC(rxlimit, dataShards, parityShards int) *FEC { } fec.enc = enc fec.shards = make([][]byte, fec.shardSize) + fec.shards2 = make([][]byte, fec.shardSize) fec.shardsflag = make([]bool, fec.shardSize) - fec.xmitBuf.New = func() interface{} { - return make([]byte, mtuLimit) - } - return fec } @@ -75,9 +72,8 @@ func (fec *FEC) decode(data []byte) fecPacket { pkt.flag = binary.LittleEndian.Uint16(data[4:]) pkt.ts = currentMs() // allocate memory & copy - buf := fec.xmitBuf.Get().([]byte) - n := copy(buf, data[6:]) - xorBytes(buf[n:], buf[n:], buf[n:]) + buf := xmitBuf.Get().([]byte)[:len(data)-6] + copy(buf, data[6:]) pkt.data = buf return pkt } @@ -107,7 +103,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) { if now-fec.rx[k].ts < fecExpire { rx = append(rx, fec.rx[k]) } else { - fec.xmitBuf.Put(fec.rx[k].data) + xmitBuf.Put(fec.rx[k].data) } } fec.rx = rx @@ -119,7 +115,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) { insertIdx := 0 for i := n; i >= 0; i-- { if pkt.seqid == fec.rx[i].seqid { // de-duplicate - fec.xmitBuf.Put(pkt.data) + xmitBuf.Put(pkt.data) return nil } else if pkt.seqid > fec.rx[i].seqid { // insertion insertIdx = i + 1 @@ -184,7 +180,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) { if numDataShard == fec.dataShards { // no lost for i := first; i < first+numshard; i++ { // free - fec.xmitBuf.Put(fec.rx[i].data) + xmitBuf.Put(fec.rx[i].data) } copy(fec.rx[first:], fec.rx[first+numshard:]) for i := 0; i < numshard; i++ { // dereference @@ -194,7 +190,9 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) { } else if numshard >= fec.dataShards { // recoverable for k := range shards { if shards[k] != nil { + dlen := len(shards[k]) shards[k] = shards[k][:maxlen] + xorBytes(shards[k][dlen:], shards[k][dlen:], shards[k][dlen:]) } } if err := fec.enc.Reconstruct(shards); err == nil { @@ -206,7 +204,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) { } for i := first; i < first+numshard; i++ { // free - fec.xmitBuf.Put(fec.rx[i].data) + xmitBuf.Put(fec.rx[i].data) } copy(fec.rx[first:], fec.rx[first+numshard:]) for i := 0; i < numshard; i++ { // dereference @@ -218,7 +216,10 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) { // keep rxlimit if len(fec.rx) > fec.rxlimit { - fec.xmitBuf.Put(fec.rx[0].data) // free + if fec.rx[0].flag == typeData { // record unrecoverable data + atomic.AddUint64(&DefaultSnmp.FECShortShards, 1) + } + xmitBuf.Put(fec.rx[0].data) // free fec.rx[0].data = nil fec.rx = fec.rx[1:] } @@ -229,7 +230,7 @@ func (fec *FEC) calcECC(data [][]byte, offset, maxlen int) (ecc [][]byte) { if len(data) != fec.shardSize { return nil } - shards := make([][]byte, fec.shardSize) + shards := fec.shards2 for k := range shards { shards[k] = data[k][offset:maxlen] } diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png index 7952e4a..0b0aefd 100644 Binary files a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png and b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/frame.png differ diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp-go.png b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp-go.png new file mode 100644 index 0000000..151b7c4 Binary files /dev/null and b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp-go.png differ diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go index 78ccf26..f53e834 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/kcp.go @@ -2,7 +2,6 @@ package kcp import ( - "container/heap" "encoding/binary" "sync/atomic" ) @@ -123,13 +122,6 @@ func (seg *Segment) encode(ptr []byte) []byte { return ptr } -// NewSegment creates a KCP segment -func NewSegment(size int) *Segment { - seg := new(Segment) - seg.data = make([]byte, size) - return seg -} - // KCP defines a single KCP connection type KCP struct { conv, mtu, mss, state uint32 @@ -137,7 +129,7 @@ type KCP struct { ssthresh uint32 rx_rttval, rx_srtt, rx_rto, rx_minrto uint32 snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe uint32 - current, interval, ts_flush, xmit uint32 + interval, ts_flush, xmit uint32 nodelay, updated uint32 ts_probe, probe_wait uint32 dead_link, incr uint32 @@ -150,33 +142,17 @@ type KCP struct { snd_buf []Segment rcv_buf []Segment - acklist ACKList + acklist []ackItem buffer []byte output Output } -// ACK packet to return -type ACK struct { +type ackItem struct { sn uint32 ts uint32 } -// ACKList is heapified -type ACKList []ACK - -func (l ACKList) Len() int { return len(l) } -func (l ACKList) Less(i, j int) bool { return l[i].sn < l[j].sn } -func (l ACKList) Swap(i, j int) { l[i], l[j] = l[j], l[i] } -func (l *ACKList) Push(x interface{}) { *l = append(*l, x.(ACK)) } -func (l *ACKList) Pop() interface{} { - old := *l - n := len(old) - x := old[n-1] - *l = old[0 : n-1] - return x -} - // NewKCP create a new kcp control object, 'conv' must equal in two endpoint // from the same connection. func NewKCP(conv uint32, output Output) *KCP { @@ -198,6 +174,18 @@ func NewKCP(conv uint32, output Output) *KCP { return kcp } +// newSegment creates a KCP segment +func (kcp *KCP) newSegment(size int) *Segment { + seg := new(Segment) + seg.data = xmitBuf.Get().([]byte)[:size] + return seg +} + +// delSegment recycles a KCP segment +func (kcp *KCP) delSegment(seg *Segment) { + xmitBuf.Put(seg.data) +} + // PeekSize checks the size of next message in the recv queue func (kcp *KCP) PeekSize() (length int) { if len(kcp.rcv_queue) == 0 { @@ -251,7 +239,7 @@ func (kcp *KCP) Recv(buffer []byte) (n int) { buffer = buffer[len(seg.data):] n += len(seg.data) count++ - seg.data = nil + kcp.delSegment(seg) if seg.frg == 0 { break } @@ -263,14 +251,13 @@ func (kcp *KCP) Recv(buffer []byte) (n int) { for k := range kcp.rcv_buf { seg := &kcp.rcv_buf[k] if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) { - kcp.rcv_queue = append(kcp.rcv_queue, *seg) kcp.rcv_nxt++ count++ - seg.data = nil } else { break } } + kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...) kcp.rcv_buf = kcp.rcv_buf[count:] // fast recover @@ -300,11 +287,12 @@ func (kcp *KCP) Send(buffer []byte) int { if len(buffer) < capacity { extend = len(buffer) } - seg := NewSegment(len(old.data) + extend) + seg := kcp.newSegment(len(old.data) + extend) seg.frg = 0 copy(seg.data, old.data) copy(seg.data[len(old.data):], buffer) buffer = buffer[extend:] + kcp.delSegment(old) kcp.snd_queue[n-1] = *seg } } @@ -335,7 +323,7 @@ func (kcp *KCP) Send(buffer []byte) int { } else { size = len(buffer) } - seg := NewSegment(size) + seg := kcp.newSegment(size) copy(seg.data, buffer[:size]) if kcp.stream == 0 { // message mode seg.frg = uint32(count - i - 1) @@ -348,8 +336,8 @@ func (kcp *KCP) Send(buffer []byte) int { return 0 } -// https://tools.ietf.org/html/rfc6298 func (kcp *KCP) update_ack(rtt int32) { + // https://tools.ietf.org/html/rfc6298 var rto uint32 if kcp.rx_srtt == 0 { kcp.rx_srtt = uint32(rtt) @@ -365,7 +353,7 @@ func (kcp *KCP) update_ack(rtt int32) { kcp.rx_srtt = 1 } } - rto = kcp.rx_srtt + _imax_(1, 4*kcp.rx_rttval) + rto = kcp.rx_srtt + _imax_(kcp.interval, 4*kcp.rx_rttval) kcp.rx_rto = _ibound_(kcp.rx_minrto, rto, IKCP_RTO_MAX) } @@ -386,6 +374,7 @@ func (kcp *KCP) parse_ack(sn uint32) { for k := range kcp.snd_buf { seg := &kcp.snd_buf[k] if sn == seg.sn { + kcp.delSegment(seg) copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:]) kcp.snd_buf[len(kcp.snd_buf)-1] = Segment{} kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1] @@ -417,8 +406,8 @@ func (kcp *KCP) parse_una(una uint32) { for k := range kcp.snd_buf { seg := &kcp.snd_buf[k] if _itimediff(una, seg.sn) > 0 { + kcp.delSegment(seg) count++ - seg.data = nil } else { break } @@ -428,14 +417,14 @@ func (kcp *KCP) parse_una(una uint32) { // ack append func (kcp *KCP) ack_push(sn, ts uint32) { - heap.Push(&kcp.acklist, ACK{sn, ts}) + kcp.acklist = append(kcp.acklist, ackItem{sn, ts}) } func (kcp *KCP) parse_data(newseg *Segment) { sn := newseg.sn if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 || _itimediff(sn, kcp.rcv_nxt) < 0 { - atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1) + kcp.delSegment(newseg) return } @@ -463,6 +452,8 @@ func (kcp *KCP) parse_data(newseg *Segment) { copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:]) kcp.rcv_buf[insert_idx] = *newseg } + } else { + kcp.delSegment(newseg) } // move available data from rcv_buf -> rcv_queue @@ -470,14 +461,13 @@ func (kcp *KCP) parse_data(newseg *Segment) { for k := range kcp.rcv_buf { seg := &kcp.rcv_buf[k] if seg.sn == kcp.rcv_nxt && len(kcp.rcv_queue) < int(kcp.rcv_wnd) { - kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[k]) kcp.rcv_nxt++ count++ - seg.data = nil } else { break } } + kcp.rcv_queue = append(kcp.rcv_queue, kcp.rcv_buf[:count]...) kcp.rcv_buf = kcp.rcv_buf[count:] } @@ -489,7 +479,9 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int { } var maxack uint32 + var recentack uint32 var flag int + for { var ts, sn, length, una, conv uint32 var wnd uint16 @@ -525,9 +517,6 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int { kcp.shrink_buf() if cmd == IKCP_CMD_ACK { - if update_ack && _itimediff(kcp.current, ts) >= 0 { - kcp.update_ack(_itimediff(kcp.current, ts)) - } kcp.parse_ack(sn) kcp.shrink_buf() if flag == 0 { @@ -536,11 +525,12 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int { } else if _itimediff(sn, maxack) > 0 { maxack = sn } + recentack = ts } else if cmd == IKCP_CMD_PUSH { if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 { kcp.ack_push(sn, ts) if _itimediff(sn, kcp.rcv_nxt) >= 0 { - seg := NewSegment(int(length)) + seg := kcp.newSegment(int(length)) seg.conv = conv seg.cmd = uint32(cmd) seg.frg = uint32(frg) @@ -550,7 +540,11 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int { seg.una = una copy(seg.data, data[:length]) kcp.parse_data(seg) + } else { + atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1) } + } else { + atomic.AddUint64(&DefaultSnmp.RepeatSegs, 1) } } else if cmd == IKCP_CMD_WASK { // ready to send back IKCP_CMD_WINS in Ikcp_flush @@ -565,8 +559,12 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int { data = data[length:] } + current := currentMs() if flag != 0 && update_ack { kcp.parse_fastack(maxack) + if _itimediff(current, recentack) >= 0 { + kcp.update_ack(_itimediff(current, recentack)) + } } if _itimediff(kcp.snd_una, una) > 0 { @@ -603,14 +601,10 @@ func (kcp *KCP) wnd_unused() int32 { // flush pending data func (kcp *KCP) flush() { - current := kcp.current buffer := kcp.buffer change := 0 lost := false - if kcp.updated == 0 { - return - } var seg Segment seg.conv = kcp.conv seg.cmd = IKCP_CMD_ACK @@ -619,25 +613,28 @@ func (kcp *KCP) flush() { // flush acknowledges ptr := buffer - for kcp.acklist.Len() > 0 { + for i, ack := range kcp.acklist { size := len(buffer) - len(ptr) if size+IKCP_OVERHEAD > int(kcp.mtu) { kcp.output(buffer, size) ptr = buffer } - ack := heap.Pop(&kcp.acklist).(ACK) - seg.sn, seg.ts = ack.sn, ack.ts - ptr = seg.encode(ptr) + // filter jitters caused by bufferbloat + if ack.sn >= kcp.rcv_nxt || len(kcp.acklist)-1 == i { + seg.sn, seg.ts = ack.sn, ack.ts + ptr = seg.encode(ptr) + } } kcp.acklist = nil + current := currentMs() // probe window size (if remote window size equals zero) if kcp.rmt_wnd == 0 { if kcp.probe_wait == 0 { kcp.probe_wait = IKCP_PROBE_INIT - kcp.ts_probe = kcp.current + kcp.probe_wait + kcp.ts_probe = current + kcp.probe_wait } else { - if _itimediff(kcp.current, kcp.ts_probe) >= 0 { + if _itimediff(current, kcp.ts_probe) >= 0 { if kcp.probe_wait < IKCP_PROBE_INIT { kcp.probe_wait = IKCP_PROBE_INIT } @@ -645,7 +642,7 @@ func (kcp *KCP) flush() { if kcp.probe_wait > IKCP_PROBE_LIMIT { kcp.probe_wait = IKCP_PROBE_LIMIT } - kcp.ts_probe = kcp.current + kcp.probe_wait + kcp.ts_probe = current + kcp.probe_wait kcp.probe |= IKCP_ASK_SEND } } @@ -684,6 +681,7 @@ func (kcp *KCP) flush() { cwnd = _imin_(kcp.cwnd, cwnd) } + // sliding window, controlled by snd_nxt && sna_una+cwnd count := 0 for k := range kcp.snd_queue { if _itimediff(kcp.snd_nxt, kcp.snd_una+cwnd) >= 0 { @@ -696,10 +694,8 @@ func (kcp *KCP) flush() { newseg.ts = current newseg.sn = kcp.snd_nxt newseg.una = kcp.rcv_nxt - newseg.resendts = current + newseg.resendts = newseg.ts newseg.rto = kcp.rx_rto - newseg.fastack = 0 - newseg.xmit = 0 kcp.snd_buf = append(kcp.snd_buf, newseg) kcp.snd_nxt++ count++ @@ -707,27 +703,29 @@ func (kcp *KCP) flush() { } kcp.snd_queue = kcp.snd_queue[count:] + // flag pending data + hasPending := false + if count > 0 { + hasPending = true + } + // calculate resent resent := uint32(kcp.fastresend) if kcp.fastresend <= 0 { resent = 0xffffffff } - rtomin := (kcp.rx_rto >> 3) - if kcp.nodelay != 0 { - rtomin = 0 - } // flush data segments - nque := len(kcp.snd_queue) var lostSegs, fastRetransSegs, earlyRetransSegs uint64 for k := range kcp.snd_buf { + current := currentMs() segment := &kcp.snd_buf[k] needsend := false if segment.xmit == 0 { needsend = true segment.xmit++ segment.rto = kcp.rx_rto - segment.resendts = current + segment.rto + rtomin + segment.resendts = current + segment.rto } else if _itimediff(current, segment.resendts) >= 0 { needsend = true segment.xmit++ @@ -740,21 +738,26 @@ func (kcp *KCP) flush() { segment.resendts = current + segment.rto lost = true lostSegs++ - } else if segment.fastack >= resent { - needsend = true - segment.xmit++ - segment.fastack = 0 - segment.resendts = current + segment.rto - change++ - fastRetransSegs++ - } else if segment.fastack > 0 && nque == 0 { - // early retransmit - needsend = true - segment.xmit++ - segment.fastack = 0 - segment.resendts = current + segment.rto - change++ - earlyRetransSegs++ + } else if segment.fastack >= resent { // fast retransmit + lastsend := segment.resendts - segment.rto + if _itimediff(current, lastsend) >= int32(kcp.rx_rto/4) { + needsend = true + segment.xmit++ + segment.fastack = 0 + segment.resendts = current + segment.rto + change++ + fastRetransSegs++ + } + } else if segment.fastack > 0 && !hasPending { // early retransmit + lastsend := segment.resendts - segment.rto + if _itimediff(current, lastsend) >= int32(kcp.rx_rto/4) { + needsend = true + segment.xmit++ + segment.fastack = 0 + segment.resendts = current + segment.rto + change++ + earlyRetransSegs++ + } } if needsend { @@ -822,27 +825,26 @@ func (kcp *KCP) flush() { // Update updates state (call it repeatedly, every 10ms-100ms), or you can ask // ikcp_check when to call it again (without ikcp_input/_send calling). // 'current' - current timestamp in millisec. -func (kcp *KCP) Update(current uint32) { +func (kcp *KCP) Update() { var slap int32 - kcp.current = current - + current := currentMs() if kcp.updated == 0 { kcp.updated = 1 - kcp.ts_flush = kcp.current + kcp.ts_flush = current } - slap = _itimediff(kcp.current, kcp.ts_flush) + slap = _itimediff(current, kcp.ts_flush) if slap >= 10000 || slap < -10000 { - kcp.ts_flush = kcp.current + kcp.ts_flush = current slap = 0 } if slap >= 0 { kcp.ts_flush += kcp.interval - if _itimediff(kcp.current, kcp.ts_flush) >= 0 { - kcp.ts_flush = kcp.current + kcp.interval + if _itimediff(current, kcp.ts_flush) >= 0 { + kcp.ts_flush = current + kcp.interval } kcp.flush() } @@ -855,7 +857,8 @@ func (kcp *KCP) Update(current uint32) { // Important to reduce unnacessary ikcp_update invoking. use it to // schedule ikcp_update (eg. implementing an epoll-like mechanism, // or optimize ikcp_update when handling massive kcp connections) -func (kcp *KCP) Check(current uint32) uint32 { +func (kcp *KCP) Check() uint32 { + current := currentMs() ts_flush := kcp.ts_flush tm_flush := int32(0x7fffffff) tm_packet := int32(0x7fffffff) diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go index 737b99d..4879e2a 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/sess.go @@ -3,6 +3,7 @@ package kcp import ( "crypto/rand" "encoding/binary" + "hash/crc32" "io" "net" "sync" @@ -10,20 +11,9 @@ import ( "time" "github.com/pkg/errors" - - "github.com/klauspost/crc32" - "golang.org/x/net/ipv4" ) -// Option defines extra options -type Option interface{} - -// OptionWithConvId defines conversation id -type OptionWithConvId struct { - Id uint32 -} - type errTimeout struct { error } @@ -38,11 +28,26 @@ const ( crcSize = 4 // 4bytes packet checksum cryptHeaderSize = nonceSize + crcSize mtuLimit = 2048 - txQueueLimit = 8192 - rxFecLimit = 8192 - defaultKeepAliveInterval = 10 * time.Second + rxQueueLimit = 8192 + rxFECMulti = 3 // FEC keeps rxFECMulti* (dataShard+parityShard) ordered packets in memory + defaultKeepAliveInterval = 10 ) +const ( + errBrokenPipe = "broken pipe" + errInvalidOperation = "invalid operation" +) + +var ( + xmitBuf sync.Pool +) + +func init() { + xmitBuf.New = func() interface{} { + return make([]byte, mtuLimit) + } +} + type ( // UDPSession defines a KCP session implemented by UDP UDPSession struct { @@ -58,14 +63,13 @@ type ( die chan struct{} chReadEvent chan struct{} chWriteEvent chan struct{} - chTicker chan time.Time chUDPOutput chan []byte headerSize int ackNoDelay bool isClosed bool - keepAliveInterval time.Duration - xmitBuf sync.Pool + keepAliveInterval int32 mu sync.Mutex + updateInterval int32 } setReadBuffer interface { @@ -80,8 +84,7 @@ type ( // newUDPSession create a new udp session for client or server func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession { sess := new(UDPSession) - sess.chTicker = make(chan time.Time, 1) - sess.chUDPOutput = make(chan []byte, txQueueLimit) + sess.chUDPOutput = make(chan []byte) sess.die = make(chan struct{}) sess.chReadEvent = make(chan struct{}, 1) sess.chWriteEvent = make(chan struct{}, 1) @@ -90,10 +93,7 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn sess.keepAliveInterval = defaultKeepAliveInterval sess.l = l sess.block = block - sess.fec = newFEC(rxFecLimit, dataShards, parityShards) - sess.xmitBuf.New = func() interface{} { - return make([]byte, mtuLimit) - } + sess.fec = newFEC(rxFECMulti*(dataShards+parityShards), dataShards, parityShards) // calculate header size if sess.block != nil { sess.headerSize += cryptHeaderSize @@ -104,7 +104,7 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn sess.kcp = NewKCP(conv, func(buf []byte, size int) { if size >= IKCP_OVERHEAD { - ext := sess.xmitBuf.Get().([]byte)[:sess.headerSize+size] + ext := xmitBuf.Get().([]byte)[:sess.headerSize+size] copy(ext[sess.headerSize:], buf) select { case sess.chUDPOutput <- ext: @@ -145,7 +145,7 @@ func (s *UDPSession) Read(b []byte) (n int, err error) { if s.isClosed { s.mu.Unlock() - return 0, errors.New("broken pipe") + return 0, errors.New(errBrokenPipe) } if !s.rd.IsZero() { @@ -169,19 +169,25 @@ func (s *UDPSession) Read(b []byte) (n int, err error) { return n, nil } - var timeout <-chan time.Time + var timeout *time.Timer + var c <-chan time.Time if !s.rd.IsZero() { delay := s.rd.Sub(time.Now()) - timeout = time.After(delay) + timeout = time.NewTimer(delay) + c = timeout.C } s.mu.Unlock() // wait for read event or timeout select { case <-s.chReadEvent: - case <-timeout: + case <-c: case <-s.die: } + + if timeout != nil { + timeout.Stop() + } } } @@ -191,7 +197,7 @@ func (s *UDPSession) Write(b []byte) (n int, err error) { s.mu.Lock() if s.isClosed { s.mu.Unlock() - return 0, errors.New("broken pipe") + return 0, errors.New(errBrokenPipe) } if !s.wd.IsZero() { @@ -201,7 +207,7 @@ func (s *UDPSession) Write(b []byte) (n int, err error) { } } - if s.kcp.WaitSnd() < 2*int(s.kcp.snd_wnd) { + if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) { n = len(b) max := s.kcp.mss << 8 for { @@ -213,26 +219,31 @@ func (s *UDPSession) Write(b []byte) (n int, err error) { b = b[max:] } } - s.kcp.current = currentMs() s.kcp.flush() s.mu.Unlock() atomic.AddUint64(&DefaultSnmp.BytesSent, uint64(n)) return n, nil } - var timeout <-chan time.Time + var timeout *time.Timer + var c <-chan time.Time if !s.wd.IsZero() { delay := s.wd.Sub(time.Now()) - timeout = time.After(delay) + timeout = time.NewTimer(delay) + c = timeout.C } s.mu.Unlock() // wait for write event or timeout select { case <-s.chWriteEvent: - case <-timeout: + case <-c: case <-s.die: } + + if timeout != nil { + timeout.Stop() + } } } @@ -241,7 +252,7 @@ func (s *UDPSession) Close() error { s.mu.Lock() defer s.mu.Unlock() if s.isClosed { - return errors.New("broken pipe") + return errors.New(errBrokenPipe) } close(s.die) s.isClosed = true @@ -321,6 +332,7 @@ func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) { s.mu.Lock() defer s.mu.Unlock() s.kcp.NoDelay(nodelay, interval, resend, nc) + atomic.StoreInt32(&s.updateInterval, int32(interval)) } // SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener @@ -328,11 +340,13 @@ func (s *UDPSession) SetDSCP(dscp int) error { s.mu.Lock() defer s.mu.Unlock() if s.l == nil { - if nc, ok := s.conn.(net.Conn); ok { + if nc, ok := s.conn.(*ConnectedUDPConn); ok { + return ipv4.NewConn(nc.Conn).SetTOS(dscp << 2) + } else if nc, ok := s.conn.(net.Conn); ok { return ipv4.NewConn(nc).SetTOS(dscp << 2) } } - return nil + return errors.New(errInvalidOperation) } // SetReadBuffer sets the socket read buffer, no effect if it's accepted from Listener @@ -344,7 +358,7 @@ func (s *UDPSession) SetReadBuffer(bytes int) error { return nc.SetReadBuffer(bytes) } } - return nil + return errors.New(errInvalidOperation) } // SetWriteBuffer sets the socket write buffer, no effect if it's accepted from Listener @@ -356,24 +370,12 @@ func (s *UDPSession) SetWriteBuffer(bytes int) error { return nc.SetWriteBuffer(bytes) } } - return nil + return errors.New(errInvalidOperation) } // SetKeepAlive changes per-connection NAT keepalive interval; 0 to disable, default to 10s func (s *UDPSession) SetKeepAlive(interval int) { - s.mu.Lock() - defer s.mu.Unlock() - s.keepAliveInterval = time.Duration(interval) * time.Second -} - -// writeTo wraps write method for client & listener -func (s *UDPSession) writeTo(b []byte, addr net.Addr) (int, error) { - if s.l == nil { - if nc, ok := s.conn.(io.Writer); ok { - return nc.Write(b) - } - } - return s.conn.WriteTo(b, addr) + atomic.StoreInt32(&s.keepAliveInterval, int32(interval)) } func (s *UDPSession) outputTask() { @@ -385,13 +387,15 @@ func (s *UDPSession) outputTask() { szOffset := fecOffset + fecHeaderSize // fec data group + var cacheLine []byte var fecGroup [][]byte var fecCnt int var fecMaxSize int if s.fec != nil { + cacheLine = make([]byte, s.fec.shardSize*mtuLimit) fecGroup = make([][]byte, s.fec.shardSize) for k := range fecGroup { - fecGroup[k] = make([]byte, mtuLimit) + fecGroup[k] = cacheLine[k*mtuLimit : (k+1)*mtuLimit] } } @@ -402,23 +406,31 @@ func (s *UDPSession) outputTask() { for { select { + // receive from a synchronous channel + // buffered channel must be avoided, because of "bufferbloat" case ext := <-s.chUDPOutput: var ecc [][]byte if s.fec != nil { s.fec.markData(ext[fecOffset:]) - // explicit size + // explicit size, including 2bytes size itself. binary.LittleEndian.PutUint16(ext[szOffset:], uint16(len(ext[szOffset:]))) // copy data to fec group - xorBytes(fecGroup[fecCnt], fecGroup[fecCnt], fecGroup[fecCnt]) + sz := len(ext) + fecGroup[fecCnt] = fecGroup[fecCnt][:sz] copy(fecGroup[fecCnt], ext) fecCnt++ - if len(ext) > fecMaxSize { - fecMaxSize = len(ext) + if sz > fecMaxSize { + fecMaxSize = sz } // calculate Reed-Solomon Erasure Code if fecCnt == s.fec.dataShards { + for i := 0; i < s.fec.dataShards; i++ { + shard := fecGroup[i] + slen := len(shard) + xorBytes(shard[slen:fecMaxSize], shard[slen:fecMaxSize], shard[slen:fecMaxSize]) + } ecc = s.fec.calcECC(fecGroup, szOffset, fecMaxSize) for k := range ecc { s.fec.markFEC(ecc[k][fecOffset:]) @@ -445,38 +457,36 @@ func (s *UDPSession) outputTask() { } } - //if rand.Intn(100) < 80 { - if n, err := s.writeTo(ext, s.remote); err == nil { - atomic.AddUint64(&DefaultSnmp.OutSegs, 1) - atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(n)) + nbytes := 0 + nsegs := 0 + // if mrand.Intn(100) < 50 { + if n, err := s.conn.WriteTo(ext, s.remote); err == nil { + nbytes += n + nsegs++ } - //} + // } if ecc != nil { for k := range ecc { - if n, err := s.writeTo(ecc[k], s.remote); err == nil { - atomic.AddUint64(&DefaultSnmp.OutSegs, 1) - atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(n)) + if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil { + nbytes += n + nsegs++ } } } - xorBytes(ext, ext, ext) - s.xmitBuf.Put(ext) + atomic.AddUint64(&DefaultSnmp.OutSegs, uint64(nsegs)) + atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes)) + xmitBuf.Put(ext) case <-ticker.C: // NAT keep-alive - if len(s.chUDPOutput) == 0 { - s.mu.Lock() - interval := s.keepAliveInterval - s.mu.Unlock() - if interval > 0 && time.Now().After(lastPing.Add(interval)) { - buf := make([]byte, 2) - io.ReadFull(rand.Reader, buf) - rnd := int(binary.LittleEndian.Uint16(buf)) - sz := rnd%(IKCP_MTU_DEF-s.headerSize-IKCP_OVERHEAD) + s.headerSize + IKCP_OVERHEAD - ping := make([]byte, sz) - io.ReadFull(rand.Reader, ping) - s.writeTo(ping, s.remote) - lastPing = time.Now() - } + interval := time.Duration(atomic.LoadInt32(&s.keepAliveInterval)) * time.Second + if interval > 0 && time.Now().After(lastPing.Add(interval)) { + var rnd uint16 + binary.Read(rand.Reader, binary.LittleEndian, &rnd) + sz := int(rnd)%(IKCP_MTU_DEF-s.headerSize-IKCP_OVERHEAD) + s.headerSize + IKCP_OVERHEAD + ping := make([]byte, sz) // randomized ping packet + io.ReadFull(rand.Reader, ping) + s.conn.WriteTo(ping, s.remote) + lastPing = time.Now() } case <-s.die: return @@ -486,25 +496,18 @@ func (s *UDPSession) outputTask() { // kcp update, input loop func (s *UDPSession) updateTask() { - var tc <-chan time.Time - if s.l == nil { // client - ticker := time.NewTicker(10 * time.Millisecond) - tc = ticker.C - defer ticker.Stop() - } else { - tc = s.chTicker - } + tc := time.After(time.Duration(atomic.LoadInt32(&s.updateInterval)) * time.Millisecond) for { select { case <-tc: s.mu.Lock() - current := currentMs() - s.kcp.Update(current) - if s.kcp.WaitSnd() < 2*int(s.kcp.snd_wnd) { + s.kcp.flush() + if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) { s.notifyWriteEvent() } s.mu.Unlock() + tc = time.After(time.Duration(atomic.LoadInt32(&s.updateInterval)) * time.Millisecond) case <-s.die: if s.l != nil { // has listener select { @@ -537,58 +540,84 @@ func (s *UDPSession) notifyWriteEvent() { } func (s *UDPSession) kcpInput(data []byte) { - current := currentMs() + var kcpInErrors, fecErrs, fecRecovered, fecSegs uint64 + if s.fec != nil { f := s.fec.decode(data) + s.mu.Lock() + if f.flag == typeData { + if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true); ret != 0 { + kcpInErrors++ + } + } + if f.flag == typeData || f.flag == typeFEC { if f.flag == typeFEC { - atomic.AddUint64(&DefaultSnmp.FECSegs, 1) + fecSegs++ } if recovers := s.fec.input(f); recovers != nil { - s.mu.Lock() - s.kcp.current = current - for k := range recovers { - sz := binary.LittleEndian.Uint16(recovers[k]) - if int(sz) <= len(recovers[k]) && sz >= 2 { - s.kcp.Input(recovers[k][2:sz], false) + for _, r := range recovers { + if len(r) >= 2 { // must be larger than 2bytes + sz := binary.LittleEndian.Uint16(r) + if int(sz) <= len(r) && sz >= 2 { + if ret := s.kcp.Input(r[2:sz], false); ret == 0 { + fecRecovered++ + } else { + kcpInErrors++ + } + } else { + fecErrs++ + } } else { - atomic.AddUint64(&DefaultSnmp.FECErrs, 1) + fecErrs++ } } - s.mu.Unlock() - atomic.AddUint64(&DefaultSnmp.FECRecovered, uint64(len(recovers))) } } - if f.flag == typeData { - s.mu.Lock() - s.kcp.current = current - s.kcp.Input(data[fecHeaderSizePlus2:], true) - s.mu.Unlock() + + // notify reader + if n := s.kcp.PeekSize(); n > 0 { + s.notifyReadEvent() } + if s.ackNoDelay { + s.kcp.flush() + } + s.mu.Unlock() } else { s.mu.Lock() - s.kcp.current = current - s.kcp.Input(data, true) + if ret := s.kcp.Input(data, true); ret != 0 { + kcpInErrors++ + } + // notify reader + if n := s.kcp.PeekSize(); n > 0 { + s.notifyReadEvent() + } + if s.ackNoDelay { + s.kcp.flush() + } s.mu.Unlock() } - // notify reader - s.mu.Lock() - if n := s.kcp.PeekSize(); n > 0 { - s.notifyReadEvent() - } - if s.ackNoDelay { - s.kcp.current = current - s.kcp.flush() - } - s.mu.Unlock() atomic.AddUint64(&DefaultSnmp.InSegs, 1) + atomic.AddUint64(&DefaultSnmp.InBytes, uint64(len(data))) + if fecSegs > 0 { + atomic.AddUint64(&DefaultSnmp.FECSegs, fecSegs) + } + if kcpInErrors > 0 { + atomic.AddUint64(&DefaultSnmp.KCPInErrors, kcpInErrors) + } + if fecErrs > 0 { + atomic.AddUint64(&DefaultSnmp.FECErrs, fecErrs) + } + if fecRecovered > 0 { + atomic.AddUint64(&DefaultSnmp.FECRecovered, fecRecovered) + } } func (s *UDPSession) receiver(ch chan []byte) { for { - data := s.xmitBuf.Get().([]byte)[:mtuLimit] + data := xmitBuf.Get().([]byte)[:mtuLimit] if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD { select { case ch <- data[:n]: @@ -604,7 +633,7 @@ func (s *UDPSession) receiver(ch chan []byte) { // read loop for client session func (s *UDPSession) readLoop() { - chPacket := make(chan []byte, txQueueLimit) + chPacket := make(chan []byte, rxQueueLimit) go s.receiver(chPacket) for { @@ -629,8 +658,7 @@ func (s *UDPSession) readLoop() { if dataValid { s.kcpInput(data) } - xorBytes(raw, raw, raw) - s.xmitBuf.Put(raw) + xmitBuf.Put(raw) case <-s.die: return } @@ -662,10 +690,8 @@ type ( // monitor incoming data for all connections of server func (l *Listener) monitor() { - chPacket := make(chan packet, txQueueLimit) + chPacket := make(chan packet, rxQueueLimit) go l.receiver(chPacket) - ticker := time.NewTicker(10 * time.Millisecond) - defer ticker.Stop() for { select { case p := <-chPacket: @@ -715,20 +741,11 @@ func (l *Listener) monitor() { } } - xorBytes(raw, raw, raw) l.rxbuf.Put(raw) case deadlink := <-l.chDeadlinks: delete(l.sessions, deadlink.String()) case <-l.die: return - case <-ticker.C: - now := time.Now() - for _, s := range l.sessions { - select { - case s.chTicker <- now: - default: - } - } } } } @@ -751,7 +768,7 @@ func (l *Listener) SetReadBuffer(bytes int) error { if nc, ok := l.conn.(setReadBuffer); ok { return nc.SetReadBuffer(bytes) } - return nil + return errors.New(errInvalidOperation) } // SetWriteBuffer sets the socket write buffer for the Listener @@ -759,7 +776,7 @@ func (l *Listener) SetWriteBuffer(bytes int) error { if nc, ok := l.conn.(setWriteBuffer); ok { return nc.SetWriteBuffer(bytes) } - return nil + return errors.New(errInvalidOperation) } // SetDSCP sets the 6bit DSCP field of IP header @@ -767,7 +784,7 @@ func (l *Listener) SetDSCP(dscp int) error { if nc, ok := l.conn.(net.Conn); ok { return ipv4.NewConn(nc).SetTOS(dscp << 2) } - return nil + return errors.New(errInvalidOperation) } // Accept implements the Accept method in the Listener interface; it waits for the next call and returns a generic Conn. @@ -788,7 +805,7 @@ func (l *Listener) AcceptKCP() (*UDPSession, error) { case c := <-l.chAccepts: return c, nil case <-l.die: - return nil, errors.New("listener stopped") + return nil, errors.New(errBrokenPipe) } } @@ -823,7 +840,7 @@ func (l *Listener) Addr() net.Addr { } // Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp", -func Listen(laddr string) (*Listener, error) { +func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) } @@ -839,6 +856,11 @@ func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards return nil, errors.Wrap(err, "net.ListenUDP") } + return ServeConn(block, dataShards, parityShards, conn) +} + +// ServeConn serves KCP protocol for a single packet connection. +func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*Listener, error) { l := new(Listener) l.conn = conn l.sessions = make(map[string]*UDPSession) @@ -848,7 +870,7 @@ func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards l.dataShards = dataShards l.parityShards = parityShards l.block = block - l.fec = newFEC(rxFecLimit, dataShards, parityShards) + l.fec = newFEC(rxFECMulti*(dataShards+parityShards), dataShards, parityShards) l.rxbuf.New = func() interface{} { return make([]byte, mtuLimit) } @@ -866,12 +888,12 @@ func ListenWithOptions(laddr string, block BlockCrypt, dataShards, parityShards } // Dial connects to the remote address "raddr" on the network "udp" -func Dial(raddr string) (*UDPSession, error) { +func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) } // DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption -func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int, opts ...Option) (*UDPSession, error) { +func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) { udpaddr, err := net.ResolveUDPAddr("udp", raddr) if err != nil { return nil, errors.Wrap(err, "net.ResolveUDPAddr") @@ -882,20 +904,34 @@ func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards in return nil, errors.Wrap(err, "net.DialUDP") } - buf := make([]byte, 4) - io.ReadFull(rand.Reader, buf) - convid := binary.LittleEndian.Uint32(buf) - for k := range opts { - switch opt := opts[k].(type) { - case OptionWithConvId: - convid = opt.Id - default: - return nil, errors.New("unrecognized option") - } + return NewConn(raddr, block, dataShards, parityShards, &ConnectedUDPConn{udpconn, udpconn}) +} + +// NewConn establishes a session and talks KCP protocol over a packet connection. +func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) { + udpaddr, err := net.ResolveUDPAddr("udp", raddr) + if err != nil { + return nil, errors.Wrap(err, "net.ResolveUDPAddr") } - return newUDPSession(convid, dataShards, parityShards, nil, udpconn, udpaddr, block), nil + + var convid uint32 + binary.Read(rand.Reader, binary.LittleEndian, &convid) + return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil } func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) } + +// ConnectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls +// to Write syscalls that are 4 times faster on some OS'es. This should only be +// used for connections that were produced by a net.Dial* call. +type ConnectedUDPConn struct { + *net.UDPConn + Conn net.Conn // underlying connection if any +} + +// WriteTo redirects all writes to the Write syscall, which is 4 times faster. +func (c *ConnectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { + return c.Write(b) +} diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go index 997b163..e8ab194 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/snmp.go @@ -1,34 +1,95 @@ package kcp -import "sync/atomic" +import ( + "fmt" + "sync/atomic" +) // Snmp defines network statistics indicator type Snmp struct { - BytesSent uint64 // payload bytes sent + BytesSent uint64 // raw bytes sent BytesReceived uint64 MaxConn uint64 ActiveOpens uint64 PassiveOpens uint64 - CurrEstab uint64 - InErrs uint64 - InCsumErrors uint64 // checksum errors + CurrEstab uint64 // count of connections for now + InErrs uint64 // udp read errors + InCsumErrors uint64 // checksum errors from CRC32 + KCPInErrors uint64 // packet iput errors from kcp InSegs uint64 OutSegs uint64 + InBytes uint64 // udp bytes received OutBytes uint64 // udp bytes sent RetransSegs uint64 FastRetransSegs uint64 EarlyRetransSegs uint64 - LostSegs uint64 - RepeatSegs uint64 - FECRecovered uint64 - FECErrs uint64 - FECSegs uint64 // fec segments received + LostSegs uint64 // number of segs infered as lost + RepeatSegs uint64 // number of segs duplicated + FECRecovered uint64 // correct packets recovered from FEC + FECErrs uint64 // incorrect packets recovered from FEC + FECSegs uint64 // FEC segments received + FECShortShards uint64 // number of data shards that's not enough for recovery } func newSnmp() *Snmp { return new(Snmp) } +func (s *Snmp) Header() []string { + return []string{ + "BytesSent", + "BytesReceived", + "MaxConn", + "ActiveOpens", + "PassiveOpens", + "CurrEstab", + "InErrs", + "InCsumErrors", + "KCPInErrors", + "InSegs", + "OutSegs", + "InBytes", + "OutBytes", + "RetransSegs", + "FastRetransSegs", + "EarlyRetransSegs", + "LostSegs", + "RepeatSegs", + "FECSegs", + "FECErrs", + "FECRecovered", + "FECShortShards", + } +} + +func (s *Snmp) ToSlice() []string { + snmp := s.Copy() + return []string{ + fmt.Sprint(snmp.BytesSent), + fmt.Sprint(snmp.BytesReceived), + fmt.Sprint(snmp.MaxConn), + fmt.Sprint(snmp.ActiveOpens), + fmt.Sprint(snmp.PassiveOpens), + fmt.Sprint(snmp.CurrEstab), + fmt.Sprint(snmp.InErrs), + fmt.Sprint(snmp.InCsumErrors), + fmt.Sprint(snmp.KCPInErrors), + fmt.Sprint(snmp.InSegs), + fmt.Sprint(snmp.OutSegs), + fmt.Sprint(snmp.InBytes), + fmt.Sprint(snmp.OutBytes), + fmt.Sprint(snmp.RetransSegs), + fmt.Sprint(snmp.FastRetransSegs), + fmt.Sprint(snmp.EarlyRetransSegs), + fmt.Sprint(snmp.LostSegs), + fmt.Sprint(snmp.RepeatSegs), + fmt.Sprint(snmp.FECSegs), + fmt.Sprint(snmp.FECErrs), + fmt.Sprint(snmp.FECRecovered), + fmt.Sprint(snmp.FECShortShards), + } +} + // Copy make a copy of current snmp snapshot func (s *Snmp) Copy() *Snmp { d := newSnmp() @@ -40,8 +101,10 @@ func (s *Snmp) Copy() *Snmp { d.CurrEstab = atomic.LoadUint64(&s.CurrEstab) d.InErrs = atomic.LoadUint64(&s.InErrs) d.InCsumErrors = atomic.LoadUint64(&s.InCsumErrors) + d.KCPInErrors = atomic.LoadUint64(&s.KCPInErrors) d.InSegs = atomic.LoadUint64(&s.InSegs) d.OutSegs = atomic.LoadUint64(&s.OutSegs) + d.InBytes = atomic.LoadUint64(&s.InBytes) d.OutBytes = atomic.LoadUint64(&s.OutBytes) d.RetransSegs = atomic.LoadUint64(&s.RetransSegs) d.FastRetransSegs = atomic.LoadUint64(&s.FastRetransSegs) @@ -51,9 +114,36 @@ func (s *Snmp) Copy() *Snmp { d.FECSegs = atomic.LoadUint64(&s.FECSegs) d.FECErrs = atomic.LoadUint64(&s.FECErrs) d.FECRecovered = atomic.LoadUint64(&s.FECRecovered) + d.FECShortShards = atomic.LoadUint64(&s.FECShortShards) return d } +// Reset values to zero +func (s *Snmp) Reset() { + atomic.StoreUint64(&s.BytesSent, 0) + atomic.StoreUint64(&s.BytesReceived, 0) + atomic.StoreUint64(&s.MaxConn, 0) + atomic.StoreUint64(&s.ActiveOpens, 0) + atomic.StoreUint64(&s.PassiveOpens, 0) + atomic.StoreUint64(&s.CurrEstab, 0) + atomic.StoreUint64(&s.InErrs, 0) + atomic.StoreUint64(&s.InCsumErrors, 0) + atomic.StoreUint64(&s.KCPInErrors, 0) + atomic.StoreUint64(&s.InSegs, 0) + atomic.StoreUint64(&s.OutSegs, 0) + atomic.StoreUint64(&s.InBytes, 0) + atomic.StoreUint64(&s.OutBytes, 0) + atomic.StoreUint64(&s.RetransSegs, 0) + atomic.StoreUint64(&s.FastRetransSegs, 0) + atomic.StoreUint64(&s.EarlyRetransSegs, 0) + atomic.StoreUint64(&s.LostSegs, 0) + atomic.StoreUint64(&s.RepeatSegs, 0) + atomic.StoreUint64(&s.FECSegs, 0) + atomic.StoreUint64(&s.FECErrs, 0) + atomic.StoreUint64(&s.FECRecovered, 0) + atomic.StoreUint64(&s.FECShortShards, 0) +} + // DefaultSnmp is the global KCP connection statistics collector var DefaultSnmp *Snmp diff --git a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go index 5d21095..20fa2e4 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/kcp-go.v2/xor.go @@ -44,15 +44,18 @@ func safeXORBytes(dst, a, b []byte) int { } for i := ex; i < n; i += 8 { - dst[i] = a[i] ^ b[i] - dst[i+1] = a[i+1] ^ b[i+1] - dst[i+2] = a[i+2] ^ b[i+2] - dst[i+3] = a[i+3] ^ b[i+3] + _dst := dst[i : i+8] + _a := a[i : i+8] + _b := b[i : i+8] + _dst[0] = _a[0] ^ _b[0] + _dst[1] = _a[1] ^ _b[1] + _dst[2] = _a[2] ^ _b[2] + _dst[3] = _a[3] ^ _b[3] - dst[i+4] = a[i+4] ^ b[i+4] - dst[i+5] = a[i+5] ^ b[i+5] - dst[i+6] = a[i+6] ^ b[i+6] - dst[i+7] = a[i+7] ^ b[i+7] + _dst[4] = _a[4] ^ _b[4] + _dst[5] = _a[5] ^ _b[5] + _dst[6] = _a[6] ^ _b[6] + _dst[7] = _a[7] ^ _b[7] } return n } @@ -85,14 +88,17 @@ func fastXORWords(dst, a, b []byte) { } for i := ex; i < n; i += 8 { - dw[i] = aw[i] ^ bw[i] - dw[i+1] = aw[i+1] ^ bw[i+1] - dw[i+2] = aw[i+2] ^ bw[i+2] - dw[i+3] = aw[i+3] ^ bw[i+3] - dw[i+4] = aw[i+4] ^ bw[i+4] - dw[i+5] = aw[i+5] ^ bw[i+5] - dw[i+6] = aw[i+6] ^ bw[i+6] - dw[i+7] = aw[i+7] ^ bw[i+7] + _dw := dw[i : i+8] + _aw := aw[i : i+8] + _bw := bw[i : i+8] + _dw[0] = _aw[0] ^ _bw[0] + _dw[1] = _aw[1] ^ _bw[1] + _dw[2] = _aw[2] ^ _bw[2] + _dw[3] = _aw[3] ^ _bw[3] + _dw[4] = _aw[4] ^ _bw[4] + _dw[5] = _aw[5] ^ _bw[5] + _dw[6] = _aw[6] ^ _bw[6] + _dw[7] = _aw[7] ^ _bw[7] } } diff --git a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md index 15e9871..9a760d2 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md +++ b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/README.md @@ -62,7 +62,7 @@ func client() { panic(err) } - // Stream implements net.Conn + // Stream implements io.ReadWriteCloser stream.Write([]byte("ping")) } @@ -94,4 +94,4 @@ func server() { ## Status -Beta +Stable diff --git a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go index a06f2ec..5c759c0 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/session.go @@ -16,10 +16,19 @@ const ( const ( errBrokenPipe = "broken pipe" - errConnReset = "connection reset by peer" errInvalidProtocol = "invalid protocol version" ) +type writeRequest struct { + frame Frame + result chan writeResult +} + +type writeResult struct { + n int + err error +} + // Session defines a multiplexed connection for streams type Session struct { conn io.ReadWriteCloser @@ -38,7 +47,12 @@ type Session struct { dieLock sync.Mutex chAccepts chan *Stream + xmitPool sync.Pool dataReady int32 // flag data has arrived + + deadline atomic.Value + + writes chan writeRequest } func newSession(config *Config, conn io.ReadWriteCloser, client bool) *Session { @@ -50,12 +64,18 @@ func newSession(config *Config, conn io.ReadWriteCloser, client bool) *Session { s.chAccepts = make(chan *Stream, defaultAcceptBacklog) s.bucket = int32(config.MaxReceiveBuffer) s.bucketCond = sync.NewCond(&sync.Mutex{}) + s.xmitPool.New = func() interface{} { + return make([]byte, (1<<16)+headerSize) + } + s.writes = make(chan writeRequest) + if client { s.nextStreamID = 1 } else { s.nextStreamID = 2 } go s.recvLoop() + go s.sendLoop() go s.keepalive() return s } @@ -82,9 +102,17 @@ func (s *Session) OpenStream() (*Stream, error) { // AcceptStream is used to block until the next available stream // is ready to be accepted. func (s *Session) AcceptStream() (*Stream, error) { + var deadline <-chan time.Time + if d, ok := s.deadline.Load().(time.Time); ok && !d.IsZero() { + timer := time.NewTimer(d.Sub(time.Now())) + defer timer.Stop() + deadline = timer.C + } select { case stream := <-s.chAccepts: return stream, nil + case <-deadline: + return nil, errTimeout case <-s.die: return nil, errors.New(errBrokenPipe) } @@ -93,13 +121,14 @@ func (s *Session) AcceptStream() (*Stream, error) { // Close is used to close the session and all streams. func (s *Session) Close() (err error) { s.dieLock.Lock() - defer s.dieLock.Unlock() select { case <-s.die: + s.dieLock.Unlock() return errors.New(errBrokenPipe) default: close(s.die) + s.dieLock.Unlock() s.streamLock.Lock() for k := range s.streams { s.streams[k].sessionClose() @@ -130,6 +159,13 @@ func (s *Session) NumStreams() int { return len(s.streams) } +// SetDeadline sets a deadline used by Accept* calls. +// A zero time value disables the deadline. +func (s *Session) SetDeadline(t time.Time) error { + s.deadline.Store(t) + return nil +} + // notify the session that a stream has closed func (s *Session) streamClosed(sid uint32) { s.streamLock.Lock() @@ -144,9 +180,12 @@ func (s *Session) streamClosed(sid uint32) { // returnTokens is called by stream to return token after read func (s *Session) returnTokens(n int) { - if atomic.AddInt32(&s.bucket, int32(n)) > 0 { + oldvalue := atomic.LoadInt32(&s.bucket) + newvalue := atomic.AddInt32(&s.bucket, int32(n)) + if oldvalue <= 0 && newvalue > 0 { s.bucketCond.Signal() } + } // session read a frame from underlying connection @@ -250,26 +289,56 @@ func (s *Session) keepalive() { } } +func (s *Session) sendLoop() { + for { + select { + case <-s.die: + return + case request, ok := <-s.writes: + if !ok { + continue + } + buf := s.xmitPool.Get().([]byte) + buf[0] = request.frame.ver + buf[1] = request.frame.cmd + binary.LittleEndian.PutUint16(buf[2:], uint16(len(request.frame.data))) + binary.LittleEndian.PutUint32(buf[4:], request.frame.sid) + copy(buf[headerSize:], request.frame.data) + + s.writeLock.Lock() + n, err := s.conn.Write(buf[:headerSize+len(request.frame.data)]) + s.writeLock.Unlock() + s.xmitPool.Put(buf) + + n -= headerSize + if n < 0 { + n = 0 + } + + result := writeResult{ + n: n, + err: err, + } + + request.result <- result + close(request.result) + } + } +} + // writeFrame writes the frame to the underlying connection // and returns the number of bytes written if successful func (s *Session) writeFrame(f Frame) (n int, err error) { - buf := make([]byte, headerSize+len(f.data)) - buf[0] = f.ver - buf[1] = f.cmd - binary.LittleEndian.PutUint16(buf[2:], uint16(len(f.data))) - binary.LittleEndian.PutUint32(buf[4:], f.sid) - copy(buf[headerSize:], f.data) + req := writeRequest{ + frame: f, + result: make(chan writeResult, 1), + } + select { + case <-s.die: + return 0, errors.New(errBrokenPipe) + case s.writes <- req: + } - s.writeLock.Lock() - n, err = s.conn.Write(buf) - s.writeLock.Unlock() - return n, err -} - -// writeBinary writes the byte slice to the underlying connection -func (s *Session) writeBinary(bts []byte) (n int, err error) { - s.writeLock.Lock() - n, err = s.conn.Write(bts) - s.writeLock.Unlock() - return n, err + result := <-req.result + return result.n, result.err } diff --git a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go index 8c44dd8..34e4abb 100644 --- a/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go +++ b/cmd/gost/vendor/gopkg.in/xtaci/smux.v1/stream.go @@ -2,24 +2,28 @@ package smux import ( "bytes" - "encoding/binary" + "io" + "net" "sync" "sync/atomic" + "time" "github.com/pkg/errors" ) // Stream implements io.ReadWriteCloser type Stream struct { - id uint32 - rstflag int32 - sess *Session - buffer bytes.Buffer - bufferLock sync.Mutex - frameSize int - chReadEvent chan struct{} // notify a read event - die chan struct{} // flag the stream has closed - dieLock sync.Mutex + id uint32 + rstflag int32 + sess *Session + buffer bytes.Buffer + bufferLock sync.Mutex + frameSize int + chReadEvent chan struct{} // notify a read event + die chan struct{} // flag the stream has closed + dieLock sync.Mutex + readDeadline atomic.Value + writeDeadline atomic.Value } // newStream initiates a Stream struct @@ -35,10 +39,19 @@ func newStream(id uint32, frameSize int, sess *Session) *Stream { // Read implements io.ReadWriteCloser func (s *Stream) Read(b []byte) (n int, err error) { + var deadline <-chan time.Time + if d, ok := s.readDeadline.Load().(time.Time); ok && !d.IsZero() { + timer := time.NewTimer(d.Sub(time.Now())) + defer timer.Stop() + deadline = timer.C + } + READ: select { case <-s.die: return 0, errors.New(errBrokenPipe) + case <-deadline: + return n, errTimeout default: } @@ -51,12 +64,14 @@ READ: return n, nil } else if atomic.LoadInt32(&s.rstflag) == 1 { _ = s.Close() - return 0, errors.New(errConnReset) + return 0, io.EOF } select { case <-s.chReadEvent: goto READ + case <-deadline: + return n, errTimeout case <-s.die: return 0, errors.New(errBrokenPipe) } @@ -64,6 +79,13 @@ READ: // Write implements io.ReadWriteCloser func (s *Stream) Write(b []byte) (n int, err error) { + var deadline <-chan time.Time + if d, ok := s.writeDeadline.Load().(time.Time); ok && !d.IsZero() { + timer := time.NewTimer(d.Sub(time.Now())) + defer timer.Stop() + deadline = timer.C + } + select { case <-s.die: return 0, errors.New(errBrokenPipe) @@ -71,42 +93,82 @@ func (s *Stream) Write(b []byte) (n int, err error) { } frames := s.split(b, cmdPSH, s.id) - // preallocate buffer - buffer := make([]byte, len(frames)*headerSize+len(b)) - bts := buffer - - // combine frames into a large blob + sent := 0 for k := range frames { - bts[0] = version - bts[1] = frames[k].cmd - binary.LittleEndian.PutUint16(bts[2:], uint16(len(frames[k].data))) - binary.LittleEndian.PutUint32(bts[4:], frames[k].sid) - copy(bts[headerSize:], frames[k].data) - bts = bts[len(frames[k].data)+headerSize:] - } + req := writeRequest{ + frame: frames[k], + result: make(chan writeResult, 1), + } - if _, err = s.sess.writeBinary(buffer); err != nil { - return 0, err + select { + case s.sess.writes <- req: + case <-s.die: + return sent, errors.New(errBrokenPipe) + case <-deadline: + return sent, errTimeout + } + + select { + case result := <-req.result: + sent += result.n + if result.err != nil { + return sent, result.err + } + case <-s.die: + return sent, errors.New(errBrokenPipe) + case <-deadline: + return sent, errTimeout + } } - return len(b), nil + return sent, nil } // Close implements io.ReadWriteCloser func (s *Stream) Close() error { s.dieLock.Lock() - defer s.dieLock.Unlock() select { case <-s.die: + s.dieLock.Unlock() return errors.New(errBrokenPipe) default: close(s.die) + s.dieLock.Unlock() s.sess.streamClosed(s.id) _, err := s.sess.writeFrame(newFrame(cmdRST, s.id)) return err } } +// SetReadDeadline sets the read deadline as defined by +// net.Conn.SetReadDeadline. +// A zero time value disables the deadline. +func (s *Stream) SetReadDeadline(t time.Time) error { + s.readDeadline.Store(t) + return nil +} + +// SetWriteDeadline sets the write deadline as defined by +// net.Conn.SetWriteDeadline. +// A zero time value disables the deadline. +func (s *Stream) SetWriteDeadline(t time.Time) error { + s.writeDeadline.Store(t) + return nil +} + +// SetDeadline sets both read and write deadlines as defined by +// net.Conn.SetDeadline. +// A zero time value disables the deadlines. +func (s *Stream) SetDeadline(t time.Time) error { + if err := s.SetReadDeadline(t); err != nil { + return err + } + if err := s.SetWriteDeadline(t); err != nil { + return err + } + return nil +} + // session closes the stream func (s *Stream) sessionClose() { s.dieLock.Lock() @@ -119,6 +181,26 @@ func (s *Stream) sessionClose() { } } +// LocalAddr satisfies net.Conn interface +func (s *Stream) LocalAddr() net.Addr { + if ts, ok := s.sess.conn.(interface { + LocalAddr() net.Addr + }); ok { + return ts.LocalAddr() + } + return nil +} + +// RemoteAddr satisfies net.Conn interface +func (s *Stream) RemoteAddr() net.Addr { + if ts, ok := s.sess.conn.(interface { + RemoteAddr() net.Addr + }); ok { + return ts.RemoteAddr() + } + return nil +} + // pushBytes a slice into buffer func (s *Stream) pushBytes(p []byte) { s.bufferLock.Lock() @@ -164,3 +246,11 @@ func (s *Stream) notifyReadEvent() { func (s *Stream) markRST() { atomic.StoreInt32(&s.rstflag, 1) } + +var errTimeout error = &timeoutError{} + +type timeoutError struct{} + +func (e *timeoutError) Error() string { return "i/o timeout" } +func (e *timeoutError) Timeout() bool { return true } +func (e *timeoutError) Temporary() bool { return true } diff --git a/cmd/gost/vendor/vendor.json b/cmd/gost/vendor/vendor.json index 51f6d88..57223ca 100644 --- a/cmd/gost/vendor/vendor.json +++ b/cmd/gost/vendor/vendor.json @@ -8,12 +8,6 @@ "revision": "c91e78db502ff629614837aacb7aa4efa61c651a", "revisionTime": "2016-04-30T09:49:23Z" }, - { - "checksumSHA1": "QPs3L3mjPoi+a9GJCjW8HhyJczM=", - "path": "github.com/codahale/chacha20", - "revision": "ec07b4f69a3f70b1dd2a8ad77230deb1ba5d6953", - "revisionTime": "2015-11-07T02:50:05Z" - }, { "checksumSHA1": "aIhLeVAIrsjs63CwqmU3+GU8yT4=", "path": "github.com/ginuerzh/gosocks4", @@ -68,12 +62,6 @@ "revision": "09cded8978dc9e80714c4d85b0322337b0a1e5e0", "revisionTime": "2016-03-02T07:53:16Z" }, - { - "checksumSHA1": "BM6ZlNJmtKy3GBoWwg2X55gnZ4A=", - "path": "github.com/klauspost/crc32", - "revision": "cb6bfca970f6908083f26f39a79009d608efd5cd", - "revisionTime": "2016-10-16T15:41:25Z" - }, { "checksumSHA1": "dwSGkUfh3A2h0VkXndzBX/27hVc=", "path": "github.com/klauspost/reedsolomon", @@ -291,16 +279,16 @@ "revisionTime": "2016-12-15T22:53:35Z" }, { - "checksumSHA1": "nkIlj9QTxHQ78Vb+VgjhXZ4rZ3E=", + "checksumSHA1": "SbBORpjEg3VfPfdSrW82pa3f9Io=", "path": "gopkg.in/xtaci/kcp-go.v2", - "revision": "6610d527ea5c4890cf593796ff8ff1f10486bb68", - "revisionTime": "2016-09-08T14:44:41Z" + "revision": "6da5044c742f24f05b00db9214b57b2ac943c9ab", + "revisionTime": "2017-01-20T08:43:10Z" }, { - "checksumSHA1": "aIqXwA82JxLOXcgmuVSgcRqdJvU=", + "checksumSHA1": "EutBuLS2elfcDCMifXNMGj9farQ=", "path": "gopkg.in/xtaci/smux.v1", - "revision": "9f2b528a60917e6446273926f4c676cac759d2b0", - "revisionTime": "2016-09-22T10:26:45Z" + "revision": "427dd804ce9fb0a9e7b27a628f68a124fb0d67a6", + "revisionTime": "2016-11-29T15:03:00Z" } ], "rootPath": "github.com/ginuerzh/gost/cmd/gost"