diff --git a/src/libs/Tiktoken.Core/CoreBPE.cs b/src/libs/Tiktoken.Core/CoreBPE.cs index cda896b..c10bd1c 100644 --- a/src/libs/Tiktoken.Core/CoreBPE.cs +++ b/src/libs/Tiktoken.Core/CoreBPE.cs @@ -45,7 +45,19 @@ public CoreBpe( Encoder = encoder; FastEncoder = Encoder .ToDictionary( - static x => new string(x.Key.Select(y => (char) y).ToArray()), +#if NETSTANDARD2_1_OR_GREATER || NET6_0_OR_GREATER + static x => + { + Span chars = stackalloc char[x.Key.Length]; + for (var i = 0; i < x.Key.Length; i++) + { + chars[i] = (char)x.Key[i]; + } + return new string(chars); + }, +#else + static x => new string(x.Key.Select(static y => (char) y).ToArray()), +#endif static x => x.Value); SpecialTokensEncoder = specialTokensEncoder; diff --git a/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs b/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs index 86ed49f..153295e 100644 --- a/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs +++ b/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs @@ -30,7 +30,7 @@ public static Dictionary LoadEncodingFromManifestResource( assembly.GetManifestResourceStream(resourcePath) ?? throw new InvalidOperationException("Resource not found."); using var reader = new StreamReader(stream); - + var lines = new List(); while (reader.ReadLine() is { } line) { @@ -53,7 +53,11 @@ public static Dictionary LoadEncodingFromLines( string name) { lines = lines ?? throw new ArgumentNullException(nameof(lines)); - + +#if NET7_0_OR_GREATER + Span tokens = stackalloc Range[3]; + Span bytes = stackalloc byte[256]; +#endif var dictionary = new Dictionary(new ByteArrayComparer()); foreach (var line in lines) { @@ -62,14 +66,28 @@ public static Dictionary LoadEncodingFromLines( continue; } +#if NET7_0_OR_GREATER + var splitCount = line.AsSpan().Split(tokens, ' '); + if (splitCount != 2) + { + throw new FormatException($"Invalid file format: {name}"); + } +#else var tokens = line.Split(' '); if (tokens.Length != 2) { throw new FormatException($"Invalid file format: {name}"); } +#endif +#if NET7_0_OR_GREATER + Convert.TryFromBase64Chars(line.AsSpan(tokens[0]), bytes, out var bytesWritten); + var tokenBytes = bytes.Slice(0, bytesWritten).ToArray(); + var rank = int.Parse(line.AsSpan(tokens[1]), CultureInfo.InvariantCulture); +#else var tokenBytes = Convert.FromBase64String(tokens[0]); var rank = int.Parse(tokens[1], CultureInfo.InvariantCulture); +#endif dictionary[tokenBytes] = rank; }