Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Microsoft.KernelMemory version 0.68+ compatibility fix #862

Merged
2 changes: 1 addition & 1 deletion LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.66.240709.1" />
<PackageReference Include="Microsoft.KernelMemory.Abstractions" Version="0.68.240716.1" />
</ItemGroup>

<ItemGroup>
Expand Down
20 changes: 19 additions & 1 deletion LLama.KernelMemory/LLamaSharpTextEmbeddingGenerator.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using LLama;
using LLama.Common;
using LLama.Native;
using Microsoft.KernelMemory;
using Microsoft.KernelMemory.AI;

Expand Down Expand Up @@ -112,5 +111,24 @@ public async Task<Embedding> GenerateEmbeddingAsync(string text, CancellationTok

/// <inheritdoc/>
public int CountTokens(string text) => _embedder.Context.Tokenize(text, special: true).Length;

/// <summary>
/// Get the list of tokens for the input text
/// </summary>
/// <param name="text">Input string to be tokenized</param>
/// <returns>Read-only list of tokens for the input test</returns>
/// <remarks>
/// It throws if text is null and Includes empty stop token because addBos is left true to be consistent with the CountTokens implementation.</remarks>
/// <see cref="CountTokens(string)"/>
public IReadOnlyList<string> GetTokens(string text)
{
/* see relevant unit tests for important implementation notes regarding unicode */
var context = _embedder.Context;
var numericTokens = context.Tokenize(text, special: true);
var decoder = new StreamingTokenDecoder(context);
martindevans marked this conversation as resolved.
Show resolved Hide resolved
return numericTokens
.Select(x => { decoder.Add(x); return decoder.Read(); })
.ToList();
}
}
}
18 changes: 18 additions & 0 deletions LLama.KernelMemory/LlamaSharpTextGenerator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,23 @@ private static InferenceParams OptionsToParams(TextGenerationOptions options, In

/// <inheritdoc/>
public int CountTokens(string text) => _context.Tokenize(text, special: true).Length;

/// <summary>
/// Get the list of tokens for the input text
/// </summary>
/// <param name="text">Input string to be tokenized</param>
/// <returns>Read-only list of tokens for the input test</returns>
/// <remarks>
/// It throws if text is null and Includes empty stop token because addBos is left true to be consistent with the CountTokens implementation.</remarks>
/// <see cref="CountTokens(string)"/>
public IReadOnlyList<string> GetTokens(string text)
{
/* see relevant unit tests for important implementation notes regarding unicode */
var numericTokens = _context.Tokenize(text, special: true);
var decoder = new StreamingTokenDecoder(_context);
return numericTokens
.Select(x => { decoder.Add(x); return decoder.Read(); })
.ToList();
}
}
}
117 changes: 117 additions & 0 deletions LLama.Unittest/KernelMemory/ITextTokenizerTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using Xunit.Abstractions;

namespace LLama.Unittest.KernelMemory
{

public abstract class ITextTokenizerTests
{
private readonly ITestOutputHelper _testOutputHelper;

#pragma warning disable KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
protected ITextTokenizer? _generator;
#pragma warning restore KMEXP00 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.

protected InferenceParams _infParams;
protected LLamaSharpConfig _lsConfig;

public ITextTokenizerTests(ITestOutputHelper testOutputHelper)
{
_testOutputHelper = testOutputHelper;

_infParams = new() { AntiPrompts = ["\n\n"] };
_lsConfig = new(Constants.GenerativeModelPath) { DefaultInferenceParams = _infParams };

testOutputHelper.WriteLine($"Using model {Path.GetFileName(_lsConfig.ModelPath)}");
}


[Theory]
martindevans marked this conversation as resolved.
Show resolved Hide resolved
[InlineData("The quick brown fox jumps over the lazy dog")]
[InlineData("Well, here're some special characters!!!")]
[InlineData("...___---")]
[InlineData("15 + 6 = 21 && 68 * 75 = 5100")]
[InlineData(" \n \r\n \t ")]
public void GetTokens_ShouldReturnListOfTokensForInputString(string? text)
{
var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);

var expected = " " + text; // the placement of the space corresponding to BOS will vary by model tokenizer
var actual = string.Join("", tokens);

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}

/* This is exactly the same test as the non-unicode cases. However, there are reasons why this
* should be made a special case and may deviate in the future:
*
* As of now there appears to be no final word as to how characters that consist of more than one
* numeric token should correspond to textual tokens, and results vary according to different
* models' tokenizers. For example, given a character 'Z' that corresponds to the numeric tokens {1,2,3}
* some (llama-2) will pad the length of the total number of tokens by returning spaces as tokens
* (i.e. ' ', ' ', 'Z') while others (GPT4Tokenizer) will pad with the character itself (i.e. 'Z','Z','Z').
*
* This is very evident when tokenizing ideograms and emojis, but can arise with various unicode characters
* as well. See pull request for more relevant discussion https://github.com/SciSharp/LLamaSharp/pull/862
*
* Currently the method will remain consistent with the output of ITextTokenizer.CountTokens, meaning
* any redundant tokens will not be omitted as long as they are counted by CountTokens.
*
* StreamingTokenDecoder, while sufficiently useful for this task, was not designed with producing
* output for one numeric token at a time in mind, so ITextTokenizer.GetTokens should not be considered
* an example of proper use.
*
* Note: if this message is removed, also remove references to it in LLamaSharpTextEmbeddingGenerator.GetTokens
* and LLamaSharpTextGenerator.GetTokens
*/
[Theory]
[InlineData("And a little bit of unicode για να κρατήσουμε τα πράγματα ενδιαφέροντα")]
[InlineData("猫坐在垫子上 😀🤨🤐😏")]
public void GetTokens_Unicode_ShouldReturnListOfTokensForInputString(string? text)
{
var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);

var expected = " " + text; // the placement of the space corresponding to BOS will vary by model tokenizer
var actual = string.Join("", tokens);

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}

[Fact]
public void GetToken_ShouldThrowForNull()
{
string? text = null;

Assert.Throws<ArgumentNullException>(() => { _generator!.GetTokens(text!); });
}

[Fact]
public void GetToken_EmptyStringYieldsOneEmptyToken()
{
var text = "";
var expected = "";

var tokens = _generator!.GetTokens(text);
var tokensCount = _generator.CountTokens(text);
var actual = tokens.Single();

_testOutputHelper.WriteLine($"Tokens for '{text}':");
_testOutputHelper.WriteLine(string.Join("", tokens.Select(x => $"({x})")));

Assert.Equal(expected, actual);
Assert.Equal(tokensCount, tokens.Count);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;

namespace LLama.Unittest.KernelMemory
{
public class LLamaSharpTextEmbeddingGeneratorTests : ITextTokenizerTests, IDisposable
{
private readonly LLamaSharpTextEmbeddingGenerator _embeddingGenerator;

public LLamaSharpTextEmbeddingGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
{
_embeddingGenerator = new LLamaSharpTextEmbeddingGenerator(_lsConfig);

_generator = _embeddingGenerator;
}

public void Dispose()
{
_embeddingGenerator.Dispose();
}
}
}
34 changes: 34 additions & 0 deletions LLama.Unittest/KernelMemory/LlamaSharpTextGeneratorTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
using LLama.Common;
using LLamaSharp.KernelMemory;
using Microsoft.KernelMemory.AI;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection.Emit;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using Xunit.Abstractions;
using Xunit.Sdk;
using static System.Net.Mime.MediaTypeNames;

namespace LLama.Unittest.KernelMemory
{
public class LlamaSharpTextGeneratorTests : ITextTokenizerTests, IDisposable
{
private readonly LlamaSharpTextGenerator _textGenerator;

public LlamaSharpTextGeneratorTests(ITestOutputHelper testOutputHelper) : base(testOutputHelper)
{
_textGenerator = new LlamaSharpTextGenerator(_lsConfig);

_generator = _textGenerator;
}

public void Dispose()
{
_textGenerator.Dispose();
}
}
}
30 changes: 6 additions & 24 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<Import Project="..\LLama\LLamaSharp.Runtime.targets" />
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
Expand Down Expand Up @@ -29,31 +29,16 @@

<Target Name="DownloadContentFilesInner">

<DownloadFile
SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf"
DestinationFolder="Models"
DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf"
DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf"
DestinationFolder="Models"
DestinationFileName="mmproj-model-f16.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf"
DestinationFolder="Models"
DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf"
SkipUnchangedFiles="true">
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true">
</DownloadFile>

</Target>
Expand All @@ -63,14 +48,11 @@
</Target>

<ItemGroup>
<ProjectReference Include="..\LLama.KernelMemory\LLamaSharp.KernelMemory.csproj" />
<ProjectReference Include="..\LLama.SemanticKernel\LLamaSharp.SemanticKernel.csproj" />
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
</ItemGroup>

<ItemGroup>
<Folder Include="Models\" />
</ItemGroup>

<ItemGroup>
<None Update="Models\all-MiniLM-L12-v2.Q8_0.gguf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
Expand Down
Loading