Skip to content

Commit

Permalink
Merge pull request #833 from SciSharp/july-2024-binaries
Browse files Browse the repository at this point in the history
July 2024 binaries
  • Loading branch information
martindevans authored Jul 11, 2024
2 parents 3a7fd37 + fc6d4dd commit 0882fce
Show file tree
Hide file tree
Showing 70 changed files with 510 additions and 6,617 deletions.
166 changes: 120 additions & 46 deletions .github/workflows/compile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ jobs:
matrix:
include:
- build: 'noavx'
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON'
defines: '-DGGML_AVX512=ON'
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
Expand All @@ -54,6 +54,11 @@ jobs:
path: ./build/src/libllama.so
name: llama-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error
- name: Upload Llava
uses: actions/upload-artifact@v4
with:
Expand All @@ -68,13 +73,13 @@ jobs:
matrix:
include:
- build: 'noavx'
defines: '-DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DLLAMA_AVX2=OFF'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DLLAMA_AVX512=ON -DLLAMA_AVX512_VBMI=ON -DLLAMA_AVX512_VNNI=ON'
defines: '-DGGML_AVX512=ON -DGGML_AVX512_VBMI=ON -DGGML_AVX512_VNNI=ON'
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -90,15 +95,22 @@ jobs:
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
tree /f
- name: Upload artifacts
- name: Upload artifacts (llama)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-${{ matrix.build }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (ggml)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-${{ matrix.build }}-x64.dll
if-no-files-found: error

- name: Upload Llava
- name: Upload artifacts (llava)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llava_shared.dll
Expand Down Expand Up @@ -147,15 +159,15 @@ jobs:
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_VULKAN=ON
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- name: Build
if: ${{ matrix.os == 'ubuntu-22.04' }}
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} -DLLAMA_VULKAN=ON
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- name: Upload llama artifacts (Windows)
Expand All @@ -165,6 +177,13 @@ jobs:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-vulkan-x64.dll
if-no-files-found: error
- name: Upload llama artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-vulkan-x64.dll
if-no-files-found: error
- name: Upload llava artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
uses: actions/upload-artifact@v4
Expand All @@ -179,6 +198,13 @@ jobs:
path: ./build/src/libllama.so
name: llama-bin-linux-vulkan-x64.so
if-no-files-found: error
- name: Upload ggml artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-vulkan-x64.so
if-no-files-found: error
- name: Upload llava artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
uses: actions/upload-artifact@v4
Expand Down Expand Up @@ -236,6 +262,13 @@ jobs:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (ggml)
if: ${{ matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
if-no-files-found: error
- name: Upload llava artifacts (Windows)
if: ${{ matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v4
Expand All @@ -250,6 +283,13 @@ jobs:
path: ./build/src/libllama.so
name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
if-no-files-found: error
- name: Upload artifacts ggml (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
if-no-files-found: error
- name: Upload llava artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
uses: actions/upload-artifact@v4
Expand All @@ -268,9 +308,9 @@ jobs:
- build: 'arm64'
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64 -DGGML_METAL_EMBED_LIBRARY=ON'
- build: 'x64'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DLLAMA_AVX=ON -DLLAMA_AVX2=ON'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=ON -DGGML_AVX2=ON'
- build: 'x64-rosetta2'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF'
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
Expand All @@ -289,7 +329,13 @@ jobs:
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Upload artifacts
- name: Upload ggml
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.dylib
name: ggml-bin-osx-${{ matrix.build }}.dylib
if-no-files-found: error
- name: Upload llama
uses: actions/upload-artifact@v4
with:
path: ./build/src/libllama.dylib
Expand Down Expand Up @@ -331,54 +377,81 @@ jobs:
# Make all directories at once
mkdir --parents deps/{avx,avx2,avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan}
cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll
# Linux
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/libggml.so
cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so
cp artifacts/llava-bin-linux-noavx-x64.so/libllava_shared.so deps/libllava_shared.so
cp artifacts/ggml-bin-linux-avx-x64.so/libggml.so deps/avx/libggml.so
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so
cp artifacts/llava-bin-linux-avx-x64.so/libllava_shared.so deps/avx/libllava_shared.so
cp artifacts/ggml-bin-linux-avx2-x64.so/libggml.so deps/avx2/libggml.so
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so
cp artifacts/llava-bin-linux-avx2-x64.so/libllava_shared.so deps/avx2/libllava_shared.so
cp artifacts/ggml-bin-linux-avx512-x64.so/libggml.so deps/avx512/libggml.so
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so
cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll
cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll
cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll
cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll
# Windows
cp artifacts/ggml-bin-win-noavx-x64.dll/ggml.dll deps/ggml.dll
cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll
cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll
cp artifacts/ggml-bin-win-avx-x64.dll/ggml.dll deps/avx/ggml.dll
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll
cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
cp artifacts/ggml-bin-win-avx2-x64.dll/ggml.dll deps/avx2/ggml.dll
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll
cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib
cp artifacts/ggml-bin-win-avx512-x64.dll/ggml.dll deps/avx512/ggml.dll
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll
cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll
cp artifacts/llama-bin-osx-x64-rosetta2.dylib/libllama.dylib deps/osx-x64-rosetta2/libllama.dylib
# MacOS
cp artifacts/ggml-bin-osx-arm64.dylib/libggml.dylib deps/osx-arm64/libggml.dylib
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
cp artifacts/ggml-bin-osx-x64.dylib/libggml.dylib deps/osx-x64/libggml.dylib
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib
cp artifacts/ggml-bin-osx-x64-rosetta2.dylib/libggml.dylib deps/osx-x64-rosetta2/libggml.dylib
cp artifacts/llama-bin-osx-x64-rosetta2.dylib/libllama.dylib deps/osx-x64-rosetta2/libllama.dylib
cp artifacts/llava-bin-osx-x64-rosetta2.dylib/libllava_shared.dylib deps/osx-x64-rosetta2/libllava_shared.dylib
cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll
cp artifacts/llava-bin-win-cublas-cu11.7.1-x64.dll/llava_shared.dll deps/cu11.7.1/llava_shared.dll
cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so
cp artifacts/llava-bin-linux-cublas-cu11.7.1-x64.so/libllava_shared.so deps/cu11.7.1/libllava_shared.so
# Windows CUDA
cp artifacts/ggml-bin-win-cublas-cu11.7.1-x64.dll/ggml.dll deps/cu11.7.1/ggml.dll
cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll
cp artifacts/llava-bin-win-cublas-cu11.7.1-x64.dll/llava_shared.dll deps/cu11.7.1/llava_shared.dll
cp artifacts/llama-bin-win-cublas-cu12.2.0-x64.dll/llama.dll deps/cu12.2.0/llama.dll
cp artifacts/llava-bin-win-cublas-cu12.2.0-x64.dll/llava_shared.dll deps/cu12.2.0/llava_shared.dll
cp artifacts/ggml-bin-win-cublas-cu12.2.0-x64.dll/ggml.dll deps/cu12.2.0/ggml.dll
cp artifacts/llama-bin-win-cublas-cu12.2.0-x64.dll/llama.dll deps/cu12.2.0/llama.dll
cp artifacts/llava-bin-win-cublas-cu12.2.0-x64.dll/llava_shared.dll deps/cu12.2.0/llava_shared.dll
# Linux CUDA
cp artifacts/ggml-bin-linux-cublas-cu11.7.1-x64.so/libggml.so deps/cu11.7.1/libggml.so
cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so
cp artifacts/llava-bin-linux-cublas-cu11.7.1-x64.so/libllava_shared.so deps/cu11.7.1/libllava_shared.so
cp artifacts/llama-bin-linux-cublas-cu12.2.0-x64.so/libllama.so deps/cu12.2.0/libllama.so
cp artifacts/llava-bin-linux-cublas-cu12.2.0-x64.so/libllava_shared.so deps/cu12.2.0/libllava_shared.so
cp artifacts/ggml-bin-linux-cublas-cu12.2.0-x64.so/libggml.so deps/cu12.2.0/libggml.so
cp artifacts/llama-bin-linux-cublas-cu12.2.0-x64.so/libllama.so deps/cu12.2.0/libllama.so
cp artifacts/llava-bin-linux-cublas-cu12.2.0-x64.so/libllava_shared.so deps/cu12.2.0/libllava_shared.so
cp artifacts/llama-bin-win-vulkan-x64.dll/llama.dll deps/vulkan/llama.dll
cp artifacts/llava-bin-win-vulkan-x64.dll/llava_shared.dll deps/vulkan/llava_shared.dll
# Windows Vulkan
cp artifacts/ggml-bin-win-vulkan-x64.dll/ggml.dll deps/vulkan/ggml.dll
cp artifacts/llama-bin-win-vulkan-x64.dll/llama.dll deps/vulkan/llama.dll
cp artifacts/llava-bin-win-vulkan-x64.dll/llava_shared.dll deps/vulkan/llava_shared.dll
cp artifacts/llama-bin-linux-vulkan-x64.so/libllama.so deps/vulkan/libllama.so
cp artifacts/llava-bin-linux-vulkan-x64.so/libllava_shared.so deps/vulkan/libllava_shared.so
# Linux Vulkan
cp artifacts/ggml-bin-linux-vulkan-x64.so/libggml.so deps/vulkan/libggml.so
cp artifacts/llama-bin-linux-vulkan-x64.so/libllama.so deps/vulkan/libllama.so
cp artifacts/llava-bin-linux-vulkan-x64.so/libllava_shared.so deps/vulkan/libllava_shared.so
- name: Upload artifacts
uses: actions/upload-artifact@v4
Expand All @@ -394,3 +467,4 @@ jobs:
llama-*
llava-*
*.metal
ggml-*
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -350,3 +350,5 @@ site/
/LLama.Benchmark/Models/*.gguf

**/appsettings.Local.json
/LLama/runtimes/deps
/LLama/runtimes/deps.zip
6 changes: 3 additions & 3 deletions LLama.Unittest/GrammarTest.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LLama.Common;
using LLama.Common;
using LLama.Grammars;
using LLama.Native;

Expand Down Expand Up @@ -86,9 +86,9 @@ public async Task SampleWithTrivialGrammar()
Grammar = grammarInstance2,
};

var result = await executor.InferAsync("Q. 7 + 12\nA. ", inferenceParams).ToListAsync();
var result = string.Join("", await executor.InferAsync("Q. 7 + 12\nA. ", inferenceParams).ToListAsync());

Assert.Equal("cat", result[0]);
Assert.Equal("cat", result);
}

//this test is flakey - it reproduces an error which appears to be a bug in llama.cpp
Expand Down
38 changes: 32 additions & 6 deletions LLama.Unittest/LLama.Unittest.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,39 @@
</PackageReference>
</ItemGroup>

<Target Name="DownloadContentFiles" BeforeTargets="Build">
<DownloadFile SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf" DestinationFolder="Models" DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf" DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf" DestinationFolder="Models" DestinationFileName="mmproj-model-f16.gguf" SkipUnchangedFiles="true"></DownloadFile>
<DownloadFile SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf" DestinationFolder="Models" DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf" SkipUnchangedFiles="true"></DownloadFile>

<Target Name="DownloadContentFilesInner">

<DownloadFile
SourceUrl="https://huggingface.co/TheBloke/Llama-2-7b-Chat-GGUF/resolve/main/llama-2-7b-chat.Q3_K_S.gguf"
DestinationFolder="Models"
DestinationFileName="llama-2-7b-chat.Q3_K_S.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/llava-v1.6-mistral-7b.Q3_K_XS.gguf"
DestinationFolder="Models" DestinationFileName="llava-v1.6-mistral-7b.Q3_K_XS.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/cjpais/llava-1.6-mistral-7b-gguf/resolve/main/mmproj-model-f16.gguf"
DestinationFolder="Models"
DestinationFileName="mmproj-model-f16.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

<DownloadFile
SourceUrl="https://huggingface.co/leliuga/all-MiniLM-L12-v2-GGUF/resolve/main/all-MiniLM-L12-v2.Q8_0.gguf"
DestinationFolder="Models"
DestinationFileName="all-MiniLM-L12-v2.Q8_0.gguf"
SkipUnchangedFiles="true">
</DownloadFile>

</Target>

<Target Name="DownloadContentFiles" BeforeTargets="DispatchToInnerBuilds;BeforeBuild">
<MSBuild Projects="$(MSBuildProjectFile)" Targets="DownloadContentFilesInner" Properties="TargetFramework=once" />
</Target>

<ItemGroup>
Expand Down
2 changes: 1 addition & 1 deletion LLama.Unittest/TemplateTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -262,4 +262,4 @@ public void EndOSpeechToken_ReturnsExpected()
{
Assert.Equal("</s>", _model.Tokens.EndOfSpeechToken);
}
}
}
7 changes: 7 additions & 0 deletions LLama/LLamaContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ public uint BatchThreads
/// Get the maximum batch size for this context
/// </summary>
public uint BatchSize => NativeHandle.BatchSize;

/// <summary>
/// Get the special tokens for the model associated with this context
/// </summary>
public SafeLlamaModelHandle.ModelTokens Tokens { get; }

private LLamaTokenData[]? _samplingBuffer;

Expand All @@ -99,6 +104,8 @@ public LLamaContext(LLamaWeights model, IContextParams @params, ILogger? logger

@params.ToLlamaContextParams(out var lparams);
NativeHandle = SafeLLamaContextHandle.Create(model.NativeHandle, lparams);

Tokens = model.Tokens;
}

/// <summary>
Expand Down
Loading

0 comments on commit 0882fce

Please sign in to comment.