Skip to content

Update Binaries

Update Binaries #87

Workflow file for this run

name: Update Binaries
on:
workflow_dispatch:
inputs:
llama_cpp_commit:
description: 'Branch, tag, or commit to use for llama.cpp'
required: true
default: 'master'
push:
branches: [cron_job]
#schedule:
# - cron: "22 22 * * 2"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event.inputs.llama_cpp_commit }}
cancel-in-progress: true
env:
# Compiler defines common to all platforms
COMMON_DEFINE: -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=ON -DLLAMA_BUILD_SERVER=OFF -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
jobs:
compile-linux:
name: Compile (Linux)
strategy:
fail-fast: true
matrix:
include:
- build: 'noavx'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DGGML_AVX512=ON'
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
repository: ggerganov/llama.cpp
fetch-depth: 0
ref: '${{ github.event.inputs.llama_cpp_commit }}'
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- uses: actions/upload-artifact@v4
with:
path: ./build/src/libllama.so
name: llama-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error
- uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error
- name: Upload Llava
uses: actions/upload-artifact@v4
with:
path: ./build/examples/llava/libllava_shared.so
name: llava-bin-linux-${{ matrix.build }}-x64.so
if-no-files-found: error
compile-windows:
name: Compile (Windows)
strategy:
fail-fast: true
matrix:
include:
- build: 'noavx'
defines: '-DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
- build: 'avx2'
defines: ''
- build: 'avx'
defines: '-DGGML_AVX2=OFF'
- build: 'avx512'
defines: '-DGGML_AVX512=ON -DGGML_AVX512_VBMI=ON -DGGML_AVX512_VNNI=ON'
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
with:
repository: ggerganov/llama.cpp
fetch-depth: 0
ref: '${{ github.event.inputs.llama_cpp_commit }}'
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
tree /f
- name: Upload artifacts (llama)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-${{ matrix.build }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (ggml)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-${{ matrix.build }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (llava)
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llava_shared.dll
name: llava-bin-win-${{ matrix.build }}-x64.dll
if-no-files-found: error
compile-vulkan:
name: Compile (vulkan) - ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [
ubuntu-22.04,
windows-latest
]
env:
VULKAN_VERSION: 1.3.261.1
runs-on: ${{ matrix.os }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
repository: ggerganov/llama.cpp
fetch-depth: 0
ref: '${{ github.event.inputs.llama_cpp_commit }}'
- name: Download dependencies - Linux
if: ${{ matrix.os == 'ubuntu-22.04' }}
run: |
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list http://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
sudo apt update
sudo apt install vulkan-sdk
- name: Download dependencies - Windows
id: get_vulkan
if: ${{ matrix.os == 'windows-latest' }}
run: |
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
- name: Build
id: cmake_build
if: ${{ matrix.os == 'windows-latest' }}
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- name: Build
if: ${{ matrix.os == 'ubuntu-22.04' }}
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_VULKAN=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- name: Upload llama artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-vulkan-x64.dll
if-no-files-found: error
- name: Upload llama artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-vulkan-x64.dll
if-no-files-found: error
- name: Upload llava artifacts (Windows)
if: ${{ matrix.os == 'windows-latest' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llava_shared.dll
name: llava-bin-win-vulkan-x64.dll
if-no-files-found: error
- name: Upload llama artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/src/libllama.so
name: llama-bin-linux-vulkan-x64.so
if-no-files-found: error
- name: Upload ggml artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-vulkan-x64.so
if-no-files-found: error
- name: Upload llava artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-22.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/examples/llava/libllava_shared.so
name: llava-bin-linux-vulkan-x64.so
if-no-files-found: error
compile-cublas:
name: Compile (cublas)
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, windows-2019]
cuda: ['12.2.0', '11.7.1']
runs-on: ${{ matrix.os }}
steps:
- name: Clone
id: checkout
uses: actions/checkout@v4
with:
repository: ggerganov/llama.cpp
fetch-depth: 0
ref: '${{ github.event.inputs.llama_cpp_commit }}'
- uses: Jimver/[email protected]
if: runner.os == 'Windows'
id: cuda-toolkit-windows
with:
cuda: ${{ matrix.cuda }}
method: 'network'
sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "thrust", "visual_studio_integration"]'
- name: "Configure"
if: runner.os == 'Windows'
run: |
echo "CMAKE_CXX_FLAGS=/Zm1000" | Out-File -FilePath $env:GITHUB_ENV -Append
- uses: Jimver/[email protected]
if: runner.os == 'Linux'
id: cuda-toolkit-linux
with:
cuda: ${{ matrix.cuda }}
method: 'network'
linux-local-args: '["--toolkit"]'
- name: Build
id: cmake_build
run: |
echo "CMAKE_CXX_FLAGS is $CMAKE_CXX_FLAGS"
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} -DGGML_CUDA=ON
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
ls -R
- name: Upload artifacts (Windows)
if: ${{ matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llama.dll
name: llama-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (ggml)
if: ${{ matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\ggml.dll
name: ggml-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
if-no-files-found: error
- name: Upload llava artifacts (Windows)
if: ${{ matrix.os == 'windows-2019' }}
uses: actions/upload-artifact@v4
with:
path: .\build\bin\Release\llava_shared.dll
name: llava-bin-win-cublas-cu${{ matrix.cuda }}-x64.dll
if-no-files-found: error
- name: Upload artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/src/libllama.so
name: llama-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
if-no-files-found: error
- name: Upload artifacts ggml (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
if-no-files-found: error
- name: Upload llava artifacts (Linux)
if: ${{ matrix.os == 'ubuntu-20.04' }}
uses: actions/upload-artifact@v4
with:
path: ./build/examples/llava/libllava_shared.so
name: llava-bin-linux-cublas-cu${{ matrix.cuda }}-x64.so
if-no-files-found: error
compile-macos:
name: Compile (MacOS)
strategy:
fail-fast: true
matrix:
include:
- build: 'arm64'
defines: '-DCMAKE_OSX_ARCHITECTURES=arm64 -DGGML_METAL_EMBED_LIBRARY=ON'
- build: 'x64'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=ON -DGGML_AVX2=ON'
- build: 'x64-rosetta2'
defines: '-DCMAKE_OSX_ARCHITECTURES=x86_64 -DGGML_METAL=OFF -DGGML_AVX=OFF -DGGML_AVX2=OFF'
runs-on: macos-latest
steps:
- uses: actions/checkout@v4
with:
repository: ggerganov/llama.cpp
fetch-depth: 0
ref: '${{ github.event.inputs.llama_cpp_commit }}'
- name: Dependencies
continue-on-error: true
run: |
brew update
- name: Build
id: cmake_build
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
- name: Upload ggml
uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.dylib
name: ggml-bin-osx-${{ matrix.build }}.dylib
if-no-files-found: error
- name: Upload llama
uses: actions/upload-artifact@v4
with:
path: ./build/src/libllama.dylib
name: llama-bin-osx-${{ matrix.build }}.dylib
if-no-files-found: error
- name: Upload Llava
uses: actions/upload-artifact@v4
with:
path: ./build/examples/llava/libllava_shared.dylib
name: llava-bin-osx-${{ matrix.build }}.dylib
if-no-files-found: error
- name: Upload Metal
if: ${{ matrix.build == 'arm64' }}
uses: actions/upload-artifact@v4
with:
path: ./build/bin/ggml-metal.metal
name: ggml-metal.metal
if-no-files-found: error
compile-android:
# Disable android build
if: false
strategy:
fail-fast: true
matrix:
include:
- build: 'x86'
defines: '-DANDROID_ABI=x86'
- build: 'x86_64'
defines: '-DANDROID_ABI=x86_64'
- build: 'arm64-v8a'
defines: '-DANDROID_ABI=arm64-v8a'
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
repository: ggerganov/llama.cpp
fetch-depth: 0
ref: '${{ github.event.inputs.llama_cpp_commit }}'
- uses: nttld/setup-ndk@v1
id: setup-ndk
with:
ndk-version: r26d
add-to-path: false
- name: Build
id: cmake_build
env:
CMAKE_FLAGS: '-DCMAKE_TOOLCHAIN_FILE=${{ steps.setup-ndk.outputs.ndk-path }}/build/cmake/android.toolchain.cmake -DANDROID_PLATFORM=android-23'
run: |
mkdir build
cd build
cmake .. ${{ env.COMMON_DEFINE }} ${{ env.CMAKE_FLAGS }} ${{ matrix.defines }}
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
cd ..
ls -R
- name: Upload Llama
uses: actions/upload-artifact@v4
with:
path: ./build/src/libllama.so
name: llama-bin-android-${{ matrix.build }}.so
- uses: actions/upload-artifact@v4
with:
path: ./build/ggml/src/libggml.so
name: ggml-bin-android-${{ matrix.build }}.so
if-no-files-found: error
- name: Upload Llava
uses: actions/upload-artifact@v4
with:
path: ./build/examples/llava/libllava_shared.so
name: llava-bin-android-${{ matrix.build }}.so
build-deps:
runs-on: ubuntu-latest
name: "Gather Binaries"
if: ${{ always() }}
needs: [
"compile-linux",
"compile-windows",
"compile-vulkan",
"compile-cublas",
"compile-macos",
"compile-android"
]
steps:
- uses: actions/download-artifact@v4
with:
path: artifacts
- name: List Files
run: ls -R
- name: Rearrange Files
run: |
# Make all directories at once
mkdir --parents deps/{avx,avx2,avx512,osx-arm64,osx-x64,osx-x64-rosetta2,cu11.7.1,cu12.2.0,vulkan,android-arm64-v8a,android-x86,android-x86_64}
# Linux
cp artifacts/ggml-bin-linux-noavx-x64.so/libggml.so deps/libggml.so
cp artifacts/llama-bin-linux-noavx-x64.so/libllama.so deps/libllama.so
cp artifacts/llava-bin-linux-noavx-x64.so/libllava_shared.so deps/libllava_shared.so
cp artifacts/ggml-bin-linux-avx-x64.so/libggml.so deps/avx/libggml.so
cp artifacts/llama-bin-linux-avx-x64.so/libllama.so deps/avx/libllama.so
cp artifacts/llava-bin-linux-avx-x64.so/libllava_shared.so deps/avx/libllava_shared.so
cp artifacts/ggml-bin-linux-avx2-x64.so/libggml.so deps/avx2/libggml.so
cp artifacts/llama-bin-linux-avx2-x64.so/libllama.so deps/avx2/libllama.so
cp artifacts/llava-bin-linux-avx2-x64.so/libllava_shared.so deps/avx2/libllava_shared.so
cp artifacts/ggml-bin-linux-avx512-x64.so/libggml.so deps/avx512/libggml.so
cp artifacts/llama-bin-linux-avx512-x64.so/libllama.so deps/avx512/libllama.so
cp artifacts/llava-bin-linux-avx512-x64.so/libllava_shared.so deps/avx512/libllava_shared.so
# Windows
cp artifacts/ggml-bin-win-noavx-x64.dll/ggml.dll deps/ggml.dll
cp artifacts/llama-bin-win-noavx-x64.dll/llama.dll deps/llama.dll
cp artifacts/llava-bin-win-noavx-x64.dll/llava_shared.dll deps/llava_shared.dll
cp artifacts/ggml-bin-win-avx-x64.dll/ggml.dll deps/avx/ggml.dll
cp artifacts/llama-bin-win-avx-x64.dll/llama.dll deps/avx/llama.dll
cp artifacts/llava-bin-win-avx-x64.dll/llava_shared.dll deps/avx/llava_shared.dll
cp artifacts/ggml-bin-win-avx2-x64.dll/ggml.dll deps/avx2/ggml.dll
cp artifacts/llama-bin-win-avx2-x64.dll/llama.dll deps/avx2/llama.dll
cp artifacts/llava-bin-win-avx2-x64.dll/llava_shared.dll deps/avx2/llava_shared.dll
cp artifacts/ggml-bin-win-avx512-x64.dll/ggml.dll deps/avx512/ggml.dll
cp artifacts/llama-bin-win-avx512-x64.dll/llama.dll deps/avx512/llama.dll
cp artifacts/llava-bin-win-avx512-x64.dll/llava_shared.dll deps/avx512/llava_shared.dll
# MacOS
cp artifacts/ggml-bin-osx-arm64.dylib/libggml.dylib deps/osx-arm64/libggml.dylib
cp artifacts/llama-bin-osx-arm64.dylib/libllama.dylib deps/osx-arm64/libllama.dylib
cp artifacts/llava-bin-osx-arm64.dylib/libllava_shared.dylib deps/osx-arm64/libllava_shared.dylib
cp artifacts/ggml-metal.metal/ggml-metal.metal deps/osx-arm64/ggml-metal.metal
cp artifacts/ggml-bin-osx-x64.dylib/libggml.dylib deps/osx-x64/libggml.dylib
cp artifacts/llama-bin-osx-x64.dylib/libllama.dylib deps/osx-x64/libllama.dylib
cp artifacts/llava-bin-osx-x64.dylib/libllava_shared.dylib deps/osx-x64/libllava_shared.dylib
cp artifacts/ggml-bin-osx-x64-rosetta2.dylib/libggml.dylib deps/osx-x64-rosetta2/libggml.dylib
cp artifacts/llama-bin-osx-x64-rosetta2.dylib/libllama.dylib deps/osx-x64-rosetta2/libllama.dylib
cp artifacts/llava-bin-osx-x64-rosetta2.dylib/libllava_shared.dylib deps/osx-x64-rosetta2/libllava_shared.dylib
# Android
#cp artifacts/ggml-bin-android-arm64-v8a.so/libggml.so deps/android-arm64-v8a/libggml.so
#cp artifacts/llama-bin-android-arm64-v8a.so/libllama.so deps/android-arm64-v8a/libllama.so
#cp artifacts/llava-bin-android-arm64-v8a.so/libllava_shared.so deps/android-arm64-v8a/libllava_shared.so
#cp artifacts/ggml-bin-android-x86.so/libggml.so deps/android-x86/libggml.so
#cp artifacts/llama-bin-android-x86.so/libllama.so deps/android-x86/libllama.so
#cp artifacts/llava-bin-android-x86.so/libllava_shared.so deps/android-x86/libllava_shared.so
#cp artifacts/ggml-bin-android-x86_64.so/libggml.so deps/android-x86_64/libggml.so
#cp artifacts/llama-bin-android-x86_64.so/libllama.so deps/android-x86_64/libllama.so
#cp artifacts/llava-bin-android-x86_64.so/libllava_shared.so deps/android-x86_64/libllava_shared.so
# Windows CUDA
cp artifacts/ggml-bin-win-cublas-cu11.7.1-x64.dll/ggml.dll deps/cu11.7.1/ggml.dll
cp artifacts/llama-bin-win-cublas-cu11.7.1-x64.dll/llama.dll deps/cu11.7.1/llama.dll
cp artifacts/llava-bin-win-cublas-cu11.7.1-x64.dll/llava_shared.dll deps/cu11.7.1/llava_shared.dll
cp artifacts/ggml-bin-win-cublas-cu12.2.0-x64.dll/ggml.dll deps/cu12.2.0/ggml.dll
cp artifacts/llama-bin-win-cublas-cu12.2.0-x64.dll/llama.dll deps/cu12.2.0/llama.dll
cp artifacts/llava-bin-win-cublas-cu12.2.0-x64.dll/llava_shared.dll deps/cu12.2.0/llava_shared.dll
# Linux CUDA
cp artifacts/ggml-bin-linux-cublas-cu11.7.1-x64.so/libggml.so deps/cu11.7.1/libggml.so
cp artifacts/llama-bin-linux-cublas-cu11.7.1-x64.so/libllama.so deps/cu11.7.1/libllama.so
cp artifacts/llava-bin-linux-cublas-cu11.7.1-x64.so/libllava_shared.so deps/cu11.7.1/libllava_shared.so
cp artifacts/ggml-bin-linux-cublas-cu12.2.0-x64.so/libggml.so deps/cu12.2.0/libggml.so
cp artifacts/llama-bin-linux-cublas-cu12.2.0-x64.so/libllama.so deps/cu12.2.0/libllama.so
cp artifacts/llava-bin-linux-cublas-cu12.2.0-x64.so/libllava_shared.so deps/cu12.2.0/libllava_shared.so
# Windows Vulkan
cp artifacts/ggml-bin-win-vulkan-x64.dll/ggml.dll deps/vulkan/ggml.dll
cp artifacts/llama-bin-win-vulkan-x64.dll/llama.dll deps/vulkan/llama.dll
cp artifacts/llava-bin-win-vulkan-x64.dll/llava_shared.dll deps/vulkan/llava_shared.dll
# Linux Vulkan
cp artifacts/ggml-bin-linux-vulkan-x64.so/libggml.so deps/vulkan/libggml.so
cp artifacts/llama-bin-linux-vulkan-x64.so/libllama.so deps/vulkan/libllama.so
cp artifacts/llava-bin-linux-vulkan-x64.so/libllava_shared.so deps/vulkan/libllava_shared.so
- name: Upload artifacts
uses: actions/upload-artifact@v4
with:
path: deps/
name: deps
- name: Remove Artifacts
uses: geekyeggo/delete-artifact@v5
with:
name: |
llama-*
llava-*
*.metal
ggml-*