Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for GGUF machine learning model files #698

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 274 additions & 0 deletions ml/gguf.ksy
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
meta:
id: gguf
title: GGML model file
file-extension: gguf
license: CC0-1.0
ks-version: 0.10
endian: le
bit-endian: le
doc: |
GGUF is a file format for storing machine learning models for inference with
the GGML library, or executors based on GGML. Models are typically developed
using PyTorch or some other framework, and then converted to GGUF for use in
GGML.

GGUF is a successor file format to GGML, GGMF and GGJT, and is designed
to be unambiguous by containing all the information needed to load a
model. It is also designed to be extensible, so that new information
can be added to models without breaking compatibility.
doc-ref:
- https://github.com/ggerganov/ggml/blob/master/docs/gguf.md
seq:
- id: magic
contents: GGUF
doc: GGUF file magic
- id: version
type: u4
doc: File format version
- id: num_infos
type: u8
doc: The number of tensors in the file
- id: num_kv
type: u8
doc: The number of key-value pairs in the file header
- id: kv
type: gguf_kv
repeat: expr
repeat-expr: num_kv
doc: Key-value pairs
- id: infos
type: gguf_tensor_info
repeat: expr
repeat-expr: num_infos
doc: Tensor metadata
- id: data
type: gguf_tensor_data(_io.pos)
doc: Tensor data

types:
gguf_value:
-webide-representation: '{value}'
params:
- id: type
type: u4
enum: gguf_type
seq:
- id: value
type:
switch-on: type
cases:
'gguf_type::gguf_type_uint8': gguf_uint8
'gguf_type::gguf_type_int8': gguf_int8
'gguf_type::gguf_type_uint16': gguf_uint16
'gguf_type::gguf_type_int16': gguf_int16
'gguf_type::gguf_type_uint32': gguf_uint32
'gguf_type::gguf_type_int32': gguf_int32
'gguf_type::gguf_type_float32': gguf_float32
'gguf_type::gguf_type_bool': gguf_bool
'gguf_type::gguf_type_string': gguf_str
'gguf_type::gguf_type_array': gguf_array
'gguf_type::gguf_type_uint64': gguf_uint64
'gguf_type::gguf_type_int64': gguf_int64
'gguf_type::gguf_type_float64': gguf_float64

gguf_kv:
-webide-representation: '{key}: {value}'
seq:
- id: key
type: gguf_str
- id: type
type: u4
enum: gguf_type
- id: value
type: gguf_value(type)

gguf_str:
-webide-representation: '"{data}"'
seq:
- id: size
type: u8
- id: data
size: size
type: str
encoding: ascii

gguf_array:
# Note that this is more permissive than the parser defined
# by the GGML library which does not permit nested arrays.
-webide-representation: '[{elems}]'
seq:
- id: type
type: u4
enum: gguf_type
- id: num_elems
type: u8
- id: elems
type: gguf_value(type) # Allows for nested arrays
repeat: expr
repeat-expr: num_elems

gguf_bool:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value}'
seq:
- id: value
type: b1

gguf_uint8:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: u1

gguf_int8:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: s1

gguf_uint16:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: u2

gguf_int16:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: s2

gguf_uint32:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: u4

gguf_int32:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: s4

gguf_uint64:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: u8

gguf_int64:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value:dec}'
seq:
- id: value
type: s8

gguf_float32:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value}'
seq:
- id: value
type: f4

gguf_float64:
# This type is used as a work-around for languages (like C++)
# that do not support variant types.
-webide-representation: '{value}'
seq:
- id: value
type: f8

gguf_tensor_info:
-webide-representation: '{type}[{dims}] {name}'
seq:
- id: name
type: gguf_str
- id: num_dims
type: u4
- id: dims
type: gguf_uint64
repeat: expr
repeat-expr: num_dims
- id: type
type: u4
enum: ggml_type
- id: offset
type: u8

gguf_tensor_data:
params:
- id: offset
type: u8
instances:
padding:
# This hardcodes the default GGUF file alignment (32).
pos: offset
size: 32 - (offset % 32)
data:
pos: offset + padding.size
size-eos: true

enums:
gguf_type:
0: gguf_type_uint8
1: gguf_type_int8
2: gguf_type_uint16
3: gguf_type_int16
4: gguf_type_uint32
5: gguf_type_int32
6: gguf_type_float32
7: gguf_type_bool
8: gguf_type_string
9: gguf_type_array
10: gguf_type_uint64
11: gguf_type_int64
12: gguf_type_float64

ggml_type:
0: ggml_type_f32
1: ggml_type_f16
2: ggml_type_q4_0
3: ggml_type_q4_1
4: ggml_type_q4_2
5: ggml_type_q4_3
6: ggml_type_q5_0
7: ggml_type_q5_1
8: ggml_type_q8_0
9: ggml_type_q8_1
10: ggml_type_q2_k
11: ggml_type_q3_k
12: ggml_type_q4_k
13: ggml_type_q5_k
14: ggml_type_q6_k
15: ggml_type_q8_k
16: ggml_type_iq2_xxs
17: ggml_type_iq2_xs
18: ggml_type_iq3_xxs
19: ggml_type_iq1_s
20: ggml_type_iq4_nl
21: ggml_type_iq3_s
22: ggml_type_iq2_s
23: ggml_type_iq4_xs
24: ggml_type_i8
25: ggml_type_i16
26: ggml_type_i32
27: ggml_type_i64
28: ggml_type_f64
29: ggml_type_iq1_m