Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert directly from llama3 #4268

Merged
merged 9 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ func tempZipFiles(path string) (string, error) {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers pytorch_model-x-of-y.bin, pytorch_model.fp32-x-of-y.bin, pytorch_model.bin
files = append(files, pt...)
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/octet-stream"); len(pt) > 0 {
} else if pt, _ := glob(filepath.Join(path, "consolidated*.pth"), "application/zip"); len(pt) > 0 {
// pytorch files might also be unresolved git lfs references; skip if they are
// covers consolidated.x.pth, consolidated.pth
files = append(files, pt...)
Expand Down
20 changes: 16 additions & 4 deletions convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ import (
"github.com/ollama/ollama/llm"
)

const (
_ int32 = iota
tokenTypeNormal
tokenTypeUnknown
tokenTypeControl
tokenTypeUserDefined
tokenTypeUnused
tokenTypeByte
)

type Params struct {
Architectures []string `json:"architectures"`
VocabSize int `json:"vocab_size"`
Expand All @@ -37,6 +47,8 @@ type Params struct {
Experts int `json:"num_local_experts"`
ExpertsUsed int `json:"num_experts_per_tok"`

PreTokenizer string

ByteOrder
}

Expand Down Expand Up @@ -74,10 +86,9 @@ func GetModelFormat(dirname string) (ModelFormat, error) {
}

for _, fn := range files {
slog.Debug(fmt.Sprintf("file = %s", fn))
if strings.HasSuffix(fn, ".safetensors") {
return &SafetensorFormat{}, nil
} else if strings.HasSuffix(fn, ".bin") {
} else if strings.HasSuffix(fn, ".bin") || strings.HasSuffix(fn, ".pth") {
slog.Debug("model is torch")
return &TorchFormat{}, nil
}
Expand All @@ -92,6 +103,7 @@ type Vocab struct {
Tokens []string
Scores []float32
Types []int32
Merges []string
}

func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
Expand Down Expand Up @@ -170,7 +182,7 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
}
v.Tokens = append(v.Tokens, t.key)
v.Scores = append(v.Scores, -1000.0)
v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
v.Types = append(v.Types, tokenTypeUserDefined)
}
slog.Info(fmt.Sprintf("vocab size w/ extra tokens: %d", len(v.Tokens)))

Expand All @@ -180,7 +192,7 @@ func LoadSentencePieceTokens(dirpath string, params *Params) (*Vocab, error) {
for cnt := 0; cnt < missingTokens; cnt++ {
v.Tokens = append(v.Tokens, fmt.Sprintf("<dummy%05d>", cnt+1))
v.Scores = append(v.Scores, -1)
v.Types = append(v.Types, int32(llm.GGUFTokenUserDefined))
v.Types = append(v.Types, tokenTypeUserDefined)
}
}

Expand Down
103 changes: 103 additions & 0 deletions convert/convert_test.go
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this test is intended to run locally and runs iff -tags slow and the model testdata exists

Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
//go:build slow

package convert

import (
"os"
"path/filepath"
"testing"

"github.com/ollama/ollama/llm"
)

func convertFull(t *testing.T, p string) (llm.KV, llm.Tensors) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's room for improvement here. Ideally there's a single function call to convert or at least set up for writing the binary. I missed calling GetTensors on the first pass but the write succeeded without writing out any tensors

t.Helper()

mf, err := GetModelFormat(p)
if err != nil {
t.Fatal(err)
}

params, err := mf.GetParams(p)
if err != nil {
t.Fatal(err)
}

arch, err := mf.GetModelArch("", p, params)
if err != nil {
t.Fatal(err)
}

if err := arch.LoadVocab(); err != nil {
t.Fatal(err)
}

if err := arch.GetTensors(); err != nil {
t.Fatal(err)
}

f, err := os.CreateTemp(t.TempDir(), "f16")
if err != nil {
t.Fatal(err)
}
defer f.Close()

if err := arch.WriteGGUF(f); err != nil {
t.Fatal(err)
}

r, err := os.Open(f.Name())
if err != nil {
t.Fatal(err)
}
defer r.Close()

m, _, err := llm.DecodeGGML(r)
if err != nil {
t.Fatal(err)
}

return m.KV(), m.Tensors()
}

func TestConvertFull(t *testing.T) {
cases := []struct {
path string
arch string
tensors int
layers int
}{
{"Meta-Llama-3-8B-Instruct", "llama", 291, 35},
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how does the test data get populated?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I symlinked it into the directory since I have in another directory but you can clone or unpack as well

{"Mistral-7B-Instruct-v0.2", "llama", 291, 35},
{"Mixtral-8x7B-Instruct-v0.1", "llama", 291, 35},
{"gemma-2b-it", "gemma", 164, 20},
}

for _, tt := range cases {
t.Run(tt.path, func(t *testing.T) {
p := filepath.Join("testdata", tt.path)
if _, err := os.Stat(p); err != nil {
t.Skipf("%s not found", p)
}

kv, tensors := convertFull(t, p)

if kv.Architecture() != tt.arch {
t.Fatalf("expected llama, got %s", kv.Architecture())
}

if kv.FileType().String() != "F16" {
t.Fatalf("expected F16, got %s", kv.FileType())
}

if len(tensors) != tt.tensors {
t.Fatalf("expected %d tensors, got %d", tt.tensors, len(tensors))
}

layers := tensors.Layers()
if len(layers) != tt.layers {
t.Fatalf("expected %d layers, got %d", tt.layers, len(layers))
}
})
}
}
51 changes: 14 additions & 37 deletions convert/gemma.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
package convert

import (
"encoding/binary"
"fmt"
"io"
"log/slog"
"os"
"strings"

"github.com/d4l3k/go-bfloat16"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor/native"

Expand All @@ -19,49 +16,27 @@ type GemmaModel struct {
ModelData
}

func gemmaLayerHandler(w io.Writer, r safetensorWriterTo, f *os.File) error {
slog.Debug(fmt.Sprintf("converting '%s'", r.t.Name))

data := make([]byte, r.end-r.start)
if err := binary.Read(f, r.bo, data); err != nil {
return err
}

tDataF32 := bfloat16.DecodeFloat32(data)

var err error
tDataF32, err = addOnes(tDataF32, int(r.t.Shape[0]))
if err != nil {
return err
}

if err := binary.Write(w, r.bo, tDataF32); err != nil {
return err
}
return nil
}

func addOnes(data []float32, vectorSize int) ([]float32, error) {
n := tensor.New(tensor.WithShape(vectorSize), tensor.WithBacking(data))
ones := tensor.Ones(tensor.Float32, vectorSize)

var err error
n, err = n.Add(ones)
n, err := n.Add(ones)
if err != nil {
return []float32{}, err
return nil, err
}

newN, err := native.SelectF32(n, 0)
ts, err := native.SelectF32(n, 0)
if err != nil {
return []float32{}, err
return nil, err
}

var fullTensor []float32
for _, v := range newN {
fullTensor = append(fullTensor, v...)
var f32s []float32
for _, t := range ts {
f32s = append(f32s, t...)
}

return fullTensor, nil

return f32s, nil
}

func (m *GemmaModel) GetTensors() error {
Expand All @@ -71,12 +46,10 @@ func (m *GemmaModel) GetTensors() error {
}

slog.Debug(fmt.Sprintf("Total tensors: %d", len(t)))

m.Tensors = []llm.Tensor{}
for _, l := range t {
if strings.HasSuffix(l.Name, "norm.weight") {
wt := l.WriterTo.(safetensorWriterTo)
wt.handler = gemmaLayerHandler
wt.repacker = m.Repack
l.WriterTo = wt
}
m.Tensors = append(m.Tensors, l)
Expand All @@ -94,6 +67,10 @@ func (m *GemmaModel) LoadVocab() error {
return nil
}

func (m *GemmaModel) Repack(_ string, data []float32, shape []uint64) ([]float32, error) {
return addOnes(data, int(shape[0]))
}

func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
kv := llm.KV{
"general.architecture": "gemma",
Expand Down