Skip to content

Commit

Permalink
fix the cpu estimatedTotal memory + get the expiry time for loading m…
Browse files Browse the repository at this point in the history
…odels (#4461)
  • Loading branch information
pdevine committed May 15, 2024
1 parent 5fa36a0 commit d1692fd
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 0 deletions.
1 change: 1 addition & 0 deletions llm/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr

cpuRunner = serverForCpu()
gpuCount = 0
_, _, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
} else {
if gpus[0].Library == "metal" {
memInfo, err := gpu.GetCPUMem()
Expand Down
8 changes: 8 additions & 0 deletions server/routes.go
Original file line number Diff line number Diff line change
Expand Up @@ -1161,6 +1161,14 @@ func (s *Server) ProcessHandler(c *gin.Context) {
Details: modelDetails,
ExpiresAt: v.expiresAt,
}
// The scheduler waits to set expiresAt, so if a model is loading it's
// possible that it will be set to the unix epoch. For those cases, just
// calculate the time w/ the sessionDuration instead.
var epoch time.Time
if v.expiresAt == epoch {
mr.ExpiresAt = time.Now().Add(v.sessionDuration)
}

models = append(models, mr)
}

Expand Down

0 comments on commit d1692fd

Please sign in to comment.