package docker import ( "context" "encoding/base64" "encoding/json" "errors" "io" "sentinel/pkg/config" "sentinel/pkg/device" "time" "sentinel/pkg/log" "github.com/docker/docker/api/types/container" "github.com/docker/docker/api/types/filters" "github.com/docker/docker/api/types/image" "github.com/docker/docker/api/types/registry" "github.com/docker/docker/client" ) type DockerManager struct { registry string username string password string cli *client.Client timeout time.Duration } func NewDockerManager() (*DockerManager, error) { cli, err := client.NewClientWithOpts( client.FromEnv, client.WithAPIVersionNegotiation(), ) if err != nil { return nil, err } return &DockerManager{ cli: cli, registry: config.DOCKER_REGISTRY, username: config.DOCKER_USERNAME, password: config.DOCKER_PASSWORD, timeout: config.DOCKER_TIME_OUT, }, nil } func (d *DockerManager) PullImage(ctx context.Context, refStr string) error { auth := registry.AuthConfig{ ServerAddress: d.registry, Username: d.username, Password: d.password, } authBytes, _ := json.Marshal(auth) authStr := base64.StdEncoding.EncodeToString(authBytes) reader, err := d.cli.ImagePull(ctx, refStr, image.PullOptions{ RegistryAuth: authStr, }) if err != nil { return err } defer reader.Close() // 消费输出,避免挂起 _, _ = io.Copy(io.Discard, reader) return nil } func (d *DockerManager) RunContainer(ctx context.Context) error { // 停止并删除已有容器 if err := d.StopAndRemoveContainer(ctx); err != nil { return err } // 创建容器 log.Println("正在启动名为<", config.DOCKER_CONTAINER_NAME, ">的容器") resp, err := d.cli.ContainerCreate( ctx, &container.Config{ Image: config.DOCKER_IMAGE, Env: []string{ "DEVICE_ID=" + device.GetDeviceID(), "LD_LIBRARY_PATH=/opt/nvidia/deepstream/deepstream/lib" + ":/opt/nvidia/deepstream/deepstream/lib/triton" + ":/opt/nvidia/deepstream/deepstream/lib/rivermax" + ":/opt/nvidia/vpi3/lib/aarch64-linux-gnu" + ":/usr/lib/aarch64-linux-gnu" + ":/usr/lib/aarch64-linux-gnu/nvidia" + ":/usr/local/cuda-12.6/lib64", "GST_PLUGIN_PATH=/opt/nvidia/deepstream/deepstream/lib/gst-plugins", }, Healthcheck: &container.HealthConfig{ Test: []string{"CMD-SHELL", "echo ok"}, Interval: 5 * time.Second, Timeout: 2 * time.Second, Retries: 3, }, }, &container.HostConfig{ Runtime: "nvidia", Privileged: true, Binds: []string{ "/usr/lib/aarch64-linux-gnu:/usr/lib/aarch64-linux-gnu:ro", "/opt/nvidia/deepstream/deepstream/lib:/opt/nvidia/deepstream/deepstream/lib:ro", "/opt/nvidia/vpi3/lib/aarch64-linux-gnu/:/opt/nvidia/vpi3/lib/aarch64-linux-gnu/:ro", "/usr/local/cuda-12.6/lib64/:/usr/local/cuda-12.6/lib64/:ro", "/tmp/argus_socket:/tmp/argus_socket", }, NetworkMode: "host", }, nil, nil, config.DOCKER_CONTAINER_NAME, ) if err != nil { return err } // 启动容器 if err := d.cli.ContainerStart(ctx, resp.ID, container.StartOptions{}); err != nil { return err } log.Println("容器已成功运行") return nil } // StopAndRemoveContainer 停止并删除指定容器 func (d *DockerManager) StopAndRemoveContainer(ctx context.Context) error { containers, err := d.cli.ContainerList(ctx, container.ListOptions{ All: true, Filters: filters.NewArgs( filters.KeyValuePair{Key: "name", Value: config.DOCKER_CONTAINER_NAME}, ), }) if err != nil { return err } for _, c := range containers { log.Println("正在停止名为<", config.DOCKER_CONTAINER_NAME, ">的容器:", c.ID) timeout := 10 * time.Second seconds := int(timeout.Seconds()) if err := d.cli.ContainerStop(ctx, c.ID, container.StopOptions{ Timeout: &seconds, }); err != nil { log.Println("Failed to stop container %s: %v", c.ID, err) return err } if err := d.cli.ContainerRemove(ctx, c.ID, container.RemoveOptions{}); err != nil { log.Println("Failed to remove container %s: %v", c.ID, err) return err } } return nil } func (d *DockerManager) CheckContainerHealth(ctx context.Context, containerName string) (string, error) { if containerName == "" { return "", errors.New("containerName must not be empty") } containers, err := d.cli.ContainerList(ctx, container.ListOptions{ All: true, Filters: filters.NewArgs( filters.KeyValuePair{Key: "name", Value: containerName}, ), }) if err != nil { return "", err } if len(containers) == 0 { return "", errors.New("container not found") } inspect, err := d.cli.ContainerInspect(ctx, containers[0].ID) if err != nil { return "", err } if inspect.State.Health != nil { return inspect.State.Health.Status, nil } return "unknown", nil }