lxd gpu设备发现:

// /dev/nvidia[0-9]+
type nvidiaGpuCards struct {
path string
major int
minor int
id string
} // {/dev/nvidiactl, /dev/nvidia-uvm, ...}
type nvidiaGpuDevices struct {
path string
major int
minor int
} // /dev/dri/card0. If we detect that vendor == nvidia, then nvidia will contain
// the corresponding nvidia car, e.g. {/dev/dri/card1 --> /dev/nvidia1}.
type gpuDevice struct {
vendorid string
productid string
id string // card id e.g. 0
// If related devices have the same PCI address as the GPU we should
// mount them all. Meaning if we detect /dev/dri/card0,
// /dev/dri/controlD64, and /dev/dri/renderD128 with the same PCI
// address, then they should all be made available in the container.
pci string
nvidia nvidiaGpuCards path string
major int
minor int
} func (g *gpuDevice) isNvidiaGpu() bool {
return strings.EqualFold(g.vendorid, "10de")
} type cardIds struct {
id string
pci string
} func deviceLoadGpu() ([]gpuDevice, []nvidiaGpuDevices, error) {
const DRI_PATH = "/sys/bus/pci/devices"
var gpus []gpuDevice
var nvidiaDevices []nvidiaGpuDevices
var cards []cardIds ents, err := ioutil.ReadDir(DRI_PATH)
if err != nil {
if os.IsNotExist(err) {
return nil, nil, nil
}
return nil, nil, err
} isNvidia := false
for _, ent := range ents {
// The pci address == the name of the directory. So let's use
// this cheap way of retrieving it.
pciAddr := ent.Name() // Make sure that we are dealing with a GPU by looking whether
// the "drm" subfolder exists.
drm := filepath.Join(DRI_PATH, pciAddr, "drm")
drmEnts, err := ioutil.ReadDir(drm)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Retrieve vendor ID.
vendorIdPath := filepath.Join(DRI_PATH, pciAddr, "vendor")
vendorId, err := ioutil.ReadFile(vendorIdPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Retrieve device ID.
productIdPath := filepath.Join(DRI_PATH, pciAddr, "device")
productId, err := ioutil.ReadFile(productIdPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Store all associated subdevices, e.g. controlD64, renderD128.
// The name of the directory == the last part of the
// /dev/dri/controlD64 path. So ent.Name() will give us
// controlD64.
for _, drmEnt := range drmEnts {
vendorTmp := strings.TrimSpace(string(vendorId))
productTmp := strings.TrimSpace(string(productId))
vendorTmp = strings.TrimPrefix(vendorTmp, "0x")
productTmp = strings.TrimPrefix(productTmp, "0x")
tmpGpu := gpuDevice{
pci: pciAddr,
vendorid: vendorTmp,
productid: productTmp,
path: filepath.Join("/dev/dri", drmEnt.Name()),
} majMinPath := filepath.Join(drm, drmEnt.Name(), "dev")
majMinByte, err := ioutil.ReadFile(majMinPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
}
majMin := strings.TrimSpace(string(majMinByte))
majMinSlice := strings.Split(string(majMin), ":")
if len(majMinSlice) != 2 {
continue
}
majorInt, err := strconv.Atoi(majMinSlice[0])
if err != nil {
continue
}
minorInt, err := strconv.Atoi(majMinSlice[1])
if err != nil {
continue
} tmpGpu.major = majorInt
tmpGpu.minor = minorInt isCard, err := regexp.MatchString("^card[0-9]+", drmEnt.Name())
if err != nil {
continue
} if isCard {
// If it is a card it's minor number will be its id.
tmpGpu.id = strconv.Itoa(minorInt)
tmp := cardIds{
id: tmpGpu.id,
pci: tmpGpu.pci,
}
cards = append(cards, tmp)
}
// Find matching /dev/nvidia* entry for /dev/dri/card*
if tmpGpu.isNvidiaGpu() && isCard {
if !isNvidia {
isNvidia = true
}
nvidiaPath := "/dev/nvidia" + strconv.Itoa(tmpGpu.minor)
stat := syscall.Stat_t{}
err := syscall.Stat(nvidiaPath, &stat)
if err != nil {
continue
}
tmpGpu.nvidia.path = nvidiaPath
tmpGpu.nvidia.major = int(stat.Rdev / 256)
tmpGpu.nvidia.minor = int(stat.Rdev % 256)
tmpGpu.nvidia.id = strconv.Itoa(tmpGpu.nvidia.minor)
}
gpus = append(gpus, tmpGpu)
}
} // We detected a Nvidia card, so let's collect all other nvidia devices
// that are not /dev/nvidia[0-9]+.
if isNvidia {
nvidiaEnts, err := ioutil.ReadDir("/dev")
if err != nil {
if os.IsNotExist(err) {
return nil, nil, err
}
}
validNvidia, err := regexp.Compile(`^nvidia[^0-9]+`)
if err != nil {
return nil, nil, err
}
for _, nvidiaEnt := range nvidiaEnts {
if !validNvidia.MatchString(nvidiaEnt.Name()) {
continue
}
nvidiaPath := filepath.Join("/dev", nvidiaEnt.Name())
stat := syscall.Stat_t{}
err = syscall.Stat(nvidiaPath, &stat)
if err != nil {
continue
}
tmpNividiaGpu := nvidiaGpuDevices{
path: nvidiaPath,
major: int(stat.Rdev / 256),
minor: int(stat.Rdev % 256),
}
nvidiaDevices = append(nvidiaDevices, tmpNividiaGpu)
} } // Since we'll give users to ability to specify and id we need to group
// devices on the same PCI that belong to the same card by id.
for _, card := range cards {
for i := 0; i < len(gpus); i++ {
if gpus[i].pci == card.pci {
gpus[i].id = card.id
}
}
} return gpus, nvidiaDevices, nil
}

lxd gpu设备加载:由下可见

最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致。而客户端又是如何知道vendorid,pci等信息?实际一般是需要建立GPU资源池,GPU元数据由上层管理,通过一定调度规则指定。而GPU资源的发现实际可通过类似上面的函数进行发现或者通过lspci命令发现

else if m["type"] == "gpu" {

			if gpus == nil {
gpus, nvidiaDevices, err = deviceLoadGpu()
if err != nil {
return "", err
}
} sawNvidia := false
for _, gpu := range gpus {
                  //最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致
if (m["vendorid"] != "" && gpu.vendorid != m["vendorid"]) ||
(m["pci"] != "" && gpu.pci != m["pci"]) ||
(m["productid"] != "" && gpu.productid != m["productid"]) ||
(m["id"] != "" && gpu.id != m["id"]) {
continue
} err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
if err != nil {
return "", err
} if gpu.nvidia.path == "" {
continue
} err = c.setupUnixDevice(k, m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path, true)
if err != nil {
return "", err
} sawNvidia = true
} if sawNvidia {
for _, gpu := range nvidiaDevices {
err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
if err != nil {
return "", err
}
}
}
}

  

// setupUnixDevice() creates the unix device and sets up the necessary low-level
// liblxc configuration items.
func (c *containerLXC) setupUnixDevice(devType string, dev types.Device, major int, minor int, path string, createMustSucceed bool) error {
if c.IsPrivileged() && !runningInUserns && cgDevicesController {
         //设置设备访问白名单
err := lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("c %d:%d rwm", major, minor))
if err != nil {
return err
}
} temp := types.Device{}
if err := shared.DeepCopy(&dev, &temp); err != nil {
return err
} temp["major"] = fmt.Sprintf("%d", major)
temp["minor"] = fmt.Sprintf("%d", minor)
temp["path"] = path paths, err := c.createUnixDevice(temp)
if err != nil {
shared.LogDebug("failed to create device", log.Ctx{"err": err, "device": devType})
if createMustSucceed {
return err
}
return nil
}
devPath := paths[0]
tgtPath := paths[1]
     //设置挂载对象
err = lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file", devPath, tgtPath))
if err != nil {
return err
}
return nil
}

  

// Unix devices handling
func (c *containerLXC) createUnixDevice(m types.Device) ([]string, error) {
var err error
var major, minor int // Our device paths
srcPath := m["path"]
tgtPath := strings.TrimPrefix(srcPath, "/")
devName := fmt.Sprintf("unix.%s", strings.Replace(tgtPath, "/", "-", -1))
devPath := filepath.Join(c.DevicesPath(), devName)//var/lib/lxd/devices/容器名称/xxxx // Extra checks for nesting
if runningInUserns {
for key, value := range m {
if shared.StringInSlice(key, []string{"major", "minor", "mode", "uid", "gid"}) && value != "" {
return nil, fmt.Errorf("The \"%s\" property may not be set when adding a device to a nested container", key)
}
}
} // Get the major/minor of the device we want to create
if m["major"] == "" && m["minor"] == "" {
// If no major and minor are set, use those from the device on the host
_, major, minor, err = deviceGetAttributes(srcPath)
if err != nil {
return nil, fmt.Errorf("Failed to get device attributes for %s: %s", m["path"], err)
}
} else if m["major"] == "" || m["minor"] == "" {
return nil, fmt.Errorf("Both major and minor must be supplied for device: %s", m["path"])
} else {
major, err = strconv.Atoi(m["major"])
if err != nil {
return nil, fmt.Errorf("Bad major %s in device %s", m["major"], m["path"])
} minor, err = strconv.Atoi(m["minor"])
if err != nil {
return nil, fmt.Errorf("Bad minor %s in device %s", m["minor"], m["path"])
}
} // Get the device mode
mode := os.FileMode(0660)
if m["mode"] != "" {
tmp, err := deviceModeOct(m["mode"])
if err != nil {
return nil, fmt.Errorf("Bad mode %s in device %s", m["mode"], m["path"])
}
mode = os.FileMode(tmp)
} if m["type"] == "unix-block" {
mode |= syscall.S_IFBLK
} else {
mode |= syscall.S_IFCHR
} // Get the device owner
uid := 0
gid := 0 if m["uid"] != "" {
uid, err = strconv.Atoi(m["uid"])
if err != nil {
return nil, fmt.Errorf("Invalid uid %s in device %s", m["uid"], m["path"])
}
} if m["gid"] != "" {
gid, err = strconv.Atoi(m["gid"])
if err != nil {
return nil, fmt.Errorf("Invalid gid %s in device %s", m["gid"], m["path"])
}
} // Create the devices directory if missing
if !shared.PathExists(c.DevicesPath()) {
os.Mkdir(c.DevicesPath(), 0711)
if err != nil {
return nil, fmt.Errorf("Failed to create devices path: %s", err)
}
} // Clean any existing entry
if shared.PathExists(devPath) {
if runningInUserns {
syscall.Unmount(devPath, syscall.MNT_DETACH)
} err = os.Remove(devPath)
if err != nil {
return nil, fmt.Errorf("Failed to remove existing entry: %s", err)
}
} // Create the new entry
if !runningInUserns {
if err := syscall.Mknod(devPath, uint32(mode), minor|(major<<8)); err != nil {
return nil, fmt.Errorf("Failed to create device %s for %s: %s", devPath, m["path"], err)
} if err := os.Chown(devPath, uid, gid); err != nil {
return nil, fmt.Errorf("Failed to chown device %s: %s", devPath, err)
} // Needed as mknod respects the umask
if err := os.Chmod(devPath, mode); err != nil {
return nil, fmt.Errorf("Failed to chmod device %s: %s", devPath, err)
} if c.idmapset != nil {
if err := c.idmapset.ShiftFile(devPath); err != nil {
// uidshift failing is weird, but not a big problem. Log and proceed
shared.LogDebugf("Failed to uidshift device %s: %s\n", m["path"], err)
}
}
} else {
f, err := os.Create(devPath)
if err != nil {
return nil, err
}
f.Close() err = deviceMountDisk(srcPath, devPath, false, false)
if err != nil {
return nil, err
}
} return []string{devPath, tgtPath}, nil
}

  

func deviceMountDisk(srcPath string, dstPath string, readonly bool, recursive bool) error {
var err error // Prepare the mount flags
flags := 0
if readonly {
flags |= syscall.MS_RDONLY
} // Detect the filesystem
fstype := "none"
if deviceIsBlockdev(srcPath) {
fstype, err = shared.BlockFsDetect(srcPath)
if err != nil {
return err
}
} else {
flags |= syscall.MS_BIND
if recursive {
flags |= syscall.MS_REC
}
} // Mount the filesystem
if err = syscall.Mount(srcPath, dstPath, fstype, uintptr(flags), ""); err != nil {
return fmt.Errorf("Unable to mount %s at %s: %s", srcPath, dstPath, err)
} flags = syscall.MS_REC | syscall.MS_SLAVE
if err = syscall.Mount("", dstPath, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("unable to make mount %s private: %s", dstPath, err)
} return nil
}

  

lxd容器之GPU发现和加载的更多相关文章

  1. 使用Pytorch在多GPU下保存和加载训练模型参数遇到的问题

    最近使用Pytorch在学习一个深度学习项目,在模型保存和加载过程中遇到了问题,最终通过在网卡查找资料得已解决,故以此记之,以备忘却. 首先,是在使用多GPU进行模型训练的过程中,在保存模型参数时,应 ...

  2. Java并发编程:并发容器之CopyOnWriteArrayList(转载)

    Java并发编程:并发容器之CopyOnWriteArrayList(转载) 原文链接: http://ifeve.com/java-copy-on-write/ Copy-On-Write简称COW ...

  3. Java并发编程:并发容器之CopyOnWriteArrayList

    转载: Java并发编程:并发容器之CopyOnWriteArrayList Copy-On-Write简称COW,是一种用于程序设计中的优化策略.其基本思路是,从一开始大家都在共享同一个内容,当某个 ...

  4. linux 编译,链接和加载

    1.   序 最近在折腾各种.so,碰到了一些问题,一开始对于很多错误也没有头绪,茫然不知所措.索性化了一天多时间将<<程序员的自我修养—链接.装载与库>>中部分内容略读了一遍 ...

  5. 源码跟读,Spring是如何解析和加载xml中配置的beans

    Spring版本基于: 跟踪代码源码基于: https://github.com/deng-cc/KeepLearning commit id:c009ce47bd19e1faf9e07f12086c ...

  6. ionic 在windows环境下更换logo和加载图片的问题

    做用自己的电脑做ionic项目时,更换logo和加载图片时,无论使用哪种命令,发现都上传不了,并且报错 最后发现,需要将 icon和splash两个文件改为.ai格式才能上传成功. 这是最终生成后的文 ...

  7. 【转】Java并发编程:并发容器之CopyOnWriteArrayList

    Copy-On-Write简称COW,是一种用于程序设计中的优化策略.其基本思路是,从一开始大家都在共享同一个内容,当某个人想要修改这个内容的时候,才会真正把内容Copy出去形成一个新的内容然后再改, ...

  8. Kettle实现数据抽取、转换、装入和加载数据-数据转移ETL工具

    原文地址:http://www.xue51.com/soft/5341.html Kettle是来自国外的一款开源的ETL工具,纯java编写,可以在Window.Linux.Unix上运行,绿色无需 ...

  9. spring揭密学习笔记(3)-spring ioc容器:Spring的IoC容器之BeanFactory

    1. Spring的IoC容器和IoC Service Provider的关系 Spring的IoC容器和IoC Service Provider所提供的服务之间存在一定的交集,二者的关系如图4-1所 ...

随机推荐

  1. drag file upload xhr 拖拽异步上传文件

    <div id="droptarget" style="width: 500px; height: 200px; background: silver"& ...

  2. 请使用支持 JDBC 4.0 的 sqljdbc4.jar 类库

    转载请使用支持 JDBC 4.0 的 sqljdbc4.jar 类库 1.下载最新的JDBC(2012/3/6) http://www.microsoft.com/downloads/zh-cn/de ...

  3. MYSQL 函数复习

    数学函数    ABS(X)    返回X的绝对值    SQRT(x)        返回非负数X的二次方根    MOD(x,y)    返回x被y除后的余数    CEIL(x)         ...

  4. word中利用宏替换标点标点全角与半角

    Alt+F11,然后插入-模块: 复制下面代码到编辑窗口: Sub 半角标点符号转换为全角标点符号() '中英互译文档中将中文段落中的英文标点符号替换为中文标点符号 Dim i As Paragrap ...

  5. Spring AOP切面的时候参数的传递

    Spring AOP切面的时候参数的传递 Xml: <?xml version="1.0" encoding="UTF-8"?> <beans ...

  6. FMS+NGINX打造高带宽利用率的流媒体(音频+视频)环境

    fms自身已经拥有了httpd,用来给客户端访问用,例如通过http的音频播放.众所周知,非专业的httpd自然有不专业之处,例如我遇到的情况就是经常http服务假死,或者在访问量庞大的时候会无缘无故 ...

  7. 《深度探索C++对象模型》笔记——Data语意学

    Data Member的绑定 inline member functin躯体之内的一个data member绑定操作会在整个class声明完成之后才发生. argument list中的名称还是会在它 ...

  8. DataTables学习:从最基本的入门静态页面,使用ajax调用Json本地数据源实现前端开发深入学习,根据后台数据接口替换掉本地的json本地数据,以及报错的处理地方,8个例子(显示行附加信息,回调使用api,动态显示和隐藏列...),详细教程

    一.DataTables  个人觉得学习一门新的插件或者技术时候,官方文档是最根本的,入门最快的地方,但是有时候看完官方文档,一步步的动手写例子,总会出现各种莫名其妙的错误,需要我们很好的进行研究出错 ...

  9. Java自然语言处理NLP工具包

    1. Java自然语言处理 LingPipe LingPipe是一个自然语言处理的Java开源工具包.LingPipe目前已有很丰富的功能,包括主题分类(Top Classification).命名实 ...

  10. linux 中用python实现终端命令行命令

    在python代码中实现和在终端中输入的命令行一样的效果,以命令(audacious -p &)为例,该代码实现用audacious在后台播放音乐的功能,当然前提是安装了audacious. ...