lxd gpu设备发现:

// /dev/nvidia[0-9]+
type nvidiaGpuCards struct {
path string
major int
minor int
id string
} // {/dev/nvidiactl, /dev/nvidia-uvm, ...}
type nvidiaGpuDevices struct {
path string
major int
minor int
} // /dev/dri/card0. If we detect that vendor == nvidia, then nvidia will contain
// the corresponding nvidia car, e.g. {/dev/dri/card1 --> /dev/nvidia1}.
type gpuDevice struct {
vendorid string
productid string
id string // card id e.g. 0
// If related devices have the same PCI address as the GPU we should
// mount them all. Meaning if we detect /dev/dri/card0,
// /dev/dri/controlD64, and /dev/dri/renderD128 with the same PCI
// address, then they should all be made available in the container.
pci string
nvidia nvidiaGpuCards path string
major int
minor int
} func (g *gpuDevice) isNvidiaGpu() bool {
return strings.EqualFold(g.vendorid, "10de")
} type cardIds struct {
id string
pci string
} func deviceLoadGpu() ([]gpuDevice, []nvidiaGpuDevices, error) {
const DRI_PATH = "/sys/bus/pci/devices"
var gpus []gpuDevice
var nvidiaDevices []nvidiaGpuDevices
var cards []cardIds ents, err := ioutil.ReadDir(DRI_PATH)
if err != nil {
if os.IsNotExist(err) {
return nil, nil, nil
}
return nil, nil, err
} isNvidia := false
for _, ent := range ents {
// The pci address == the name of the directory. So let's use
// this cheap way of retrieving it.
pciAddr := ent.Name() // Make sure that we are dealing with a GPU by looking whether
// the "drm" subfolder exists.
drm := filepath.Join(DRI_PATH, pciAddr, "drm")
drmEnts, err := ioutil.ReadDir(drm)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Retrieve vendor ID.
vendorIdPath := filepath.Join(DRI_PATH, pciAddr, "vendor")
vendorId, err := ioutil.ReadFile(vendorIdPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Retrieve device ID.
productIdPath := filepath.Join(DRI_PATH, pciAddr, "device")
productId, err := ioutil.ReadFile(productIdPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
} // Store all associated subdevices, e.g. controlD64, renderD128.
// The name of the directory == the last part of the
// /dev/dri/controlD64 path. So ent.Name() will give us
// controlD64.
for _, drmEnt := range drmEnts {
vendorTmp := strings.TrimSpace(string(vendorId))
productTmp := strings.TrimSpace(string(productId))
vendorTmp = strings.TrimPrefix(vendorTmp, "0x")
productTmp = strings.TrimPrefix(productTmp, "0x")
tmpGpu := gpuDevice{
pci: pciAddr,
vendorid: vendorTmp,
productid: productTmp,
path: filepath.Join("/dev/dri", drmEnt.Name()),
} majMinPath := filepath.Join(drm, drmEnt.Name(), "dev")
majMinByte, err := ioutil.ReadFile(majMinPath)
if err != nil {
if os.IsNotExist(err) {
continue
}
}
majMin := strings.TrimSpace(string(majMinByte))
majMinSlice := strings.Split(string(majMin), ":")
if len(majMinSlice) != 2 {
continue
}
majorInt, err := strconv.Atoi(majMinSlice[0])
if err != nil {
continue
}
minorInt, err := strconv.Atoi(majMinSlice[1])
if err != nil {
continue
} tmpGpu.major = majorInt
tmpGpu.minor = minorInt isCard, err := regexp.MatchString("^card[0-9]+", drmEnt.Name())
if err != nil {
continue
} if isCard {
// If it is a card it's minor number will be its id.
tmpGpu.id = strconv.Itoa(minorInt)
tmp := cardIds{
id: tmpGpu.id,
pci: tmpGpu.pci,
}
cards = append(cards, tmp)
}
// Find matching /dev/nvidia* entry for /dev/dri/card*
if tmpGpu.isNvidiaGpu() && isCard {
if !isNvidia {
isNvidia = true
}
nvidiaPath := "/dev/nvidia" + strconv.Itoa(tmpGpu.minor)
stat := syscall.Stat_t{}
err := syscall.Stat(nvidiaPath, &stat)
if err != nil {
continue
}
tmpGpu.nvidia.path = nvidiaPath
tmpGpu.nvidia.major = int(stat.Rdev / 256)
tmpGpu.nvidia.minor = int(stat.Rdev % 256)
tmpGpu.nvidia.id = strconv.Itoa(tmpGpu.nvidia.minor)
}
gpus = append(gpus, tmpGpu)
}
} // We detected a Nvidia card, so let's collect all other nvidia devices
// that are not /dev/nvidia[0-9]+.
if isNvidia {
nvidiaEnts, err := ioutil.ReadDir("/dev")
if err != nil {
if os.IsNotExist(err) {
return nil, nil, err
}
}
validNvidia, err := regexp.Compile(`^nvidia[^0-9]+`)
if err != nil {
return nil, nil, err
}
for _, nvidiaEnt := range nvidiaEnts {
if !validNvidia.MatchString(nvidiaEnt.Name()) {
continue
}
nvidiaPath := filepath.Join("/dev", nvidiaEnt.Name())
stat := syscall.Stat_t{}
err = syscall.Stat(nvidiaPath, &stat)
if err != nil {
continue
}
tmpNividiaGpu := nvidiaGpuDevices{
path: nvidiaPath,
major: int(stat.Rdev / 256),
minor: int(stat.Rdev % 256),
}
nvidiaDevices = append(nvidiaDevices, tmpNividiaGpu)
} } // Since we'll give users to ability to specify and id we need to group
// devices on the same PCI that belong to the same card by id.
for _, card := range cards {
for i := 0; i < len(gpus); i++ {
if gpus[i].pci == card.pci {
gpus[i].id = card.id
}
}
} return gpus, nvidiaDevices, nil
}

lxd gpu设备加载:由下可见

最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致。而客户端又是如何知道vendorid,pci等信息?实际一般是需要建立GPU资源池,GPU元数据由上层管理,通过一定调度规则指定。而GPU资源的发现实际可通过类似上面的函数进行发现或者通过lspci命令发现

else if m["type"] == "gpu" {

			if gpus == nil {
gpus, nvidiaDevices, err = deviceLoadGpu()
if err != nil {
return "", err
}
} sawNvidia := false
for _, gpu := range gpus {
                  //最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致
if (m["vendorid"] != "" && gpu.vendorid != m["vendorid"]) ||
(m["pci"] != "" && gpu.pci != m["pci"]) ||
(m["productid"] != "" && gpu.productid != m["productid"]) ||
(m["id"] != "" && gpu.id != m["id"]) {
continue
} err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
if err != nil {
return "", err
} if gpu.nvidia.path == "" {
continue
} err = c.setupUnixDevice(k, m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path, true)
if err != nil {
return "", err
} sawNvidia = true
} if sawNvidia {
for _, gpu := range nvidiaDevices {
err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true)
if err != nil {
return "", err
}
}
}
}

  

// setupUnixDevice() creates the unix device and sets up the necessary low-level
// liblxc configuration items.
func (c *containerLXC) setupUnixDevice(devType string, dev types.Device, major int, minor int, path string, createMustSucceed bool) error {
if c.IsPrivileged() && !runningInUserns && cgDevicesController {
         //设置设备访问白名单
err := lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("c %d:%d rwm", major, minor))
if err != nil {
return err
}
} temp := types.Device{}
if err := shared.DeepCopy(&dev, &temp); err != nil {
return err
} temp["major"] = fmt.Sprintf("%d", major)
temp["minor"] = fmt.Sprintf("%d", minor)
temp["path"] = path paths, err := c.createUnixDevice(temp)
if err != nil {
shared.LogDebug("failed to create device", log.Ctx{"err": err, "device": devType})
if createMustSucceed {
return err
}
return nil
}
devPath := paths[0]
tgtPath := paths[1]
     //设置挂载对象
err = lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file", devPath, tgtPath))
if err != nil {
return err
}
return nil
}

  

// Unix devices handling
func (c *containerLXC) createUnixDevice(m types.Device) ([]string, error) {
var err error
var major, minor int // Our device paths
srcPath := m["path"]
tgtPath := strings.TrimPrefix(srcPath, "/")
devName := fmt.Sprintf("unix.%s", strings.Replace(tgtPath, "/", "-", -1))
devPath := filepath.Join(c.DevicesPath(), devName)//var/lib/lxd/devices/容器名称/xxxx // Extra checks for nesting
if runningInUserns {
for key, value := range m {
if shared.StringInSlice(key, []string{"major", "minor", "mode", "uid", "gid"}) && value != "" {
return nil, fmt.Errorf("The \"%s\" property may not be set when adding a device to a nested container", key)
}
}
} // Get the major/minor of the device we want to create
if m["major"] == "" && m["minor"] == "" {
// If no major and minor are set, use those from the device on the host
_, major, minor, err = deviceGetAttributes(srcPath)
if err != nil {
return nil, fmt.Errorf("Failed to get device attributes for %s: %s", m["path"], err)
}
} else if m["major"] == "" || m["minor"] == "" {
return nil, fmt.Errorf("Both major and minor must be supplied for device: %s", m["path"])
} else {
major, err = strconv.Atoi(m["major"])
if err != nil {
return nil, fmt.Errorf("Bad major %s in device %s", m["major"], m["path"])
} minor, err = strconv.Atoi(m["minor"])
if err != nil {
return nil, fmt.Errorf("Bad minor %s in device %s", m["minor"], m["path"])
}
} // Get the device mode
mode := os.FileMode(0660)
if m["mode"] != "" {
tmp, err := deviceModeOct(m["mode"])
if err != nil {
return nil, fmt.Errorf("Bad mode %s in device %s", m["mode"], m["path"])
}
mode = os.FileMode(tmp)
} if m["type"] == "unix-block" {
mode |= syscall.S_IFBLK
} else {
mode |= syscall.S_IFCHR
} // Get the device owner
uid := 0
gid := 0 if m["uid"] != "" {
uid, err = strconv.Atoi(m["uid"])
if err != nil {
return nil, fmt.Errorf("Invalid uid %s in device %s", m["uid"], m["path"])
}
} if m["gid"] != "" {
gid, err = strconv.Atoi(m["gid"])
if err != nil {
return nil, fmt.Errorf("Invalid gid %s in device %s", m["gid"], m["path"])
}
} // Create the devices directory if missing
if !shared.PathExists(c.DevicesPath()) {
os.Mkdir(c.DevicesPath(), 0711)
if err != nil {
return nil, fmt.Errorf("Failed to create devices path: %s", err)
}
} // Clean any existing entry
if shared.PathExists(devPath) {
if runningInUserns {
syscall.Unmount(devPath, syscall.MNT_DETACH)
} err = os.Remove(devPath)
if err != nil {
return nil, fmt.Errorf("Failed to remove existing entry: %s", err)
}
} // Create the new entry
if !runningInUserns {
if err := syscall.Mknod(devPath, uint32(mode), minor|(major<<8)); err != nil {
return nil, fmt.Errorf("Failed to create device %s for %s: %s", devPath, m["path"], err)
} if err := os.Chown(devPath, uid, gid); err != nil {
return nil, fmt.Errorf("Failed to chown device %s: %s", devPath, err)
} // Needed as mknod respects the umask
if err := os.Chmod(devPath, mode); err != nil {
return nil, fmt.Errorf("Failed to chmod device %s: %s", devPath, err)
} if c.idmapset != nil {
if err := c.idmapset.ShiftFile(devPath); err != nil {
// uidshift failing is weird, but not a big problem. Log and proceed
shared.LogDebugf("Failed to uidshift device %s: %s\n", m["path"], err)
}
}
} else {
f, err := os.Create(devPath)
if err != nil {
return nil, err
}
f.Close() err = deviceMountDisk(srcPath, devPath, false, false)
if err != nil {
return nil, err
}
} return []string{devPath, tgtPath}, nil
}

  

func deviceMountDisk(srcPath string, dstPath string, readonly bool, recursive bool) error {
var err error // Prepare the mount flags
flags := 0
if readonly {
flags |= syscall.MS_RDONLY
} // Detect the filesystem
fstype := "none"
if deviceIsBlockdev(srcPath) {
fstype, err = shared.BlockFsDetect(srcPath)
if err != nil {
return err
}
} else {
flags |= syscall.MS_BIND
if recursive {
flags |= syscall.MS_REC
}
} // Mount the filesystem
if err = syscall.Mount(srcPath, dstPath, fstype, uintptr(flags), ""); err != nil {
return fmt.Errorf("Unable to mount %s at %s: %s", srcPath, dstPath, err)
} flags = syscall.MS_REC | syscall.MS_SLAVE
if err = syscall.Mount("", dstPath, "", uintptr(flags), ""); err != nil {
return fmt.Errorf("unable to make mount %s private: %s", dstPath, err)
} return nil
}

  

lxd容器之GPU发现和加载的更多相关文章

  1. 使用Pytorch在多GPU下保存和加载训练模型参数遇到的问题

    最近使用Pytorch在学习一个深度学习项目,在模型保存和加载过程中遇到了问题,最终通过在网卡查找资料得已解决,故以此记之,以备忘却. 首先,是在使用多GPU进行模型训练的过程中,在保存模型参数时,应 ...

  2. Java并发编程:并发容器之CopyOnWriteArrayList(转载)

    Java并发编程:并发容器之CopyOnWriteArrayList(转载) 原文链接: http://ifeve.com/java-copy-on-write/ Copy-On-Write简称COW ...

  3. Java并发编程:并发容器之CopyOnWriteArrayList

    转载: Java并发编程:并发容器之CopyOnWriteArrayList Copy-On-Write简称COW,是一种用于程序设计中的优化策略.其基本思路是,从一开始大家都在共享同一个内容,当某个 ...

  4. linux 编译,链接和加载

    1.   序 最近在折腾各种.so,碰到了一些问题,一开始对于很多错误也没有头绪,茫然不知所措.索性化了一天多时间将<<程序员的自我修养—链接.装载与库>>中部分内容略读了一遍 ...

  5. 源码跟读,Spring是如何解析和加载xml中配置的beans

    Spring版本基于: 跟踪代码源码基于: https://github.com/deng-cc/KeepLearning commit id:c009ce47bd19e1faf9e07f12086c ...

  6. ionic 在windows环境下更换logo和加载图片的问题

    做用自己的电脑做ionic项目时,更换logo和加载图片时,无论使用哪种命令,发现都上传不了,并且报错 最后发现,需要将 icon和splash两个文件改为.ai格式才能上传成功. 这是最终生成后的文 ...

  7. 【转】Java并发编程:并发容器之CopyOnWriteArrayList

    Copy-On-Write简称COW,是一种用于程序设计中的优化策略.其基本思路是,从一开始大家都在共享同一个内容,当某个人想要修改这个内容的时候,才会真正把内容Copy出去形成一个新的内容然后再改, ...

  8. Kettle实现数据抽取、转换、装入和加载数据-数据转移ETL工具

    原文地址:http://www.xue51.com/soft/5341.html Kettle是来自国外的一款开源的ETL工具,纯java编写,可以在Window.Linux.Unix上运行,绿色无需 ...

  9. spring揭密学习笔记(3)-spring ioc容器:Spring的IoC容器之BeanFactory

    1. Spring的IoC容器和IoC Service Provider的关系 Spring的IoC容器和IoC Service Provider所提供的服务之间存在一定的交集,二者的关系如图4-1所 ...

随机推荐

  1. SQL四种语言

    1.DDL(Data Definition Language)数据库定义语言statements are used to define the database structure or schema ...

  2. Selenium2(java)TestNG的使用 七

      TestNG,即Testing Next Generation,下一代测试技术,是一套根据JUnit和NUnit思想而构建的利用注释来强化测试功能的一个测试框架,即可以用来做单元测试,也可以用来做 ...

  3. Delphi中String类型原理介绍

    Delphi中字符串的操作很简单,但幕后情况却相当复杂.Pascal传统的字符串操作方法与Windows不同,Windows吸取了C语言的字符串操作方法.32位Delphi中增加了长字符串类型,该类型 ...

  4. --@angularJS--自定义服务与后台数据交互小实例

    1.myService.html: <!DOCTYPE HTML><html ng-app="app"><head>    <title& ...

  5. CI Weekly #12 | 微信小程序的自动化测试进阶

    岁末将至,站在年终冲刺的尾巴上,flow.ci 新增了个人和团队设置的功能: 上线团队功能,注册时默认创建一个与用户名一致的团队,可设置:团队名称.增减团队成员,后续会不断完善: 增加个人设置,可修改 ...

  6. 故障排查实战案例——某电器ERP系统日志暴增

    前言 本篇文章写在新春佳节前夕,也是给IT运维朋友一个警醒,在春节长假前请妥善体检自己的系统安心过个年. 千里之堤毁于蚁穴,一条看似简单的语句就能拖垮整个系统,您的SQL Server很久没体检了吧? ...

  7. ArcGIS10.2直连PostgreSQL存在问题

    现象: 将下载到的libeay32.dll, libiconv-2.dll, libintl-8.dll, libpq.dll, ssleay32.dll文件拷贝到Desktop 安装目录的bin目录 ...

  8. 游戏音频技术备忘 (三) 集成Wwise到Unreal Engine

    当前受众较广的商业游戏引擎有 Unreal Engine.Unity.cocos2d-x等,在音频领域的第三方中间件则有Wwise.FMOD.Criware等,言多且烦,我们首先集成Wwise到 Un ...

  9. 最近一年多我总结的常用mate标签-常用mate标签

    昨天开始上班  ,今天晚上不是太忙 ,来写篇博客了 meta元素共有三个可选属性(http-equiv.name和scheme)和一个必选属性(content),content定义与 http-equ ...

  10. js实现淘宝首页图片轮播效果

    原文:http://ce.sysu.edu.cn/hope2008/Education/ShowArticle.asp?ArticleID=10585 <!DOCTYPE html> &l ...