基本概念入门:

Device Manager Proposal

Device plugin offical Doc(中文)

device-plugins offical Doc(En)

Go through Intel FPGA Plugin code

1.  cmd/fpga_plugin/fpga_plugin.go

生成一个新的puglin, pulgin传入的信息sysfs,devfs和mode(共两种mode: af or region

plugin, err := newDevicePlugin(sysfsDirectory, devfsDirectory, mode)
if err != nil {
fatal(err)
} fmt.Println("FPGA device plugin started in ", mode, " mode") manager := dpapi.NewManager(namespace, plugin)
manager.Run()

2.  internal/deviceplugin/manager.go

会生成一个server, 然后run, 主要就是devicePlugin.Scan (具体到某个device),扫描设备信息,然后启动grpc Serve(handleUpdate)

// Manager manages life cycle of device plugins and handles the scan results
// received from them.
type Manager struct {
devicePlugin Scanner
namespace string
servers map[string]devicePluginServer
createServer func(string, func(*pluginapi.AllocateResponse) error) devicePluginServer
} // NewManager creates a new instance of Manager
func NewManager(namespace string, devicePlugin Scanner) *Manager {
return &Manager{
devicePlugin: devicePlugin,
namespace: namespace,
servers: make(map[string]devicePluginServer),
createServer: newServer,
}
} // Run prepares and launches event loop for updates from Scanner
func (m *Manager) Run() {
updatesCh := make(chan updateInfo) go func() {
err := m.devicePlugin.Scan(newNotifier(updatesCh))
if err != nil {
fmt.Printf("Device scan failed: %+v\n", err)
os.Exit(1)
}
close(updatesCh)
}() for update := range updatesCh {
m.handleUpdate(update)
}
}

handleUpdate 启动grpc 服务 m.servers[dt].Serve(m.namespace)

func (m *Manager) handleUpdate(update updateInfo) {
debug.Print("Received dev updates:", update)
for devType, devices := range update.Added {
var postAllocate func(*pluginapi.AllocateResponse) error if postAllocator, ok := m.devicePlugin.(PostAllocator); ok {
postAllocate = postAllocator.PostAllocate
} m.servers[devType] = m.createServer(devType, postAllocate)
go func(dt string) {
err := m.servers[dt].Serve(m.namespace)
if err != nil {
fmt.Printf("Failed to serve %s/%s: %+v\n", m.namespace, dt, err)
os.Exit(1)
}
}(devType)
m.servers[devType].Update(devices)
}
for devType, devices := range update.Updated {
m.servers[devType].Update(devices)
}
for devType := range update.Removed {
m.servers[devType].Stop()
delete(m.servers, devType)
}
}

3. cmd/fpga_plugin/fpga_plugin.go

获得Device的具体信息

// Scan starts scanning FPGA devices on the host
func (dp *devicePlugin) Scan(notifier dpapi.Notifier) error {
for {
devTree, err := dp.scanFPGAs()
if err != nil {
return err
} notifier.Notify(devTree) time.Sleep(5 * time.Second)
}
}

4. 启动GRPC 服务

// Serve starts a gRPC server to serve pluginapi.PluginInterfaceServer interface.
func (srv *server) Serve(namespace string) error {
return srv.setupAndServe(namespace, pluginapi.DevicePluginPath, pluginapi.KubeletSocket)
}
// setupAndServe binds given gRPC server to device manager, starts it and registers it with kubelet.
func (srv *server) setupAndServe(namespace string, devicePluginPath string, kubeletSocket string) error {
resourceName := namespace + "/" + srv.devType
pluginPrefix := namespace + "-" + srv.devType for {
pluginEndpoint := pluginPrefix + ".sock"
pluginSocket := path.Join(devicePluginPath, pluginEndpoint) if err := waitForServer(pluginSocket, time.Second); err == nil {
return errors.Errorf("Socket %s is already in use", pluginSocket)
}
os.Remove(pluginSocket) lis, err := net.Listen("unix", pluginSocket)
if err != nil {
return errors.Wrap(err, "Failed to listen to plugin socket")
} srv.grpcServer = grpc.NewServer()
pluginapi.RegisterDevicePluginServer(srv.grpcServer, srv) // Starts device plugin service.
go func() {
fmt.Printf("Start server for %s at: %s\n", srv.devType, pluginSocket)
srv.grpcServer.Serve(lis)
}() // Wait for the server to start
if err = waitForServer(pluginSocket, 10*time.Second); err != nil {
return err
} // Register with Kubelet.
err = registerWithKubelet(kubeletSocket, pluginEndpoint, resourceName)
if err != nil {
return err
}
fmt.Printf("Device plugin for %s registered\n", srv.devType) // Kubelet removes plugin socket when it (re)starts
// plugin must restart in this case
if err = watchFile(pluginSocket); err != nil {
return err
}
fmt.Printf("Socket %s removed, restarting\n", pluginSocket) srv.grpcServer.Stop()
os.Remove(pluginSocket)
}
}

  

5. 注册GRPC server

vendor/k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1/api.pb.go

func RegisterRegistrationServer(s *grpc.Server, srv RegistrationServer) {
s.RegisterService(&_Registration_serviceDesc, srv)
}

"vendor/google.golang.org/grpc/server.go"

// RegisterService registers a service and its implementation to the gRPC
// server. It is called from the IDL generated code. This must be called before
// invoking Serve.
func (s *Server) RegisterService(sd *ServiceDesc, ss interface{}) {
ht := reflect.TypeOf(sd.HandlerType).Elem()
st := reflect.TypeOf(ss)
if !st.Implements(ht) {
grpclog.Fatalf("grpc: Server.RegisterService found the handler of type %v that does not satisfy %v", st, ht)
}
s.register(sd, ss)
} func (s *Server) register(sd *ServiceDesc, ss interface{}) {
s.mu.Lock()
defer s.mu.Unlock()
s.printf("RegisterService(%q)", sd.ServiceName)
if s.serve {
grpclog.Fatalf("grpc: Server.RegisterService after Server.Serve for %q", sd.ServiceName)
}
if _, ok := s.m[sd.ServiceName]; ok {
grpclog.Fatalf("grpc: Server.RegisterService found duplicate service registration for %q", sd.ServiceName)
}
srv := &service{
server: ss,
md: make(map[string]*MethodDesc),
sd: make(map[string]*StreamDesc),
mdata: sd.Metadata,
}
for i := range sd.Methods {
d := &sd.Methods[i]
srv.md[d.MethodName] = d
}
for i := range sd.Streams {
d := &sd.Streams[i]
srv.sd[d.StreamName] = d
}
s.m[sd.ServiceName] = srv
}

(s *Server) Serve

// Serve accepts incoming connections on the listener lis, creating a new
// ServerTransport and service goroutine for each. The service goroutines
// read gRPC requests and then call the registered handlers to reply to them.
// Serve returns when lis.Accept fails with fatal errors. lis will be closed when
// this method returns.
// Serve will return a non-nil error unless Stop or GracefulStop is called.
func (s *Server) Serve(lis net.Listener) error {
s.mu.Lock()
s.printf("serving")
s.serve = true
if s.lis == nil {
// Serve called after Stop or GracefulStop.
s.mu.Unlock()
lis.Close()
return ErrServerStopped
} s.serveWG.Add(1)
defer func() {
s.serveWG.Done()
select {
// Stop or GracefulStop called; block until done and return nil.
case <-s.quit:
<-s.done
default:
}
}() ls := &listenSocket{Listener: lis}
s.lis[ls] = true if channelz.IsOn() {
ls.channelzID = channelz.RegisterListenSocket(ls, s.channelzID, lis.Addr().String())
}
s.mu.Unlock() defer func() {
s.mu.Lock()
if s.lis != nil && s.lis[ls] {
ls.Close()
delete(s.lis, ls)
}
s.mu.Unlock()
}() var tempDelay time.Duration // how long to sleep on accept failure for {
rawConn, err := lis.Accept()
if err != nil {
if ne, ok := err.(interface {
Temporary() bool
}); ok && ne.Temporary() {
if tempDelay == 0 {
tempDelay = 5 * time.Millisecond
} else {
tempDelay *= 2
}
if max := 1 * time.Second; tempDelay > max {
tempDelay = max
}
s.mu.Lock()
s.printf("Accept error: %v; retrying in %v", err, tempDelay)
s.mu.Unlock()
timer := time.NewTimer(tempDelay)
select {
case <-timer.C:
case <-s.quit:
timer.Stop()
return nil
}
continue
}
s.mu.Lock()
s.printf("done serving; Accept = %v", err)
s.mu.Unlock() select {
case <-s.quit:
return nil
default:
}
return err
}
tempDelay = 0
// Start a new goroutine to deal with rawConn so we don't stall this Accept
// loop goroutine.
//
// Make sure we account for the goroutine so GracefulStop doesn't nil out
// s.conns before this conn can be added.
s.serveWG.Add(1)
go func() {
s.handleRawConn(rawConn)
s.serveWG.Done()
}()
}
}

gRPC tutorial

6.  注册kebelet

func registerWithKubelet(kubeletSocket, pluginEndPoint, resourceName string) error {
conn, err := grpc.Dial(kubeletSocket, grpc.WithInsecure(),
grpc.WithDialer(func(addr string, timeout time.Duration) (net.Conn, error) {
return net.DialTimeout("unix", addr, timeout)
}))
if err != nil {
return errors.Wrap(err, "Cannot connect to kubelet service")
}
defer conn.Close()
client := pluginapi.NewRegistrationClient(conn)
reqt := &pluginapi.RegisterRequest{
Version: pluginapi.Version,
Endpoint: pluginEndPoint,
ResourceName: resourceName,
} _, err = client.Register(context.Background(), reqt)
if err != nil {
return errors.Wrap(err, "Cannot register to kubelet service")
} return nil
}

7. 定义  DevicePluginServer interface   

"vendor/k8s.io/kubernetes/pkg/kubelet/apis/deviceplugin/v1beta1/api.pb.go"  

/ Server API for DevicePlugin service

type DevicePluginServer interface {
// GetDevicePluginOptions returns options to be communicated with Device
// Manager
GetDevicePluginOptions(context.Context, *Empty) (*DevicePluginOptions, error)
// ListAndWatch returns a stream of List of Devices
// Whenever a Device state change or a Device disapears, ListAndWatch
// returns the new list
ListAndWatch(*Empty, DevicePlugin_ListAndWatchServer) error
// Allocate is called during container creation so that the Device
// Plugin can run device specific operations and instruct Kubelet
// of the steps to make the Device available in the container
Allocate(context.Context, *AllocateRequest) (*AllocateResponse, error)
// PreStartContainer is called, if indicated by Device Plugin during registeration phase,
// before each container start. Device plugin can run device specific operations
// such as reseting the device before making devices available to the container
PreStartContainer(context.Context, *PreStartContainerRequest) (*PreStartContainerResponse, error)
}

具体实现

"internal/deviceplugin/server.go"  

参考===============================

prepare

Kubernetes的Device Plugin设计解读

深入浅出kubernetes之device-plugins

kubernetes调度gpu

KubeVirt:通过CRD扩展Kubernetes实现虚拟机管理

kubernetes系列之十四:Kubernetes CRD(CustomResourceDefinition)概览

Extend the Kubernetes API with CustomResourceDefinitions

用户资源定义(基本上所有的项目都用到了这个)

example

Kubernetes CRD (CustomResourceDefinition) 自定义资源类型

REF:

k8s 基本概念

k8s 系列介绍

API Extensions

Schedule GPUs

中文分析

KUBERNETES ON NVIDIA GPUS

RDMA device plugin for Kubernetes

intel-device-plugins-for-kubernetes

  

概念:

1. Opaque Integer Resources (OIRs)

Scheduling • Opaque Integer Resources (OIRs) ⽬目前已棄⽤,也將在 v1.9 版本移除。 • Extended Resources (ERs) 成為 OIRs 的替代 Resource。 • 使⽤用者能夠使⽤用 kubernetes.io/ domain 之外的任何域名前輟,不再是使 ⽤用 pod.alpha.kubernetes.io/opaque-int-resource- prefix。

k8s device plugin的更多相关文章

  1. 从零开始入门 K8s | GPU 管理和 Device Plugin 工作机制

    作者 | 车漾  阿里巴巴高级技术专家 本文整理自<CNCF x Alibaba 云原生技术公开课>第 20 讲. 关注"阿里巴巴云原生"公众号,回复关键词" ...

  2. 第20 章 : GPU 管理和 Device Plugin 工作机制

    GPU 管理和 Device Plugin 工作机制 本文将主要分享以下几个方面的内容: 需求来源 GPU 的容器化 Kubernetes 的 GPU 管理 工作原理 课后思考与实践 需求来源 201 ...

  3. 如何掌握 Kubernetes ?系统学习 k8s 的大纲一份

    深度剖析 Kubernetes 深度剖析 k8s 如何学习 Kubernetes ?如何入门 Kubernetes? 为了帮帮初学者,2018 年 InfoQ 旗下(就是你知道的那个 InfoQ 哇) ...

  4. NVIDIA-GPU归入K8S集群管理的安装文档--第二版

    一,nvidia K80驱动安装 1,  查看服务器上的Nvidia(英伟达)显卡信息,命令lspci |grep NVIDIA 2,  按下来,进行显卡驱动程序的安装,驱动程序可到nvidia的官网 ...

  5. k8s gpu 资源设置

    将所有相同型号显卡的node打上 相同的label kubectl label node ogs-gpu02 gpu_type=k20m 启动device plugin 和app 时: nodeSel ...

  6. Kubernetes Device Plugins

    The gRPC server that the device plugin must implement is expected to be advertised on a unix socket ...

  7. image management in kubernet

    Image How can I edit an existing docker image metadata? docker-copyedit Registry Disk kubevirtis a g ...

  8. Apache Spark 3.0 将内置支持 GPU 调度

    如今大数据和机器学习已经有了很大的结合,在机器学习里面,因为计算迭代的时间可能会很长,开发人员一般会选择使用 GPU.FPGA 或 TPU 来加速计算.在 Apache Hadoop 3.1 版本里面 ...

  9. kubeadm安装kubernetes 1.13.1集群完整部署记录

    k8s是什么 Kubernetes简称为k8s,它是 Google 开源的容器集群管理系统.在 Docker 技术的基础上,为容器化的应用提供部署运行.资源调度.服务发现和动态伸缩等一系列完整功能,提 ...

随机推荐

  1. windows server配置ftp服务器以及外网访问

    在Server2012打开 服务器管理器,选择 添加角色与功能,添加Web服务下的FTP服务器 安装后. 我们现在C盘创建一个名字为FTP的文件夹,里面创建一个ftp的文件,做测试用,如图 打开服务器 ...

  2. cocos2d CCNode类(节点属性大全)

    1 CCNode是cocos2d-x中一个很重要的类,CCNode是场景.层.菜单.精灵等的父类.而我们在使用cocos2d-x时,接触最多的就是场景.层.菜单.精灵等.所以有必要先弄懂CCNode类 ...

  3. CefGlue在WinXP下闪退的排查方法

    用CefGlue开发的程序部署到多台机器上,运行正常.本以为没有问题了,下午突然接到客户电话说:运行程序时,闪一下就退出,没有任何错误提示!远程连接到客户机器上,看了下果然如此!cef没有记录任何日志 ...

  4. JavaScript数组去重方法总结

    一.双重遍历去重 function onlyFigure(arr) { let newarr = []; const length = arr.length for (let i = 0; i < ...

  5. JS实例2

    进度条 <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8 ...

  6. 一个获取本机ip地址的正则

    ifconfig|grep -oP '(?<=inet addr:)(?=(?!127\.0\.0\.1))\d+(\.\d+){3}'

  7. linux中安装oracle数据库

    1. 执行 ./runInstaller 提示 /tmp 的空间过小执行 mount -o remount,size=1G,noatime /tmp重新设置 /tmp 的大小 2. 安装完成数据库之后 ...

  8. big and little endian

    总是容易搞混big endian 和 little endian,但是找到一篇文章,其解释让人耳目一新. 文章链接:http://www.cs.umd.edu/class/sum2003/cmsc31 ...

  9. html5-字体css

    #div1{font-size: 50px;}#div2{font-size: 50%;}#div3{font-size: 300%}#div4{font-size: 3em;}#div5{font- ...

  10. 概念、DW介绍

    网页设计知识点大致分为五个部分,分别是: 1.概念.DW介绍: 2.标签: 3.样式表CSS: 4.JQuery: 5.JavaScript 概念.DW介绍: 一.网页的基本结构 <!--文档声 ...