Linux3.10.0块IO子系统流程(4)-- 为请求构造SCSI命令
首先来看scsi_prep_fn
int scsi_prep_fn(struct request_queue *q, struct request *req)
{
struct scsi_device *sdev = q->queuedata;
int ret = BLKPREP_KILL; if (req->cmd_type == REQ_TYPE_BLOCK_PC)
ret = scsi_setup_blk_pc_cmnd(sdev, req);
return scsi_prep_return(q, req, ret);
}
scsi_prep_fn只能处理来自SCSI公共层的命令,在scsi_setup_blk_pc_cmnd函数返回后,根据返回值调用scsi_prep_return进行相应的处理
int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
int ret = scsi_prep_state_check(sdev, req); // 根据请求的标志位以及SCSI设备的状态进行初步检查 if (ret != BLKPREP_OK)
return ret; /*
* 分配一个新的scsi_cmnd描述符,将它记录在special域;如果这里已经指向了一个现有的scsi_cmnd描述符,直接使用它
*/
cmd = scsi_get_cmd_from_req(sdev, req);
if (unlikely(!cmd))
return BLKPREP_DEFER; /*
* BLOCK_PC requests may transfer data, in which case they must a bio attached to them. Or they might contain a SCSI command
* that does not transfer data, in which case they may optionally submit a request without an attached bio.
* 尽管请求来自SCSI公共服务层,但是这些请求也可以涉及数据传输,在bio中保存的数据最终需要复制到SCSI命令描述符的数据缓冲区中
* 具体的工作由scsi_init_io完成,后续分析
* 如果不涉及数据传输,就将SCSI命令缓冲区清零
*/
if (req->bio) {
int ret; BUG_ON(!req->nr_phys_segments); ret = scsi_init_io(cmd, GFP_ATOMIC);
if (unlikely(ret))
return ret;
} else {
BUG_ON(blk_rq_bytes(req)); memset(&cmd->sdb, , sizeof(cmd->sdb));
req->buffer = NULL;
} cmd->cmd_len = req->cmd_len;
if (!blk_rq_bytes(req))
cmd->sc_data_direction = DMA_NONE;
else if (rq_data_dir(req) == WRITE)
cmd->sc_data_direction = DMA_TO_DEVICE;
else
cmd->sc_data_direction = DMA_FROM_DEVICE; cmd->transfersize = blk_rq_bytes(req);
cmd->allowed = req->retries;
return BLKPREP_OK;
}
sd_prep_fn函数从request结构中的信息构造SCSI(读或写)命令,将结果保存在request的special域,sd_prep_fn只能处理来自上层的请求(REQ_TYPE_FS),以及来自SCSI层的(REQ_TYPE_BLOCK_PC)请求。有一种请求比较特殊,即所谓的DISCARD请求。这个请求来自上层,但需要被转换成SCSI请求来处理
/**
* sd_prep_fn - build a scsi (read or write) command from
* information in the request structure.
* @SCpnt: pointer to mid-level's per scsi command structure that
* contains request and into which the scsi command is written
*
* Returns 1 if successful and 0 if error (or cannot be done now).
**/
static int sd_prep_fn(struct request_queue *q, struct request *rq)
{
struct scsi_cmnd *SCpnt;
struct scsi_device *sdp = q->queuedata;
struct gendisk *disk = rq->rq_disk;
struct scsi_disk *sdkp;
sector_t block = blk_rq_pos(rq);
sector_t threshold;
unsigned int this_count = blk_rq_sectors(rq);
int ret, host_dif;
unsigned char protect; /*
* Discard request come in as REQ_TYPE_FS but we turn them into
* block PC requests to make life easier.
*/
if (rq->cmd_flags & REQ_DISCARD) {
ret = sd_setup_discard_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_WRITE_SAME) {
ret = sd_setup_write_same_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_flags & REQ_FLUSH) {
ret = scsi_setup_flush_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
ret = scsi_setup_blk_pc_cmnd(sdp, rq);
goto out;
} else if (rq->cmd_type != REQ_TYPE_FS) {
ret = BLKPREP_KILL;
goto out;
}
ret = scsi_setup_fs_cmnd(sdp, rq);
if (ret != BLKPREP_OK)
goto out;
SCpnt = rq->special;
sdkp = scsi_disk(disk); /* from here on until we're complete, any goto out
* is used for a killable error condition */
ret = BLKPREP_KILL; SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"sd_prep_fn: block=%llu, "
"count=%d\n",
(unsigned long long)block,
this_count)); /*
* 以下几种情况直接结束命令:
* 1.SCSI不在线
* 2.请求数据超出了设备容量
* 3.磁盘介质发生了变化
*/
if (!sdp || !scsi_device_online(sdp) ||
block + blk_rq_sectors(rq) > get_capacity(disk)) {
SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"Finishing %u sectors\n",
blk_rq_sectors(rq)));
SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"Retry with 0x%p\n", SCpnt));
goto out;
} if (sdp->changed) {
/*
* quietly refuse to do anything to a changed disc until
* the changed bit has been reset
*/
/* printk("SCSI disk has been changed or is not present. Prohibiting further I/O.\n"); */
goto out;
} /*
* Some SD card readers can't handle multi-sector accesses which touch the last one or two hardware sectors. Split accesses as needed.
* 某些设备(如SD卡)不能多扇区访问最后的部分扇区,需分割访问
*/
threshold = get_capacity(disk) - SD_LAST_BUGGY_SECTORS *
(sdp->sector_size / ); if (unlikely(sdp->last_sector_bug && block + this_count > threshold)) {
if (block < threshold) {
/* Access up to the threshold but not beyond */
this_count = threshold - block;
} else {
/* Access only a single hardware sector */
this_count = sdp->sector_size / ;
}
} SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt, "block=%llu\n",
(unsigned long long)block)); /*
* If we have a 1K hardware sectorsize, prevent access to single 512 byte sectors.
* In theory we could handle this - in fact the scsi cdrom driver must be able to handle this because
* we typically use 1K blocksizes, and cdroms typically have 2K hardware sectorsizes.
* Of course, things are simpler with the cdrom, since it is read-only. For performance reasons,
* the filesystems should be able to handle this and not force the scsi disk driver to use bounce buffers for this.
* 磁盘的硬件扇区长度可能不是512,而是1024/2048或4096
*/
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (sdp->sector_size == ) {
if ((block & ) || (blk_rq_sectors(rq) & )) {
scmd_printk(KERN_ERR, SCpnt,
"Bad block number requested\n");
goto out;
} else {
block = block >> ;
this_count = this_count >> ;
}
}
if (rq_data_dir(rq) == WRITE) {
if (!sdp->writeable) {
goto out;
}
SCpnt->cmnd[] = WRITE_6;
SCpnt->sc_data_direction = DMA_TO_DEVICE; if (blk_integrity_rq(rq))
sd_dif_prepare(rq, block, sdp->sector_size); } else if (rq_data_dir(rq) == READ) {
SCpnt->cmnd[] = READ_6;
SCpnt->sc_data_direction = DMA_FROM_DEVICE;
} else {
scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags);
goto out;
} SCSI_LOG_HLQUEUE(, scmd_printk(KERN_INFO, SCpnt,
"%s %d/%u 512 byte blocks.\n",
(rq_data_dir(rq) == WRITE) ?
"writing" : "reading", this_count,
blk_rq_sectors(rq))); /* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
if (host_dif)
protect = << ;
else
protect = ; if (host_dif == SD_DIF_TYPE2_PROTECTION) {
SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC); if (unlikely(SCpnt->cmnd == NULL)) {
ret = BLKPREP_DEFER;
goto out;
} SCpnt->cmd_len = SD_EXT_CDB_SIZE;
memset(SCpnt->cmnd, , SCpnt->cmd_len);
SCpnt->cmnd[] = VARIABLE_LENGTH_CMD;
SCpnt->cmnd[] = 0x18;
SCpnt->cmnd[] = (rq_data_dir(rq) == READ) ? READ_32 : WRITE_32;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : ); /* LBA */
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff; /* Expected Indirect LBA */
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff; /* Transfer length */
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
} else if (sdp->use_16_for_rw) {
SCpnt->cmnd[] += READ_16 - READ_6;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : );
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = sizeof(block) > ? (unsigned char) (block >> ) & 0xff : ;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
SCpnt->cmnd[] = SCpnt->cmnd[] = ;
} else if ((this_count > 0xff) || (block > 0x1fffff) ||
scsi_device_protection(SCpnt->device) ||
SCpnt->device->use_10_for_rw) {
if (this_count > 0xffff)
this_count = 0xffff; SCpnt->cmnd[] += READ_10 - READ_6;
SCpnt->cmnd[] = protect | ((rq->cmd_flags & REQ_FUA) ? 0x8 : );
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) (block >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = SCpnt->cmnd[] = ;
SCpnt->cmnd[] = (unsigned char) (this_count >> ) & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count & 0xff;
} else {
if (unlikely(rq->cmd_flags & REQ_FUA)) {
/*
* This happens only if this drive failed
* 10byte rw command with ILLEGAL_REQUEST
* during operation and thus turned off
* use_10_for_rw.
*/
scmd_printk(KERN_ERR, SCpnt,
"FUA write on READ/WRITE(6) drive\n");
goto out;
} SCpnt->cmnd[] |= (unsigned char) ((block >> ) & 0x1f);
SCpnt->cmnd[] = (unsigned char) ((block >> ) & 0xff);
SCpnt->cmnd[] = (unsigned char) block & 0xff;
SCpnt->cmnd[] = (unsigned char) this_count;
SCpnt->cmnd[] = ;
}
SCpnt->sdb.length = this_count * sdp->sector_size; /* If DIF or DIX is enabled, tell HBA how to handle request */
if (host_dif || scsi_prot_sg_count(SCpnt))
sd_prot_op(SCpnt, host_dif); /*
* We shouldn't disconnect in the middle of a sector, so with a dumb
* host adapter, it's safe to assume that we can at least transfer
* this many bytes between each connect / disconnect.
*/
SCpnt->transfersize = sdp->sector_size;
SCpnt->underflow = this_count << ;
SCpnt->allowed = SD_MAX_RETRIES; /*
* This indicates that the command is ready from our end to be
* queued.
*/
ret = BLKPREP_OK;
out:
return scsi_prep_return(q, rq, ret);
}
/*
* Setup a REQ_TYPE_FS command. These are simple read/write request
* from filesystems that still need to be translated to SCSI CDBs from
* the ULD.
*/
int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req)
{
struct scsi_cmnd *cmd;
int ret = scsi_prep_state_check(sdev, req); if (ret != BLKPREP_OK)
return ret; if (unlikely(sdev->scsi_dh_data && sdev->scsi_dh_data->scsi_dh
&& sdev->scsi_dh_data->scsi_dh->prep_fn)) {
ret = sdev->scsi_dh_data->scsi_dh->prep_fn(sdev, req);
if (ret != BLKPREP_OK)
return ret;
} /*
* Filesystem requests must transfer data.
*/
BUG_ON(!req->nr_phys_segments); cmd = scsi_get_cmd_from_req(sdev, req);
if (unlikely(!cmd))
return BLKPREP_DEFER; /*
* 来自上层的请求信息都在bio里,和SCSI公共层请求不一样,我们需要重新为它构造SCSI规范定义的SCSI命令
* 构造好的内容会保存在scsi_cmnd描述符的cmnd域,所以首先将这个命令缓冲区清零
*/
memset(cmd->cmnd, , BLK_MAX_CDB);
return scsi_init_io(cmd, GFP_ATOMIC);
}
Linux3.10.0块IO子系统流程(4)-- 为请求构造SCSI命令的更多相关文章
- Linux3.10.0块IO子系统流程(6)-- 派发SCSI命令到低层驱动
在SCSI策略例程中最后调用scsi_dispatch_cmd将SCSI命令描述符派发给低层驱动进行处理 /** * scsi_dispatch_command - Dispatch a comman ...
- Linux3.10.0块IO子系统流程(5)-- 为SCSI命令准备聚散列表
SCSI数据缓冲区组织成聚散列表的形式.Linux内核中表示聚散列表的基本数据结构是scatterlist,虽然名字中有list,但它只对应一个内存缓冲区,聚散列表就是多个scatterlist的组合 ...
- Linux3.10.0块IO子系统流程(2)-- 构造、排序、合并请求
Linux块设备可以分为三类.分别针对顺序访问物理设备.随机访问物理设备和逻辑设备(即“栈式设备”) 类型 make_request_fn request_fn 备注 SCSI 设备等 从bio构 ...
- Linux3.10.0块IO子系统流程(3)-- SCSI策略例程
很长时间以来,Linux块设备使用了一种称为“蓄流/泄流”(plugging/unplugging)的技术来改进吞吐率.简单而言,这种工作方式类似浴盆排水系统的塞子.当IO被提交时,它被储存在一个队列 ...
- Linux3.10.0块IO子系统流程(0)-- 块IO子系统概述
前言:这个系列主要是记录自己学习Linux块IO子系统的过程,其中代码分析皆基于Linux3.10.0版本,如有描述错误或不妥之处,敬请指出! 参考书籍:存储技术原理分析--基于Linux 2.6内核 ...
- Linux3.10.0块IO子系统流程(7)-- 请求处理完成
和提交请求相反,完成请求的过程是从低层驱动开始的.请求处理完成分为两个部分:上半部和下半部.开始时,请求处理完成总是处在中断上下文,在这里的主要任务是将已完成的请求放到某个队列中,然后引发软终端让中断 ...
- Linux3.10.0块IO子系统流程(1)-- 上层提交请求
Linux通用块层提供给上层的接口函数是submit_bio.上层在构造好bio之后,调用submit_bio提交给通用块层处理. submit_bio函数如下: void submit_bi ...
- DPA 9.1.85 升级到DPA 10.0.352流程
SolarWinds DPA的升级其实是一件非常简单的事情,这里介绍一下从DPA 9.1.95升级到 DPA 10.0.352版本的流程.为什么要升级呢? DPA给用户发的邮件已经写的非常清楚了(如下 ...
- 【转】linux IO子系统和文件系统读写流程
原文地址:linux IO子系统和文件系统读写流程 我们含有分析的,是基于2.6.32及其后的内核. 我们在linux上总是要保存数据,数据要么保存在文件系统里(如ext3),要么就保存在裸设备里.我 ...
随机推荐
- 第 3 章 镜像 - 017 - RUN vs CMD vs ENTRYPOINT
RUN.CMD 和 ENTRYPOINT 这三个 Dockerfile 指令看上去很类似,很容易混淆. 简单的说: RUN 执行命令并创建新的镜像层,RUN 经常用于安装软件包. CMD 设置容器启动 ...
- 雷林鹏分享:XML 简介
XML 简介 XML 被设计用来传输和存储数据. HTML 被设计用来显示数据. 应该掌握的基础知识 在您继续学习之前,需要对以下知识有基本的了解: HTML JavaScript 如果您希望首先学习 ...
- Using the G711 standard
Using the G711 standard Marc Sweetgall, 28 Jul 2006 4.74 (27 votes) 1 2 ...
- BGP - 3,BGP重要概念(EBGP,IBGP,防环/黑洞/全互连/同步)
1,防环/黑洞/同步/全互连(为出现大于号,现在通常都是要下一跳可达+关同步) a)EBGP邻居传来的路由可以通过AS_PATH防环,所以收到的不会有问题,因此直接是优化的(>),也就是直接装表 ...
- 抽离amazeUI里面的弹出框
花了一些时间读了amazeUI的源码 把他的弹出框给单独抽离出来了,具体可以见源码:http://pan.baidu.com/s/1mibQ9T2
- day11-15,装饰器
day11 1.装饰器 import time # print(time.time()) # 点数前边是从1970年到现在过了多少秒 # time.sleep(10) # 让程序执行到这里停一会儿 # ...
- python记录_day09 初识函数
一.认识函数 函数:对动作或者功能的封装 格式: 函数声明 def 函数名(): 函数体 函数调用 函数名() #定义函数 def xiao(): print("你的笑像一 ...
- Python遍历文件个文件夹
在读文件的时候往往需要遍历文件夹,python的os.path包含了很多文件.文件夹操作的方法.下面列出: os.path.abspath(path) #返回绝对路径 os.path.basename ...
- shiro身份验证
身份验证,即在应用中谁能证明他就是他本人.一般提供如他们的身份ID一些标识信息来表明他就是他本人,如提供身份证,用户名/密码来证明. 在shiro中,用户需要提供principals (身份)和cre ...
- LINQ 中常用函数使用: Take TakeWhile Skip SkipWhile Reverse Distinct
1,Take 方法 Take方法用于从一个序列的开头返回指定数量的元素. string[] names = { "郭靖", "李莫愁", "欧阳晓晓& ...