nvme_probe函数中继续调用nvme_configure_admin_queue
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u32 aqa;
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0;
if (dev->subsystem &&
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
//在对nvme controller 操作的时需要通过nvme_disable_ctrl,完成后调用nvme_enable_ctrl
result = nvme_disable_ctrl(&dev->ctrl, cap);
if (result < 0)
return result;
//第一次调用nvmeq 肯定为null,因此调用nvme_alloc_queue 申请
nvmeq = dev->queues[0];
if (!nvmeq) {
nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
if (!nvmeq)
return -ENOMEM;
}
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
//将sq_dma_addr 和 cq_dma_addr 分别系到bar 空间偏移为NVME_REG_ASQ和NVME_REG_ACQ,这个地址都是在nvme_alloc_queue 中申请的.
writel(aqa, dev->bar + NVME_REG_AQA);
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
result = nvme_enable_ctrl(&dev->ctrl, cap);
if (result)
return result;
nvmeq->cq_vector = 0;
通过queue_request_irq 注册中断
result = queue_request_irq(nvmeq);
if (result) {
nvmeq->cq_vector = -1;
return result;
}
return result;
}
首先看看如果disable nvme controller
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
{
int ret;
ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
ctrl->ctrl_config &= ~NVME_CC_ENABLE;
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
return nvme_wait_ready(ctrl, cap, false);
}
主要是将NVME_CC_ENABLE 这个bit清零后写道写道偏移为NVME_REG_CC的地方,如果设置了NVME_QUIRK_DELAY_BEFORE_CHK_RDY,则等待NVME_QUIRK_DELAY_AMOUNT 之后,通过nvme_wait_ready 判断nvme是否已经成功disable
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
{
unsigned long timeout =
((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
int ret;
while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
if (csts == ~0)
return -ENODEV;
if ((csts & NVME_CSTS_RDY) == bit)
break;
msleep(100);
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(ctrl->device,
"Device not ready; aborting %s\n", enabled ?
"initialisation" : "reset");
return -ENODEV;
}
}
return ret;
}
在nvme_wait_ready 中通过判断Controller Status是否为零,等待的时间也是从nvme 的寄存器中读到的,如果在规定的时间Controller Status的寄存器没有改变的话,则会退出while循环,并打印下面的log
dev_err(ctrl->device,"Device not ready; aborting %s\n", enabled ?"initialisation" : "reset");
回到nvme_configure_admin_queue 中看看nvme_alloc_queue的实现
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth)
{
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
if (!nvmeq)
return NULL;
nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
&nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
goto free_cqdma;
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
dev->ctrl.instance, qid);
spin_lock_init(&nvmeq->q_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
nvmeq->q_depth = depth;
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
dev->queue_count++;
return nvmeq;
free_cqdma:
dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
nvmeq->cq_dma_addr);
free_nvmeq:
kfree(nvmeq);
return NULL;
}
在nvme_alloc_queue 中通过dma_zalloc_coherent 为cqes申请memory并清零,在nvme_alloc_sq_cmds 中为sq_dma_addr赋值。
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
dev->ctrl.page_size);
nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
nvmeq->sq_cmds_io = dev->cmb + offset;
} else {
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
}
return 0;
}
回到nvme_configure_admin_queue 通过queue_request_irq 申请irq
static int queue_request_irq(struct nvme_queue *nvmeq)
{
if (use_threaded_interrupts)
return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
else
return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
nvmeq->irqname, nvmeq);
}
如果use_threaded_interrupts为零的话,则通过request_irq 申请irq,如果use_threaded_interrupts 不为零的话,则通过request_threaded_irq 申请,而request_irq 是request_threaded_irq的一个包装,用request_irq的话,不用指定中断线程化的函数
static inline int __must_check
request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
const char *name, void *dev)
{
return request_threaded_irq(irq, handler, NULL, flags, name, dev);
}
可以看到通过request_irq申请irq的话也是调用request_threaded_irq,但是形参irq_handler_t thread_fn为null
回到nvme_reset_work 中继续调用nvme_init_queue 来初始化在nvme_configure_admin_queue 中申请的queue。然后调用
nvme_alloc_admin_tags 初始化admin_tagset
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{
if (!dev->ctrl.admin_q) {
dev->admin_tagset.ops = &nvme_mq_admin_ops;
dev->admin_tagset.nr_hw_queues = 1;
/*
* Subtract one to leave an empty queue entry for 'Full Queue'
* condition. See NVM-Express 1.2 specification, section 4.1.2.
*/
dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
dev->admin_tagset.driver_data = dev;
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
return -ENOMEM;
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (IS_ERR(dev->ctrl.admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset);
return -ENOMEM;
}
if (!blk_get_queue(dev->ctrl.admin_q)) {
nvme_dev_remove_admin(dev);
dev->ctrl.admin_q = NULL;
return -ENODEV;
}
} else
blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
return 0;
}
回到nvme_reset_work 中继续调用nvme_init_identify->nvme_identify_ctrl 来识别nvme controller
int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
int error;
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
c.identify.opcode = nvme_admin_identify;
c.identifys = cpu_to_le32(NVME_ID_CNS_CTRL);
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
if (!*id)
return -ENOMEM;
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ctrl));
if (error)
kfree(*id);
return error;
}
从nvme_identify_ctrl 中可以知道发送nvme_admin_identify( nvme_admin_identify = 0x06,
)来识别nvme controller。nvme_submit_sync_cmd->__nvme_submit_sync_cmd->blk_execute_rq 最终调用block层的处理函数发送命令给nvme controller.
nvme_identify_ctrl中成功识别到nvme controller,会将controller的参数保持的nvme_ctrl 这个结构体中
回到nvme_reset_work,当调用nvme_start_queues/nvme_dev_add 之后就调用nvme_queue_scan(&dev->ctrl);来scan
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
int result;
u32 aqa;
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0;
if (dev->subsystem &&
(readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
//在对nvme controller 操作的时需要通过nvme_disable_ctrl,完成后调用nvme_enable_ctrl
result = nvme_disable_ctrl(&dev->ctrl, cap);
if (result < 0)
return result;
//第一次调用nvmeq 肯定为null,因此调用nvme_alloc_queue 申请
nvmeq = dev->queues[0];
if (!nvmeq) {
nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
if (!nvmeq)
return -ENOMEM;
}
aqa = nvmeq->q_depth - 1;
aqa |= aqa << 16;
//将sq_dma_addr 和 cq_dma_addr 分别系到bar 空间偏移为NVME_REG_ASQ和NVME_REG_ACQ,这个地址都是在nvme_alloc_queue 中申请的.
writel(aqa, dev->bar + NVME_REG_AQA);
lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
result = nvme_enable_ctrl(&dev->ctrl, cap);
if (result)
return result;
nvmeq->cq_vector = 0;
通过queue_request_irq 注册中断
result = queue_request_irq(nvmeq);
if (result) {
nvmeq->cq_vector = -1;
return result;
}
return result;
}
首先看看如果disable nvme controller
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
{
int ret;
ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
ctrl->ctrl_config &= ~NVME_CC_ENABLE;
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
if (ret)
return ret;
if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
msleep(NVME_QUIRK_DELAY_AMOUNT);
return nvme_wait_ready(ctrl, cap, false);
}
主要是将NVME_CC_ENABLE 这个bit清零后写道写道偏移为NVME_REG_CC的地方,如果设置了NVME_QUIRK_DELAY_BEFORE_CHK_RDY,则等待NVME_QUIRK_DELAY_AMOUNT 之后,通过nvme_wait_ready 判断nvme是否已经成功disable
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
{
unsigned long timeout =
((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
int ret;
while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
if (csts == ~0)
return -ENODEV;
if ((csts & NVME_CSTS_RDY) == bit)
break;
msleep(100);
if (fatal_signal_pending(current))
return -EINTR;
if (time_after(jiffies, timeout)) {
dev_err(ctrl->device,
"Device not ready; aborting %s\n", enabled ?
"initialisation" : "reset");
return -ENODEV;
}
}
return ret;
}
在nvme_wait_ready 中通过判断Controller Status是否为零,等待的时间也是从nvme 的寄存器中读到的,如果在规定的时间Controller Status的寄存器没有改变的话,则会退出while循环,并打印下面的log
dev_err(ctrl->device,"Device not ready; aborting %s\n", enabled ?"initialisation" : "reset");
回到nvme_configure_admin_queue 中看看nvme_alloc_queue的实现
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth)
{
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
if (!nvmeq)
return NULL;
nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
&nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
goto free_cqdma;
nvmeq->q_dmadev = dev->dev;
nvmeq->dev = dev;
snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
dev->ctrl.instance, qid);
spin_lock_init(&nvmeq->q_lock);
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
nvmeq->q_depth = depth;
nvmeq->qid = qid;
nvmeq->cq_vector = -1;
dev->queues[qid] = nvmeq;
dev->queue_count++;
return nvmeq;
free_cqdma:
dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
nvmeq->cq_dma_addr);
free_nvmeq:
kfree(nvmeq);
return NULL;
}
在nvme_alloc_queue 中通过dma_zalloc_coherent 为cqes申请memory并清零,在nvme_alloc_sq_cmds 中为sq_dma_addr赋值。
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
int qid, int depth)
{
if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
dev->ctrl.page_size);
nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
nvmeq->sq_cmds_io = dev->cmb + offset;
} else {
nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
&nvmeq->sq_dma_addr, GFP_KERNEL);
if (!nvmeq->sq_cmds)
return -ENOMEM;
}
return 0;
}
回到nvme_configure_admin_queue 通过queue_request_irq 申请irq
static int queue_request_irq(struct nvme_queue *nvmeq)
{
if (use_threaded_interrupts)
return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
else
return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
nvmeq->irqname, nvmeq);
}
如果use_threaded_interrupts为零的话,则通过request_irq 申请irq,如果use_threaded_interrupts 不为零的话,则通过request_threaded_irq 申请,而request_irq 是request_threaded_irq的一个包装,用request_irq的话,不用指定中断线程化的函数
static inline int __must_check
request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
const char *name, void *dev)
{
return request_threaded_irq(irq, handler, NULL, flags, name, dev);
}
可以看到通过request_irq申请irq的话也是调用request_threaded_irq,但是形参irq_handler_t thread_fn为null
回到nvme_reset_work 中继续调用nvme_init_queue 来初始化在nvme_configure_admin_queue 中申请的queue。然后调用
nvme_alloc_admin_tags 初始化admin_tagset
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{
if (!dev->ctrl.admin_q) {
dev->admin_tagset.ops = &nvme_mq_admin_ops;
dev->admin_tagset.nr_hw_queues = 1;
/*
* Subtract one to leave an empty queue entry for 'Full Queue'
* condition. See NVM-Express 1.2 specification, section 4.1.2.
*/
dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
dev->admin_tagset.timeout = ADMIN_TIMEOUT;
dev->admin_tagset.numa_node = dev_to_node(dev->dev);
dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
dev->admin_tagset.driver_data = dev;
if (blk_mq_alloc_tag_set(&dev->admin_tagset))
return -ENOMEM;
dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (IS_ERR(dev->ctrl.admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset);
return -ENOMEM;
}
if (!blk_get_queue(dev->ctrl.admin_q)) {
nvme_dev_remove_admin(dev);
dev->ctrl.admin_q = NULL;
return -ENODEV;
}
} else
blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
return 0;
}
回到nvme_reset_work 中继续调用nvme_init_identify->nvme_identify_ctrl 来识别nvme controller
int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
int error;
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
c.identify.opcode = nvme_admin_identify;
c.identifys = cpu_to_le32(NVME_ID_CNS_CTRL);
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
if (!*id)
return -ENOMEM;
error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
sizeof(struct nvme_id_ctrl));
if (error)
kfree(*id);
return error;
}
从nvme_identify_ctrl 中可以知道发送nvme_admin_identify( nvme_admin_identify = 0x06,
)来识别nvme controller。nvme_submit_sync_cmd->__nvme_submit_sync_cmd->blk_execute_rq 最终调用block层的处理函数发送命令给nvme controller.
nvme_identify_ctrl中成功识别到nvme controller,会将controller的参数保持的nvme_ctrl 这个结构体中
回到nvme_reset_work,当调用nvme_start_queues/nvme_dev_add 之后就调用nvme_queue_scan(&dev->ctrl);来scan
更多推荐
nvme_reset_work 分析2
发布评论