nvme_probe函数中继续调用nvme_configure_admin_queue
static int nvme_configure_admin_queue(struct nvme_dev *dev)
{
    int result;
    u32 aqa;
    u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
    struct nvme_queue *nvmeq;

    dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
                        NVME_CAP_NSSRC(cap) : 0;

    if (dev->subsystem &&
        (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
        writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
//在对nvme controller 操作的时需要通过nvme_disable_ctrl,完成后调用nvme_enable_ctrl
    result = nvme_disable_ctrl(&dev->ctrl, cap);
    if (result < 0)
        return result;
//第一次调用nvmeq 肯定为null,因此调用nvme_alloc_queue 申请
    nvmeq = dev->queues[0];
    if (!nvmeq) {
        nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
        if (!nvmeq)
            return -ENOMEM;
    }

    aqa = nvmeq->q_depth - 1;
    aqa |= aqa << 16;
//将sq_dma_addr 和 cq_dma_addr 分别系到bar 空间偏移为NVME_REG_ASQ和NVME_REG_ACQ,这个地址都是在nvme_alloc_queue 中申请的.
    writel(aqa, dev->bar + NVME_REG_AQA);
    lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
    lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);

    result = nvme_enable_ctrl(&dev->ctrl, cap);
    if (result)
        return result;
    nvmeq->cq_vector = 0;
通过queue_request_irq 注册中断
    result = queue_request_irq(nvmeq);
    if (result) {
        nvmeq->cq_vector = -1;
        return result;
    }

    return result;
}
首先看看如果disable nvme controller
int nvme_disable_ctrl(struct nvme_ctrl *ctrl, u64 cap)
{
    int ret;

    ctrl->ctrl_config &= ~NVME_CC_SHN_MASK;
    ctrl->ctrl_config &= ~NVME_CC_ENABLE;

    ret = ctrl->ops->reg_write32(ctrl, NVME_REG_CC, ctrl->ctrl_config);
    if (ret)
        return ret;

    if (ctrl->quirks & NVME_QUIRK_DELAY_BEFORE_CHK_RDY)
        msleep(NVME_QUIRK_DELAY_AMOUNT);

    return nvme_wait_ready(ctrl, cap, false);
}
主要是将NVME_CC_ENABLE 这个bit清零后写道写道偏移为NVME_REG_CC的地方,如果设置了NVME_QUIRK_DELAY_BEFORE_CHK_RDY,则等待NVME_QUIRK_DELAY_AMOUNT 之后,通过nvme_wait_ready 判断nvme是否已经成功disable
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
{
    unsigned long timeout =
        ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies;
    u32 csts, bit = enabled ? NVME_CSTS_RDY : 0;
    int ret;

    while ((ret = ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) == 0) {
        if (csts == ~0)
            return -ENODEV;
        if ((csts & NVME_CSTS_RDY) == bit)
            break;

        msleep(100);
        if (fatal_signal_pending(current))
            return -EINTR;
        if (time_after(jiffies, timeout)) {
            dev_err(ctrl->device,
                "Device not ready; aborting %s\n", enabled ?
                        "initialisation" : "reset");
            return -ENODEV;
        }
    }

    return ret;
}
在nvme_wait_ready 中通过判断Controller Status是否为零,等待的时间也是从nvme 的寄存器中读到的,如果在规定的时间Controller Status的寄存器没有改变的话,则会退出while循环,并打印下面的log
dev_err(ctrl->device,"Device not ready; aborting %s\n", enabled ?"initialisation" : "reset");
回到nvme_configure_admin_queue 中看看nvme_alloc_queue的实现
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
                            int depth)
{
    struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
    if (!nvmeq)
        return NULL;

    nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
                      &nvmeq->cq_dma_addr, GFP_KERNEL);
    if (!nvmeq->cqes)
        goto free_nvmeq;

    if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
        goto free_cqdma;

    nvmeq->q_dmadev = dev->dev;
    nvmeq->dev = dev;
    snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
            dev->ctrl.instance, qid);
    spin_lock_init(&nvmeq->q_lock);
    nvmeq->cq_head = 0;
    nvmeq->cq_phase = 1;
    nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
    nvmeq->q_depth = depth;
    nvmeq->qid = qid;
    nvmeq->cq_vector = -1;
    dev->queues[qid] = nvmeq;
    dev->queue_count++;

    return nvmeq;

 free_cqdma:
    dma_free_coherent(dev->dev, CQ_SIZE(depth), (void *)nvmeq->cqes,
                            nvmeq->cq_dma_addr);
 free_nvmeq:
    kfree(nvmeq);
    return NULL;
}
在nvme_alloc_queue 中通过dma_zalloc_coherent 为cqes申请memory并清零,在nvme_alloc_sq_cmds 中为sq_dma_addr赋值。
static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
                int qid, int depth)
{
    if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
        unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
                              dev->ctrl.page_size);
        nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
        nvmeq->sq_cmds_io = dev->cmb + offset;
    } else {
        nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
                    &nvmeq->sq_dma_addr, GFP_KERNEL);
        if (!nvmeq->sq_cmds)
            return -ENOMEM;
    }

    return 0;
}
回到nvme_configure_admin_queue 通过queue_request_irq 申请irq
static int queue_request_irq(struct nvme_queue *nvmeq)
{
    if (use_threaded_interrupts)
        return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
                nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
    else
        return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
                nvmeq->irqname, nvmeq);
}
如果use_threaded_interrupts为零的话,则通过request_irq 申请irq,如果use_threaded_interrupts 不为零的话,则通过request_threaded_irq 申请,而request_irq 是request_threaded_irq的一个包装,用request_irq的话,不用指定中断线程化的函数
static inline int __must_check
request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
        const char *name, void *dev)
{
    return request_threaded_irq(irq, handler, NULL, flags, name, dev);
}
可以看到通过request_irq申请irq的话也是调用request_threaded_irq,但是形参irq_handler_t thread_fn为null
回到nvme_reset_work 中继续调用nvme_init_queue 来初始化在nvme_configure_admin_queue 中申请的queue。然后调用
nvme_alloc_admin_tags 初始化admin_tagset
static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{
    if (!dev->ctrl.admin_q) {
        dev->admin_tagset.ops = &nvme_mq_admin_ops;
        dev->admin_tagset.nr_hw_queues = 1;

        /*
         * Subtract one to leave an empty queue entry for 'Full Queue'
         * condition. See NVM-Express 1.2 specification, section 4.1.2.
         */
        dev->admin_tagset.queue_depth = NVME_AQ_BLKMQ_DEPTH - 1;
        dev->admin_tagset.timeout = ADMIN_TIMEOUT;
        dev->admin_tagset.numa_node = dev_to_node(dev->dev);
        dev->admin_tagset.cmd_size = nvme_cmd_size(dev);
        dev->admin_tagset.driver_data = dev;

        if (blk_mq_alloc_tag_set(&dev->admin_tagset))
            return -ENOMEM;

        dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
        if (IS_ERR(dev->ctrl.admin_q)) {
            blk_mq_free_tag_set(&dev->admin_tagset);
            return -ENOMEM;
        }
        if (!blk_get_queue(dev->ctrl.admin_q)) {
            nvme_dev_remove_admin(dev);
            dev->ctrl.admin_q = NULL;
            return -ENODEV;
        }
    } else
        blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);

    return 0;
}
回到nvme_reset_work 中继续调用nvme_init_identify->nvme_identify_ctrl 来识别nvme controller
int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
    struct nvme_command c = { };
    int error;

    /* gcc-4.4.4 (at least) has issues with initializers and anon unions */
    c.identify.opcode = nvme_admin_identify;
    c.identifys = cpu_to_le32(NVME_ID_CNS_CTRL);

    *id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL);
    if (!*id)
        return -ENOMEM;

    error = nvme_submit_sync_cmd(dev->admin_q, &c, *id,
            sizeof(struct nvme_id_ctrl));
    if (error)
        kfree(*id);
    return error;
}
从nvme_identify_ctrl 中可以知道发送nvme_admin_identify(    nvme_admin_identify        = 0x06,
)来识别nvme controller。nvme_submit_sync_cmd->__nvme_submit_sync_cmd->blk_execute_rq 最终调用block层的处理函数发送命令给nvme controller.
nvme_identify_ctrl中成功识别到nvme controller,会将controller的参数保持的nvme_ctrl 这个结构体中
回到nvme_reset_work,当调用nvme_start_queues/nvme_dev_add 之后就调用nvme_queue_scan(&dev->ctrl);来scan

更多推荐

nvme_reset_work 分析2