SPDK 中的NVMe over PCIe驱动

SPDK 是什么?

SPDK 是Intel推出的高性能用户态存储软件库SPDK。该软件库的核心是实现了用户态、异步、无锁、轮询方式的 NVMe 驱动。本文将结合代码分析 SPDK是如何在用户态实现这个高性能NVMe 驱动的。

SPDK NVMe 驱动详解

SPDK NVMe 驱动的基本结构

首先先来看一下 spdk 的基本组成结构,nvme 驱动的代码主要位于 /lib/nvme 文件夹下。

最关键的一个数据结构 spdk_nvme_transport_ops,我们知道 NVMe 协议是一个应用层协议,其底层传输协议可以基于PCIe 或者 TCP 或者是RDMA,因此为了隐藏底层的实现细节,不同传输类型的NVMe设备驱动都需要实现这个结构体。接下来我们分析基于 pcie 的nvme 驱动的主要实现。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
struct spdk_nvme_transport_ops {
char name[SPDK_NVMF_TRSTRING_MAX_LEN + 1];

enum spdk_nvme_transport_type type;

struct spdk_nvme_ctrlr *(*ctrlr_construct)(const struct spdk_nvme_transport_id *trid,
const struct spdk_nvme_ctrlr_opts *opts,
void *devhandle);

int (*ctrlr_scan)(struct spdk_nvme_probe_ctx *probe_ctx, bool direct_connect);

int (*ctrlr_destruct)(struct spdk_nvme_ctrlr *ctrlr);

int (*ctrlr_enable)(struct spdk_nvme_ctrlr *ctrlr);

int (*ctrlr_set_reg_4)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value);

int (*ctrlr_set_reg_8)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value);

int (*ctrlr_get_reg_4)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t *value);

int (*ctrlr_get_reg_8)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t *value);

int (*ctrlr_set_reg_4_async)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint32_t value,
spdk_nvme_reg_cb cb_fn, void *cb_arg);

int (*ctrlr_set_reg_8_async)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset, uint64_t value,
spdk_nvme_reg_cb cb_fn, void *cb_arg);

int (*ctrlr_get_reg_4_async)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset,
spdk_nvme_reg_cb cb_fn, void *cb_arg);

int (*ctrlr_get_reg_8_async)(struct spdk_nvme_ctrlr *ctrlr, uint32_t offset,
spdk_nvme_reg_cb cb_fn, void *cb_arg);

uint32_t (*ctrlr_get_max_xfer_size)(struct spdk_nvme_ctrlr *ctrlr);

uint16_t (*ctrlr_get_max_sges)(struct spdk_nvme_ctrlr *ctrlr);

int (*ctrlr_reserve_cmb)(struct spdk_nvme_ctrlr *ctrlr);

void *(*ctrlr_map_cmb)(struct spdk_nvme_ctrlr *ctrlr, size_t *size);

int (*ctrlr_unmap_cmb)(struct spdk_nvme_ctrlr *ctrlr);

int (*ctrlr_enable_pmr)(struct spdk_nvme_ctrlr *ctrlr);

int (*ctrlr_disable_pmr)(struct spdk_nvme_ctrlr *ctrlr);

void *(*ctrlr_map_pmr)(struct spdk_nvme_ctrlr *ctrlr, size_t *size);

int (*ctrlr_unmap_pmr)(struct spdk_nvme_ctrlr *ctrlr);

struct spdk_nvme_qpair *(*ctrlr_create_io_qpair)(struct spdk_nvme_ctrlr *ctrlr, uint16_t qid,
const struct spdk_nvme_io_qpair_opts *opts);

int (*ctrlr_delete_io_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);

int (*ctrlr_connect_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);

void (*ctrlr_disconnect_qpair)(struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair);

void (*qpair_abort_reqs)(struct spdk_nvme_qpair *qpair, uint32_t dnr);

int (*qpair_reset)(struct spdk_nvme_qpair *qpair);

int (*qpair_submit_request)(struct spdk_nvme_qpair *qpair, struct nvme_request *req);

int32_t (*qpair_process_completions)(struct spdk_nvme_qpair *qpair, uint32_t max_completions);

int (*qpair_iterate_requests)(struct spdk_nvme_qpair *qpair,
int (*iter_fn)(struct nvme_request *req, void *arg),
void *arg);

void (*admin_qpair_abort_aers)(struct spdk_nvme_qpair *qpair);

struct spdk_nvme_transport_poll_group *(*poll_group_create)(void);
struct spdk_nvme_transport_poll_group *(*qpair_get_optimal_poll_group)(
struct spdk_nvme_qpair *qpair);

int (*poll_group_add)(struct spdk_nvme_transport_poll_group *tgroup, struct spdk_nvme_qpair *qpair);

int (*poll_group_remove)(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_qpair *qpair);

int (*poll_group_connect_qpair)(struct spdk_nvme_qpair *qpair);

int (*poll_group_disconnect_qpair)(struct spdk_nvme_qpair *qpair);

int64_t (*poll_group_process_completions)(struct spdk_nvme_transport_poll_group *tgroup,
uint32_t completions_per_qpair, spdk_nvme_disconnected_qpair_cb disconnected_qpair_cb);

int (*poll_group_destroy)(struct spdk_nvme_transport_poll_group *tgroup);

int (*poll_group_get_stats)(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_transport_poll_group_stat **stats);

void (*poll_group_free_stats)(struct spdk_nvme_transport_poll_group *tgroup,
struct spdk_nvme_transport_poll_group_stat *stats);

int (*ctrlr_get_memory_domains)(const struct spdk_nvme_ctrlr *ctrlr,
struct spdk_memory_domain **domains,
int array_size);

int (*ctrlr_ready)(struct spdk_nvme_ctrlr *ctrlr);

volatile struct spdk_nvme_registers *(*ctrlr_get_registers)(struct spdk_nvme_ctrlr *ctrlr);
}

nvme_pcie_ctrlr_construct

nvme_pcie_ctrlr_construct 主要完成以下的几个工作

  • spdk_pci_device_claim
  • nvme_ctrlr_construct
  • nvme_pcie_ctrlr_allocate_bars
  • nvme_pcie_ctrlr_construct_admin_qpair
  • nvme_ctrlr_add_process
1
2
3
4
static int
nvme_pcie_ctrlr_scan(struct spdk_nvme_probe_ctx *probe_ctx,
bool direct_connect)

关于我

我是谁

一个程序员

主要关注哪些技术

虚拟化

NVMe 设备

数据库,存储技术