Skip to content

Commit 86839c5

Browse files
Bob Liukonradwilk
authored andcommitted
xen/block: add multi-page ring support
Extend xen/block to support multi-page ring, so that more requests can be issued by using more than one pages as the request ring between blkfront and backend. As a result, the performance can get improved significantly. We got some impressive improvements on our highend iscsi storage cluster backend. If using 64 pages as the ring, the IOPS increased about 15 times for the throughput testing and above doubled for the latency testing. The reason was the limit on outstanding requests is 32 if use only one-page ring, but in our case the iscsi lun was spread across about 100 physical drives, 32 was really not enough to keep them busy. Changes in v2: - Rebased to 4.0-rc6. - Document on how multi-page ring feature working to linux io/blkif.h. Changes in v3: - Remove changes to linux io/blkif.h and follow the protocol defined in io/blkif.h of XEN tree. - Rebased to 4.1-rc3 Changes in v4: - Turn to use 'ring-page-order' and 'max-ring-page-order'. - A few comments from Roger. Changes in v5: - Clarify with 4k granularity to comment - Address more comments from Roger Signed-off-by: Bob Liu <bob.liu@oracle.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
1 parent 8ab0144 commit 86839c5

File tree

4 files changed

+180
-59
lines changed

4 files changed

+180
-59
lines changed

drivers/block/xen-blkback/blkback.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,13 @@ module_param_named(max_persistent_grants, xen_blkif_max_pgrants, int, 0644);
8383
MODULE_PARM_DESC(max_persistent_grants,
8484
"Maximum number of grants to map persistently");
8585

86+
/*
87+
* Maximum order of pages to be used for the shared ring between front and
88+
* backend, 4KB page granularity is used.
89+
*/
90+
unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
91+
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
92+
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
8693
/*
8794
* The LRU mechanism to clean the lists of persistent grants needs to
8895
* be executed periodically. The time interval between consecutive executions
@@ -1451,6 +1458,12 @@ static int __init xen_blkif_init(void)
14511458
if (!xen_domain())
14521459
return -ENODEV;
14531460

1461+
if (xen_blkif_max_ring_order > XENBUS_MAX_RING_PAGE_ORDER) {
1462+
pr_info("Invalid max_ring_order (%d), will use default max: %d.\n",
1463+
xen_blkif_max_ring_order, XENBUS_MAX_RING_PAGE_ORDER);
1464+
xen_blkif_max_ring_order = XENBUS_MAX_RING_PAGE_ORDER;
1465+
}
1466+
14541467
rc = xen_blkif_interface_init();
14551468
if (rc)
14561469
goto failed_init;

drivers/block/xen-blkback/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include <xen/interface/io/blkif.h>
4545
#include <xen/interface/io/protocols.h>
4646

47+
extern unsigned int xen_blkif_max_ring_order;
4748
/*
4849
* This is the maximum number of segments that would be allowed in indirect
4950
* requests. This value will also be passed to the frontend.
@@ -320,6 +321,7 @@ struct xen_blkif {
320321
struct work_struct free_work;
321322
/* Thread shutdown wait queue. */
322323
wait_queue_head_t shutdown_wq;
324+
unsigned int nr_ring_pages;
323325
};
324326

325327
struct seg_buf {

drivers/block/xen-blkback/xenbus.c

Lines changed: 69 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
/* Enlarge the array size in order to fully show blkback name. */
2727
#define BLKBACK_NAME_LEN (20)
28+
#define RINGREF_NAME_LEN (20)
2829

2930
struct backend_info {
3031
struct xenbus_device *dev;
@@ -156,16 +157,16 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
156157
return blkif;
157158
}
158159

159-
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
160-
unsigned int evtchn)
160+
static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t *gref,
161+
unsigned int nr_grefs, unsigned int evtchn)
161162
{
162163
int err;
163164

164165
/* Already connected through? */
165166
if (blkif->irq)
166167
return 0;
167168

168-
err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
169+
err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
169170
&blkif->blk_ring);
170171
if (err < 0)
171172
return err;
@@ -175,21 +176,21 @@ static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
175176
{
176177
struct blkif_sring *sring;
177178
sring = (struct blkif_sring *)blkif->blk_ring;
178-
BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE);
179+
BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE * nr_grefs);
179180
break;
180181
}
181182
case BLKIF_PROTOCOL_X86_32:
182183
{
183184
struct blkif_x86_32_sring *sring_x86_32;
184185
sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring;
185-
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE);
186+
BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE * nr_grefs);
186187
break;
187188
}
188189
case BLKIF_PROTOCOL_X86_64:
189190
{
190191
struct blkif_x86_64_sring *sring_x86_64;
191192
sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring;
192-
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE);
193+
BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE * nr_grefs);
193194
break;
194195
}
195196
default:
@@ -270,7 +271,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
270271
i++;
271272
}
272273

273-
WARN_ON(i != XEN_BLKIF_REQS_PER_PAGE);
274+
WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
274275

275276
kmem_cache_free(xen_blkif_cachep, blkif);
276277
}
@@ -555,6 +556,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev,
555556
if (err)
556557
goto fail;
557558

559+
err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
560+
xen_blkif_max_ring_order);
561+
if (err)
562+
pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
563+
558564
err = xenbus_switch_state(dev, XenbusStateInitWait);
559565
if (err)
560566
goto fail;
@@ -818,23 +824,66 @@ static void connect(struct backend_info *be)
818824
static int connect_ring(struct backend_info *be)
819825
{
820826
struct xenbus_device *dev = be->dev;
821-
unsigned long ring_ref;
822-
unsigned int evtchn;
827+
unsigned int ring_ref[XENBUS_MAX_RING_PAGES];
828+
unsigned int evtchn, nr_grefs, ring_page_order;
823829
unsigned int pers_grants;
824830
char protocol[64] = "";
825831
struct pending_req *req, *n;
826832
int err, i, j;
827833

828834
pr_debug("%s %s\n", __func__, dev->otherend);
829835

830-
err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu",
831-
&ring_ref, "event-channel", "%u", &evtchn, NULL);
832-
if (err) {
833-
xenbus_dev_fatal(dev, err,
834-
"reading %s/ring-ref and event-channel",
836+
err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u",
837+
&evtchn);
838+
if (err != 1) {
839+
err = -EINVAL;
840+
xenbus_dev_fatal(dev, err, "reading %s/event-channel",
835841
dev->otherend);
836842
return err;
837843
}
844+
pr_info("event-channel %u\n", evtchn);
845+
846+
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
847+
&ring_page_order);
848+
if (err != 1) {
849+
err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref",
850+
"%u", &ring_ref[0]);
851+
if (err != 1) {
852+
err = -EINVAL;
853+
xenbus_dev_fatal(dev, err, "reading %s/ring-ref",
854+
dev->otherend);
855+
return err;
856+
}
857+
nr_grefs = 1;
858+
pr_info("%s:using single page: ring-ref %d\n", dev->otherend,
859+
ring_ref[0]);
860+
} else {
861+
unsigned int i;
862+
863+
if (ring_page_order > xen_blkif_max_ring_order) {
864+
err = -EINVAL;
865+
xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
866+
dev->otherend, ring_page_order,
867+
xen_blkif_max_ring_order);
868+
return err;
869+
}
870+
871+
nr_grefs = 1 << ring_page_order;
872+
for (i = 0; i < nr_grefs; i++) {
873+
char ring_ref_name[RINGREF_NAME_LEN];
874+
875+
snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
876+
err = xenbus_scanf(XBT_NIL, dev->otherend, ring_ref_name,
877+
"%u", &ring_ref[i]);
878+
if (err != 1) {
879+
err = -EINVAL;
880+
xenbus_dev_fatal(dev, err, "reading %s/%s",
881+
dev->otherend, ring_ref_name);
882+
return err;
883+
}
884+
pr_info("ring-ref%u: %u\n", i, ring_ref[i]);
885+
}
886+
}
838887

839888
be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
840889
err = xenbus_gather(XBT_NIL, dev->otherend, "protocol",
@@ -859,12 +908,13 @@ static int connect_ring(struct backend_info *be)
859908

860909
be->blkif->vbd.feature_gnt_persistent = pers_grants;
861910
be->blkif->vbd.overflow_max_grants = 0;
911+
be->blkif->nr_ring_pages = nr_grefs;
862912

863-
pr_info("ring-ref %ld, event-channel %d, protocol %d (%s) %s\n",
864-
ring_ref, evtchn, be->blkif->blk_protocol, protocol,
913+
pr_info("ring-pages:%d, event-channel %d, protocol %d (%s) %s\n",
914+
nr_grefs, evtchn, be->blkif->blk_protocol, protocol,
865915
pers_grants ? "persistent grants" : "");
866916

867-
for (i = 0; i < XEN_BLKIF_REQS_PER_PAGE; i++) {
917+
for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
868918
req = kzalloc(sizeof(*req), GFP_KERNEL);
869919
if (!req)
870920
goto fail;
@@ -883,10 +933,9 @@ static int connect_ring(struct backend_info *be)
883933
}
884934

885935
/* Map the shared frame, irq etc. */
886-
err = xen_blkif_map(be->blkif, ring_ref, evtchn);
936+
err = xen_blkif_map(be->blkif, ring_ref, nr_grefs, evtchn);
887937
if (err) {
888-
xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u",
889-
ring_ref, evtchn);
938+
xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
890939
return err;
891940
}
892941

0 commit comments

Comments
 (0)