9
9
use std:: io:: Read ;
10
10
use std:: mem;
11
11
use std:: net:: Ipv4Addr ;
12
+ use std:: num:: Wrapping ;
12
13
use std:: sync:: { Arc , Mutex } ;
13
14
14
15
use libc:: EAGAIN ;
15
- use log:: { error, warn } ;
16
+ use log:: error;
16
17
use utils:: eventfd:: EventFd ;
17
18
use utils:: net:: mac:: MacAddr ;
18
- use utils:: u64_to_usize;
19
- use vm_memory:: GuestMemoryError ;
19
+ use utils:: { u64_to_usize, usize_to_u64 } ;
20
+ use vm_memory:: { GuestAddress , GuestMemory , GuestMemoryError } ;
20
21
21
22
use crate :: devices:: virtio:: device:: { DeviceState , IrqTrigger , IrqType , VirtioDevice } ;
22
23
use crate :: devices:: virtio:: gen:: virtio_blk:: VIRTIO_F_VERSION_1 ;
23
24
use crate :: devices:: virtio:: gen:: virtio_net:: {
24
25
virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
25
26
VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
27
+ VIRTIO_NET_F_MRG_RXBUF ,
26
28
} ;
27
29
use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
28
30
use crate :: devices:: virtio:: iovec:: IoVecBuffer ;
@@ -31,7 +33,7 @@ use crate::devices::virtio::net::tap::Tap;
31
33
use crate :: devices:: virtio:: net:: {
32
34
gen, NetError , NetQueue , MAX_BUFFER_SIZE , NET_QUEUE_SIZES , RX_INDEX , TX_INDEX ,
33
35
} ;
34
- use crate :: devices:: virtio:: queue:: { DescriptorChain , Queue } ;
36
+ use crate :: devices:: virtio:: queue:: Queue ;
35
37
use crate :: devices:: virtio:: { ActivateError , TYPE_NET } ;
36
38
use crate :: devices:: { report_net_event_fail, DeviceError } ;
37
39
use crate :: dumbo:: pdu:: arp:: ETH_IPV4_FRAME_LEN ;
@@ -46,16 +48,14 @@ const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN;
46
48
47
49
#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
48
50
enum FrontendError {
49
- /// Add user.
50
- AddUsed ,
51
- /// Descriptor chain too mall.
52
- DescriptorChainTooSmall ,
53
51
/// Empty queue.
54
52
EmptyQueue ,
55
53
/// Guest memory error: {0}
56
54
GuestMemory ( GuestMemoryError ) ,
57
- /// Read only descriptor.
58
- ReadOnlyDescriptor ,
55
+ /// Attempt to write an empty packet.
56
+ AttemptToWriteEmptyPacket ,
57
+ /// Attempt to use more than 1 head to process a packet without VIRTIO_NET_F_MRG_RXBUF support.
58
+ NoMrgBufSupport ,
59
59
}
60
60
61
61
pub ( crate ) const fn vnet_hdr_len ( ) -> usize {
@@ -102,6 +102,20 @@ pub struct ConfigSpace {
102
102
// SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding.
103
103
unsafe impl ByteValued for ConfigSpace { }
104
104
105
+ // This struct contains information about partially
106
+ // written packet.
107
+ #[ derive( Debug ) ]
108
+ struct PartialWrite {
109
+ // Amount of bytes written so far.
110
+ bytes_written : usize ,
111
+ // Amount of descriptor heads used for the packet.
112
+ used_heads : u16 ,
113
+ // Guest address of the first buffer used for the packet.
114
+ // This will be used to set number of descriptors heads used
115
+ // to store the whole packet.
116
+ packet_start_addr : GuestAddress ,
117
+ }
118
+
105
119
/// VirtIO network device.
106
120
///
107
121
/// It emulates a network device able to exchange L2 frames between the guest
@@ -126,6 +140,7 @@ pub struct Net {
126
140
127
141
rx_bytes_read : usize ,
128
142
rx_frame_buf : [ u8 ; MAX_BUFFER_SIZE ] ,
143
+ rx_partial_write : Option < PartialWrite > ,
129
144
130
145
tx_frame_headers : [ u8 ; frame_hdr_len ( ) ] ,
131
146
@@ -159,6 +174,7 @@ impl Net {
159
174
| 1 << VIRTIO_NET_F_HOST_TSO4
160
175
| 1 << VIRTIO_NET_F_HOST_UFO
161
176
| 1 << VIRTIO_F_VERSION_1
177
+ | 1 << VIRTIO_NET_F_MRG_RXBUF
162
178
| 1 << VIRTIO_RING_F_EVENT_IDX ;
163
179
164
180
let mut config_space = ConfigSpace :: default ( ) ;
@@ -188,6 +204,7 @@ impl Net {
188
204
rx_deferred_frame : false ,
189
205
rx_bytes_read : 0 ,
190
206
rx_frame_buf : [ 0u8 ; MAX_BUFFER_SIZE ] ,
207
+ rx_partial_write : None ,
191
208
tx_frame_headers : [ 0u8 ; frame_hdr_len ( ) ] ,
192
209
irq_trigger : IrqTrigger :: new ( ) . map_err ( NetError :: EventFd ) ?,
193
210
config_space,
@@ -316,7 +333,7 @@ impl Net {
316
333
}
317
334
318
335
// Attempt frame delivery.
319
- let success = self . write_frame_to_guest ( ) ;
336
+ let success = self . write_frame_to_guest ( ) . is_ok ( ) ;
320
337
321
338
// Undo the tokens consumption if guest delivery failed.
322
339
if !success {
@@ -327,108 +344,149 @@ impl Net {
327
344
success
328
345
}
329
346
330
- /// Write a slice in a descriptor chain
331
- ///
332
- /// # Errors
333
- ///
334
- /// Returns an error if the descriptor chain is too short or
335
- /// an inappropriate (read only) descriptor is found in the chain
336
- fn write_to_descriptor_chain (
337
- mem : & GuestMemoryMmap ,
338
- data : & [ u8 ] ,
339
- head : DescriptorChain ,
340
- net_metrics : & NetDeviceMetrics ,
341
- ) -> Result < ( ) , FrontendError > {
342
- let mut chunk = data;
343
- let mut next_descriptor = Some ( head) ;
347
+ fn write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
348
+ // This is safe since we checked in the event handler that the device is activated.
349
+ let mem = self . device_state . mem ( ) . unwrap ( ) ;
350
+
351
+ // Check if queue is empty and early error out
352
+ if self . queues [ RX_INDEX ] . is_empty ( mem) {
353
+ return Err ( FrontendError :: EmptyQueue ) ;
354
+ }
344
355
345
- while let Some ( descriptor) = & next_descriptor {
346
- if !descriptor. is_write_only ( ) {
347
- return Err ( FrontendError :: ReadOnlyDescriptor ) ;
356
+ let actual_size = self . queues [ RX_INDEX ] . actual_size ( ) ;
357
+
358
+ let ( mut slice, mut packet_start_addr, mut used_heads, mut next_used) =
359
+ if let Some ( pw) = & self . rx_partial_write {
360
+ (
361
+ & self . rx_frame_buf [ pw. bytes_written ..self . rx_bytes_read ] ,
362
+ Some ( pw. packet_start_addr ) ,
363
+ pw. used_heads ,
364
+ self . queues [ RX_INDEX ] . next_used + Wrapping ( pw. used_heads ) ,
365
+ )
366
+ } else {
367
+ (
368
+ & self . rx_frame_buf [ ..self . rx_bytes_read ] ,
369
+ None ,
370
+ 0 ,
371
+ self . queues [ RX_INDEX ] . next_used ,
372
+ )
373
+ } ;
374
+
375
+ while !self . queues [ RX_INDEX ] . is_empty ( mem) && !slice. is_empty ( ) {
376
+ used_heads += 1 ;
377
+
378
+ if !self . has_feature ( u64:: from ( VIRTIO_NET_F_MRG_RXBUF ) ) && 1 < used_heads {
379
+ return Err ( FrontendError :: NoMrgBufSupport ) ;
348
380
}
349
381
350
- let len = std:: cmp:: min ( chunk. len ( ) , descriptor. len as usize ) ;
351
- match mem. write_slice ( & chunk[ ..len] , descriptor. addr ) {
352
- Ok ( ( ) ) => {
353
- net_metrics. rx_count . inc ( ) ;
354
- chunk = & chunk[ len..] ;
355
- }
356
- Err ( err) => {
357
- error ! ( "Failed to write slice: {:?}" , err) ;
358
- if let GuestMemoryError :: PartialBuffer { .. } = err {
359
- net_metrics. rx_partial_writes . inc ( ) ;
360
- }
361
- return Err ( FrontendError :: GuestMemory ( err) ) ;
362
- }
382
+ // SAFETY:
383
+ // This should never panic as we just checked that the queue is not empty.
384
+ let mut desc = self . queues [ RX_INDEX ] . do_pop_unchecked ( mem) . unwrap ( ) ;
385
+
386
+ let head_desc_index = desc. index ;
387
+ let mut desc_len = 0 ;
388
+
389
+ // If this is the first head of the packet, save it for later
390
+ if packet_start_addr. is_none ( ) {
391
+ packet_start_addr = Some ( desc. addr )
363
392
}
364
393
365
- // If chunk is empty we are done here.
366
- if chunk. is_empty ( ) {
367
- let len = data. len ( ) as u64 ;
368
- net_metrics. rx_bytes_count . add ( len) ;
369
- net_metrics. rx_packets_count . inc ( ) ;
370
- return Ok ( ( ) ) ;
394
+ // Handle descriptor chain head
395
+ let len = slice. len ( ) . min ( desc. len as usize ) ;
396
+ mem. write_slice ( & slice[ ..len] , desc. addr )
397
+ . map_err ( FrontendError :: GuestMemory ) ?;
398
+ desc_len += len;
399
+ slice = & slice[ len..] ;
400
+
401
+ // Handle following descriptors
402
+ while desc. has_next ( ) && !slice. is_empty ( ) {
403
+ // SAFETY:
404
+ // This should never panic as we just checked there is next descriptor.
405
+ desc = desc. next_descriptor ( ) . unwrap ( ) ;
406
+
407
+ let len = slice. len ( ) . min ( desc. len as usize ) ;
408
+ mem. write_slice ( & slice[ ..len] , desc. addr )
409
+ . map_err ( FrontendError :: GuestMemory ) ?;
410
+ desc_len += len;
411
+ slice = & slice[ len..] ;
371
412
}
372
413
373
- next_descriptor = descriptor. next_descriptor ( ) ;
414
+ // Add used descriptor head to used ring
415
+ let next_used_index = next_used. 0 % actual_size;
416
+ // SAFETY:
417
+ // This should never panic as we provide index in
418
+ // correct bounds.
419
+ self . queues [ RX_INDEX ]
420
+ . write_used_ring (
421
+ mem,
422
+ next_used_index,
423
+ head_desc_index,
424
+ u32:: try_from ( desc_len) . unwrap ( ) ,
425
+ )
426
+ . unwrap ( ) ;
427
+
428
+ // We don't update queues internal next_used value just yet.
429
+ // This is done to prevent giving information to the guest
430
+ // about descriptor heads we used for partial packets.
431
+ next_used += Wrapping ( 1 ) ;
374
432
}
375
433
376
- warn ! ( "Receiving buffer is too small to hold frame of current size" ) ;
377
- Err ( FrontendError :: DescriptorChainTooSmall )
378
- }
434
+ let packet_start_addr =
435
+ packet_start_addr. ok_or ( FrontendError :: AttemptToWriteEmptyPacket ) ?;
379
436
380
- // Copies a single frame from `self.rx_frame_buf` into the guest.
381
- fn do_write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
382
- // This is safe since we checked in the event handler that the device is activated.
383
- let mem = self . device_state . mem ( ) . unwrap ( ) ;
384
-
385
- let queue = & mut self . queues [ RX_INDEX ] ;
386
- let head_descriptor = queue. pop_or_enable_notification ( mem) . ok_or_else ( || {
387
- self . metrics . no_rx_avail_buffer . inc ( ) ;
388
- FrontendError :: EmptyQueue
389
- } ) ?;
390
- let head_index = head_descriptor. index ;
437
+ if slice. is_empty ( ) {
438
+ // Packet was fully written.
439
+ self . metrics
440
+ . rx_bytes_count
441
+ . add ( usize_to_u64 ( self . rx_bytes_read ) ) ;
442
+ self . metrics . rx_packets_count . inc ( ) ;
443
+
444
+ // TODO: can be simplified by usage of `std::mem::offset_of`
445
+ // when Rust version is updated.
446
+ //
447
+ // Update number of descriptor heads used to store a packet.
448
+ // SAFETY:
449
+ // The packet_start_addr is valid guest address and we check
450
+ // memory boundaries.
451
+ #[ allow( clippy:: transmute_ptr_to_ref) ]
452
+ let header: & mut virtio_net_hdr_v1 = unsafe {
453
+ let header_slice = mem
454
+ . get_slice ( packet_start_addr, std:: mem:: size_of :: < virtio_net_hdr_v1 > ( ) )
455
+ . map_err ( FrontendError :: GuestMemory ) ?;
456
+ std:: mem:: transmute ( header_slice. ptr_guard_mut ( ) . as_ptr ( ) )
457
+ } ;
458
+ header. num_buffers = used_heads;
391
459
392
- let result = Self :: write_to_descriptor_chain (
393
- mem,
394
- & self . rx_frame_buf [ ..self . rx_bytes_read ] ,
395
- head_descriptor,
396
- & self . metrics ,
397
- ) ;
398
- // Mark the descriptor chain as used. If an error occurred, skip the descriptor chain.
399
- let used_len = if result. is_err ( ) {
400
- self . metrics . rx_fails . inc ( ) ;
401
- 0
402
- } else {
403
- // Safe to unwrap because a frame must be smaller than 2^16 bytes.
404
- u32:: try_from ( self . rx_bytes_read ) . unwrap ( )
405
- } ;
406
- queue. add_used ( mem, head_index, used_len) . map_err ( |err| {
407
- error ! ( "Failed to add available descriptor {}: {}" , head_index, err) ;
408
- FrontendError :: AddUsed
409
- } ) ?;
460
+ // Update queues internals
461
+ self . queues [ RX_INDEX ] . next_used = next_used;
462
+ self . queues [ RX_INDEX ] . num_added += Wrapping ( used_heads) ;
410
463
411
- result
412
- }
464
+ // Update used ring with what we used to process the packet
465
+ self . queues [ RX_INDEX ] . set_used_ring_idx ( next_used. 0 , mem) ;
466
+ let next_avail = self . queues [ RX_INDEX ] . next_avail . 0 ;
467
+ self . queues [ RX_INDEX ] . set_used_ring_avail_event ( next_avail, mem) ;
413
468
414
- // Copies a single frame from `self.rx_frame_buf` into the guest. In case of an error retries
415
- // the operation if possible. Returns true if the operation was successfull.
416
- fn write_frame_to_guest ( & mut self ) -> bool {
417
- let max_iterations = self . queues [ RX_INDEX ] . actual_size ( ) ;
418
- for _ in 0 ..max_iterations {
419
- match self . do_write_frame_to_guest ( ) {
420
- Ok ( ( ) ) => return true ,
421
- Err ( FrontendError :: EmptyQueue ) | Err ( FrontendError :: AddUsed ) => {
422
- return false ;
423
- }
424
- Err ( _) => {
425
- // retry
426
- continue ;
427
- }
469
+ // Clear partial write info if there was one
470
+ self . rx_partial_write = None ;
471
+ Ok ( ( ) )
472
+ } else {
473
+ // Packet could not be fully written to the guest
474
+ // Save necessary info to use it during next invocation.
475
+ self . metrics . rx_partial_writes . inc ( ) ;
476
+
477
+ if let Some ( pw) = & mut self . rx_partial_write {
478
+ pw. bytes_written = self . rx_bytes_read - slice. len ( ) ;
479
+ pw. used_heads = used_heads;
480
+ } else {
481
+ let pw = PartialWrite {
482
+ bytes_written : self . rx_bytes_read - slice. len ( ) ,
483
+ used_heads,
484
+ packet_start_addr,
485
+ } ;
486
+ self . rx_partial_write = Some ( pw) ;
428
487
}
488
+ Err ( FrontendError :: EmptyQueue )
429
489
}
430
-
431
- false
432
490
}
433
491
434
492
// Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
0 commit comments