Skip to content

Commit 71d1610

Browse files
committed
Sema: @memcpy changes
* The langspec definition of `@memcpy` has been changed so that the source and destination element types must be in-memory coercible, allowing all such calls to be raw copying operations, not actually applying any coercions. * Implement aliasing check for comptime `@memcpy`; a compile error will now be emitted if the arguments alias. * Implement more efficient comptime `@memcpy` by loading and storing a whole array at once, similar to how `@memset` is implemented.
1 parent 97ccf35 commit 71d1610

File tree

5 files changed

+173
-52
lines changed

5 files changed

+173
-52
lines changed

src/Sema.zig

Lines changed: 66 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -25793,7 +25793,6 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
2579325793
const src_len = try indexablePtrLenOrNone(sema, block, src_src, src_ptr);
2579425794
const pt = sema.pt;
2579525795
const zcu = pt.zcu;
25796-
const target = zcu.getTarget();
2579725796

2579825797
if (dest_ty.isConstPtr(zcu)) {
2579925798
return sema.fail(block, dest_src, "cannot memcpy to constant pointer", .{});
@@ -25814,6 +25813,30 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
2581425813
return sema.failWithOwnedErrorMsg(block, msg);
2581525814
}
2581625815

25816+
const dest_elem_ty = dest_ty.indexablePtrElem(zcu);
25817+
const src_elem_ty = src_ty.indexablePtrElem(zcu);
25818+
25819+
const imc = try sema.coerceInMemoryAllowed(
25820+
block,
25821+
dest_elem_ty,
25822+
src_elem_ty,
25823+
false,
25824+
zcu.getTarget(),
25825+
dest_src,
25826+
src_src,
25827+
null,
25828+
);
25829+
if (imc != .ok) return sema.failWithOwnedErrorMsg(block, msg: {
25830+
const msg = try sema.errMsg(
25831+
src,
25832+
"pointer element type '{}' cannot coerce into element type '{}'",
25833+
.{ src_elem_ty.fmt(pt), dest_elem_ty.fmt(pt) },
25834+
);
25835+
errdefer msg.destroy(sema.gpa);
25836+
try imc.report(sema, src, msg);
25837+
break :msg msg;
25838+
});
25839+
2581725840
var len_val: ?Value = null;
2581825841

2581925842
if (dest_len != .none and src_len != .none) check: {
@@ -25855,61 +25878,52 @@ fn zirMemcpy(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!void
2585525878
}
2585625879
}
2585725880

25858-
const runtime_src = if (try sema.resolveDefinedValue(block, dest_src, dest_ptr)) |dest_ptr_val| rs: {
25881+
const runtime_src = rs: {
25882+
const dest_ptr_val = try sema.resolveDefinedValue(block, dest_src, dest_ptr) orelse break :rs dest_src;
25883+
const src_ptr_val = try sema.resolveDefinedValue(block, src_src, src_ptr) orelse break :rs src_src;
25884+
25885+
const raw_dest_ptr = if (dest_ty.isSlice(zcu)) dest_ptr_val.slicePtr(zcu) else dest_ptr_val;
25886+
const raw_src_ptr = if (src_ty.isSlice(zcu)) src_ptr_val.slicePtr(zcu) else src_ptr_val;
25887+
25888+
const len_u64 = try len_val.?.toUnsignedIntSema(pt);
25889+
25890+
if (Value.doPointersOverlap(
25891+
raw_src_ptr,
25892+
raw_dest_ptr,
25893+
len_u64,
25894+
zcu,
25895+
)) return sema.fail(block, src, "'@memcpy' arguments alias", .{});
25896+
2585925897
if (!sema.isComptimeMutablePtr(dest_ptr_val)) break :rs dest_src;
25860-
if (try sema.resolveDefinedValue(block, src_src, src_ptr)) |_| {
25861-
const len_u64 = try len_val.?.toUnsignedIntSema(pt);
25862-
const len = try sema.usizeCast(block, dest_src, len_u64);
25863-
for (0..len) |i| {
25864-
const elem_index = try pt.intRef(Type.usize, i);
25865-
const dest_elem_ptr = try sema.elemPtrOneLayerOnly(
25866-
block,
25867-
src,
25868-
dest_ptr,
25869-
elem_index,
25870-
src,
25871-
true, // init
25872-
false, // oob_safety
25873-
);
25874-
const src_elem_ptr = try sema.elemPtrOneLayerOnly(
25875-
block,
25876-
src,
25877-
src_ptr,
25878-
elem_index,
25879-
src,
25880-
false, // init
25881-
false, // oob_safety
25882-
);
25883-
const uncoerced_elem = try sema.analyzeLoad(block, src, src_elem_ptr, src_src);
25884-
try sema.storePtr2(
25885-
block,
25886-
src,
25887-
dest_elem_ptr,
25888-
dest_src,
25889-
uncoerced_elem,
25890-
src_src,
25891-
.store,
25892-
);
25893-
}
25894-
return;
25895-
} else break :rs src_src;
25896-
} else dest_src;
2589725898

25898-
// If in-memory coercion is not allowed, explode this memcpy call into a
25899-
// for loop that copies element-wise.
25900-
// Likewise if this is an iterable rather than a pointer, do the same
25901-
// lowering. The AIR instruction requires pointers with element types of
25902-
// equal ABI size.
25899+
// Because comptime pointer access is a somewhat expensive operation, we implement @memcpy
25900+
// as one load and store of an array, rather than N loads and stores of individual elements.
2590325901

25904-
if (dest_ty.zigTypeTag(zcu) != .pointer or src_ty.zigTypeTag(zcu) != .pointer) {
25905-
return sema.fail(block, src, "TODO: lower @memcpy to a for loop because the source or destination iterable is a tuple", .{});
25906-
}
25902+
const array_ty = try pt.arrayType(.{
25903+
.child = dest_elem_ty.toIntern(),
25904+
.len = len_u64,
25905+
});
2590725906

25908-
const dest_elem_ty = dest_ty.elemType2(zcu);
25909-
const src_elem_ty = src_ty.elemType2(zcu);
25910-
if (.ok != try sema.coerceInMemoryAllowed(block, dest_elem_ty, src_elem_ty, true, target, dest_src, src_src, null)) {
25911-
return sema.fail(block, src, "TODO: lower @memcpy to a for loop because the element types have different ABI sizes", .{});
25912-
}
25907+
const dest_array_ptr_ty = try pt.ptrType(info: {
25908+
var info = dest_ty.ptrInfo(zcu);
25909+
info.flags.size = .one;
25910+
info.child = array_ty.toIntern();
25911+
break :info info;
25912+
});
25913+
const src_array_ptr_ty = try pt.ptrType(info: {
25914+
var info = src_ty.ptrInfo(zcu);
25915+
info.flags.size = .one;
25916+
info.child = array_ty.toIntern();
25917+
break :info info;
25918+
});
25919+
25920+
const coerced_dest_ptr = try pt.getCoerced(raw_dest_ptr, dest_array_ptr_ty);
25921+
const coerced_src_ptr = try pt.getCoerced(raw_src_ptr, src_array_ptr_ty);
25922+
25923+
const array_val = try sema.pointerDeref(block, src_src, coerced_src_ptr, src_array_ptr_ty) orelse break :rs src_src;
25924+
try sema.storePtrVal(block, dest_src, coerced_dest_ptr, array_val, array_ty);
25925+
return;
25926+
};
2591325927

2591425928
// If the length is comptime-known, then upgrade src and destination types
2591525929
// into pointer-to-array. At this point we know they are both pointers

src/Type.zig

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2057,6 +2057,22 @@ pub fn elemType2(ty: Type, zcu: *const Zcu) Type {
20572057
};
20582058
}
20592059

2060+
/// Given that `ty` is an indexable pointer, returns its element type. Specifically:
2061+
/// * for `*[n]T`, returns `T`
2062+
/// * for `[]T`, returns `T`
2063+
/// * for `[*]T`, returns `T`
2064+
/// * for `[*c]T`, returns `T`
2065+
pub fn indexablePtrElem(ty: Type, zcu: *const Zcu) Type {
2066+
const ip = &zcu.intern_pool;
2067+
const ptr_type = ip.indexToKey(ty.toIntern()).ptr_type;
2068+
switch (ptr_type.flags.size) {
2069+
.many, .slice, .c => return .fromInterned(ptr_type.child),
2070+
.one => {},
2071+
}
2072+
const array_type = ip.indexToKey(ptr_type.child).array_type;
2073+
return .fromInterned(array_type.child);
2074+
}
2075+
20602076
fn shallowElemType(child_ty: Type, zcu: *const Zcu) Type {
20612077
return switch (child_ty.zigTypeTag(zcu)) {
20622078
.array, .vector => child_ty.childType(zcu),

src/Value.zig

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4755,3 +4755,70 @@ pub fn uninterpret(val: anytype, ty: Type, pt: Zcu.PerThread) error{ OutOfMemory
47554755
},
47564756
};
47574757
}
4758+
4759+
/// Returns whether `ptr_val_a[0..elem_count]` and `ptr_val_b[0..elem_count]` overlap.
4760+
/// `ptr_val_a` and `ptr_val_b` are indexable pointers (not slices) whose element types are in-memory coercible.
4761+
pub fn doPointersOverlap(ptr_val_a: Value, ptr_val_b: Value, elem_count: u64, zcu: *const Zcu) bool {
4762+
const ip = &zcu.intern_pool;
4763+
4764+
const a_elem_ty = ptr_val_a.typeOf(zcu).indexablePtrElem(zcu);
4765+
const b_elem_ty = ptr_val_b.typeOf(zcu).indexablePtrElem(zcu);
4766+
4767+
const a_ptr = ip.indexToKey(ptr_val_a.toIntern()).ptr;
4768+
const b_ptr = ip.indexToKey(ptr_val_b.toIntern()).ptr;
4769+
4770+
// If `a_elem_ty` is not comptime-only, then overlapping pointers have identical
4771+
// `base_addr`, and we just need to look at the byte offset. If it *is* comptime-only,
4772+
// then `base_addr` may be an `arr_elem`, and we'll have to consider the element index.
4773+
if (a_elem_ty.comptimeOnly(zcu)) {
4774+
assert(a_elem_ty.toIntern() == b_elem_ty.toIntern()); // IMC comptime-only types are equivalent
4775+
4776+
const a_base_addr: InternPool.Key.Ptr.BaseAddr, const a_idx: u64 = switch (a_ptr.base_addr) {
4777+
else => .{ a_ptr.base_addr, 0 },
4778+
.arr_elem => |arr_elem| a: {
4779+
const base_ptr = Value.fromInterned(arr_elem.base);
4780+
const base_child_ty = base_ptr.typeOf(zcu).childType(zcu);
4781+
if (base_child_ty.toIntern() == a_elem_ty.toIntern()) {
4782+
// This `arr_elem` is indexing into the element type we want.
4783+
const base_ptr_info = ip.indexToKey(base_ptr.toIntern()).ptr;
4784+
if (base_ptr_info.byte_offset != 0) {
4785+
return false; // this pointer is invalid, just let the access fail
4786+
}
4787+
break :a .{ base_ptr_info.base_addr, arr_elem.index };
4788+
}
4789+
break :a .{ a_ptr.base_addr, 0 };
4790+
},
4791+
};
4792+
const b_base_addr: InternPool.Key.Ptr.BaseAddr, const b_idx: u64 = switch (a_ptr.base_addr) {
4793+
else => .{ b_ptr.base_addr, 0 },
4794+
.arr_elem => |arr_elem| b: {
4795+
const base_ptr = Value.fromInterned(arr_elem.base);
4796+
const base_child_ty = base_ptr.typeOf(zcu).childType(zcu);
4797+
if (base_child_ty.toIntern() == b_elem_ty.toIntern()) {
4798+
// This `arr_elem` is indexing into the element type we want.
4799+
const base_ptr_info = ip.indexToKey(base_ptr.toIntern()).ptr;
4800+
if (base_ptr_info.byte_offset != 0) {
4801+
return false; // this pointer is invalid, just let the access fail
4802+
}
4803+
break :b .{ base_ptr_info.base_addr, arr_elem.index };
4804+
}
4805+
break :b .{ b_ptr.base_addr, 0 };
4806+
},
4807+
};
4808+
if (!std.meta.eql(a_base_addr, b_base_addr)) return false;
4809+
const diff = if (a_idx >= b_idx) a_idx - b_idx else b_idx - a_idx;
4810+
return diff < elem_count;
4811+
} else {
4812+
assert(a_elem_ty.abiSize(zcu) == b_elem_ty.abiSize(zcu));
4813+
4814+
if (!std.meta.eql(a_ptr.base_addr, b_ptr.base_addr)) return false;
4815+
4816+
const bytes_diff = if (a_ptr.byte_offset >= b_ptr.byte_offset)
4817+
a_ptr.byte_offset - b_ptr.byte_offset
4818+
else
4819+
b_ptr.byte_offset - a_ptr.byte_offset;
4820+
4821+
const need_bytes_diff = elem_count * a_elem_ty.abiSize(zcu);
4822+
return bytes_diff < need_bytes_diff;
4823+
}
4824+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
var arr: [10]u64 = undefined;
2+
export fn foo() void {
3+
@memcpy(arr[0..6], arr[4..10]);
4+
}
5+
6+
comptime {
7+
var types: [4]type = .{ u8, u16, u32, u64 };
8+
@memcpy(types[2..4], types[1..3]);
9+
}
10+
11+
// error
12+
//
13+
// :3:5: error: '@memcpy' arguments alias
14+
// :8:5: error: '@memcpy' arguments alias
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
const src: [10]u8 = @splat(0);
2+
var dest: [10]u16 = undefined;
3+
4+
export fn foo() void {
5+
@memcpy(&dest, &src);
6+
}
7+
8+
// error
9+
//
10+
// :5:5: error: pointer element type 'u8' cannot coerce into element type 'u16'

0 commit comments

Comments
 (0)