diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32707fe86..2c6c77e77 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,6 +20,13 @@ jobs: pipx install ruff ruff check + typos: + name: Typos + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: crate-ci/typos@master + pylint: name: Pylint runs-on: ubuntu-latest diff --git a/MEMO b/MEMO index f4e5c34e4..5a9438811 100644 --- a/MEMO +++ b/MEMO @@ -7,7 +7,7 @@ Documentation Notes Things to consider ^^^^^^^^^^^^^^^^^^ -- Depedencies are pointwise for shared loop dimensions +- Dependencies are pointwise for shared loop dimensions and global over non-shared ones (between dependent and ancestor) - multiple insns could fight over which iname gets local axis 0 diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py index 82a2b9602..bbde23174 100644 --- a/contrib/mem-pattern-explorer/pattern_vis.py +++ b/contrib/mem-pattern-explorer/pattern_vis.py @@ -76,7 +76,7 @@ def tick(self): class Array: def __init__(self, ctx, name, shape, strides, elements_per_row=None): # Each array element stores a tuple: - # (timestamp, subgroup, g0, g1, g2, ) of last acccess + # (timestamp, subgroup, g0, g1, g2, ) of last access assert len(shape) == len(strides) diff --git a/doc/misc.rst b/doc/misc.rst index 3fea6fdd4..be1c964cd 100644 --- a/doc/misc.rst +++ b/doc/misc.rst @@ -158,7 +158,7 @@ In the meantime, you can generate code simply by saying:: print(cg_result.host_code()) print(cg_result.device_code()) -Additionally, for C-based languages, header defintions are available via:: +Additionally, for C-based languages, header definitions are available via:: loopy.generate_header(knl) @@ -338,8 +338,8 @@ This list is always growing, but here are a few pointers: Use :func:`loopy.join_inames`. -In what sense does Loopy suport vectorization? ----------------------------------------------- +In what sense does Loopy support vectorization? +----------------------------------------------- There are really two ways in which the OpenCL/CUDA model of computation exposes vectorization: @@ -352,7 +352,7 @@ vectorization: e.g. ``float4``, which support arithmetic with implicit vector semantics as well as a number of 'intrinsic' functions. -Loopy suports both. The first one, SIMT, is accessible by tagging inames with, +Loopy supports both. The first one, SIMT, is accessible by tagging inames with, e.g., ``l.0```. Accessing the second one requires using both execution- and data-reshaping capabilities in loopy. To start with, you need an array that has an axis with the length of the desired vector. If that's not yet available, diff --git a/loopy/__init__.py b/loopy/__init__.py index 275d4f26e..1eebb8223 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -563,18 +563,18 @@ def make_copy_kernel(new_dim_tags, old_dim_tags=None): indices = ["i%d" % i for i in range(rank)] shape = ["n%d" % i for i in range(rank)] - commad_indices = ", ".join(indices) + command_indices = ", ".join(indices) bounds = " and ".join( f"0<={ind}<{shape_i}" for ind, shape_i in zip(indices, shape)) set_str = "{{[{}]: {} }}".format( - commad_indices, + command_indices, bounds ) result = make_kernel(set_str, "output[%s] = input[%s]" - % (commad_indices, commad_indices), + % (command_indices, command_indices), lang_version=MOST_RECENT_LANGUAGE_VERSION, default_offset=auto) diff --git a/loopy/check.py b/loopy/check.py index 17887cff5..c2b3d8cd3 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -1123,7 +1123,7 @@ def satisfy_dep_reqs_in_order(dep_reqs_to_vars, edges, order): # for each *pred*, we will calculate all the direct/indirect # instructions that can be reached. seen_successors = set() - # first let us start with direct sucessors + # first let us start with direct successors to_check = edges[pred].copy() while to_check: successor = to_check.pop() @@ -1219,7 +1219,7 @@ def check_variable_access_ordered(kernel): """Checks that between each write to a variable and all other accesses to the variable there is either: - * a direct/indirect depdendency edge, or + * a direct/indirect dependency edge, or * an explicit statement that no ordering is necessary (expressed through a bi-directional :attr:`loopy.InstructionBase.no_sync_with`) """ diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index 530e92678..fc9eace87 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -200,7 +200,7 @@ def get_type(self, name, none_ok=False): return None raise TranslationError( - "no type for '%s' found in 'implict none' routine" + "no type for '%s' found in 'implicit none' routine" % name) from None return self.implicit_types.get(name[0], np.dtype(np.int32)) @@ -426,7 +426,7 @@ def map_Implicit(self, node): scope.implicit_types = None for stmt, specs in node.items: - if scope.implict_types is None: + if scope.implict_types is None: # spellchecker: disable-line raise TranslationError("implicit decl not allowed after " "'implicit none'") tp = self.dtype_from_stmt(stmt) diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 160b6415b..28aa3be30 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -186,7 +186,7 @@ def simplify_pw_aff(pw_aff, context=None): continue if aff_i.gist(dom_j).is_equal(aff_j): - # aff_i is sufficient to conver aff_j, eliminate aff_j + # aff_i is sufficient to cover aff_j, eliminate aff_j new_pieces = pieces[:] if i < j: new_pieces.pop(j) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index a9b3bb07e..7bf4cb845 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -164,7 +164,7 @@ class LoopKernel(Taggable): .. attribute:: loop_priority A frozenset of priority constraints to the kernel. Each such constraint - is a tuple of inames. Inames occuring in such a tuple will be scheduled + is a tuple of inames. Inames occurring in such a tuple will be scheduled earlier than any iname following in the tuple. This applies only to inames with non-parallel implementation tags. @@ -515,7 +515,7 @@ def get_leaf_domain_indices(self, inames): for iname in inames: home_domain_index = hdm[iname] if home_domain_index in domain_indices: - # nothin' new + # nothing new continue domain_path_to_root = [home_domain_index] + ppd[home_domain_index] diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index c4cc880a0..f359eec33 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -1884,7 +1884,7 @@ def add_inferred_inames(knl): # {{{ apply single-writer heuristic @for_each_kernel -def apply_single_writer_depencency_heuristic(kernel, warn_if_used=True, +def apply_single_writer_dependency_heuristic(kernel, warn_if_used=True, error_if_used=False): logger.debug("%s: default deps" % kernel.name) @@ -2023,7 +2023,7 @@ class SliceToInameReplacer(IdentityMapper): .. attribute:: subarray_ref_bounds A :class:`list` (one entry for each :class:`SubArrayRef` to be created) - of :class:`dict` instances to store the slices enountered in the + of :class:`dict` instances to store the slices encountered in the expressions as a mapping from ``iname`` to a tuple of ``(start, stop, step)``, which describes the boxy (i.e. affine) constraints imposed on the ``iname`` by the corresponding slice notation its intended to @@ -2574,7 +2574,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): knl = guess_arg_shape_if_requested(knl, default_order) knl = apply_default_order_to_args(knl, default_order) knl = resolve_dependencies(knl) - knl = apply_single_writer_depencency_heuristic(knl, warn_if_used=False) + knl = apply_single_writer_dependency_heuristic(knl, warn_if_used=False) # ------------------------------------------------------------------------- # Ordering dependency: diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index e3fcf108a..e81e4dafc 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -672,7 +672,7 @@ def is_type_specialized(self): class CallableKernel(InKernelCallable): """ - Records informations about a callee kernel. Also provides interface through + Records information about a callee kernel. Also provides interface through member methods to make the callee kernel compatible to be called from a caller kernel. diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 198b7c03f..b9b86b53b 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -934,7 +934,7 @@ class CallInstruction(MultiAssignmentBase): A tuple of `:class:loopy.Optional`. If an entry is not empty, it contains the type that will be assigned to the new temporary variable - created from the assigment. + created from the assignment. .. automethod:: __init__ """ @@ -1099,7 +1099,7 @@ def is_array_call(assignees, expression): Returns *True* is the instruction is an array call. An array call is a function call applied to array type objects. If any of - the arguemnts or assignees to the function is an array, + the arguments or assignees to the function is an array, :meth:`is_array_call` will return *True*. """ from pymbolic.primitives import Call, Subscript @@ -1460,7 +1460,7 @@ class BarrierInstruction(_DataObliviousInstruction): .. attribute:: mem_kind A string, ``"global"`` or ``"local"``. Chooses which memory type to - sychronize, for targets that require this (e.g. OpenCL) + synchronize, for targets that require this (e.g. OpenCL) The textual syntax in a :mod:`loopy` kernel is:: diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 5ed9b2ad3..0826ed010 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -263,7 +263,7 @@ def find_all_insn_inames(kernel): if insn.within_inames_is_final: continue - # {{{ depdency-based propagation + # {{{ dependency-based propagation inames_old = insn_id_to_inames[insn.id] inames_new = inames_old | guess_iname_deps_based_on_var_use( @@ -513,8 +513,8 @@ def get_dot_dependency_graph(kernel, callables_table, iname_cluster=True, """ # make sure all automatically added stuff shows up - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=False) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + kernel = apply_single_writer_dependency_heuristic(kernel, warn_if_used=False) if iname_cluster and not kernel.linearization: try: @@ -1252,9 +1252,9 @@ def find_recursive_dependencies(kernel, insn_ids): for insn_id in queue: insn = kernel.id_to_insn[insn_id] - additionals = insn.depends_on - result - result.update(additionals) - new_queue.extend(additionals) + additional = insn.depends_on - result + result.update(additional) + new_queue.extend(additional) queue = new_queue @@ -1735,7 +1735,7 @@ def get_global_barrier_order(kernel): @memoize_on_first_arg def find_most_recent_global_barrier(kernel, insn_id): - """Return the id of the latest occuring global barrier which the + """Return the id of the latest occurring global barrier which the given instruction (indirectly or directly) depends on, or *None* if this instruction does not depend on a global barrier. @@ -1995,7 +1995,7 @@ def infer_args_are_input_output(kernel): elif isinstance(arg, (ConstantArg, ImageArg, ValueArg)): pass else: - raise NotImplementedError("Unkonwn argument type %s." % type(arg)) + raise NotImplementedError("Unknown argument type %s." % type(arg)) if not (arg.is_input or arg.is_output): raise LoopyError("Kernel argument must be either input or output." diff --git a/loopy/match.py b/loopy/match.py index 889f4e74f..5e409791b 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -1,4 +1,4 @@ -"""Matching functionality for instruction ids and subsitution +"""Matching functionality for instruction ids and substitution rule invocations stacks.""" diff --git a/loopy/options.py b/loopy/options.py index 9c4fa0fb4..d58421e3e 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -118,7 +118,7 @@ class Options(ImmutableRecord): .. attribute:: cl_exec_manage_array_events - Within the PyOpenCL executor, respect and udpate + Within the PyOpenCL executor, respect and update :attr:`pyopencl.array.Array.events`. Defaults to *True*. @@ -156,7 +156,7 @@ class Options(ImmutableRecord): Allow re-ordering of floating point arithmetic. Re-ordering may give different results as floating point arithmetic is not - associative in addition and mulitplication. Default is *True*. + associative in addition and multiplication. Default is *True*. Note that the implementation of this option is currently incomplete. .. attribute:: build_options diff --git a/loopy/preprocess.py b/loopy/preprocess.py index d24e14cc2..7176d9d15 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -825,8 +825,8 @@ def preprocess_program(t_unit: TranslationUnit) -> TranslationUnit: from loopy.transform.subst import expand_subst t_unit = expand_subst(t_unit) - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - t_unit = apply_single_writer_depencency_heuristic(t_unit) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + t_unit = apply_single_writer_dependency_heuristic(t_unit) # Ordering restrictions: # diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index ca45521e3..6249b36ba 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -718,7 +718,7 @@ def get_insns_in_topologically_sorted_order( for dep in insn.depends_on: rev_dep_map[dep].add(insn.id) - # For breaking ties, we compare the features of an intruction + # For breaking ties, we compare the features of an instruction # so that instructions with the same set of features are lumped # together. This helps in :method:`schedule_as_many_run_insns_as_possible` # which bails after 5 insns that don't have the same feature. @@ -1196,7 +1196,7 @@ def insn_sort_key(insn_id): print( "%(warn)swarning:%(reset_all)s '%(iname)s', " "which the schedule is " - "currently stuck inside of, seems mis-nested. " + "currently stuck inside of, seems misnested. " "'%(subdep)s' must occur " "before '%(dep)s', " "but '%(subdep)s must be outside " "'%(iname)s', whereas '%(dep)s' must be back " @@ -1404,7 +1404,7 @@ def insn_sort_key(insn_id): get_priority_tiers(wanted, sched_state.kernel.loop_priority)) # Update the loop priority set, because some constraints may have - # have been contradictary. + # have been contradictory. loop_priority_set = set().union(*[set(t) for t in priority_tiers]) priority_tiers.append( diff --git a/loopy/statistics.py b/loopy/statistics.py index c9cf9d938..0bd1340c1 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -629,7 +629,7 @@ class Op(ImmutableRecord): work-group executes on a single compute unit with all work-items within the work-group sharing local memory. A sub-group is an implementation-dependent grouping of work-items within a work-group, - analagous to an NVIDIA CUDA warp. + analogous to an NVIDIA CUDA warp. .. attribute:: kernel_name @@ -723,7 +723,7 @@ class MemAccess(ImmutableRecord): work-group executes on a single compute unit with all work-items within the work-group sharing local memory. A sub-group is an implementation-dependent grouping of work-items within a work-group, - analagous to an NVIDIA CUDA warp. + analogous to an NVIDIA CUDA warp. .. attribute:: kernel_name @@ -1109,7 +1109,7 @@ def _get_lid_and_gid_strides(knl, array, index): # create lid_strides and gid_strides dicts - # strides are coefficents in flattened index, i.e., we want + # strides are coefficients in flattened index, i.e., we want # lid_strides = {0:l0, 1:l1, 2:l2, ...} and # gid_strides = {0:g0, 1:g1, 2:g2, ...}, # where l0, l1, l2, g0, g1, and g2 come from flattened index @@ -1723,7 +1723,7 @@ def get_op_map(program, count_redundant_work=False, :arg subgroup_size: (currently unused) An :class:`int`, :class:`str` ``"guess"``, or *None* that specifies the sub-group size. An OpenCL sub-group is an implementation-dependent grouping of work-items within - a work-group, analagous to an NVIDIA CUDA warp. subgroup_size is used, + a work-group, analogous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` whose count_granularity specifies that it should only be counted once per sub-group. If set to *None* an attempt to find the sub-group size using the device will be @@ -1921,7 +1921,7 @@ def get_mem_access_map(program, count_redundant_work=False, :arg subgroup_size: An :class:`int`, :class:`str` ``"guess"``, or *None* that specifies the sub-group size. An OpenCL sub-group is an implementation-dependent grouping of work-items within a work-group, - analagous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when + analogous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` whose count_granularity specifies that it should only be counted once per sub-group. If set to *None* an attempt to find the sub-group size using the device will be made, if this fails @@ -2085,7 +2085,7 @@ def get_synchronization_map(program, subgroup_size=None, entrypoint=None): :arg subgroup_size: (currently unused) An :class:`int`, :class:`str` ``"guess"``, or *None* that specifies the sub-group size. An OpenCL sub-group is an implementation-dependent grouping of work-items within - a work-group, analagous to an NVIDIA CUDA warp. subgroup_size is used, + a work-group, analogous to an NVIDIA CUDA warp. subgroup_size is used, e.g., when counting a :class:`MemAccess` whose count_granularity specifies that it should only be counted once per sub-group. If set to *None* an attempt to find the sub-group size using the device will be diff --git a/loopy/symbolic.py b/loopy/symbolic.py index d56b54e79..2a1b140cc 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -683,7 +683,7 @@ class TaggedVariable(LoopyExpressionBase, p.Variable, Taggable): A :class:`frozenset` of subclasses of :class:`pytools.tag.Tag` used to provide metadata on this object. Legacy string tags are converted to :class:`~loopy.LegacyStringInstructionTag` or, if they used to carry - a functional meaning, the tag carrying that same fucntional meaning + a functional meaning, the tag carrying that same functional meaning (e.g. :class:`~loopy.UseStreamingStoreTag`). Inherits from :class:`pymbolic.primitives.Variable` @@ -737,7 +737,7 @@ class Reduction(LoopyExpressionBase): .. attribute:: allow_simultaneous A :class:`bool`. If not *True*, an iname is allowed to be used - in precisely one reduction, to avoid mis-nesting errors. + in precisely one reduction, to avoid misnesting errors. """ init_arg_names = ("operation", "inames", "expr", "allow_simultaneous") diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index fc3238e92..9cde501a7 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -94,21 +94,21 @@ def python_dtype_str_inner(self, dtype): return f"_lpy_np.dtype(_lpy_np.{name})" raise Exception(f"dtype: {dtype} not recognized") - # {{{ handle non numpy arguements + # {{{ handle non numpy arguments def handle_non_numpy_arg(self, gen, arg): pass # }}} - # {{{ handle allocation of unspecified arguements + # {{{ handle allocation of unspecified arguments def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], skip_arg_checks: bool) -> None: """ - Handle allocation of non-specified arguements for C-execution + Handle allocation of non-specified arguments for C-execution """ from pymbolic import var @@ -181,7 +181,7 @@ def target_specific_preamble(self, gen): def initialize_system_args(self, gen): """ - Initializes possibly empty system arguements + Initializes possibly empty system arguments """ pass @@ -238,7 +238,7 @@ class CCompiler: The general strategy here is as follows: 1. A :class:`codepy.Toolchain` is guessed from distutils. - The user may override any flags obtained therein by passing in arguements + The user may override any flags obtained therein by passing in arguments to cc, cflags, etc. 2. The kernel source is built into and object first, then made into a shared diff --git a/loopy/target/execution.py b/loopy/target/execution.py index cb081a3e5..21600c734 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -377,21 +377,21 @@ def generate_value_arg_check( # }}} - # {{{ handle non numpy arguements + # {{{ handle non numpy arguments def handle_non_numpy_arg(self, gen: CodeGenerator, arg): raise NotImplementedError() # }}} - # {{{ handle allocation of unspecified arguements + # {{{ handle allocation of unspecified arguments def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], skip_arg_checks: bool) -> None: """ - Handle allocation of non-specified arguements for C-execution + Handle allocation of non-specified arguments for C-execution """ raise NotImplementedError() @@ -647,7 +647,7 @@ def target_specific_preamble(self, gen): def initialize_system_args(self, gen): """ - Override to intialize any default system args + Override to initialize any default system args """ raise NotImplementedError() @@ -674,7 +674,7 @@ def __call__(self, program, entrypoint, codegen_result): """ Generates the wrapping python invoker for this execution target - :arg kernel: the loopy :class:`LoopKernel`(s) to be executued + :arg kernel: the loopy :class:`LoopKernel`(s) to be executed :codegen_result: the loopy :class:`CodeGenerationResult` created by code generation @@ -944,7 +944,7 @@ def __call__(self, queue, **kwargs): # }}} -# {{{ code highlighers +# {{{ code highlighters def get_highlighted_code(text, python=False): diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index ce2a150b0..31d1cfd2d 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -112,7 +112,7 @@ def map_subscript(self, expr, type_context): if (isinstance(ary, TemporaryVariable) and ary.address_space == AddressSpace.PRIVATE): - # generate access code for acccess to private-index temporaries + # generate access code for access to private-index temporaries gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index e2f3ecda2..14383e54f 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -642,7 +642,7 @@ def get_function_declaration( from loopy.target.c import FunctionDeclarationWrapper assert isinstance(fdecl, FunctionDeclarationWrapper) if not codegen_state.is_entrypoint: - # auxiliary kernels need not mention opencl speicific qualifiers + # auxiliary kernels need not mention opencl specific qualifiers # for a functions signature return preambles, fdecl @@ -908,7 +908,7 @@ def emit_atomic_update(self, codegen_state, lhs_atomicity, lhs_var, # }}} -# {{{ volatile mem acccess target +# {{{ volatile mem access target class VolatileMemExpressionToOpenCLCExpressionMapper( ExpressionToOpenCLCExpressionMapper): diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index ec702e39b..ecaea9b57 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -324,7 +324,7 @@ def binary_tree_add(start, end): # -ffp-contract=fast which is the default for PTX codegen, but # for some unknown reason, clang fails to see the FMAs. # - # We need to do this only for complex as we haev temporaries + # We need to do this only for complex as we have temporaries # only in complex. For reals, the code generated looks like # # res = c + a * b @@ -879,7 +879,7 @@ def get_kernel_call( value_arg_code = generate_value_arg_setup( codegen_state.kernel, regular_arg_names) - arry_arg_code = generate_array_arg_setup( + array_arg_code = generate_array_arg_setup( codegen_state.kernel, regular_arg_names) if struct_overflow_arg_names: @@ -952,7 +952,7 @@ def get_kernel_call( "argument count of the kernel ({_lpy_knl.num_args}).'"), Line(), value_arg_code, - arry_arg_code, + array_arg_code, overflow_args_code, Assign("_lpy_evt", f"{self.target.pyopencl_module_name}.enqueue_nd_range_kernel(" @@ -1207,7 +1207,7 @@ def get_expression_to_c_expression_mapper(self, codegen_state): # }}} -# {{{ volatile mem acccess target +# {{{ volatile mem access target class VolatileMemPyOpenCLCASTBuilder(PyOpenCLCASTBuilder): def get_expression_to_c_expression_mapper(self, codegen_state): diff --git a/loopy/transform/array_buffer_map.py b/loopy/transform/array_buffer_map.py index ec3737233..7e7b6459c 100644 --- a/loopy/transform/array_buffer_map.py +++ b/loopy/transform/array_buffer_map.py @@ -413,17 +413,17 @@ def _is_access_descriptor_in_footprint_inner(self, storage_axis_exprs): except_inames=frozenset(self.primed_sweep_inames)) s2s_domain = stor2sweep.domain() - s2s_domain, aligned_g_s2s_parm_dom = isl.align_two( + s2s_domain, aligned_g_s2s_param_dom = isl.align_two( s2s_domain, global_s2s_par_dom) arg_restrictions = ( - aligned_g_s2s_parm_dom + aligned_g_s2s_param_dom .eliminate(dim_type.set, 0, - aligned_g_s2s_parm_dom.dim(dim_type.set)) + aligned_g_s2s_param_dom.dim(dim_type.set)) .remove_divs()) return (arg_restrictions & s2s_domain).is_subset( - aligned_g_s2s_parm_dom) + aligned_g_s2s_param_dom) class NoOpArrayToBufferMap(ArrayToBufferMapBase): diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py index 1fe40a370..d683cbd29 100644 --- a/loopy/transform/callable.py +++ b/loopy/transform/callable.py @@ -102,7 +102,7 @@ def merge(translation_units): if (prg_i.callables_table[clbl_name] != prg_j.callables_table[clbl_name]): # TODO: generate unique names + rename for the colliding - # callables (if entrypoints are colliding that shuold still + # callables (if entrypoints are colliding that should still # be an error) raise NotImplementedError("Translation units to be merged" " must have different callable names" diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 088d89643..ddfc9b5e8 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -252,7 +252,7 @@ def add_prefetch_for_single_kernel(kernel, callables_table, var_name, footprint_subscripts, var_descr) # Our _not_provided is actually a different object from the one in the - # precompute module, but precompute acutally uses that to adjust its + # precompute module, but precompute actually uses that to adjust its # warning message. from loopy.transform.precompute import precompute_for_single_kernel @@ -653,7 +653,7 @@ def set_argument_order(kernel, arg_names): :arg arg_names: A list (or comma-separated string) or argument names. All arguments must be in this list. """ - # FIXME: @inducer -- shoulld this only affect the root kernel, or should it + # FIXME: @inducer -- should this only affect the root kernel, or should it # take a within? if isinstance(arg_names, str): diff --git a/loopy/transform/diff.py b/loopy/transform/diff.py index bb828221f..6c2688d90 100644 --- a/loopy/transform/diff.py +++ b/loopy/transform/diff.py @@ -154,7 +154,7 @@ def map_call(self, expr, *args): dc = self.diff_context if expr.function.name in dc.kernel.substitutions: - # FIXME: Deal with subsitution rules + # FIXME: Deal with substitution rules # Need to use chain rule here, too. raise NotImplementedError("substitution rules in differentiation") else: @@ -382,8 +382,8 @@ def diff_kernel(kernel, diff_outputs, by, diff_iname_prefix="diff_i", assert isinstance(kernel, LoopKernel) - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - kernel = apply_single_writer_depencency_heuristic(kernel, warn_if_used=True) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + kernel = apply_single_writer_dependency_heuristic(kernel, warn_if_used=True) if isinstance(diff_outputs, str): diff_outputs = [ diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 18df3dae4..b835373da 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -1540,7 +1540,7 @@ def find_unused_axis_tag(kernel, kind, insn_match=None): break if not found: - raise LoopyError("invlaid tag kind: %s" % kind) + raise LoopyError("invalid tag kind: %s" % kind) from loopy.match import parse_match match = parse_match(insn_match) @@ -2265,7 +2265,7 @@ def add_inames_for_unused_hw_axes(kernel, within=None): Current limitations: * Only one iname in the kernel may be tagged with each of the unused hw axes. - * Occurence of an ``l.auto`` tag when an instruction is missing one of the + * Occurrence of an ``l.auto`` tag when an instruction is missing one of the local hw axes. :arg within: An instruction match as understood by diff --git a/loopy/transform/instruction.py b/loopy/transform/instruction.py index 629916628..374587da5 100644 --- a/loopy/transform/instruction.py +++ b/loopy/transform/instruction.py @@ -425,7 +425,7 @@ def insns_in_conflicting_groups(insn1_id, insn2_id): if not nosync_to_add and not empty_ok: raise LoopyError("No nosync annotations were added as a result " "of this call. add_nosync will (by default) only add them to " - "accompany existing depencies or group exclusions. Maybe you want " + "accompany existing dependencies or group exclusions. Maybe you want " "to pass force=True?") new_instructions = list(kernel.instructions) diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index b5c7aa7a1..422d22568 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -327,8 +327,8 @@ def assignment_to_subst(kernel, lhs_name, extra_arguments=(), within=None, # {{{ establish the relevant definition of lhs_name for each usage site dep_kernel = expand_subst(kernel) - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - dep_kernel = apply_single_writer_depencency_heuristic(dep_kernel) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + dep_kernel = apply_single_writer_dependency_heuristic(dep_kernel) assigning_insn_ids = {insn.id for insn in dep_kernel.instructions if lhs_name in insn.assignee_var_names()} @@ -354,7 +354,7 @@ def get_relevant_definition_insn_id(usage_insn_id): if len(rel_def_ids) > 1: raise LoopyError("more than one write to '%s' found in " - "depdendencies of '%s'--definition cannot be resolved " + "dependencies of '%s'--definition cannot be resolved " "(writer instructions ids: %s)" % (lhs_name, usage_insn_id, ", ".join(rel_def_ids))) @@ -433,7 +433,7 @@ def _accesses_lhs(kernel, insn, *args): for i in indices: if not isinstance(i, Variable): raise LoopyError("In defining instruction '%s': " - "asignee index '%s' is not a plain variable. " + "assignee index '%s' is not a plain variable. " "Perhaps use loopy.affine_map_inames() " "to perform substitution." % (def_id, i)) diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index c0d1b0b05..76e795b76 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -196,7 +196,7 @@ class TranslationUnit: .. attribute:: func_id_to_in_knl_callables_mappers A :class:`frozenset` of functions of the signature ``(target: - TargetBase, function_indentifier: str)`` that returns an instance + TargetBase, function_identifier: str)`` that returns an instance of :class:`loopy.kernel.function_interface.InKernelCallable` or *None*. .. automethod:: executor diff --git a/loopy/types.py b/loopy/types.py index 143715a39..a837d1c46 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -165,7 +165,7 @@ def __repr__(self): class OpaqueType(LoopyType): """An opaque data type is truly opaque - it has no allocations, no temporaries of that type, etc. The only thing allowed is to be pass in - through one ValueArg and go out to another. It is introduced to accomodate + through one ValueArg and go out to another. It is introduced to accommodate functional calls to external libraries. """ def __init__(self, name: str) -> None: diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index 7ee30313c..eb8125cdb 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -190,7 +190,7 @@ def test_tim3d(ctx_factory): knl = lp.split_iname(knl, "k", n, inner_tag="l.2") # , slabs=(0, 1)) knl = lp.split_iname(knl, "i", n, inner_tag="l.0") # , slabs=(0, 1)) -# knl = lp.tag_inames(knl, dict(k_nner="unr")) +# knl = lp.tag_inames(knl, dict(k_inner="unr")) knl = lp.tag_inames(knl, dict(o="unr")) knl = lp.tag_inames(knl, dict(m="unr")) diff --git a/pyproject.toml b/pyproject.toml index 6f7b977b6..9dadd57f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,3 +90,29 @@ module = [ "IPython.*", ] ignore_missing_imports = true + +[tool.typos.default] +extend-ignore-re = [ + "(?Rm)^.*(#|//)\\s*spellchecker:\\s*disable-line$" +] + +[tool.typos.default.extend-words] +# like the numpy function, array range +arange = "arange" +# N-Dimensional +ND = "ND" +# used for 'diff_output' +dout = "dout" +# an element-wise slice of array u +ue = "ue" +# used in an ordering context, "ab" / "ba" +ba = "ba" + +"dependees" = "dependees" + +[tool.typos.files] +extend-exclude = [ + "loopy/target/c/compyte", + "notes/*/*.eps", +] + diff --git a/test/test_apps.py b/test/test_apps.py index 207bc7ee2..c4cffaee1 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -324,7 +324,7 @@ def test_rob_stroud_bernstein_full(): def test_stencil(ctx_factory): ctx = ctx_factory() - # n=32 causes corner case behavior in size calculations for temprorary (a + # n=32 causes corner case behavior in size calculations for temporary (a # non-unifiable, two-constant-segments PwAff as the base index) n = 256 diff --git a/test/test_c_execution.py b/test/test_c_execution.py index e703d9415..6208b9aed 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -95,17 +95,17 @@ def test_c_target_strides_nonsquare(): from loopy.target.c import ExecutableCTarget def __get_kernel(order="C"): - indicies = ["i", "j", "k"] - sizes = tuple(np.random.randint(1, 11, size=len(indicies))) + indices = ["i", "j", "k"] + sizes = tuple(np.random.randint(1, 11, size=len(indices))) # create domain strings domain_template = "{{ [{iname}]: 0 <= {iname} < {size} }}" domains = [] - for idx, size in zip(indicies, sizes): + for idx, size in zip(indices, sizes): domains.append(domain_template.format( iname=idx, size=size)) statement = "out[{indexed}] = 2 * a[{indexed}]".format( - indexed=", ".join(indicies)) + indexed=", ".join(indices)) return lp.make_kernel( domains, statement, @@ -142,17 +142,17 @@ def test_c_optimizations(): from loopy.target.c import ExecutableCTarget def __get_kernel(order="C"): - indicies = ["i", "j", "k"] - sizes = tuple(np.random.randint(1, 11, size=len(indicies))) + indices = ["i", "j", "k"] + sizes = tuple(np.random.randint(1, 11, size=len(indices))) # create domain strings domain_template = "{{ [{iname}]: 0 <= {iname} < {size} }}" domains = [] - for idx, size in zip(indicies, sizes): + for idx, size in zip(indices, sizes): domains.append(domain_template.format( iname=idx, size=size)) statement = "out[{indexed}] = 2 * a[{indexed}]".format( - indexed=", ".join(indicies)) + indexed=", ".join(indices)) return lp.make_kernel( domains, statement, diff --git a/test/test_callables.py b/test/test_callables.py index d58247a75..44a94e43a 100644 --- a/test/test_callables.py +++ b/test/test_callables.py @@ -1397,8 +1397,8 @@ def test_inline_deps(ctx_factory): prg = lp.merge([parent_knl, child_knl]) inlined = lp.inline_callable_kernel(prg, "func") - from loopy.kernel.creation import apply_single_writer_depencency_heuristic - apply_single_writer_depencency_heuristic(inlined, error_if_used=True) + from loopy.kernel.creation import apply_single_writer_dependency_heuristic + apply_single_writer_dependency_heuristic(inlined, error_if_used=True) _evt, (a_dev,) = inlined(cq) diff --git a/test/test_loopy.py b/test/test_loopy.py index e9aa47ef4..34310171f 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -2306,7 +2306,7 @@ def test_barrier_in_overridden_get_grid_size_expanded_kernel(): from testlib import GridOverride - # artifically expand via overridden_get_grid_sizes_for_insn_ids + # artificially expand via overridden_get_grid_sizes_for_insn_ids knl = prog["loopy_kernel"] knl = knl.copy(overridden_get_grid_sizes_for_insn_ids=GridOverride( knl.copy(), vecsize))