Skip to content

Byte index out of bounds #8681

Closed
Closed
@Jake-Shadle

Description

@Jake-Shadle

Summary

I believe this was introduced in #8450, for reference, here is the full file that is ICEd on, but unfortunately pulling out the file to try to get a minimal repro made the error go away. I was able to work around the ICE by moving the #[allow(unsafe_code)] attribute above the SAFETY: documentation.

use crate::{build_cid_raw, Cid};
use ark_file_system::PathBuf;
use bytes::{Bytes, BytesMut};
use dashmap::DashMap;
use std::{fs::File, io::Read}; // TODO: Convert to cap-std

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CasFile {
    pub cid: Cid,
    pub bytes: Bytes,
    pub last_modified: ark_time::DateTime,
}

/// Read-only content-address file system access
#[derive(Default)]
pub struct CasFileSystem {
    files: DashMap<PathBuf, Option<CasFile>>,
    blobs: DashMap<Cid, Bytes>,
}

impl CasFileSystem {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn file_paths(&self) -> Vec<PathBuf> {
        self.files.iter().map(|i| i.key().clone()).collect()
    }

    pub fn get_blob(&self, cid: &Cid) -> Option<Bytes> {
        self.blobs.get(cid).map(|b| b.value().clone())
    }

    pub fn read(&self, path: PathBuf) -> Option<CasFile> {
        // get already read file
        if let Some(file) = self.files.get(&path) {
            return file.as_ref().cloned();
        }

        // try and read the file and its metadata
        // if anything fails, the file is considered to not exist
        let f = || -> std::io::Result<_> {
            let metadata = std::fs::metadata(&path)?;
            let last_modified = metadata.modified()?.into();
            let len = metadata.len();

            let mut file = File::open(&path)?;

            // simple no-unsafe version with pre-initialized data (that immediately gets overwritten)
            /*
                let mut bytes = BytesMut::new();
                bytes.resize(len as usize, 0);
                file.read_exact(&mut bytes[..])?;
            */

            // faster "unsafe" version that avoid zero initialization, ~5% faster on Windows.
            let mut bytes = BytesMut::with_capacity(len as usize);
            // SAFETY: this should be sound practice as we read in the entire file into the buffer with read_exact, so no uninitialized data can be kept.
            // we also use explicit file I/O here, not passing this to a generic Read trait that may read the uninitialized data
            #[allow(unsafe_code)]
            unsafe {
                bytes.set_len(len as usize);
            }
            file.read_exact(&mut bytes[..])?;

            Ok((bytes.freeze(), last_modified))
        };

        // store the file info and CAS blob
        let file = f()
            .map(|(bytes, last_modified)| {
                let cid = build_cid_raw(&bytes);

                // de-duplicate data with our content-addressed list of blobs
                let bytes = if let Some(r) = self.blobs.get(&cid) {
                    r.value().clone()
                } else {
                    self.blobs.insert(cid, bytes.clone());
                    bytes
                };

                CasFile {
                    cid,
                    bytes,
                    last_modified,
                }
            })
            .ok();

        self.files.insert(path, file.clone());
        file
    }
}

#[cfg(test)]
mod test {
    use super::*;

    #[test]
    fn basic() {
        let fs = CasFileSystem::new();
        assert!(fs.read("test.test".into()).is_none());
        assert!(fs.read("Cargo.toml".into()).unwrap().bytes.len() > 100);
        //CasFileSystem::with_filter(&[PathBuf::from(".")], |_file_type, _path| -> bool { true });
    }
}

Version

rustc 1.62.0-nightly (1f7fb6413 2022-04-10)
binary: rustc
commit-hash: 1f7fb6413d6d6c0c929b223e478e44c3db991b03
commit-date: 2022-04-10
host: x86_64-unknown-linux-gnu
release: 1.62.0-nightly
LLVM version: 14.0.0

Error output

Backtrace

thread 'rustc' panicked at 'byte index 5929 is out of bounds of `use crate::{build_cid_raw, Cid};
use ark_file_system::PathBuf;
use bytes::{Bytes, BytesMut};
use dashmap::DashMap;
use std::{fs::File, io::Read}; // TODO: Convert to cap-std

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CasFile {
  pub cid: Cid,
 `[...]', library/core/src/str/mod.rs:107:9
stack backtrace:
 0: rust_begin_unwind
           at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/std/src/panicking.rs:584:5
 1: core::panicking::panic_fmt
           at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/core/src/panicking.rs:142:14
 2: core::str::slice_error_fail_rt
 3: core::ops::function::FnOnce::call_once
           at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/core/src/ops/function.rs:248:5
 4: core::intrinsics::const_eval_select
           at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/core/src/intrinsics.rs:2370:5
 5: core::str::slice_error_fail
           at /rustc/1f7fb6413d6d6c0c929b223e478e44c3db991b03/library/core/src/str/mod.rs:86:9
 6: clippy_lints::undocumented_unsafe_blocks::text_has_safety_comment
 7: <clippy_lints::undocumented_unsafe_blocks::UndocumentedUnsafeBlocks as rustc_lint::passes::LateLintPass>::check_block
 8: <rustc_lint::late::LateLintPassObjects as rustc_lint::passes::LateLintPass>::check_block
 9: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_block
10: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_expr
11: rustc_hir::intravisit::walk_block::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
12: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_block
13: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_expr
14: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_nested_body
15: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_fn
16: rustc_hir::intravisit::walk_expr::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
17: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_expr
18: rustc_hir::intravisit::walk_local::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
19: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_local
20: rustc_hir::intravisit::walk_block::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
21: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_block
22: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_expr
23: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_nested_body
24: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_fn
25: rustc_hir::intravisit::walk_impl_item::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
26: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_nested_impl_item
27: rustc_hir::intravisit::walk_impl_item_ref::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
28: rustc_hir::intravisit::walk_item::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
29: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_nested_item
30: rustc_hir::intravisit::walk_mod::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
31: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_mod
32: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_nested_item
33: rustc_hir::intravisit::walk_mod::<rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects>>
34: <rustc_lint::late::LateContextAndPass<rustc_lint::late::LateLintPassObjects> as rustc_hir::intravisit::Visitor>::visit_mod
35: rustc_lint::late::late_lint_pass_crate::<rustc_lint::late::LateLintPassObjects>
36: rustc_lint::late::late_lint_crate::<rustc_lint::BuiltinCombinedLateLintPass>
37: <rustc_session::session::Session>::time::<(), rustc_lint::late::check_crate<rustc_lint::BuiltinCombinedLateLintPass, rustc_interface::passes::analysis::{closure#5}::{closure#0}::{closure#2}::{closure#0}::{closure#0}>::{closure#0}::{closure#0}>
38: <rustc_session::session::Session>::time::<(), rustc_interface::passes::analysis::{closure#5}::{closure#0}::{closure#2}::{closure#0}>
39: <core::panic::unwind_safe::AssertUnwindSafe<rustc_interface::passes::analysis::{closure#5}::{closure#0}> as core::ops::function::FnOnce<()>>::call_once
40: <rustc_session::session::Session>::time::<(), rustc_interface::passes::analysis::{closure#5}>
41: rustc_interface::passes::analysis
42: <rustc_query_system::dep_graph::graph::DepGraph<rustc_middle::dep_graph::dep_node::DepKind>>::with_task::<rustc_middle::ty::context::TyCtxt, (), core::result::Result<(), rustc_errors::ErrorGuaranteed>>
43: rustc_query_system::query::plumbing::try_execute_query::<rustc_query_impl::plumbing::QueryCtxt, rustc_query_system::query::caches::DefaultCache<(), core::result::Result<(), rustc_errors::ErrorGuaranteed>>>
44: rustc_query_system::query::plumbing::get_query::<rustc_query_impl::queries::analysis, rustc_query_impl::plumbing::QueryCtxt>
45: <rustc_interface::passes::QueryContext>::enter::<rustc_driver::run_compiler::{closure#1}::{closure#2}::{closure#3}, core::result::Result<(), rustc_errors::ErrorGuaranteed>>
46: <rustc_interface::interface::Compiler>::enter::<rustc_driver::run_compiler::{closure#1}::{closure#2}, core::result::Result<core::option::Option<rustc_interface::queries::Linker>, rustc_errors::ErrorGuaranteed>>
47: rustc_span::with_source_map::<core::result::Result<(), rustc_errors::ErrorGuaranteed>, rustc_interface::interface::create_compiler_and_run<core::result::Result<(), rustc_errors::ErrorGuaranteed>, rustc_driver::run_compiler::{closure#1}>::{closure#1}>
48: <scoped_tls::ScopedKey<rustc_span::SessionGlobals>>::set::<rustc_interface::interface::run_compiler<core::result::Result<(), rustc_errors::ErrorGuaranteed>, rustc_driver::run_compiler::{closure#1}>::{closure#0}, core::result::Result<(), rustc_errors::ErrorGuaranteed>>
note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.

error: internal compiler error: unexpected panic

note: the compiler unexpectedly panicked. this is a bug.

note: we would appreciate a bug report: https://github.com/rust-lang/rust-clippy/issues/new

note: Clippy version: clippy 0.1.62 (1f7fb64 2022-04-10)

query stack during panic:
#0 [analysis] running analysis passes on this crate
end of query stack
error: could not compile `ark-storage`

Metadata

Metadata

Assignees

No one assigned

    Labels

    C-bugCategory: Clippy is not doing the correct thingI-ICEIssue: Clippy panicked, giving an Internal Compilation Error (ICE) ❄️

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions