Skip to content

[commitgraph] implement basic, low-level read API #21

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 25 commits into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
d1f0e9c
[commitgraph] implement basic, low-level read API
avoidscorn Sep 15, 2020
36953e0
[commitgraph] Take `info` dir as arg, not `objects` dir.
avoidscorn Sep 17, 2020
3c92761
[commitgraph] Remove `Kind` enum.
avoidscorn Sep 17, 2020
724f391
[commitgraph] Include in `make check` target.
avoidscorn Sep 17, 2020
1ce8468
[commitgraph] Ditch pre-generated test repos.
avoidscorn Sep 17, 2020
b59bd5e
Merge from main.
avoidscorn Sep 17, 2020
7c405ab
[commitgraph] Don't re-export graph_file symbols at crate level.
avoidscorn Sep 18, 2020
d8c2007
[commitgraph] Rename CommitData -> Commit.
avoidscorn Sep 26, 2020
f451822
[commitgraph] Rename GraphFile -> File.
avoidscorn Sep 26, 2020
66588f2
[commitgraph] Remove unused error variant.
avoidscorn Sep 26, 2020
6cf5cd8
[commitgraph] Add some doc comments.
avoidscorn Sep 26, 2020
000748c
[commitgraph] Include Conor in crate manifest.
avoidscorn Sep 26, 2020
be0e845
[commitgraph] Don't export Commit symbol at crate level.
avoidscorn Sep 26, 2020
185d14b
[commitgraph] Rearrange some `use` statements.
avoidscorn Sep 26, 2020
21e4527
[commitgraph] Use crate::graph::Graph instead of crate::Graph.
avoidscorn Sep 26, 2020
5e78213
[commitgraph] Attempt to fix bash script execution on Windows.
avoidscorn Sep 26, 2020
ca5b801
Merge branch 'main' into commit-graph
Byron Sep 28, 2020
9ae1f4b
[commitgraph] Assure git doesn't try to sign commits when fixtures ar…
Byron Oct 1, 2020
7026961
[commitgraph] refactor
Byron Oct 1, 2020
2ed0037
[commitgraph] refactor
Byron Oct 1, 2020
3c8640e
[commitgraph] refactor Graph, Position, and access module
Byron Oct 1, 2020
d2eec1d
[commitgraph] refactor graph::init module
Byron Oct 1, 2020
6f90bee
[commitgraph] Rename LexPosition to 'file::Position'
Byron Oct 1, 2020
c4b14c1
[commitgraph] refactor
Byron Oct 1, 2020
8b003a0
[commitgraph] refactor file::init
Byron Oct 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[commitgraph] Rename LexPosition to 'file::Position'
That way it's in line with 'graph::Position'
  • Loading branch information
Byron committed Oct 1, 2020
commit 6f90beeb418480f9cd8bb7ae3b5db678b24103cb
30 changes: 16 additions & 14 deletions git-commitgraph/src/file/access.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use crate::file::commit::Commit;
use crate::file::{File, LexPosition, COMMIT_DATA_ENTRY_SIZE};
use crate::file::{self, commit::Commit, File, COMMIT_DATA_ENTRY_SIZE};
use git_object::{borrowed, HashKind, SHA1_SIZE};
use std::convert::{TryFrom, TryInto};
use std::fmt::{Debug, Formatter};
use std::path::Path;
use std::{
convert::{TryFrom, TryInto},
fmt::{Debug, Formatter},
path::Path,
};

/// Access
impl File {
/// Returns the commit data for the commit located at the given lex position.
///
Expand All @@ -13,7 +15,7 @@ impl File {
/// # Panics
///
/// Panics if `pos` is out of bounds.
pub fn commit_at(&self, pos: LexPosition) -> Commit<'_> {
pub fn commit_at(&self, pos: file::Position) -> Commit<'_> {
Commit::new(self, pos)
}

Expand All @@ -24,7 +26,7 @@ impl File {
// copied from git-odb/src/pack/index/access.rs
/// Returns 20 bytes sha1 at the given index in our list of (sorted) sha1 hashes.
/// The position ranges from 0 to self.num_commits()
pub fn id_at(&self, pos: LexPosition) -> borrowed::Id<'_> {
pub fn id_at(&self, pos: file::Position) -> borrowed::Id<'_> {
assert!(
pos.0 < self.num_commits(),
"expected lex position less than {}, got {}",
Expand All @@ -50,15 +52,15 @@ impl File {
}

pub fn iter_commits(&self) -> impl Iterator<Item = Commit<'_>> {
(0..self.num_commits()).map(move |i| self.commit_at(LexPosition(i)))
(0..self.num_commits()).map(move |i| self.commit_at(file::Position(i)))
}

pub fn iter_ids(&self) -> impl Iterator<Item = borrowed::Id<'_>> {
(0..self.num_commits()).map(move |i| self.id_at(LexPosition(i)))
(0..self.num_commits()).map(move |i| self.id_at(file::Position(i)))
}

// copied from git-odb/src/pack/index/access.rs
pub fn lookup(&self, id: borrowed::Id<'_>) -> Option<LexPosition> {
pub fn lookup(&self, id: borrowed::Id<'_>) -> Option<file::Position> {
let first_byte = id.first_byte() as usize;
let mut upper_bound = self.fan[first_byte];
let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 };
Expand All @@ -69,12 +71,12 @@ impl File {
// it should not be if the bytes match up and the type has no destructor.
while lower_bound < upper_bound {
let mid = (lower_bound + upper_bound) / 2;
let mid_sha = self.id_at(LexPosition(mid));
let mid_sha = self.id_at(file::Position(mid));

use std::cmp::Ordering::*;
match id.cmp(&mid_sha) {
Less => upper_bound = mid,
Equal => return Some(LexPosition(mid)),
Equal => return Some(file::Position(mid)),
Greater => lower_bound = mid + 1,
}
}
Expand All @@ -83,7 +85,7 @@ impl File {

/// Returns the number of commits in this graph file.
///
/// The maximum valid `LexPosition` that can be used with this file is one less than
/// The maximum valid `Lexfile::Position` that can be used with this file is one less than
/// `num_commits()`.
pub fn num_commits(&self) -> u32 {
self.fan[255]
Expand All @@ -96,7 +98,7 @@ impl File {

impl File {
/// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk.
pub(crate) fn commit_data_bytes(&self, pos: LexPosition) -> &[u8] {
pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] {
assert!(
pos.0 < self.num_commits(),
"expected lex position less than {}, got {}",
Expand Down
36 changes: 20 additions & 16 deletions git-commitgraph/src/file/commit.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use crate::file::{File, LexPosition};
use crate::graph::Position;
use crate::{
file::{self, File},
graph,
};
use byteorder::{BigEndian, ByteOrder};
use git_object::{borrowed, owned, SHA1_SIZE};
use quick_error::quick_error;
use std::convert::{TryFrom, TryInto};
use std::fmt::{Debug, Formatter};
use std::slice::Chunks;
use std::{
convert::{TryFrom, TryInto},
fmt::{Debug, Formatter},
slice::Chunks,
};

quick_error! {
#[derive(Debug)]
Expand Down Expand Up @@ -41,7 +45,7 @@ const EXTENDED_EDGES_MASK: u32 = 0x8000_0000;

pub struct Commit<'a> {
file: &'a File,
lex_pos: LexPosition,
lex_pos: file::Position,
// We can parse the below fields lazily if needed.
commit_timestamp: u64,
generation: u32,
Expand All @@ -51,7 +55,7 @@ pub struct Commit<'a> {
}

impl<'a> Commit<'a> {
pub(crate) fn new(file: &'a File, pos: LexPosition) -> Self {
pub(crate) fn new(file: &'a File, pos: file::Position) -> Self {
let bytes = file.commit_data_bytes(pos);
Commit {
file,
Expand Down Expand Up @@ -79,7 +83,7 @@ impl<'a> Commit<'a> {
self.generation
}

pub fn iter_parents(&'a self) -> impl Iterator<Item = Result<Position, Error>> + 'a {
pub fn iter_parents(&'a self) -> impl Iterator<Item = Result<graph::Position, Error>> + 'a {
// I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported
// fuse-after-first-error behavior, and b) was significantly shorter or more understandable
// than ParentIterator. So here we are.
Expand All @@ -93,7 +97,7 @@ impl<'a> Commit<'a> {
self.file.id_at(self.lex_pos)
}

pub fn parent1(&self) -> Result<Option<Position>, Error> {
pub fn parent1(&self) -> Result<Option<graph::Position>, Error> {
self.iter_parents().next().transpose()
}

Expand Down Expand Up @@ -131,7 +135,7 @@ pub struct ParentIterator<'a> {
}

impl<'a> Iterator for ParentIterator<'a> {
type Item = Result<Position, Error>;
type Item = Result<graph::Position, Error>;

fn next(&mut self) -> Option<Self::Item> {
let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted);
Expand Down Expand Up @@ -219,7 +223,7 @@ enum ParentIteratorState<'a> {
#[derive(Clone, Copy, Debug)]
enum ParentEdge {
None,
GraphPosition(Position),
GraphPosition(graph::Position),
ExtraEdgeIndex(u32),
}

Expand All @@ -231,24 +235,24 @@ impl ParentEdge {
if raw & EXTENDED_EDGES_MASK != 0 {
ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK)
} else {
ParentEdge::GraphPosition(Position(raw))
ParentEdge::GraphPosition(graph::Position(raw))
}
}
}

const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000;

enum ExtraEdge {
Internal(Position),
Last(Position),
Internal(graph::Position),
Last(graph::Position),
}

impl ExtraEdge {
pub fn from_raw(raw: u32) -> Self {
if raw & LAST_EXTENDED_EDGE_MASK != 0 {
Self::Last(Position(raw & !LAST_EXTENDED_EDGE_MASK))
Self::Last(graph::Position(raw & !LAST_EXTENDED_EDGE_MASK))
} else {
Self::Internal(Position(raw))
Self::Internal(graph::Position(raw))
}
}
}
8 changes: 5 additions & 3 deletions git-commitgraph/src/file/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@ use byteorder::{BigEndian, ByteOrder};
use filebuffer::FileBuffer;
use git_object::SHA1_SIZE;
use quick_error::quick_error;
use std::convert::{TryFrom, TryInto};
use std::ops::Range;
use std::path::Path;
use std::{
convert::{TryFrom, TryInto},
ops::Range,
path::Path,
};

type ChunkId = [u8; 4];

Expand Down
23 changes: 13 additions & 10 deletions git-commitgraph/src/file/mod.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
//! Operations on a single commit-graph file.
mod access;
pub mod commit;

mod init;
pub use init::Error;

pub use commit::Commit;
use filebuffer::FileBuffer;
use git_object::SHA1_SIZE;
pub use init::Error;
use std::fmt::{Display, Formatter};
use std::ops::Range;
use std::path::PathBuf;
use std::{
fmt::{Display, Formatter},
ops::Range,
path::PathBuf,
};

const COMMIT_DATA_ENTRY_SIZE: usize = SHA1_SIZE + 16;
const FAN_LEN: usize = 256;
Expand All @@ -32,16 +35,16 @@ pub struct File {

/// The position of a given commit within a graph file, starting at 0.
///
/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lex position
/// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexigraphical position
/// is its position in this ordering. If a commit graph spans multiple files, each file's commits
/// start at lex position 0, so lex position is unique across a single file but is not unique across
/// the whole commit graph. Each commit also has a graph position (`GraphPosition`), which is unique
/// across the whole commit graph. In order to avoid accidentally mixing lex positions with graph
/// start at lexigraphical position 0, so it is unique across a single file but is not unique across
/// the whole commit graph. Each commit also has a graph position (`graph::Position`), which is unique
/// across the whole commit graph. In order to avoid accidentally mixing lexigraphical positions with graph
/// positions, distinct types are used for each.
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub struct LexPosition(pub u32);
pub struct Position(pub u32);

impl Display for LexPosition {
impl Display for Position {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.0.fmt(f)
}
Expand Down
24 changes: 12 additions & 12 deletions git-commitgraph/src/graph/access.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use crate::{
file::{Commit, File, LexPosition},
graph::{Graph, Position},
file::{self, Commit, File},
graph::{self, Graph},
};
use git_object::borrowed;

/// Access convenience
/// Access
impl Graph {
pub fn commit_at(&self, pos: Position) -> Commit<'_> {
pub fn commit_at(&self, pos: graph::Position) -> Commit<'_> {
let r = self.lookup_by_pos(pos);
r.file.commit_at(r.lex_pos)
}
Expand All @@ -16,7 +16,7 @@ impl Graph {
Some(r.file.commit_at(r.lex_pos))
}

pub fn id_at(&self, pos: Position) -> borrowed::Id<'_> {
pub fn id_at(&self, pos: graph::Position) -> borrowed::Id<'_> {
let r = self.lookup_by_pos(pos);
r.file.id_at(r.lex_pos)
}
Expand All @@ -31,7 +31,7 @@ impl Graph {
self.files.iter().flat_map(|file| file.iter_ids())
}

pub fn lookup(&self, id: borrowed::Id<'_>) -> Option<Position> {
pub fn lookup(&self, id: borrowed::Id<'_>) -> Option<graph::Position> {
Some(self.lookup_by_id(id)?.graph_pos)
}

Expand All @@ -49,23 +49,23 @@ impl Graph {
return Some(LookupByIdResult {
file,
lex_pos,
graph_pos: Position(current_file_start + lex_pos.0),
graph_pos: graph::Position(current_file_start + lex_pos.0),
});
}
current_file_start += file.num_commits();
}
None
}

fn lookup_by_pos(&self, pos: Position) -> LookupByPositionResult<'_> {
fn lookup_by_pos(&self, pos: graph::Position) -> LookupByPositionResult<'_> {
let mut remaining = pos.0;
for file in &self.files {
match remaining.checked_sub(file.num_commits()) {
Some(v) => remaining = v,
None => {
return LookupByPositionResult {
file,
lex_pos: LexPosition(remaining),
lex_pos: file::Position(remaining),
}
}
}
Expand All @@ -77,12 +77,12 @@ impl Graph {
#[derive(Clone)]
struct LookupByIdResult<'a> {
pub file: &'a File,
pub graph_pos: Position,
pub lex_pos: LexPosition,
pub graph_pos: graph::Position,
pub lex_pos: file::Position,
}

#[derive(Clone)]
struct LookupByPositionResult<'a> {
pub file: &'a File,
pub lex_pos: LexPosition,
pub lex_pos: file::Position,
}
6 changes: 6 additions & 0 deletions tasks.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@
* [ ] feature-toggled support for serde
* [ ] ~~make tests depend on checked-in fixtures, instead of generating them (and depend on git on CI), making it easy to recreate them~~
* the tests currently rely on calling git, see `inspect_refs(…)`
* **Questions**
* ~~How can `Commit` return Graph positions? It doesn't seem to learn about an offset.~~
* Parent IDs are indeed specified as graph positions, not file positions, as they may be in previous commit graph files.
* **Still to be done**
* A plumbing command to extract some value from the current implementation, maybe statistics, or verification
* Application of the command above in a stress test
* **git-config**
* A complete implementation, writing a the git remote configuration is needed for finalizing the clone
* **git-ref**
Expand Down