Skip to content

Commit

Permalink
Print colliding inputs (and track all inputs)
Browse files Browse the repository at this point in the history
  • Loading branch information
penguin-teal committed Nov 14, 2023
1 parent 242bc66 commit 513b415
Show file tree
Hide file tree
Showing 6 changed files with 294 additions and 62 deletions.
11 changes: 11 additions & 0 deletions include/collisions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef COLLISIONS_H
#define COLLISIONS_H

#include <stdio.h>
#include "appArgs.h"
#include "list.h"

bool reportCollisions(struct AppArgs *appArgs, uint64_t hashListCount, size_t hashSize, uint8_t *hashList, FILE *outF, string_list_T *allInputs);

#endif

22 changes: 22 additions & 0 deletions include/list.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#pragma once

#include <stddef.h>
#include <stdbool.h>
#include <stdint.h>

typedef struct StringList string_list_T;

string_list_T *createStringList(uint64_t initialCapacity);

void destroyStringList(string_list_T *list);

char *stringListAt(string_list_T *list, uint64_t index);

size_t stringListSizeAt(string_list_T *list, uint64_t index);

bool stringListPush(string_list_T *list, const char *s, size_t n);

uint64_t stringListCount(string_list_T *list);

char *stringListIterate(string_list_T *list, char *s);

96 changes: 96 additions & 0 deletions src/collisions.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "list.h"
#include "appArgs.h"
#include "out.h"

bool reportCollisions(struct AppArgs *appArgs, uint64_t hashListCount, size_t hashSize, uint8_t *hashList, FILE *outF, string_list_T *allInputs)
{
bool ret = true;

if(appArgs->multi)
{
uint64_t collisionCount = 0;
// 2 U64 indices for each hash
size_t collisionIndicesSize = hashListCount / hashSize * sizeof(uint64_t) * 2;
uint64_t *collisionIndices = malloc(collisionIndicesSize);

for(uint64_t i = 0; i < hashListCount; i += hashSize)
{
uint8_t *hashA = hashList + i;
for(uint64_t j = i + hashSize; j < hashListCount; j += hashSize)
{
uint8_t *hashB = hashList + j;
if(!memcmp(hashA, hashB, hashSize))
{
if(collisionCount * sizeof(uint64_t) * 2 >= collisionIndicesSize)
{
collisionIndicesSize *= 2;
uint64_t *newArr = realloc(collisionIndices, collisionIndicesSize);
if(!newArr)
{
fprintf(stderr, "Failed to realloc collision list.\n");
ret = false;
goto ExitCollisionLoop;
}
collisionIndices = newArr;
}
collisionIndices[collisionCount * 2] = i / hashSize;
collisionIndices[collisionCount * 2 + 1] = j / hashSize;
collisionCount++;
}
}
}
ExitCollisionLoop:

fprintf(outF, "\n%lu Collisions\n", collisionCount);
if(collisionCount > 0) printf("\n");

for(uint64_t i = 0; i < collisionCount; i++)
{
uint64_t hashInxA = collisionIndices[i * 2];
uint64_t hashInxB = collisionIndices[i * 2 + 1];

char valA[19];
char valB[19];
size_t valASize = stringListSizeAt(allInputs, hashInxA);
size_t valBSize = stringListSizeAt(allInputs, hashInxB);
char *valARef = stringListAt(allInputs, hashInxA);
char *valBRef = stringListAt(allInputs, hashInxB);

// Put whole input or cut with '...' if needed
if(valASize > sizeof valA - 1)
{
memcpy(valA, valARef, sizeof valA - 4);
memset(valA + sizeof valA - 4, '.', 3);
}
else memcpy(valA, valARef, sizeof valA - 1);

// Always add a NUL-terminator (this could result in double-NUL
// but it doesn't matter)
valA[sizeof valA - 1] = '\0';

// same thing for B
if(valBSize > sizeof valB - 1)
{
memcpy(valB, valBRef, sizeof valB - 4);
memset(valB + sizeof valB - 4, '.', 3);
}
else memcpy(valB, valBRef, sizeof valB - 1);

valB[sizeof valB - 1] = '\0';

fprintf(outF, "Hashes #%lu (%.18s) and #%lu (%.18s) both get this hash:\n", hashInxA + 1, valA, hashInxB + 1, valB);
printOut(hashList + hashInxA * hashSize, outF, appArgs, hashInxA);
fprintf(outF, "\n");
}

free(collisionIndices);
}

return ret;

}
13 changes: 1 addition & 12 deletions src/hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,21 +103,10 @@ bool doHash(struct AppArgs *appArgs, FILE *outF, uint8_t *hashOut, uint64_t hash
{
uint8_t hash[16];

size_t valueLen;
if(appArgs->len)
{
valueLen = appArgs->len;
}
else
{
valueLen = strlen(appArgs->value);
if(appArgs->hashNul) valueLen++;
}

size_t hashSize = getHash(
appArgs,
(uint8_t*)appArgs->value,
valueLen,
appArgs->len,
hash,
sizeof hash
);
Expand Down
150 changes: 150 additions & 0 deletions src/list.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "list.h"

struct StringList
{
uint64_t *ptrBuffer;
size_t ptrCapacity;
size_t ptrLength;

char *dataBuffer;
size_t dataCapacity;
size_t dataLength;
};

static size_t toQWord(size_t sz)
{
return sz - sz % sizeof(uint64_t) + sizeof(uint64_t);
}

static void *bufferGuarantee(void *buffer, size_t *current, size_t needed)
{
size_t has = *current;
if(needed > has)
{
if(has * 2 > needed) has *= 2;
else has = toQWord(needed);

void *newBuffer = realloc(buffer, has);
if(!newBuffer)
{
fprintf(stderr, "Resize buffer failed.\n");
*current = 0;
return buffer;
}

*current = has;
return newBuffer;
}
else return buffer;
}

string_list_T *createStringList(uint64_t initialCapacity)
{
uint64_t *ptrBuffer = NULL;
char *dataBuffer = NULL;

string_list_T *list = malloc(sizeof(string_list_T));
if(!list) goto HeapFail;

list->ptrCapacity = toQWord(initialCapacity * sizeof(uint64_t));
list->ptrLength = 0;
ptrBuffer = malloc(list->ptrCapacity);
if(!ptrBuffer) goto HeapFail;
list->ptrBuffer = ptrBuffer;


list->dataCapacity = toQWord(initialCapacity * 16);
list->dataLength = 0;
dataBuffer = malloc(list->dataCapacity);
if(!dataBuffer) goto HeapFail;
list->dataBuffer = dataBuffer;

return list;

HeapFail:
fprintf(stderr, "String list failed to allocate.\n");
if(list) free(list);
if(ptrBuffer) free(ptrBuffer);
if(dataBuffer) free(dataBuffer);
return NULL;
}

void destroyStringList(string_list_T *list)
{
free(list->ptrBuffer);
free(list->dataBuffer);
free(list);
}

char *stringListAt(string_list_T *list, uint64_t index)
{
return list->dataBuffer + *(list->ptrBuffer + index) + sizeof(uint64_t);
}

size_t stringListSizeAt(string_list_T *list, uint64_t index)
{
if(index == list->ptrLength - 1)
{
return (char*)list->dataLength - (stringListAt(list, index));
}
else
{
return stringListAt(list, index + 1) - stringListAt(list, index) - sizeof(uint64_t);
}
}

bool stringListPush(string_list_T *list, const char *s, size_t n)
{
if(n == 0) n = strlen(s) + 1;

uint64_t index = list->ptrLength;
size_t dataIndex = list->dataLength;

size_t newPtrLen = list->ptrLength + 1;
size_t newPtrCap = list->ptrCapacity;
list->ptrBuffer = bufferGuarantee(list->ptrBuffer, &newPtrCap, newPtrLen);
if(!list->ptrCapacity) return false;
else
{
list->ptrCapacity = newPtrCap;
}

size_t newDataLen = toQWord(list->dataLength + n + sizeof(uint64_t));
size_t newDataCap = list->dataCapacity;
list->dataBuffer = bufferGuarantee(list->dataBuffer, &newDataCap, newDataLen);
if(!list->dataCapacity) return false;

list->dataLength = newDataLen;
list->dataCapacity = newDataCap;

list->ptrLength = newPtrLen;

memcpy(list->dataBuffer + dataIndex, &index, sizeof(uint64_t));
memcpy(list->dataBuffer + dataIndex + sizeof(uint64_t), s, n);
list->ptrBuffer[index] = dataIndex;

return true;

}

uint64_t stringListCount(string_list_T *list)
{
return list->ptrLength;
}

char *stringListIterate(string_list_T *list, char *s)
{
uint64_t nextIndex;
// get the u64 at the start of the data
if(s) nextIndex = *(((uint64_t*)s) - 1) + 1;

else nextIndex = 0;

if(nextIndex >= list->ptrLength) return NULL;

return stringListAt(list, nextIndex);
}

Loading

0 comments on commit 513b415

Please sign in to comment.