How (un)ethical are instruction-centric responses of LLMs? Unveiling the vulnerabilities of safety guardrails to harmful queries
We are updating our repository. Read our paper from Arxiv.
The dataset TechHazardQA can be found at Huggingface.
If you are using this dataset, please cite our paper
@article{DBLP:journals/corr/abs-2402-15302,
author = {Somnath Banerjee and
Sayan Layek and
Rima Hazra and
Animesh Mukherjee},
title = {How (un)ethical are instruction-centric responses of LLMs? Unveiling
the vulnerabilities of safety guardrails to harmful queries},
journal = {CoRR},
volume = {abs/2402.15302},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2402.15302},
doi = {10.48550/ARXIV.2402.15302},
eprinttype = {arXiv},
eprint = {2402.15302},
timestamp = {Fri, 22 Mar 2024 12:19:03 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2402-15302.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}