Skip to content

Commit

Permalink
update talks, add token-bounds paper
Browse files Browse the repository at this point in the history
  • Loading branch information
bamos committed Jul 12, 2024
1 parent 0067b22 commit bb89d90
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 0 deletions.
6 changes: 6 additions & 0 deletions cv.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -507,10 +507,16 @@ repos:
desc: $\varheart$ Linux, xmonad, emacs, vim, zsh, tmux

talks:
- title: Amortized optimization for optimal transport and LLM attacks
location: ISMP
year: 2024
- title: Differentiable optimization for control and robotics
location: RSS Optimization for Robotics Workshop
url: https://sites.google.com/robotics.utias.utoronto.ca/frontiers-optimization-rss24/schedule
year: 2024
- title: Amortized optimization-based reasoning for AI
location: University of Amsterdam
year: 2024
- title: End-to-end learning geometries for graphs, dynamical systems, and regression
location: LoG New York
url: https://logmeetupnyc.github.io/
Expand Down
36 changes: 36 additions & 0 deletions publications/all.bib
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,42 @@ @inproceedings{atanackovic2024meta
}
}

@inproceedings{lotfi2024unlocking,
title={Unlocking Tokens as Data Points for Generalization Bounds on Larger Language Models},
author={Sanae Lotfi and Yilun Kuang and Marc Anton Finzi and Brandon Amos and Micah Goldblum and Andrew Gordon Wilson},
_venue={ICML TF2M Workshop},
year={2024},
url={https://openreview.net/forum?id=cQWsTeTSkZ},
abstract={
Large language models (LLMs) with billions of parameters excel at
predicting the next token in a sequence. Recent work
computes non-vacuous compression-based
generalization bounds for LLMs, but these bounds are
vacuous for large models at the billion-parameter
scale. Moreover, these bounds are obtained through
restrictive compression techniques, bounding
compressed models that generate low-quality
text. Additionally, the tightness of these existing
bounds depends on the number of IID documents in a
training set rather than the much larger number of
non-IID constituent tokens, leaving untapped
potential for tighter bounds. In this work, we
instead use properties of martingales to derive
generalization bounds that benefit from the vast
number of tokens in LLM training sets. Since a
dataset contains far more tokens than documents, our
generalization bounds not only tolerate but actually
benefit from far less restrictive compression
schemes. With Monarch matrices, Kronecker
factorizations, and post-training quantization, we
achieve non-vacuous generalization bounds for LLMs
as large as LLaMA2-70B. Unlike previous approaches,
our work achieves the first non-vacuous bounds for
models that are deployed in practice and generate
high-quality text.
}
}

@misc{amos2023tutorial,
title={Tutorial on amortized optimization},
author={Brandon Amos},
Expand Down

0 comments on commit bb89d90

Please sign in to comment.