LICENSE
README.md
setup.py
LongNet/__init__.py
LongNet/attend.py
LongNet/attention.py
LongNet/model.py
LongNet/training.py
LongNet/utils.py
LongNet.egg-info/PKG-INFO
LongNet.egg-info/SOURCES.txt
LongNet.egg-info/dependency_links.txt
LongNet.egg-info/requires.txt
LongNet.egg-info/top_level.txt
flash_attn/__init__.py
flash_attn/setup.py
flash_attn/flash_attn/__init__.py
flash_attn/flash_attn/bert_padding.py
flash_attn/flash_attn/flash_attention.py
flash_attn/flash_attn/flash_attn_interface.py
flash_attn/flash_attn/flash_attn_triton.py
flash_attn/flash_attn/flash_attn_triton_og.py
flash_attn/flash_attn/flash_blocksparse_attention.py
flash_attn/flash_attn/flash_blocksparse_attn_interface.py
flash_attn/flash_attn/fused_softmax.py
flash_attn/flash_attn/layers/__init__.py
flash_attn/flash_attn/layers/patch_embed.py
flash_attn/flash_attn/layers/rotary.py
flash_attn/flash_attn/losses/__init__.py
flash_attn/flash_attn/losses/cross_entropy.py
flash_attn/flash_attn/models/__init__.py
flash_attn/flash_attn/models/bert.py
flash_attn/flash_attn/models/gpt.py
flash_attn/flash_attn/models/gpt_neox.py
flash_attn/flash_attn/models/gptj.py
flash_attn/flash_attn/models/llama.py
flash_attn/flash_attn/models/opt.py
flash_attn/flash_attn/models/vit.py
flash_attn/flash_attn/modules/__init__.py
flash_attn/flash_attn/modules/block.py
flash_attn/flash_attn/modules/embedding.py
flash_attn/flash_attn/modules/mha.py
flash_attn/flash_attn/modules/mlp.py
flash_attn/flash_attn/ops/__init__.py
flash_attn/flash_attn/ops/activations.py
flash_attn/flash_attn/ops/fused_dense.py
flash_attn/flash_attn/ops/layer_norm.py
flash_attn/flash_attn/ops/rms_norm.py
flash_attn/flash_attn/utils/__init__.py
flash_attn/flash_attn/utils/benchmark.py
flash_attn/flash_attn/utils/distributed.py
flash_attn/flash_attn/utils/generation.py
flash_attn/flash_attn/utils/pretrained.py
test/test.py