LICENSE
MANIFEST.in
README.md
requirements.txt
setup.py
gptqmodel/__init__.py
gptqmodel/version.py
gptqmodel.egg-info/PKG-INFO
gptqmodel.egg-info/SOURCES.txt
gptqmodel.egg-info/dependency_links.txt
gptqmodel.egg-info/requires.txt
gptqmodel.egg-info/top_level.txt
gptqmodel/eval_tasks/__init__.py
gptqmodel/eval_tasks/_base.py
gptqmodel/eval_tasks/language_modeling_task.py
gptqmodel/eval_tasks/sequence_classification_task.py
gptqmodel/eval_tasks/text_summarization_task.py
gptqmodel/eval_tasks/_utils/__init__.py
gptqmodel/eval_tasks/_utils/classification_utils.py
gptqmodel/eval_tasks/_utils/data_utils.py
gptqmodel/eval_tasks/_utils/generation_utils.py
gptqmodel/integration/__init__.py
gptqmodel/integration/optimum/__init__.py
gptqmodel/integration/optimum/constants.py
gptqmodel/integration/optimum/data.py
gptqmodel/integration/optimum/hf_quantizer_gptq.py
gptqmodel/integration/optimum/quantizer.py
gptqmodel/integration/optimum/utils.py
gptqmodel/models/__init__.py
gptqmodel/models/_const.py
gptqmodel/models/auto.py
gptqmodel/models/base.py
gptqmodel/models/loader.py
gptqmodel/models/writer.py
gptqmodel/models/definitions/__init__.py
gptqmodel/models/definitions/baichuan.py
gptqmodel/models/definitions/bloom.py
gptqmodel/models/definitions/chatglm.py
gptqmodel/models/definitions/codegen.py
gptqmodel/models/definitions/cohere.py
gptqmodel/models/definitions/dbrx.py
gptqmodel/models/definitions/dbrx_converted.py
gptqmodel/models/definitions/decilm.py
gptqmodel/models/definitions/deepseek_v2.py
gptqmodel/models/definitions/exaone.py
gptqmodel/models/definitions/gemma.py
gptqmodel/models/definitions/gemma2.py
gptqmodel/models/definitions/gpt2.py
gptqmodel/models/definitions/gpt_bigcode.py
gptqmodel/models/definitions/gpt_neox.py
gptqmodel/models/definitions/gptj.py
gptqmodel/models/definitions/granite.py
gptqmodel/models/definitions/grinmoe.py
gptqmodel/models/definitions/internlm.py
gptqmodel/models/definitions/internlm2.py
gptqmodel/models/definitions/llama.py
gptqmodel/models/definitions/longllama.py
gptqmodel/models/definitions/minicpm.py
gptqmodel/models/definitions/minicpm3.py
gptqmodel/models/definitions/mistral.py
gptqmodel/models/definitions/mixtral.py
gptqmodel/models/definitions/mllama.py
gptqmodel/models/definitions/mobilellm.py
gptqmodel/models/definitions/moss.py
gptqmodel/models/definitions/mpt.py
gptqmodel/models/definitions/opt.py
gptqmodel/models/definitions/phi.py
gptqmodel/models/definitions/phi3.py
gptqmodel/models/definitions/qwen.py
gptqmodel/models/definitions/qwen2.py
gptqmodel/models/definitions/qwen2_moe.py
gptqmodel/models/definitions/rw.py
gptqmodel/models/definitions/stablelmepoch.py
gptqmodel/models/definitions/starcoder2.py
gptqmodel/models/definitions/xverse.py
gptqmodel/models/definitions/yi.py
gptqmodel/nn_modules/__init__.py
gptqmodel/nn_modules/qlinear/__init__.py
gptqmodel/nn_modules/qlinear/bitblas_target_detector.py
gptqmodel/nn_modules/qlinear/qlinear_bitblas.py
gptqmodel/nn_modules/qlinear/qlinear_exllamav2.py
gptqmodel/nn_modules/qlinear/qlinear_ipex.py
gptqmodel/nn_modules/qlinear/qlinear_marlin.py
gptqmodel/nn_modules/qlinear/qlinear_marlin_inference.py
gptqmodel/nn_modules/qlinear/qlinear_tritonv2.py
gptqmodel/nn_modules/triton_utils/__init__.py
gptqmodel/nn_modules/triton_utils/custom_autotune.py
gptqmodel/nn_modules/triton_utils/dequant.py
gptqmodel/nn_modules/triton_utils/kernels.py
gptqmodel/nn_modules/triton_utils/mixin.py
gptqmodel/quantization/__init__.py
gptqmodel/quantization/config.py
gptqmodel/quantization/gptq.py
gptqmodel/quantization/quantizer.py
gptqmodel/utils/__init__.py
gptqmodel/utils/backend.py
gptqmodel/utils/bitblas.py
gptqmodel/utils/data.py
gptqmodel/utils/device.py
gptqmodel/utils/importer.py
gptqmodel/utils/marlin.py
gptqmodel/utils/model.py
gptqmodel/utils/perplexity.py
gptqmodel/utils/sglang.py
gptqmodel/utils/vllm.py
gptqmodel/utils/vram.py
gptqmodel_ext/exllamav2/config.h
gptqmodel_ext/exllamav2/ext.cpp
gptqmodel_ext/exllamav2/cpp/util.h
gptqmodel_ext/exllamav2/cuda/compat.cuh
gptqmodel_ext/exllamav2/cuda/compat_gemm.cuh
gptqmodel_ext/exllamav2/cuda/matrix_view.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm.cu
gptqmodel_ext/exllamav2/cuda/q_gemm.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel_gptq.cuh
gptqmodel_ext/exllamav2/cuda/q_matrix.cu
gptqmodel_ext/exllamav2/cuda/q_matrix.cuh
gptqmodel_ext/exllamav2/cuda/util.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_2.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_3.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_4.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_5.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_6.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_8.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_util.cuh
gptqmodel_ext/marlin/marlin_cuda.cpp
gptqmodel_ext/marlin/marlin_cuda_kernel.cu
gptqmodel_ext/marlin/marlin_cuda_kernel.cuh
gptqmodel_ext/marlin/marlin_repack.cu
gptqmodel_ext/marlin/marlin_repack.cuh
gptqmodel_ext/marlin_inference/marlin.cuh
gptqmodel_ext/marlin_inference/marlin_cuda.cpp
gptqmodel_ext/marlin_inference/marlin_cuda_kernel.cu
gptqmodel_ext/marlin_inference/marlin_cuda_kernel.cuh
gptqmodel_ext/marlin_inference/marlin_dtypes.cuh
gptqmodel_ext/marlin_inference/marlin_repack.cu
gptqmodel_ext/marlin_inference/marlin_repack.cuh
tests/test_dynamic.py
tests/test_estimate_vram.py
tests/test_ipex.py
tests/test_lm_eval.py
tests/test_lm_head.py
tests/test_packing.py
tests/test_perplexity.py
tests/test_pt.py
tests/test_q4_bitblas.py
tests/test_q4_exllama_v2.py
tests/test_q4_marlin.py
tests/test_q4_triton.py
tests/test_quant_batch.py
tests/test_quant_formats.py
tests/test_quant_trust_remote.py
tests/test_save_loaded_quantized_model.py
tests/test_serialization.py
tests/test_sglang.py
tests/test_sharded.py
tests/test_tgi.py
tests/test_transformers_integration.py
tests/test_triton.py
tests/test_verify_hash.py
tests/test_vllm.py