Metadata-Version: 2.1
Name: imitation
Version: 0.3.2
Summary: Implementation of modern reward and imitation learning algorithms.
Home-page: https://github.com/HumanCompatibleAI/imitation
Author: Center for Human-Compatible AI and Google
License: MIT
Classifier: License :: OSI Approved :: MIT License
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
Requires-Python: >=3.8.0
Description-Content-Type: text/markdown
License-File: LICENSE
Requires-Dist: gym[classic_control] (==0.21.0)
Requires-Dist: pyglet (==1.5.27)
Requires-Dist: matplotlib
Requires-Dist: numpy (>=1.15)
Requires-Dist: torch (>=1.4.0)
Requires-Dist: tqdm
Requires-Dist: scikit-learn (>=0.21.2)
Requires-Dist: seals (>=0.1.5)
Requires-Dist: stable-baselines3 (>=1.6.1)
Requires-Dist: chai-sacred (>=0.8.3)
Requires-Dist: tensorboard (>=1.14)
Requires-Dist: huggingface-sb3 (>=2.2.1)
Provides-Extra: atari
Requires-Dist: opencv-python ; extra == 'atari'
Requires-Dist: ale-py (==0.7.4) ; extra == 'atari'
Requires-Dist: pillow ; extra == 'atari'
Requires-Dist: autorom[accept-rom-license] (~=0.4.2) ; extra == 'atari'
Provides-Extra: dev
Requires-Dist: autopep8 ; extra == 'dev'
Requires-Dist: ipdb ; extra == 'dev'
Requires-Dist: isort (~=5.0) ; extra == 'dev'
Requires-Dist: codespell ; extra == 'dev'
Requires-Dist: sphinx-autobuild ; extra == 'dev'
Requires-Dist: black[jupyter] (~=22.6.0) ; extra == 'dev'
Requires-Dist: coverage (~=6.4.2) ; extra == 'dev'
Requires-Dist: codecov (~=2.1.12) ; extra == 'dev'
Requires-Dist: codespell (~=2.1.0) ; extra == 'dev'
Requires-Dist: darglint (~=1.8.1) ; extra == 'dev'
Requires-Dist: filelock (~=3.7.1) ; extra == 'dev'
Requires-Dist: flake8 (~=4.0.1) ; extra == 'dev'
Requires-Dist: flake8-blind-except (==0.2.1) ; extra == 'dev'
Requires-Dist: flake8-builtins (~=1.5.3) ; extra == 'dev'
Requires-Dist: flake8-commas (~=2.1.0) ; extra == 'dev'
Requires-Dist: flake8-debugger (~=4.1.2) ; extra == 'dev'
Requires-Dist: flake8-docstrings (~=1.6.0) ; extra == 'dev'
Requires-Dist: flake8-isort (~=4.1.2) ; extra == 'dev'
Requires-Dist: hypothesis (~=6.54.1) ; extra == 'dev'
Requires-Dist: ipykernel (~=6.15.1) ; extra == 'dev'
Requires-Dist: jupyter (~=1.0.0) ; extra == 'dev'
Requires-Dist: jupyter-client (~=6.1.12) ; extra == 'dev'
Requires-Dist: mypy (~=0.990) ; extra == 'dev'
Requires-Dist: pandas (~=1.4.3) ; extra == 'dev'
Requires-Dist: pytest (~=7.1.2) ; extra == 'dev'
Requires-Dist: pytest-cov (~=3.0.0) ; extra == 'dev'
Requires-Dist: pytest-notebook (==0.8.0) ; extra == 'dev'
Requires-Dist: pytest-xdist (~=2.5.0) ; extra == 'dev'
Requires-Dist: scipy (~=1.9.0) ; extra == 'dev'
Requires-Dist: wandb (==0.12.21) ; extra == 'dev'
Requires-Dist: setuptools-scm (~=7.0.5) ; extra == 'dev'
Requires-Dist: pre-commit (>=2.20.0) ; extra == 'dev'
Requires-Dist: ray[debug,tune] (~=2.0.0) ; extra == 'dev'
Requires-Dist: opencv-python ; extra == 'dev'
Requires-Dist: ale-py (==0.7.4) ; extra == 'dev'
Requires-Dist: pillow ; extra == 'dev'
Requires-Dist: autorom[accept-rom-license] (~=0.4.2) ; extra == 'dev'
Requires-Dist: pytype (==2022.7.26) ; extra == 'dev'
Requires-Dist: sphinx (~=5.1.1) ; extra == 'dev'
Requires-Dist: sphinx-autodoc-typehints (~=1.19.1) ; extra == 'dev'
Requires-Dist: sphinx-rtd-theme (~=1.0.0) ; extra == 'dev'
Requires-Dist: sphinxcontrib-napoleon (==0.7) ; extra == 'dev'
Requires-Dist: furo (==2022.6.21) ; extra == 'dev'
Requires-Dist: sphinx-copybutton (==0.5.0) ; extra == 'dev'
Requires-Dist: sphinx-github-changelog (~=1.2.0) ; extra == 'dev'
Requires-Dist: myst-nb (==0.16.0) ; extra == 'dev'
Requires-Dist: ipykernel (~=6.15.2) ; extra == 'dev'
Provides-Extra: docs
Requires-Dist: sphinx (~=5.1.1) ; extra == 'docs'
Requires-Dist: sphinx-autodoc-typehints (~=1.19.1) ; extra == 'docs'
Requires-Dist: sphinx-rtd-theme (~=1.0.0) ; extra == 'docs'
Requires-Dist: sphinxcontrib-napoleon (==0.7) ; extra == 'docs'
Requires-Dist: furo (==2022.6.21) ; extra == 'docs'
Requires-Dist: sphinx-copybutton (==0.5.0) ; extra == 'docs'
Requires-Dist: sphinx-github-changelog (~=1.2.0) ; extra == 'docs'
Requires-Dist: myst-nb (==0.16.0) ; extra == 'docs'
Requires-Dist: ipykernel (~=6.15.2) ; extra == 'docs'
Requires-Dist: opencv-python ; extra == 'docs'
Requires-Dist: ale-py (==0.7.4) ; extra == 'docs'
Requires-Dist: pillow ; extra == 'docs'
Requires-Dist: autorom[accept-rom-license] (~=0.4.2) ; extra == 'docs'
Provides-Extra: mujoco
Requires-Dist: gym[classic_control,mujoco] (==0.21.0) ; extra == 'mujoco'
Provides-Extra: parallel
Requires-Dist: ray[debug,tune] (~=2.0.0) ; extra == 'parallel'
Provides-Extra: test
Requires-Dist: black[jupyter] (~=22.6.0) ; extra == 'test'
Requires-Dist: coverage (~=6.4.2) ; extra == 'test'
Requires-Dist: codecov (~=2.1.12) ; extra == 'test'
Requires-Dist: codespell (~=2.1.0) ; extra == 'test'
Requires-Dist: darglint (~=1.8.1) ; extra == 'test'
Requires-Dist: filelock (~=3.7.1) ; extra == 'test'
Requires-Dist: flake8 (~=4.0.1) ; extra == 'test'
Requires-Dist: flake8-blind-except (==0.2.1) ; extra == 'test'
Requires-Dist: flake8-builtins (~=1.5.3) ; extra == 'test'
Requires-Dist: flake8-commas (~=2.1.0) ; extra == 'test'
Requires-Dist: flake8-debugger (~=4.1.2) ; extra == 'test'
Requires-Dist: flake8-docstrings (~=1.6.0) ; extra == 'test'
Requires-Dist: flake8-isort (~=4.1.2) ; extra == 'test'
Requires-Dist: hypothesis (~=6.54.1) ; extra == 'test'
Requires-Dist: ipykernel (~=6.15.1) ; extra == 'test'
Requires-Dist: jupyter (~=1.0.0) ; extra == 'test'
Requires-Dist: jupyter-client (~=6.1.12) ; extra == 'test'
Requires-Dist: mypy (~=0.990) ; extra == 'test'
Requires-Dist: pandas (~=1.4.3) ; extra == 'test'
Requires-Dist: pytest (~=7.1.2) ; extra == 'test'
Requires-Dist: pytest-cov (~=3.0.0) ; extra == 'test'
Requires-Dist: pytest-notebook (==0.8.0) ; extra == 'test'
Requires-Dist: pytest-xdist (~=2.5.0) ; extra == 'test'
Requires-Dist: scipy (~=1.9.0) ; extra == 'test'
Requires-Dist: wandb (==0.12.21) ; extra == 'test'
Requires-Dist: setuptools-scm (~=7.0.5) ; extra == 'test'
Requires-Dist: pre-commit (>=2.20.0) ; extra == 'test'
Requires-Dist: ray[debug,tune] (~=2.0.0) ; extra == 'test'
Requires-Dist: opencv-python ; extra == 'test'
Requires-Dist: ale-py (==0.7.4) ; extra == 'test'
Requires-Dist: pillow ; extra == 'test'
Requires-Dist: autorom[accept-rom-license] (~=0.4.2) ; extra == 'test'
Requires-Dist: pytype (==2022.7.26) ; extra == 'test'

[![CircleCI](https://circleci.com/gh/HumanCompatibleAI/imitation.svg?style=svg)](https://circleci.com/gh/HumanCompatibleAI/imitation)
[![Documentation Status](https://readthedocs.org/projects/imitation/badge/?version=latest)](https://imitation.readthedocs.io/en/latest/?badge=latest)
[![codecov](https://codecov.io/gh/HumanCompatibleAI/imitation/branch/master/graph/badge.svg)](https://codecov.io/gh/HumanCompatibleAI/imitation)
[![PyPI version](https://badge.fury.io/py/imitation.svg)](https://badge.fury.io/py/imitation)

# Imitation Learning Baseline Implementations

This project aims to provide clean implementations of imitation and reward learning algorithms.
Currently, we have implementations of the algorithms below. 'Discrete' and 'Continous' stands for whether the algorithm supports discrete or continuous action/state spaces respectively.

| Algorithm (+ link to paper)                                                                                                       | API Docs                                                                                                                 | Discrete | Continuous |
|-----------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------|----------|------------|
| Behavioral Cloning                                                                                                                | [`algorithms.bc`](https://imitation.readthedocs.io/en/latest/algorithms/bc.html)                                         | ✅        | ✅          |
| [DAgger](https://arxiv.org/pdf/1011.0686.pdf)                                                                                     | [`algorithms.dagger`](https://imitation.readthedocs.io/en/latest/algorithms/dagger.html)                                 | ✅        | ✅          |
| Density-Based Reward Modeling                                                                                                     | [`algorithms.density`](https://imitation.readthedocs.io/en/latest/algorithms/density.html)                               | ✅        | ✅          |
| [Maximum Causal Entropy Inverse Reinforcement Learning](https://www.cs.cmu.edu/~bziebart/publications/maximum-causal-entropy.pdf) | [`algorithms.mce_irl`](https://imitation.readthedocs.io/en/latest/algorithms/mce_irl.html)                               | ✅        | ❌          |
| [Adversarial Inverse Reinforcement Learning](https://arxiv.org/abs/1710.11248)                                                    | [`algoritms.airl`](https://imitation.readthedocs.io/en/latest/algorithms/airl.html)                                      | ✅        | ✅          |
| [Generative Adversarial Imitation Learning](https://arxiv.org/abs/1606.03476)                                                     | [`algorithms.gail`](https://imitation.readthedocs.io/en/latest/algorithms/gail.html)                                     | ✅        | ✅          |
| [Deep RL from Human Preferences](https://arxiv.org/abs/1706.03741)                                                                | [`algorithms.preference_comparisons`](https://imitation.readthedocs.io/en/latest/algorithms/preference_comparisons.html) | ✅        | ✅          |


You can find [the documentation here](https://imitation.readthedocs.io/en/latest/).

## Installation

### Prerequisites

- Python 3.8+
- (Optional) OpenGL (to render Gym environments)
- (Optional) FFmpeg (to encode videos of renders)
- (Optional) MuJoCo (follow instructions to install [mujoco_py v1.5 here](https://github.com/openai/mujoco-py/tree/498b451a03fb61e5bdfcb6956d8d7c881b1098b5#install-mujoco))

### Installing PyPI release

Installing the PyPI release is the standard way to use `imitation`, and the recommended way for most users.

```
pip install imitation
```

### Install from source

If you like, you can install `imitation` from source to [contribute to the project][contributing] or access the very last features before a stable release. You can do this by cloning the GitHub repository and running the installer directly. First run:
`git clone http://github.com/HumanCompatibleAI/imitation && cd imitation`.

For development mode, then run:

```
pip install -e ".[dev]"
```

This will run `setup.py` in development mode, and install the additional dependencies required for development. For regular use, run instead

```
pip install .
```

Additional extras are available depending on your needs. Namely, `tests` for running the test suite, `docs` for building the documentation, `parallel` for parallelizing the training, and `atari` for including atari environments. The `dev` extra already installs the `tests`, `docs`, and `atari` dependencies automatically, and `tests` installs the `atari` dependencies.

For macOS users, some packages are required to run experiments (see `./experiments/README.md` for details). First, install Homebrew if not available (see [Homebrew](https://brew.sh/)). Then, run:

```
brew install coreutils gnu-getopt parallel
```

## CLI Quickstart

We provide several CLI scripts as a front-end to the algorithms implemented in `imitation`. These use [Sacred](https://github.com/idsia/sacred) for configuration and replicability.

From [examples/quickstart.sh:](examples/quickstart.sh)

```bash
# Train PPO agent on pendulum and collect expert demonstrations. Tensorboard logs saved in quickstart/rl/
python -m imitation.scripts.train_rl with pendulum common.fast train.fast rl.fast fast common.log_dir=quickstart/rl/

# Train GAIL from demonstrations. Tensorboard logs saved in output/ (default log directory).
python -m imitation.scripts.train_adversarial gail with pendulum common.fast demonstrations.fast train.fast rl.fast fast demonstrations.rollout_path=quickstart/rl/rollouts/final.npz

# Train AIRL from demonstrations. Tensorboard logs saved in output/ (default log directory).
python -m imitation.scripts.train_adversarial airl with pendulum common.fast demonstrations.fast train.fast rl.fast fast demonstrations.rollout_path=quickstart/rl/rollouts/final.npz
```

Tips:

- Remove the "fast" options from the commands above to allow training run to completion.
- `python -m imitation.scripts.train_rl print_config` will list Sacred script options. These configuration options are documented in each script's docstrings.

For more information on how to configure Sacred CLI options, see the [Sacred docs](https://sacred.readthedocs.io/en/stable/).

## Python Interface Quickstart

See [examples/quickstart.py](examples/quickstart.py) for an example script that loads CartPole-v1 demonstrations and trains BC, GAIL, and AIRL models on that data.

### Density reward baseline

We also implement a density-based reward baseline. You can find an [example notebook here](docs/tutorials/7_train_density.ipynb).

# Citations (BibTeX)

```
@misc{wang2020imitation,
  author = {Wang, Steven and Toyer, Sam and Gleave, Adam and Emmons, Scott},
  title = {The {\tt imitation} Library for Imitation Learning and Inverse Reinforcement Learning},
  year = {2020},
  publisher = {GitHub},
  journal = {GitHub repository},
  howpublished = {\url{https://github.com/HumanCompatibleAI/imitation}},
}
```

# Contributing

See [Contributing to imitation][contributing] for more information.


[contributing]: https://imitation.readthedocs.io/en/latest/development/contributing/index.html
