Metadata-Version: 2.1
Name: rtx-deep
Version: 1.3.8
Summary: Deep AI modules developed by MOGO RTX team
Home-page: UNKNOWN
Author: Andy
License: GPLv3
Platform: UNKNOWN
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
Classifier: Operating System :: POSIX :: Linux
Classifier: Operating System :: MacOS
Classifier: Operating System :: Microsoft :: Windows
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Requires-Python: >=3.10, <3.11
Description-Content-Type: text/markdown
Requires-Dist: torch (>=1.10.0)
Requires-Dist: tensorrt (>=7.0)
Requires-Dist: graphviz

## `rtx_deep`: Deep AI modules developed by MOGO RTX team, aims to accelerate the distributed training, int8-aware distributed training, distributed evaluation and inference, model tracing and optimization, and TensorRT deployment.

#### 1 Dependency
```bash
torch>=1.10.0
tensorrt>=7.0
graphviz
```

#### 2 Installation
```bash
pip3 install graphviz
apt-get install graphviz
python3 setup.py install
```

#### 3 Examples
##### 3.1 Graph Tracing and Model Optimization
```bash
import torch
import torch.nn as nn
import torch.nn.functional as F

import rtx_deep


class conv3x3_bn_relu(nn.Module):
    def __init__(self, in_planes, out_planes, stride=1, dilation=1, groups=1):
        super(conv3x3_bn_relu, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=groups, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        x1 = self.net(x)
        return x1


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.net = nn.Sequential(
            conv3x3_bn_relu(64, 64),
            conv3x3_bn_relu(64, 64)
        )
    
    def forward(self, x):
        x1 = self.net(x)
        return x1

model = Model()
model.eval()
model.cuda()

input_data = torch.randn(1, 64, 1024, 1024).cuda()

# graph tracing
model_fx = rtx_deep.graph_tracer.ad_trace.graph_trace(model, function_name=None)

# Model Optimization
# conduct graph tracing in graph_optim_from_module automatically
model_fx_optim = rtx_deep.graph_tracer.graph_utils.graph_optim_from_module(model, function_name=None, sample_inputs=(input_data,))
```

##### 3.2 Quantization-Aware Training
```bash
import torch
import torch.nn as nn
import torch.nn.functional as F

import rtx_deep


class conv3x3_bn_relu(nn.Module):
    def __init__(self, in_planes, out_planes, stride=1, dilation=1, groups=1):
        super(conv3x3_bn_relu, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=groups, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        x1 = self.net(x)
        return x1


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.net = nn.Sequential(
            conv3x3_bn_relu(64, 64),
            conv3x3_bn_relu(64, 64)
        )
    
    def forward(self, x):
        x1 = self.net(x)
        return x1

model = Model()
model.eval()
model.cuda()

input_data = torch.randn(1, 64, 1024, 1024).cuda()

# Model Optimization
# conduct graph tracing in graph_optim_from_module automatically
model_fx_optim = rtx_deep.graph_tracer.graph_utils.graph_optim_from_module(model, function_name=None)

# qat
model_qat = rtx_deep.quant_lib.quant_utils.prepare_qat(model_fx_optim,
    sample_inputs=[input_data],
    observe_config_dic=dict(averaging_constant=0.05),
    quant_config_dic=dict(quant_min=-127, quant_max=127, is_symmetric=True, is_quant=True),
    disable_prefix=[])


# vis model network
rtx_deep.graph_tracer.vis_model.vis(model_fx_optim, './model_fx_optim.png')
rtx_deep.graph_tracer.vis_model.vis(model_qat, './model_qat.png')

# qat training
...
```

##### 3.3 TensorRT Deployment
```bash
import torch
import torch.nn as nn
import torch.nn.functional as F

import rtx_deep
import rtx_deep_plugin
from rtx_deep.deploy_lib.convert_trt import InputTensor, torch2trt

class conv3x3_bn_relu(nn.Module):
    def __init__(self, in_planes, out_planes, stride=1, dilation=1, groups=1):
        super(conv3x3_bn_relu, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=dilation, dilation=dilation, groups=groups, bias=False),
            nn.BatchNorm2d(out_planes),
            nn.ReLU(inplace=True)
        )
    
    def forward(self, x):
        x1 = self.net(x)
        return x1


class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.net = nn.Sequential(
            conv3x3_bn_relu(64, 64),
            conv3x3_bn_relu(64, 64)
        )
    
    def forward(self, x):
        x1 = self.net(x)
        x2 = rtx_deep_plugin.max_op(x1, dim=1)
        return x2

model = Model()
model.eval()
model.cuda()

input_data = torch.randn(1, 64, 1024, 1024).cuda()

# Model Optimization
# conduct graph tracing in graph_optim_from_module automatically
model_fx_optim = rtx_deep.graph_tracer.graph_utils.graph_optim_from_module(model, function_name=None, sample_inputs=(input_data,))

# TensorRT Deployment
model_trt = torch2trt(
    model=model_fx_optim,
    input_specs=[InputTensor(input_data, 'input_data')],
    output_names=['max_value', 'max_index'],
    fp16_mode=True,
    #dla_core=0,
    strict_type_constraints=True,
    explicit_precision=True
)

# vis tensorrt network
rtx_deep.deploy_lib.tools.vis_trt.vis(model_trt.network, 'test.png')

error = model(input_data)[0] - model_trt(input_data)[0]
print(error.abs().max())
```

