I am initializing an inf1.6xlarge
Sagemaekr instance via
sudo tee /etc/yum.repos.d/neuron.repo > /dev/null <<EOF
[neuron]
name=Neuron YUM Repository
baseurl=https://yum.repos.neuron.amazonaws.com
enabled=1
metadata_expire=0
EOF
sudo rpm --import https://yum.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB
# Update OS packages
sudo yum update -y
# Install OS headers
sudo yum install kernel-devel-$(uname -r) kernel-headers-$(uname -r) -y
# Install git
sudo yum install git -y
# install Neuron Driver
sudo yum install aws-neuronx-dkms-2.* -y
# Install Neuron Tools
sudo yum install aws-neuronx-tools-2.* -y
# Add PATH
export PATH=/opt/aws/neuron/bin:$PATH
# Install Python venv
sudo yum install -y python3.7-venv gcc-c++
# Create Python venv
yes | conda create --name aws_neuron_venv_pytorch python=3.7
#python3.7 -m venv aws_neuron_venv_pytorch
# Activate Python venv
source activate aws_neuron_venv_pytorch
#source aws_neuron_venv_pytorch/bin/activate
python -m pip install -U pip
# Install Jupyter notebook kernel
pip install ipykernel
python3.7 -m ipykernel install --user --name aws_neuron_venv_pytorch --display-name "Python (torch-neuron)"
pip install jupyter notebook
pip install environment_kernels
# Set pip repository pointing to the Neuron repository
python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
# Install PyTorch Neuron
python -m pip install torch-neuron neuron-cc[tensorflow] "protobuf" torchvision transformers sagemaker
then I use a Jupyter Notebook and run
import os
#import tensorflow # to workaround a protobuf version conflict issue
import torch
import torch.neuron
import torch.nn as nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler
from transformers import BloomTokenizerFast, BloomForCausalLM
model_id = "bigscience/bloom-560m"
tokenizer = BloomTokenizerFast.from_pretrained(model_id, )
model = BloomForCausalLM.from_pretrained(model_id)
class Net(nn.Module):
def __init__(self, pre_trained_model):
super(Net, self).__init__()
self.pre_trained_model = pre_trained_model
def forward(self, text, attention_mask):
token_length = int(torch.sum(attention_mask).item())
inputs, out = text[-token_length:].unsqueeze(0), []
model_out = self.pre_trained_model(input_ids=inputs)
logits = model_out.logits[:, -1, :]
logits = logits * torch.tensor([range(250880)])
log_probs = F.softmax(logits, dim=-1)
input_token = torch.multinomial(log_probs, 1)
out.append(input_token.item())
print(out)
return torch.tensor(out)
n = Net(pre_trained_model=model)
n.eval()
dummy_input = "Dummy input which will be padded later"
max_length = 128
embeddings = tokenizer(dummy_input, max_length=max_length, padding="max_length",return_tensors="pt")
neuron_inputs = (embeddings['input_ids'][0], embeddings['attention_mask'][0])
neuron_net = torch.neuron.trace(n, neuron_inputs, compiler_workdir="./workdir", separate_weights=True)
Then I get the following error
/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/ipykernel/__main__.py:8: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/ipykernel/__main__.py:13: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/ipykernel/__main__.py:19: TracerWarning: Converting a tensor to a Python number might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/ipykernel/__main__.py:21: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
[250068]
[250068]
[250068]
INFO:Neuron:There are 2 ops of 2 different types in the TorchScript that are not compiled by neuron-cc: aten::__or__, aten::embedding, (For more information see https://awsdocs-neuron.readthedocs-hosted.com/en/latest/release-notes/compiler/neuron-cc/neuron-cc-ops/neuron-cc-ops-pytorch.html)
INFO:Neuron:Number of arithmetic operators (pre-compilation) before = 1820, fused = 1774, percent fused = 97.47%
INFO:Neuron:Compiling function _NeuronGraph$2460 with neuron-cc; log file is at /home/ec2-user/SageMaker/workdir/129/graph_def.neuron-cc.log
INFO:Neuron:Compiling with command line: '/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/bin/neuron-cc compile /home/ec2-user/SageMaker/workdir/129/model --framework TENSORFLOW --pipeline compile SaveTemps --output /home/ec2-user/SageMaker/workdir/129/graph_def.neff --verbose 35'
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
To disable this warning, you can either:
- Avoid using `tokenizers` before the fork if possible
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
INFO:Neuron:Compile command returned: 1
WARNING:Neuron:torch.neuron.trace failed on _NeuronGraph$2460; falling back to native python function call
ERROR:Neuron:neuron-cc failed with the following command line call:
/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/bin/neuron-cc compile /home/ec2-user/SageMaker/workdir/129/model --framework TENSORFLOW --pipeline compile SaveTemps --output /home/ec2-user/SageMaker/workdir/129/graph_def.neff --verbose 35
Traceback (most recent call last):
File "/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/torch_neuron/convert.py", line 414, in op_converter
item, inputs, compiler_workdir=sg_workdir, **kwargs)
File "/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/torch_neuron/decorators.py", line 264, in trace
'neuron-cc failed with the following command line call:\n{}'.format(command))
subprocess.SubprocessError: neuron-cc failed with the following command line call:
/home/ec2-user/anaconda3/envs/aws_neuron_venv_pytorch/bin/neuron-cc compile /home/ec2-user/SageMaker/workdir/129/model --framework TENSORFLOW --pipeline compile SaveTemps --output /home/ec2-user/SageMaker/workdir/129/graph_def.neff --verbose 35
INFO:Neuron:Number of arithmetic operators (post-compilation) before = 1820, compiled = 0, percent compiled = 0.0%
INFO:Neuron:The neuron partitioner created 1 sub-graphs
INFO:Neuron:Neuron successfully compiled 0 sub-graphs, Total fused subgraphs = 1, Percent of model sub-graphs successfully compiled = 0.0%
INFO:Neuron:Compiled these operators (and operator counts) to Neuron:
INFO:Neuron:Not compiled operators (and operator counts) to Neuron:
INFO:Neuron: => aten::Int: 444 [supported]
INFO:Neuron: => aten::ScalarImplicit: 1 [supported]
INFO:Neuron: => aten::__or__: 1 [not supported]
INFO:Neuron: => aten::add: 98 [supported]
INFO:Neuron: => aten::arange: 2 [supported]
INFO:Neuron: => aten::baddbmm: 24 [supported]
INFO:Neuron: => aten::bitwise_not: 1 [supported]
INFO:Neuron: => aten::bmm: 24 [supported]
INFO:Neuron: => aten::copy_: 1 [supported]
INFO:Neuron: => aten::cumsum: 1 [supported]
INFO:Neuron: => aten::detach: 2 [supported]
INFO:Neuron: => aten::dropout: 72 [supported]
INFO:Neuron: => aten::embedding: 1 [not supported]
INFO:Neuron: => aten::empty: 1 [supported]
INFO:Neuron: => aten::expand: 2 [supported]
INFO:Neuron: => aten::floor_divide: 24 [supported]
INFO:Neuron: => aten::layer_norm: 50 [supported]
INFO:Neuron: => aten::linear: 97 [supported]
INFO:Neuron: => aten::lt: 1 [supported]
INFO:Neuron: => aten::masked_fill: 24 [supported]
INFO:Neuron: => aten::mul: 243 [supported]
INFO:Neuron: => aten::ones: 1 [supported]
INFO:Neuron: => aten::permute: 48 [supported]
INFO:Neuron: => aten::pow: 1 [supported]
INFO:Neuron: => aten::reshape: 97 [supported]
INFO:Neuron: => aten::select: 72 [supported]
INFO:Neuron: => aten::size: 174 [supported]
INFO:Neuron: => aten::slice: 83 [supported]
INFO:Neuron: => aten::softmax: 24 [supported]
INFO:Neuron: => aten::sub: 1 [supported]
INFO:Neuron: => aten::tanh: 24 [supported]
INFO:Neuron: => aten::to: 28 [supported]
INFO:Neuron: => aten::transpose: 48 [supported]
INFO:Neuron: => aten::unsqueeze: 9 [supported]
INFO:Neuron: => aten::view: 96 [supported]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
/tmp/ipykernel_3923/821702101.py in <module>
3 # Trace a module (implicitly traces `forward`) and constructs a
4 # `ScriptModule` with a single `forward` method
----> 5 neuron_net = torch.neuron.trace(n, neuron_inputs, compiler_workdir="./workdir", separate_weights=True)
6 #neuron_net = torch.neuron.trace(n, neuron_inputs, separate_weights=True, subgraph_builder_function=subgraph_builder_function)
~/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/torch_neuron/convert.py in trace(func, example_inputs, fallback, op_whitelist, minimum_segment_size, subgraph_builder_function, subgraph_inputs_pruning, skip_compiler, debug_must_trace, allow_no_ops_on_neuron, compiler_workdir, dynamic_batch_size, compiler_timeout, single_fusion_ratio_threshold, _neuron_trace, compiler_args, optimizations, separate_weights, verbose, **kwargs)
215 logger.debug("skip_inference_context - trace with fallback at {}".format(get_file_and_line()))
216 neuron_graph = cu.compile_fused_operators(neuron_graph, **compile_kwargs)
--> 217 cu.stats_post_compiler(neuron_graph)
218
219 # Wrap the compiled version of the model in a script module. Note that this is
~/anaconda3/envs/aws_neuron_venv_pytorch/lib/python3.7/site-packages/torch_neuron/convert.py in stats_post_compiler(self, neuron_graph)
529 if succesful_compilations == 0 and not self.allow_no_ops_on_neuron:
530 raise RuntimeError(
--> 531 "No operations were successfully partitioned and compiled to neuron for this model - aborting trace!")
532
533 if percent_operations_compiled < 50.0:
RuntimeError: No operations were successfully partitioned and compiled to neuron for this model - aborting trace!
Below in the first "answer" I also added some logs.
It would be great if you could tell me what I am doing wrong here. 🙂