Bedrock FM API configurations

Question

May I know why I need to input use configurations for different models?
For example, Titan uses
```
body = json.dumps({
    "inputText": "

Human: explain black holes to 8th graders

Assistant:",
    "textGenerationConfig": {
      "maxTokenCount": 512,
      "stopSequences": [],
      "temperature":0.7,
      "topP":1
     }
})
```
but claude uses
```
body = json.dumps({
    "prompt": "

Human: explain black holes to 8th graders

Assistant:",
    "max_tokens_to_sample": 300,
    "temperature": 0.1,
    "top_p": 0.9,
})
```

Are these models served on AWS?
Thank you!!

Answer

Hi,

I use something similar to the following. Hope this helps:

```
import argparse
import base64
import boto3
import io
import json
import os

REGION = 'us-west-2'
bedrock = boto3.client(service_name='bedrock', region_name=REGION)
bedrock_runtime = boto3.client(service_name='bedrock-runtime', region_name=REGION)

def invoke_jurrasic2_mid(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "maxTokens": 200,
        "temperature": 0,
        "topP": 1.0,
        "countPenalty": {
            "scale": 0
        },
        "presencePenalty": {
            "scale": 0
        },
        "frequencyPenalty": {
            "scale": 0
        }
    }

for parameter in ['maxTokens', 'temperature', 'topP', 'countPenalty', 'presencePenalty', 'frequencyPenalty']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='ai21.j2-mid-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        completions = response_body.get('completions')[0]
        return completions.get('data').get('text')
    except Exception as e:
        print(e)
        return e

def invoke_jurrasic2_ultra(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "maxTokens": 200,
        "temperature": 0,
        "topP": 1.0,
        "countPenalty": {
            "scale": 0
        },
        "presencePenalty": {
            "scale": 0
        },
        "frequencyPenalty": {
            "scale": 0
        }
    }
    for parameter in ['maxTokens', 'temperature', 'topP', 'countPenalty', 'presencePenalty', 'frequencyPenalty']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='ai21.j2-ultra-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        completions = response_body.get('completions')[0]
        return completions.get('data').get('text')
    except Exception as e:
        print(e)
        return e

def invoke_titan_text_express(prompt, **kwargs):
    body = {
        "inputText": prompt,
        "textGenerationConfig": {
            "maxTokenCount": 8192,
            "stopSequences": [],
            "temperature":0,
            "topP": 1
         }
    }
    for parameter in ['maxTokenCount', 'stopSequences', 'temperature', 'topP']:
        if parameter in kwargs:
            body['textGenerationConfig'][parameter] = kwargs[parameter]

try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='amazon.titan-text-express-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        return response_body.get('results')[0].get('outputText')
    except Exception as e:
        print(e)
        return e

def invoke_titan_text_embeddings(prompt, **kwargs):
    body = {
        "inputText": prompt
    }

try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='amazon.titan-embed-text-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        embedding = response_body.get('embedding')
        token_count = response_body.get('inputTextTokenCount')
        return embedding, token_count
    except Exception as e:
        print(e)
        return e

def invoke_claude_instant(prompt, **kwargs):
    body = {
        "prompt": f"

Human: {prompt}

Assistant:",
        "max_tokens_to_sample": 300,
        "temperature": 0.5,
        "top_k": 250,
        "top_p": 1,
        "stop_sequences": [
        "\n\nHuman:"
        ],
        "anthropic_version": "bedrock-2023-05-31"
    }
    for parameter in ['max_tokens_to_sample', 'temperature', 'top_k', 'top_p']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

response = bedrock_runtime.invoke_model(
        modelId = "anthropic.claude-instant-v1",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['completion']

def invoke_claude_v1(prompt, **kwargs):
    body = {
        "prompt": f"

Human: {prompt}

Assistant:",
        "max_tokens_to_sample": 300,
        "temperature": 0.5,
        "top_k": 250,
        "top_p": 1,
        "stop_sequences": [
            "\n\nHuman:"
        ],
        "anthropic_version": "bedrock-2023-05-31"
    }
    for parameter in ['max_tokens_to_sample', 'temperature', 'top_k', 'top_p']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

response = bedrock_runtime.invoke_model(
        modelId = "anthropic.claude-v1",
        contentType = "application/json",
        accept = "*/*",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['completion']

def invoke_claude_v2(prompt, **kwargs):
    body = {
        "prompt": f"

Human: {prompt}

Assistant:",
        "max_tokens_to_sample": 300,
        "temperature": 0.5,
        "top_k": 250,
        "top_p": 1,
        "stop_sequences": [
        "\n\nHuman:"
        ],
        "anthropic_version": "bedrock-2023-05-31"
    }
    for parameter in ['max_tokens_to_sample', 'temperature', 'top_k', 'top_p']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

response = bedrock_runtime.invoke_model(
        modelId = "anthropic.claude-v2",
        contentType = "application/json",
        accept = "*/*",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['completion']

def invoke_cohere_command(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.8
    }
    for parameter in ['max_tokens', 'temperature']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

response = bedrock_runtime.invoke_model(
        modelId = "cohere.command-text-v14",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['generations'][0]['text']

def invoke_cohere_command_light(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.8
    }
    for parameter in ['max_tokens', 'temperature']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

response = bedrock_runtime.invoke_model(
        modelId = "cohere.command-light-text-v14",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['generations'][0]['text']

def invoke_cohere_embed_english(prompts: List[str]):
    body = {
        "texts": prompts,
        "input_type": 'search_document',
        "truncate": 'NONE'
    }

response = bedrock_runtime.invoke_model(
        modelId = "cohere.embed-english-v3",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['embeddings']

def invoke_cohere_embed_multilingual(prompts: List[str]):
    body = {
        "texts": prompts,
        "input_type": 'search_document',
        "truncate": 'NONE'
    }

response = bedrock_runtime.invoke_model(
        modelId = "cohere.embed-multilingual-v3",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['embeddings']

def invoke_llama2(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "max_gen_len": 512,
        "top_p": 0.9,
        "temperature": 0.2
    }
    for parameter in ['max_gen_len', 'top_p', 'temperature']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

response = bedrock_runtime.invoke_model(
        modelId = "meta.llama2-13b-chat-v1",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['generation']

def invoke_stable_diffusion_xl(prompt, **kwargs):
    text_prompts = [{"text": prompt, "weight": 1.0}]
    if 'negative_prompts' in kwargs:
        negative_prompts = kwargs['negative_prompts'].split(',')
        text_prompts = text_prompts + [{"text": negprompt, "weight": -1.0} for negprompt in negative_prompts]

body = {
        "text_prompts": text_prompts,
        "cfg_scale": 10,
        "seed": 0,
        "steps": 50
    }

for parameter in ['cfg_scale', 'seed', 'steps', 'style_preset']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

response = bedrock_runtime.invoke_model(
        modelId = "stability.stable-diffusion-xl-v0",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    artifacts = response_body.get('artifacts')
    base_64_img_str = artifacts[0].get('base64')
    img = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, "utf-8"))))
    
    if 'img_file' in kwargs:
        img.save(kwargs['img_file'])
    
    return img
```

Answer

Thank you for your question it is one that we see often from many customers. Currently Amazon Bedrock provides access to Foundational Models (FM) though its interface and API at a simplified cost basis. This allows you to simply switch between different FMs when using your building, testing, and running your application. You will notice on the boto3 documentation that there are specific methods to help you interact with Amazon Bedrock itself. Here you can do things like list and select FMs and configure details specific to AWS like tagging and configuring provisioned throughput.

When you use a specific model, the parameters passed to the model are specific to the model itself and maintained by the provider. As an example, regardless of where the model is hosted, Anthropic Claude will always expect a set or required parameters to create a completion during inference. The same holds true for other providers as these parameters will vary and are specific the the libraries maintained by the provider.

When you use a specific model, the parameters passed to the model are specific to the model itself and maintained by the provider. As an example, regardless of where the model is hosted, Anthropic Claude will always expect a set or required parameters to  [create a completion](https://docs.anthropic.com/claude/reference/complete_post) during inference. The same holds true for other providers as these parameters will vary and are specific the the libraries maintained by the provider.

Bedrock FM API configurations

Relevant content