Bedrock FM API configurations

1

May I know why I need to input use configurations for different models? For example, Titan uses

body = json.dumps({
    "inputText": "\n\nHuman: explain black holes to 8th graders\n\nAssistant:",
    "textGenerationConfig": {
      "maxTokenCount": 512,
      "stopSequences": [],
      "temperature":0.7,
      "topP":1
     }
})

but claude uses

body = json.dumps({
    "prompt": "\n\nHuman: explain black holes to 8th graders\n\nAssistant:",
    "max_tokens_to_sample": 300,
    "temperature": 0.1,
    "top_p": 0.9,
})

Are these models served on AWS? Thank you!!

JCJJ
asked 6 months ago304 views
2 Answers
0

Thank you for your question it is one that we see often from many customers. Currently Amazon Bedrock provides access to Foundational Models (FM) though its interface and API at a simplified cost basis. This allows you to simply switch between different FMs when using your building, testing, and running your application. You will notice on the boto3 documentation that there are specific methods to help you interact with Amazon Bedrock itself. Here you can do things like list and select FMs and configure details specific to AWS like tagging and configuring provisioned throughput.

When you use a specific model, the parameters passed to the model are specific to the model itself and maintained by the provider. As an example, regardless of where the model is hosted, Anthropic Claude will always expect a set or required parameters to create a completion during inference. The same holds true for other providers as these parameters will vary and are specific the the libraries maintained by the provider.

AWS
answered 5 months ago
0

Hi,

I use something similar to the following. Hope this helps:

import argparse
import base64
import boto3
import io
import json
import os

REGION = 'us-west-2'
bedrock = boto3.client(service_name='bedrock', region_name=REGION)
bedrock_runtime = boto3.client(service_name='bedrock-runtime', region_name=REGION)


def invoke_jurrasic2_mid(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "maxTokens": 200,
        "temperature": 0,
        "topP": 1.0,
        "countPenalty": {
            "scale": 0
        },
        "presencePenalty": {
            "scale": 0
        },
        "frequencyPenalty": {
            "scale": 0
        }
    }

    for parameter in ['maxTokens', 'temperature', 'topP', 'countPenalty', 'presencePenalty', 'frequencyPenalty']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='ai21.j2-mid-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        completions = response_body.get('completions')[0]
        return completions.get('data').get('text')
    except Exception as e:
        print(e)
        return e


def invoke_jurrasic2_ultra(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "maxTokens": 200,
        "temperature": 0,
        "topP": 1.0,
        "countPenalty": {
            "scale": 0
        },
        "presencePenalty": {
            "scale": 0
        },
        "frequencyPenalty": {
            "scale": 0
        }
    }
    for parameter in ['maxTokens', 'temperature', 'topP', 'countPenalty', 'presencePenalty', 'frequencyPenalty']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='ai21.j2-ultra-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        completions = response_body.get('completions')[0]
        return completions.get('data').get('text')
    except Exception as e:
        print(e)
        return e


def invoke_titan_text_express(prompt, **kwargs):
    body = {
        "inputText": prompt,
        "textGenerationConfig": {
            "maxTokenCount": 8192,
            "stopSequences": [],
            "temperature":0,
            "topP": 1
         }
    }
    for parameter in ['maxTokenCount', 'stopSequences', 'temperature', 'topP']:
        if parameter in kwargs:
            body['textGenerationConfig'][parameter] = kwargs[parameter]

    try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='amazon.titan-text-express-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        return response_body.get('results')[0].get('outputText')
    except Exception as e:
        print(e)
        return e


def invoke_titan_text_embeddings(prompt, **kwargs):
    body = {
        "inputText": prompt
    }

    try:
        response = bedrock_runtime.invoke_model(
            body=json.dumps(body),
            modelId='amazon.titan-embed-text-v1',
            accept='application/json',
            contentType='application/json'
        )
        response_body = json.loads(response.get('body').read())
        embedding = response_body.get('embedding')
        token_count = response_body.get('inputTextTokenCount')
        return embedding, token_count
    except Exception as e:
        print(e)
        return e


def invoke_claude_instant(prompt, **kwargs):
    body = {
        "prompt": f"\n\nHuman: {prompt}\n\nAssistant:",
        "max_tokens_to_sample": 300,
        "temperature": 0.5,
        "top_k": 250,
        "top_p": 1,
        "stop_sequences": [
        "\\n\\nHuman:"
        ],
        "anthropic_version": "bedrock-2023-05-31"
    }
    for parameter in ['max_tokens_to_sample', 'temperature', 'top_k', 'top_p']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    response = bedrock_runtime.invoke_model(
        modelId = "anthropic.claude-instant-v1",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['completion']


def invoke_claude_v1(prompt, **kwargs):
    body = {
        "prompt": f"\n\nHuman: {prompt}\n\nAssistant:",
        "max_tokens_to_sample": 300,
        "temperature": 0.5,
        "top_k": 250,
        "top_p": 1,
        "stop_sequences": [
            "\\n\\nHuman:"
        ],
        "anthropic_version": "bedrock-2023-05-31"
    }
    for parameter in ['max_tokens_to_sample', 'temperature', 'top_k', 'top_p']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    response = bedrock_runtime.invoke_model(
        modelId = "anthropic.claude-v1",
        contentType = "application/json",
        accept = "*/*",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['completion']


def invoke_claude_v2(prompt, **kwargs):
    body = {
        "prompt": f"\n\nHuman: {prompt}\n\nAssistant:",
        "max_tokens_to_sample": 300,
        "temperature": 0.5,
        "top_k": 250,
        "top_p": 1,
        "stop_sequences": [
        "\\n\\nHuman:"
        ],
        "anthropic_version": "bedrock-2023-05-31"
    }
    for parameter in ['max_tokens_to_sample', 'temperature', 'top_k', 'top_p']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    response = bedrock_runtime.invoke_model(
        modelId = "anthropic.claude-v2",
        contentType = "application/json",
        accept = "*/*",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['completion']


def invoke_cohere_command(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.8
    }
    for parameter in ['max_tokens', 'temperature']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    response = bedrock_runtime.invoke_model(
        modelId = "cohere.command-text-v14",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['generations'][0]['text']


def invoke_cohere_command_light(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "max_tokens": 100,
        "temperature": 0.8
    }
    for parameter in ['max_tokens', 'temperature']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    response = bedrock_runtime.invoke_model(
        modelId = "cohere.command-light-text-v14",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['generations'][0]['text']


def invoke_cohere_embed_english(prompts: List[str]):
    body = {
        "texts": prompts,
        "input_type": 'search_document',
        "truncate": 'NONE'
    }

    response = bedrock_runtime.invoke_model(
        modelId = "cohere.embed-english-v3",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['embeddings']


def invoke_cohere_embed_multilingual(prompts: List[str]):
    body = {
        "texts": prompts,
        "input_type": 'search_document',
        "truncate": 'NONE'
    }

    response = bedrock_runtime.invoke_model(
        modelId = "cohere.embed-multilingual-v3",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['embeddings']


def invoke_llama2(prompt, **kwargs):
    body = {
        "prompt": prompt,
        "max_gen_len": 512,
        "top_p": 0.9,
        "temperature": 0.2
    }
    for parameter in ['max_gen_len', 'top_p', 'temperature']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    response = bedrock_runtime.invoke_model(
        modelId = "meta.llama2-13b-chat-v1",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    return response_body['generation']


def invoke_stable_diffusion_xl(prompt, **kwargs):
    text_prompts = [{"text": prompt, "weight": 1.0}]
    if 'negative_prompts' in kwargs:
        negative_prompts = kwargs['negative_prompts'].split(',')
        text_prompts = text_prompts + [{"text": negprompt, "weight": -1.0} for negprompt in negative_prompts]

    body = {
        "text_prompts": text_prompts,
        "cfg_scale": 10,
        "seed": 0,
        "steps": 50
    }

    for parameter in ['cfg_scale', 'seed', 'steps', 'style_preset']:
        if parameter in kwargs:
            body[parameter] = kwargs[parameter]

    response = bedrock_runtime.invoke_model(
        modelId = "stability.stable-diffusion-xl-v0",
        contentType = "application/json",
        accept = "application/json",
        body = json.dumps(body)
    )
    response_body = json.loads(response.get('body').read())
    artifacts = response_body.get('artifacts')
    base_64_img_str = artifacts[0].get('base64')
    img = Image.open(io.BytesIO(base64.decodebytes(bytes(base_64_img_str, "utf-8"))))
    
    if 'img_file' in kwargs:
        img.save(kwargs['img_file'])
    
    return img
AWS
answered 5 months ago

You are not logged in. Log in to post an answer.

A good answer clearly answers the question and provides constructive feedback and encourages professional growth in the question asker.

Guidelines for Answering Questions