Pre-flight cost check and post-call actual deduction in three languages.
// 1 unit = 1 USD cent; limit: 1000 = $10/day per tenant
const (costPer1KTokens = 3; unitsPerCent = 100)
func (s *LLMService) CallLLM(ctx context.Context, tenantID, prompt string) (string, error) {
estTokens := float64(len(strings.Fields(prompt))) * 1.3
estUnits := int64(estTokens / 1000 * costPer1KTokens * unitsPerCent)
decision, err := s.rlaas.Check(ctx, &rlaas.CheckRequest{
UserID: tenantID,
Resource: "llm:spend",
Cost: estUnits,
})
if err != nil { return "", err }
if !decision.Allowed {
return "", ErrBudgetExhausted
}
resp, err := s.openai.CreateChatCompletion(ctx, ...)
if err != nil { return "", err }
actualUnits := int64(float64(resp.Usage.TotalTokens) / 1000 * costPer1KTokens * unitsPerCent)
_ = s.rlaas.Record(ctx, &rlaas.RecordRequest{
UserID: tenantID,
Resource: "llm:spend",
Units: actualUnits,
})
return resp.Choices[0].Message.Content, nil
}
# 1 unit = 1 USD cent; $10/day budget = limit 1000
COST_PER_1K = 3 # cents
UNITS_PER_CENT = 100
def call_llm(tenant_id: str, prompt: str) -> str:
est_tokens = len(prompt.split()) * 1.3
est_units = int(est_tokens / 1000 * COST_PER_1K * UNITS_PER_CENT)
decision = client.check(CheckRequest(
user_id=tenant_id, resource="llm:spend", cost=est_units
))
if not decision.allowed:
raise BudgetExhausted(reset_at=decision.retry_after)
response = openai.chat.completions.create(...)
actual_units = int(
response.usage.total_tokens / 1000 * COST_PER_1K * UNITS_PER_CENT
)
client.record(RecordRequest(
user_id=tenant_id, resource="llm:spend", units=actual_units
))
return response.choices[0].message.content
// 1 unit = 1 USD cent; $10/day = limit 1000
const COST_PER_1K_CENTS = 3;
async function callLLM(tenantId: string, prompt: string): Promise<string> {
const estTokens = prompt.split(' ').length * 1.3;
const estUnits = Math.ceil(estTokens / 1000 * COST_PER_1K_CENTS);
const decision = await rlaas.check({
userId: tenantId, resource: 'llm:spend', cost: estUnits,
});
if (!decision.allowed) {
throw new BudgetExhausted({ tenantId, resetAt: decision.retryAfter });
}
const response = await openai.chat.completions.create({...});
const actualUnits = Math.ceil(
response.usage.total_tokens / 1000 * COST_PER_1K_CENTS
);
await rlaas.record({ userId: tenantId, resource: 'llm:spend', units: actualUnits });
return response.choices[0].message.content;
}
// pre-flight cost check and post-call deduction
import io.rlaas.sdk.RlaasClient;
import io.rlaas.sdk.model.*;
RlaasClient rlaas = new RlaasClient("http://rlaas:8080");
String callLLM(String tenantId, String prompt) throws Exception {
int estTokens = (int) (prompt.split(" ").length * 1.3);
int estUnits = estTokens / 1000 * 3;
Decision decision = rlaas.checkLimit(new CheckRequest(tenantId, "llm:spend", estUnits));
if (!decision.isAllowed())
throw new BudgetExhaustedException(decision.getRetryAfter());
var response = openAi.createChatCompletion(...);
int actualUnits = response.getUsage().getTotalTokens() / 1000 * 3;
rlaas.record(new RecordRequest(tenantId, "llm:spend", actualUnits));
return response.getChoices().get(0).getMessage().getContent();
}
// pre-flight cost check and post-call deduction
using Rlaas.Sdk;
using Rlaas.Sdk.Models;
var rlaas = new RlaasClient("http://rlaas:8080");
async Task<string> CallLLMAsync(string tenantId, string prompt)
{
var estTokens = (int)(prompt.Split(' ').Length * 1.3);
var estUnits = estTokens / 1000 * 3;
var decision = await rlaas.CheckLimitAsync(
new CheckRequest(tenantId, "llm:spend", estUnits));
if (!decision.Allowed)
throw new BudgetExhaustedException(decision.RetryAfter);
var response = await openAi.CreateChatCompletionAsync(...);
var actualUnits = response.Usage.TotalTokens / 1000 * 3;
await rlaas.RecordAsync(
new RecordRequest(tenantId, "llm:spend", actualUnits));
return response.Choices[0].Message.Content;
}
// cost budget enforcement (Node.js)
const { RlaasClient } = require('@rlaas/node-sdk');
const client = new RlaasClient('http://rlaas:8080');
async function costAwareLLMCall(userId, prompt, estimatedCost) {
const decision = await client.check({
user_id: userId,
resource: 'ai:spend-usd',
cost: estimatedCost,
});
if (!decision.allowed) {
throw new Error(`Budget exhausted – retry after ${decision.retry_after}s`);
}
const result = await callLLM(prompt);
await client.record({ user_id: userId,
resource: 'ai:spend-usd', cost: result.actual_cost });
return result;
}
// cost budget enforcement (C++)
#include "rlaas/client.h"
rlaas::Client client("http://rlaas:8080");
LLMResult cost_aware_llm_call(const std::string& user_id,
const std::string& prompt,
double estimated_cost) {
rlaas::CheckRequest req;
req.user_id = user_id;
req.resource = "ai:spend-usd";
req.cost = estimated_cost;
auto decision = client.check(req);
if (!decision.allowed)
throw BudgetExhausted(decision.retry_after_ms);
auto result = call_llm(prompt);
client.record(user_id, "ai:spend-usd", result.actual_cost);
return result;
}
// cost budget enforcement (Rust)
use rlaas_sdk::{Client, CheckRequest};
let client = Client::new("http://rlaas:8080");
async fn cost_aware_llm_call(
client: &Client, user_id: &str, prompt: &str, est_cost: f64,
) -> Result<LLMResult> {
let decision = client.check(&CheckRequest {
user_id: user_id.into(),
resource: "ai:spend-usd".into(),
cost: Some(est_cost),
..Default::default()
}).await?;
if !decision.allowed {
return Err(anyhow!("Budget exhausted"));
}
let result = call_llm(prompt).await?;
client.record(user_id, "ai:spend-usd", result.actual_cost).await?;
Ok(result)
}
# cost budget enforcement (Ruby)
require 'rlaas_sdk'
client = Rlaas::Client.new('http://rlaas:8080')
def cost_aware_llm_call(user_id, prompt, estimated_cost)
decision = client.check(
user_id: user_id,
resource: 'ai:spend-usd',
cost: estimated_cost
)
raise BudgetExhausted unless decision.allowed
result = call_llm(prompt)
client.record(user_id: user_id,
resource: 'ai:spend-usd', cost: result.actual_cost)
result
end