Session API
The Session is the main interface for text generation with language models. It supports both on-device (WebGPU/WASM) and cloud-based (OpenAI, Anthropic) inference providers.
createSession
Creates a new inference session with one or more providers.
function createSession(args?: CreateSessionArgs): Promise<Session>Parameters
import { DataType, DeviceType } from '@huggingface/transformers';
interface CreateSessionArgs {
models?: InferenceProviderConfig[];
}
type InferenceProviderConfig = DeviceProviderConfig | CloudProviderConfig;
interface DeviceProviderConfig {
runtime: 'transformers-js';
model: string;
quantization: DataType; // 'q4' | 'q8' | 'fp16' | 'fp32' | 'int8' | 'uint8' | 'bnb4' | 'q4f16'
engine?: DeviceType; // 'webgpu' | 'wasm' | 'auto'
hfToken?: string; // Optional: for private Hugging Face models
}
interface CloudProviderConfig {
runtime: 'openai' | 'anthropic' | 'custom';
model: string;
proxyUrl: string;
modelProvider?: 'anthropic' | 'openai'; // Optional: enables message format transformation
timeout?: number; // Default: 60000 (60 seconds)
maxRetries?: number; // Default: 3
headers?: Record<string, string>;
}Examples
Device Provider (On-Device Inference)
import { createSession } from 'agentary-js';
const session = await createSession({
models: [{
runtime: 'transformers-js',
model: 'onnx-community/Qwen3-0.6B-ONNX',
quantization: 'q4', // DataType value
engine: 'webgpu' // DeviceType value
}]
});Cloud Provider (Anthropic Claude)
const session = await createSession({
models: [{
runtime: 'anthropic',
model: 'claude-3-5-sonnet-20241022',
proxyUrl: 'https://your-backend.com/api/anthropic',
modelProvider: 'anthropic',
timeout: 30000,
maxRetries: 3
}]
});Cloud Provider (OpenAI)
const session = await createSession({
models: [{
runtime: 'anthropic',
model: 'gpt-4o',
proxyUrl: 'https://your-backend.com/api/openai',
modelProvider: 'openai'
}]
});Multi-Provider Setup
const session = await createSession({
models: [
// On-device model
{
runtime: 'transformers-js',
model: 'onnx-community/Qwen3-0.6B-ONNX',
quantization: 'q4',
engine: 'webgpu'
},
// Cloud model
{
runtime: 'anthropic',
model: 'claude-3-5-sonnet-20241022',
proxyUrl: 'https://your-backend.com/api/anthropic',
modelProvider: 'anthropic'
}
]
});Session Methods
createResponse
Generate text with the specified model.
createResponse(
modelId: string,
args: GenerateArgs
): Promise<ModelResponse>Parameters
interface GenerateArgs {
messages: Message[];
max_new_tokens?: number;
tools?: ToolDefinition[]; // Tool definitions only (implementations handled separately)
stream?: boolean; // Request streaming response (device providers only)
temperature?: number;
enable_thinking?: boolean; // Enable thinking mode where supported
top_p?: number;
top_k?: number;
repetition_penalty?: number;
stop?: string[];
seed?: number;
deterministic?: boolean;
}Returns
type ModelResponse = StreamingResponse | NonStreamingResponse;
interface StreamingResponse {
type: 'streaming';
stream: AsyncIterable<TokenStreamChunk>;
}
interface TokenStreamChunk {
token: string;
tokenId: number;
isFirst: boolean;
isLast: boolean;
ttfbMs?: number;
tokensPerSecond?: number;
}
interface NonStreamingResponse {
type: 'complete';
content: string;
usage?: {
promptTokens: number;
completionTokens: number;
totalTokens: number;
};
toolCalls?: Array<{
id: string;
type: 'function';
function: {
name: string;
arguments: string;
};
}>;
finishReason?: 'stop' | 'length' | 'tool_calls' | 'content_filter';
reasoning?: string;
}Examples
Streaming Response (Device Provider)
const response = await session.createResponse('onnx-community/Qwen3-0.6B-ONNX', {
messages: [
{ role: 'user', content: 'Hello!' }
],
temperature: 0.7,
max_new_tokens: 200
});
if (response.type === 'streaming') {
for await (const chunk of response.stream) {
console.log(chunk.token);
}
}Non-Streaming Response (Cloud Provider)
const response = await session.createResponse('claude-3-5-sonnet-20241022', {
messages: [
{ role: 'user', content: 'Explain quantum computing' }
]
});
if (response.type === 'complete') {
console.log(response.content);
console.log('Tokens used:', response.usage?.totalTokens);
}Handling Both Response Types
const response = await session.createResponse(modelId, { messages });
if (response.type === 'streaming') {
for await (const chunk of response.stream) {
process.stdout.write(chunk.token);
}
} else {
console.log(response.content);
}registerModels
Register additional providers after session creation.
registerModels(models: InferenceProviderConfig[]): Promise<void>Example
// Add a cloud provider to an existing session
await session.registerModels([{
type: 'cloud',
model: 'gpt-4o',
proxyUrl: 'https://your-backend.com/api/openai',
modelProvider: 'openai'
}]);dispose
Clean up session resources and terminate all providers.
dispose(): Promise<void>Example
await session.dispose();on
Subscribe to lifecycle events.
on(eventType: string | '*', handler: EventHandler): UnsubscribeFnExample
const unsubscribe = session.on('generation:token', (event) => {
console.log(event.token);
});
// Later: unsubscribe()off
Unsubscribe from events.
off(eventType: string | '*', handler: EventHandler): voidTypes
Message
interface Message {
role: 'user' | 'assistant' | 'system';
content: string | MessageContent[];
}
type MessageContent = TextContent | ToolUseContent | ToolResultContent;
interface TextContent {
type: 'text';
text: string;
}
interface ToolUseContent {
type: 'tool_use';
id: string;
name: string;
arguments: Record<string, any>;
}
interface ToolResultContent {
type: 'tool_result';
tool_use_id: string;
result: string;
}Tool
Tools are defined separately from their implementations:
interface Tool {
definition: ToolDefinition;
implementation?: (...args: any[]) => any;
}
interface ToolDefinition {
name: string;
description: string;
parameters: {
type: 'object';
properties: Record<string, any>;
required: string[];
};
}Note: When passing tools to createResponse(), you only pass ToolDefinition[] (the definitions). The implementations are handled separately in your application code or registered with agent sessions.
Provider Errors
class ProviderError extends Error {
code: string;
statusCode?: number;
}
class ProviderConfigurationError extends ProviderError {}
class ProviderNetworkError extends ProviderError {}
class ProviderTimeoutError extends ProviderError {}
class ProviderAPIError extends ProviderError {
statusCode: number;
}Example: Error Handling
import {
ProviderConfigurationError,
ProviderTimeoutError,
ProviderNetworkError,
ProviderAPIError
} from 'agentary-js';
try {
const response = await session.createResponse(modelId, args);
} catch (error) {
if (error instanceof ProviderConfigurationError) {
console.error('Invalid configuration:', error.message);
} else if (error instanceof ProviderTimeoutError) {
console.error('Request timeout');
} else if (error instanceof ProviderNetworkError) {
console.error('Network error:', error.message);
} else if (error instanceof ProviderAPIError) {
console.error('API error:', error.statusCode);
}
}