import fs from 'fs'; import crypto from 'crypto'; import { LlamaTokenizer } from 'llama-tokenizer-js'; import { ChatMsg, ChatOptions, ChatStream, ModelInfo } from './api-defs.js'; import * as api from './api-connection.js'; import * as notes from './notes.js'; import * as tools from './tools.js'; import * as u from './utils.js'; export type Msg = ChatMsg; export type Stream = ChatStream; export type Options = ChatOptions; let CONTEXT_WINDOW = 12000; let CHAT_DIR = `../data/chats`; const tokenizer = new LlamaTokenizer(); export async function setConfig( config: any ) { if( config.contextWindow ) CONTEXT_WINDOW = config.contextWindow; if( config.chatDir ) CHAT_DIR = config.chatDir; } const TITLE_GEN_MODEL = "qwen25-custom-1b"; async function generateTitle( firstMsg: Msg, response: string ) { const prompt = `Your purpose is to generate a title for a chat given the beginning of a conversation between a user and a chatbot. The title should be a short summary of the topic, no longer than 50 characters. Output the title between tags. user: ${firstMsg.content} chatbot: ${response} ` const model = { modelname: TITLE_GEN_MODEL } as ModelInfo; let res = await generate( prompt, '', { model } as Options ); const eraselist = [ "", "", "", "", "title: " ]; for( let key of eraselist ) res = res.replace( key, "" ); return res; } export async function run( msgs: Msg[], options: Options, ignoreOutput: boolean, notelist: notes.Note[], onChunk: Function ) : Promise { const ctx = parseMsgs( msgs, options, notelist ); const body = { model: options.model.modelname, messages: ctx }; const res = await fetch( "http://127.0.0.1:11434/api/chat", { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify( body ) } ); if( !res.ok ) { console.error( res ); throw new Error( "failed to receive response " + res.status ); } const reader = res.body?.getReader(); if( !reader ) throw new Error( 'cannot get reader' ); api.serverNotify( { loadedModel: options.model.name } ); let content = '' let toolBuffer = ''; let toolCall: tools.Call | undefined = undefined; /** this is messy as fuck but u cant pass req body as param so whatever */ for( let read = await reader.read(); !read.done; read = await reader.read() ) { const parsed = u.parseChunkedJson( read, ( json: any ) : boolean | void => { if( !json.message ) return true; let msg = json.message.content; toolBuffer += msg; if( tools.isToolStr( toolBuffer ) ) { try { toolCall = JSON.parse( toolBuffer ); if( toolCall ) { content += toolBuffer; return true; } } catch( e ) {} return; } msg = toolBuffer; toolBuffer = ''; content += msg; if( !ignoreOutput ) { process.stdout.write( msg ); onChunk( msg ); } } ); if( !parsed || read.done ) break; } console.log(); const ret: Msg = { timestamp: u.getTimestamp(), role: 'assistant', content, toolCall }; if( options.generateTitle && msgs.length == 1 ) { // "not busy" notif sent by generate func const title = await generateTitle( msgs[0], content ); ret.title = title; } else { api.serverNotify( { loadedModel: options.model.name, isBusy: false } ); } return ret; } export async function generate( prompt: string, suffix: string, options: Options, onChunk: Function = () => {} ) : Promise { const body = { model: options.model.modelname, system: options.system ? getFullSystem( options, [] ) : '', prompt, suffix }; const res = await fetch( `http://127.0.0.1:11434/api/generate`, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify( body ) } ); if( !res.ok ) { console.error( res ); if( res.body ) console.log( await res.text() ); throw new Error( "failed to receive response" + res.status ); } const reader = res.body?.getReader(); if( !reader ) throw new Error( 'cannot get reader' ); api.serverNotify( { loadedModel: options.model.name, isBusy: true } ); let content = ''; for( let read = await reader.read(); !read.done; read = await reader.read() ) { const parsed = u.parseChunkedJson( read, ( json: any ) : boolean | void => { let msg = json.response; if( !msg ) return true; content += msg; process.stdout.write( msg ); onChunk( msg ); } ); if( !parsed || read.done ) break; } api.serverNotify( { loadedModel: options.model.name, isBusy: false } ); console.log(); return content; } export function save( msglog: Msg[], uuid: string ) { const chatfile = `${CHAT_DIR}/${uuid}.json`; const chatJson = JSON.stringify( msglog ); const salt = crypto.randomBytes( 16 ); const key = crypto.pbkdf2Sync( u.jwt_secret(), salt, 100000, 32, 'sha512' ); const iv = crypto.randomBytes( 12 ); const cipher = crypto.createCipheriv( 'aes-256-gcm', key, iv ); let encrypted = cipher.update( chatJson, 'utf8', 'hex' ); encrypted += cipher.final( 'hex' ); const authTag = cipher.getAuthTag().toString( 'hex' ); let fullStr = salt.toString( 'hex' ) + iv.toString( 'hex' ) + authTag + encrypted; fs.writeFileSync( chatfile, fullStr ); console.log( "== [ chat saved ] ==" ); } export function load( filename: string ) : Msg[] { const chatfile = `${CHAT_DIR}/${filename}.json`; try { const contents = fs.readFileSync( chatfile, 'utf8' ); const salt = Buffer.from( contents.slice( 0, 32 ), 'hex' ); const iv = Buffer.from( contents.slice( 32, 56 ), 'hex' ); const authTag = Buffer.from( contents.slice( 56, 88 ), 'hex' ); const data = contents.slice( 88 ); const key = crypto.pbkdf2Sync( u.jwt_secret(), salt, 100000, 32, 'sha512' ); const decipher = crypto.createDecipheriv( 'aes-256-gcm', key, iv ); decipher.setAuthTag( authTag ); let decrypted = decipher.update( data, 'hex', 'utf8' ); decrypted += decipher.final( 'utf8' ); return JSON.parse( decrypted ); } catch( e ) { return []; } } export function parseMsgs( msgs: Msg[], options: Options, notelist: notes.Note[] ) : Msg[] { let padTokens = 0; let fullSystem = ''; if( options.system ) { fullSystem = getFullSystem( options, notelist ); } let res = truncateMsgs( msgs, padTokens ); if( fullSystem.length > 1 ) { res.unshift( { timestamp: u.getTimestamp(), role: 'system', content: fullSystem } ); } return res; } export function getFullSystem( options: Options, notelist: notes.Note[] ) : string { if( !options.system ) return ''; let notesPrompt = notes.getPromptStr( notelist ); let fullSystem = ''; if( options.system.model ) fullSystem += options.system.model; if( options.system.user ) fullSystem += options.system.user; fullSystem = fullSystem.replace( '<|system_time|>', `the current system time is ${u.getTimestamp()}.` ); fullSystem = fullSystem.replace( '<|tools_list|>', tools.getPromptStr( options ) ); fullSystem = fullSystem.replace( '<|notes_str|>', notesPrompt ); return fullSystem; }; function parseMsgFiles( msg: Msg ) : string { if( !msg.files || !msg.files.length ) return ''; let files = msg.files; let attachmentStr = 'attached files:'; for( let [key, f] of Object.entries( files ) ) { if( f.type != 'text' ) continue; attachmentStr += `\n\n[file: ${f.name}]\n`; attachmentStr += f.content; } return attachmentStr; } function truncateMsgs( msgs: Msg[], reservedTokens: number ) : Msg[] { let maxTokens = CONTEXT_WINDOW - 768 - reservedTokens; let totalTokens = 0; let totalLength = 0; let loopEnd = false; let start = Date.now(); let ret: Msg[] = []; for( let i = msgs.length - 1; i >= 0; --i ) { // javascript is fucking stupid. let msg = JSON.parse( JSON.stringify( msgs[i] ) ); let content = msg.content + parseMsgFiles( msg ); let tokens = tokenizer.encode( content ); if( totalTokens + tokens.length > maxTokens ) { if( content.length <= 20 ) break; let diff = totalTokens + tokens.length + 10 - maxTokens; let lenPercentage = diff / ( tokens.length + 10 ); let newLen = Math.floor( content.length * lenPercentage ); content = "..." + content.slice( content.length - newLen ); loopEnd = true; } totalLength += content.length; totalTokens += tokens.length + 2; delete msg.files; msg.content = content; ret.unshift( msg ); if( loopEnd ) break; } let end = Date.now(); console.log( `tokenized ${ret.length} messages in ${end - start}ms` ); return ret; }