Streaming OpenAI responses in React
April 2024
TLDR;
Iterator / generators make interacting with streaming data more straightforward.
Introduction
I recently deployed some AI functionality and wanted to help others trying to stream responses from Generative AI APIs such as OpenAI. Others have done a great job explaining the general concept of streaming responses, so if you are wanting to learn more about the subject I recommend starting at MDN's Using readable streams.
Although my example app has an express
backend and React
frontend, this functionality can be achieved in other backend and frontend frameworks. I will cover how to stream responses from OpenAI and will also touch on interacting with Anthropic's Claude model through AWS Bedrock.
Setup
Here are the important aspects of our backend.
/* eslint-disable no-undef */
import { config } from 'dotenv';
import express from 'express';
import prompt from './prompt.js';
config();
const { PORT = 5000, HOST = 'localhost' } = process.env;
const app = express();
app.post('/streaming-response', async (req, res) => {
const { body } = req;
// 'Transfer-Encoding: chunked' must be set to stream our response
res.set({
'Content-Type': 'text/plain',
'Transfer-Encoding': 'chunked',
});
try {
// get an iterator from our generator `prompt` fn
const stream = await prompt(body);
for await (const chunk of stream) {
// write a chunk of the delta content
res.write(`${chunk.choices[0]?.delta?.content || ''}`);
}
} catch (e) {
console.error(e);
}
res.end('');
});
app.listen(PORT, HOST, () => {
console.log(`listening on http://${HOST}:${PORT}`)
});
/* eslint-disable no-undef */
import OpenAI from 'openai';
import { config } from 'dotenv';
config();
const systemPrompt = 'You are a helpful assistant...';
export default function prompt(params) {
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
return openai.chat.completions.create({
model: 'gpt-4-turbo-preview',
stream: true,
messages: [
{
role: 'system',
content: systemPrompt,
},
{
role: 'user',
content: params,
},
],
temperature: 1,
max_tokens: 512,
top_p: 1,
frequency_penalty: 0,
presence_penalty: 0,
});
}
{
"name": "streaming-ai-response",
"type": "module",
"proxy": "http://localhost:5000",
"license": "MIT",
"author": "Rob McVey <mcvey.rob@gmail.com>",
"scripts": {
"start:react": "vite",
"start:server": "node server/server.js",
"dev": "run-p start:*",
"build": "vite build",
"lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
"preview": "vite preview"
},
"dependencies": {
"dotenv": "^8.2.0",
"express": "^4.17.1",
"npm-run-all": "4.1.5",
"openai": "^4.29.1",
"react": "^18.2.0",
"react-dom": "^18.2.0"
},
"devDependencies": {
"@types/react": "^18.2.66",
"@types/react-dom": "^18.2.22",
"@vitejs/plugin-react": "^4.2.1",
"eslint": "^8.57.0",
"eslint-plugin-react": "^7.34.1",
"eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-react-refresh": "^0.4.6",
"vite": "^5.2.0"
}
}
OPENAI_API_KEY=YOUR_API_KEY
# PORT=5000
# HOST=localhost
A job for generators
Generators are like normal functions, except generators can "return" multiple times via yield
. Calling and working with generator functions directly is super clunky in comparison to normal synchronous and async function calls, but iterators make interacting with streaming responses more natural.
For now, let's define a generator function (notice the asterisk *
following the function
keyword)
export default async function* stream(path, data) {
const response = await fetch(`/streaming-response`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Accept: 'text/plain',
},
body: JSON.stringify(data),
responseType: 'stream',
});
for await (const chunk of response.body) {
// decode bytes as they become available
yield new TextDecoder().decode(chunk);
}
}
Putting it all together
import { useState } from 'react';
import chatGPTLogo from './assets/ChatGPT-Logo.png';
import stream from './client';
import './App.css'
function App() {
const [streaming, setStreaming] = useState(false);
const [aiError, setAiError] = useState();
const [response, setResponse] = useState('');
// TODO hook this up to a form, etc.
const [question, setQuestion] = useState('')
const generate = async () => {
// store aggregate response in a temp variable
let res = '';
setStreaming(true);
try {
for await (const chunk of stream('/streaming-response', question)) {
// DO NOT try to concat to state - e.g. setResponse(`${response} ${chunk}`),
// the asynchronous nature of state setting will cause you to
// consistently drop bytes
setResponse(`${res} ${chunk}`);
res += chunk;
}
} catch (e) {
setAiError(e);
} finally {
setStreaming(false);
}
};
if (aiError) {
return <aside>Something went wrong :( ({aiError}</aside>
}
return (
<main>
<h1>Streaming ChatGPT Data to React</h1>
<form>
<input
type="text"
placeholder="What is your question?"
value={question}
onChange={(e) =>
setQuestion(e.target.value)}
/>
<button onClick={generate}>Submit</button>
</form>
<h2>
{streaming ? 'Streaming ' : ''}Response{' '}
{streaming ? <img className='logo' src={chatGPTLogo} /> : null}</h2>
<hr />
<section className="response">
{response}
</section>
</main>
)
}
export default App