Streaming OpenAI responses in React

April 2024

TLDR;

Iterator / generators make interacting with streaming data more straightforward.

Code: https://github.com/rmcvey/react-streaming-ai-response

Introduction

I recently deployed some AI functionality and wanted to help others trying to stream responses from Generative AI APIs such as OpenAI. Others have done a great job explaining the general concept of streaming responses, so if you are wanting to learn more about the subject I recommend starting at MDN's Using readable streams.

Although my example app has an express backend and React frontend, this functionality can be achieved in other backend and frontend frameworks. I will cover how to stream responses from OpenAI and will also touch on interacting with Anthropic's Claude model through AWS Bedrock.

Setup

Here are the important aspects of our backend.

server.jsprompt.jspackage.json.env

/* eslint-disable no-undef */
import { config } from 'dotenv';
import express from 'express';
import prompt from './prompt.js';

config();

const { PORT = 5000, HOST = 'localhost' } = process.env;
const app = express();

app.post('/streaming-response', async (req, res) => { 
  const { body } = req;

  // 'Transfer-Encoding: chunked' must be set to stream our response
  res.set({ 
    'Content-Type': 'text/plain', 
    'Transfer-Encoding': 'chunked', 
  }); 

  try {
    // get an iterator from our generator `prompt` fn
    const stream = await prompt(body); 
    for await (const chunk of stream) { 
      // write a chunk of the delta content
      res.write(`${chunk.choices[0]?.delta?.content || ''}`); 
    } 
  } catch (e) {
    console.error(e);
  }

  res.end(''); 
});

app.listen(PORT, HOST, () => {
  console.log(`listening on http://${HOST}:${PORT}`)
});

/* eslint-disable no-undef */
import OpenAI from 'openai';
import { config } from 'dotenv';

config();

const systemPrompt = 'You are a helpful assistant...';

export default function prompt(params) {
  const openai = new OpenAI({ 
    apiKey: process.env.OPENAI_API_KEY, 
  }); 

  return openai.chat.completions.create({ 
    model: 'gpt-4-turbo-preview',
    stream: true, 
    messages: [
      {
        role: 'system',
        content: systemPrompt,
      },
      {
        role: 'user',
        content: params,
      },
    ],
    temperature: 1,
    max_tokens: 512,
    top_p: 1,
    frequency_penalty: 0,
    presence_penalty: 0,
  });
}

json

{
  "name": "streaming-ai-response",
  "type": "module",
  "proxy": "http://localhost:5000",
  "license": "MIT",
  "author": "Rob McVey <mcvey.rob@gmail.com>",
  "scripts": {
    "start:react": "vite",
    "start:server": "node server/server.js",
    "dev": "run-p start:*",
    "build": "vite build",
    "lint": "eslint . --ext js,jsx --report-unused-disable-directives --max-warnings 0",
    "preview": "vite preview"
  },
  "dependencies": {
    "dotenv": "^8.2.0",
    "express": "^4.17.1",
    "npm-run-all": "4.1.5",
    "openai": "^4.29.1",
    "react": "^18.2.0",
    "react-dom": "^18.2.0"
  },
  "devDependencies": {
    "@types/react": "^18.2.66",
    "@types/react-dom": "^18.2.22",
    "@vitejs/plugin-react": "^4.2.1",
    "eslint": "^8.57.0",
    "eslint-plugin-react": "^7.34.1",
    "eslint-plugin-react-hooks": "^4.6.0",
    "eslint-plugin-react-refresh": "^0.4.6",
    "vite": "^5.2.0"
  }
}

OPENAI_API_KEY=YOUR_API_KEY

# PORT=5000
# HOST=localhost

A job for generators

Generators are like normal functions, except generators can "return" multiple times via yield. Calling and working with generator functions directly is super clunky in comparison to normal synchronous and async function calls, but iterators make interacting with streaming responses more natural.

For now, let's define a generator function (notice the asterisk * following the function keyword)

client.js

export default async function* stream(path, data) {
  const response = await fetch(`/streaming-response`, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
      Accept: 'text/plain',
    },
    body: JSON.stringify(data),
    responseType: 'stream',
  });

  for await (const chunk of response.body) {
    // decode bytes as they become available
    yield new TextDecoder().decode(chunk);
  }
}

Putting it all together

App.jsx

jsx

import { useState } from 'react';
import chatGPTLogo from './assets/ChatGPT-Logo.png';
import stream from './client';  
import './App.css'

function App() {
  const [streaming, setStreaming] = useState(false);
  const [aiError, setAiError] = useState();
  const [response, setResponse] = useState('');  
  // TODO hook this up to a form, etc.
  const [question, setQuestion] = useState('')

  const generate = async () => {  
    // store aggregate response in a temp variable
    let res = '';  
    setStreaming(true);  
    try {
      for await (const chunk of stream('/streaming-response', question)) { 
        // DO NOT try to concat to state - e.g. setResponse(`${response} ${chunk}`),
        // the asynchronous nature of state setting will cause you to
        // consistently drop bytes
        setResponse(`${res} ${chunk}`); 
        res += chunk; 
      }
    } catch (e) {
      setAiError(e);
    } finally {  
      setStreaming(false); 
    } 
  };

  if (aiError) {
    return <aside>Something went wrong :( ({aiError}</aside>
  }

  return (
    <main>
      <h1>Streaming ChatGPT Data to React</h1>
      <form>
        <input
          type="text"
          placeholder="What is your question?"
          value={question}
          onChange={(e) =>
            setQuestion(e.target.value)}
        />
        <button onClick={generate}>Submit</button>
      </form>
      <h2>
        {streaming ? 'Streaming ' : ''}Response{' '}
        {streaming ? <img className='logo' src={chatGPTLogo} /> : null}</h2>
      <hr />
      <section className="response">
        {response}
      </section>
    </main>
  )
}

export default App

Streaming OpenAI responses in React ​

Introduction ​

Setup ​

A job for generators ​

Putting it all together ​

Streaming OpenAI responses in React

Introduction

Setup

A job for generators

Putting it all together