Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add create audio speech stream support #189

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,42 @@ let result = try await openAI.audioCreateSpeech(query: query)
```
[OpenAI Create Speech – Documentation](https://platform.openai.com/docs/api-reference/audio/createSpeech)

#### Audio Create Speech Streaming

Audio Create Speech is available by using `audioCreateSpeechStream` function. Tokens will be sent one-by-one.

**Closures**
```swift
openAI.audioCreateSpeechStream(query: query) { partialResult in
switch partialResult {
case .success(let result):
print(result.audio)
case .failure(let error):
//Handle chunk error here
}
} completion: { error in
//Handle streaming error here
}
```

**Combine**

```swift
openAI
.audioCreateSpeechStream(query: query)
.sink { completion in
//Handle completion result here
} receiveValue: { result in
//Handle chunk here
}.store(in: &cancellables)
```

**Structured concurrency**
```swift
for try await result in openAI.audioCreateSpeechStream(query: query) {
//Handle result here
}
```

#### Audio Transcriptions

Expand Down
36 changes: 33 additions & 3 deletions Sources/OpenAI/OpenAI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,14 @@ final public class OpenAI: OpenAIProtocol {
public func audioCreateSpeech(query: AudioSpeechQuery, completion: @escaping (Result<AudioSpeechResult, Error>) -> Void) {
performSpeechRequest(request: JSONRequest<AudioSpeechResult>(body: query, url: buildURL(path: .audioSpeech)), completion: completion)
}


public func audioCreateSpeechStream(query: AudioSpeechQuery, onResult: @escaping (Result<AudioSpeechResult, Error>) -> Void, completion: ((Error?) -> Void)?) {
performSpeechStreamingRequest(
request: JSONRequest<AudioSpeechResult>(body: query, url: buildURL(path: .audioSpeech)),
onResult: onResult,
completion: completion
)
}
}

extension OpenAI {
Expand Down Expand Up @@ -155,10 +162,10 @@ extension OpenAI {
organizationIdentifier: configuration.organizationIdentifier,
timeoutInterval: configuration.timeoutInterval)
let session = StreamingSession<ResultType>(urlRequest: request)
session.onReceiveContent = {_, object in
session.onReceiveContent = { _, object in
onResult(.success(object))
}
session.onProcessingError = {_, error in
session.onProcessingError = { _, error in
onResult(.failure(error))
}
session.onComplete = { [weak self] object, error in
Expand Down Expand Up @@ -193,6 +200,29 @@ extension OpenAI {
completion(.failure(error))
}
}

func performSpeechStreamingRequest(request: any URLRequestBuildable, onResult: @escaping (Result<AudioSpeechResult, Error>) -> Void, completion: ((Error?) -> Void)?) {
do {
let request = try request.build(token: configuration.token,
organizationIdentifier: configuration.organizationIdentifier,
timeoutInterval: configuration.timeoutInterval)
let session = StreamingSession<AudioSpeechResult>(urlRequest: request)
session.onReceiveContent = { _, object in
onResult(.success(object))
}
session.onProcessingError = { _, error in
onResult(.failure(error))
}
session.onComplete = { [weak self] object, error in
self?.streamingSessions.removeAll(where: { $0 == object })
completion?(error)
}
session.perform()
streamingSessions.append(session)
} catch {
completion?(error)
}
}
}

extension OpenAI {
Expand Down
4 changes: 4 additions & 0 deletions Sources/OpenAI/Private/StreamingSession.swift
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ final class StreamingSession<ResultType: Codable>: NSObject, Identifiable, URLSe
}

func urlSession(_ session: URLSession, dataTask: URLSessionDataTask, didReceive data: Data) {
if ResultType.self == AudioSpeechResult.self, let result = AudioSpeechResult(audio: data) as? ResultType {
onReceiveContent?(self, result)
return
}
guard let stringContent = String(data: data, encoding: .utf8) else {
onProcessingError?(self, StreamingError.unknownContent)
return
Expand Down
16 changes: 14 additions & 2 deletions Sources/OpenAI/Public/Protocols/OpenAIProtocol+Async.swift
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ public extension OpenAIProtocol {
query: ChatQuery
) -> AsyncThrowingStream<ChatStreamResult, Error> {
return AsyncThrowingStream { continuation in
return chatsStream(query: query) { result in
return chatsStream(query: query) { result in
continuation.yield(with: result)
} completion: { error in
continuation.finish(throwing: error)
Expand Down Expand Up @@ -198,7 +198,19 @@ public extension OpenAIProtocol {
}
}
}


func audioCreateSpeechStream(
query: AudioSpeechQuery
) -> AsyncThrowingStream<AudioSpeechResult, Error> {
return AsyncThrowingStream { continuation in
return audioCreateSpeechStream(query: query) { result in
continuation.yield(with: result)
} completion: { error in
continuation.finish(throwing: error)
}
}
}

func audioTranscriptions(
query: AudioTranscriptionQuery
) async throws -> AudioTranscriptionResult {
Expand Down
18 changes: 16 additions & 2 deletions Sources/OpenAI/Public/Protocols/OpenAIProtocol+Combine.swift
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public extension OpenAIProtocol {
}
.eraseToAnyPublisher()
}

func chatsStream(query: ChatQuery) -> AnyPublisher<Result<ChatStreamResult, Error>, Error> {
let progress = PassthroughSubject<Result<ChatStreamResult, Error>, Error>()
chatsStream(query: query) { result in
Expand All @@ -84,7 +84,7 @@ public extension OpenAIProtocol {
}
return progress.eraseToAnyPublisher()
}

func edits(query: EditsQuery) -> AnyPublisher<EditsResult, Error> {
Future<EditsResult, Error> {
edits(query: query, completion: $0)
Expand Down Expand Up @@ -120,6 +120,20 @@ public extension OpenAIProtocol {
.eraseToAnyPublisher()
}

func audioCreateSpeechStream(query: AudioSpeechQuery) -> AnyPublisher<Result<AudioSpeechResult, Error>, Error> {
let progress = PassthroughSubject<Result<AudioSpeechResult, Error>, Error>()
audioCreateSpeechStream(query: query) { result in
progress.send(result)
} completion: { error in
if let error {
progress.send(completion: .failure(error))
} else {
progress.send(completion: .finished)
}
}
return progress.eraseToAnyPublisher()
}

func audioTranscriptions(query: AudioTranscriptionQuery) -> AnyPublisher<AudioTranscriptionResult, Error> {
Future<AudioTranscriptionResult, Error> {
audioTranscriptions(query: query, completion: $0)
Expand Down
15 changes: 15 additions & 0 deletions Sources/OpenAI/Public/Protocols/OpenAIProtocol.swift
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,21 @@ public protocol OpenAIProtocol {
*/
func audioCreateSpeech(query: AudioSpeechQuery, completion: @escaping (Result<AudioSpeechResult, Error>) -> Void)

/** This function sends an `AudioSpeechQuery` to the OpenAI API to create audio speech from text using a specific voice and format.
Example:
```
let query = AudioSpeechQuery(model: .tts_1, input: "Hello, world!", voice: .alloy, responseFormat: .mp3, speed: 1.0)
openAI.audioCreateSpeechStream(query: query) { result in
// Handle response here
}
```
- Parameters:
- query: An `AudioSpeechQuery` object containing the parameters for the API request. This includes the Text-to-Speech model to be used, input text, voice to be used for generating the audio, the desired audio format, and the speed of the generated audio.
- onResult: A closure which receives the result when the API request finishes. The closure's parameter, `Result<AudioSpeechResult, Error>`, will contain either the `AudioSpeechResult` object with the generated Audio chunk, or an error if the request failed.
- completion: A closure that is being called when all chunks are delivered or uncrecoverable error occured
*/
func audioCreateSpeechStream(query: AudioSpeechQuery, onResult: @escaping (Result<AudioSpeechResult, Error>) -> Void, completion: ((Error?) -> Void)?)

/**
Transcribes audio data using OpenAI's audio transcription API and completes the operation asynchronously.

Expand Down