diff --git a/swagger/docs.go b/swagger/docs.go index 29e04af6..f48b9661 100644 --- a/swagger/docs.go +++ b/swagger/docs.go @@ -22,6 +22,36 @@ const docTemplate = `{ "host": "{{.Host}}", "basePath": "{{.BasePath}}", "paths": { + "/tts": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], + "summary": "Generates audio from the input text.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "generated audio/wav file", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/assistants": { "post": { "summary": "Create an assistant with a model and instructions.", @@ -48,6 +78,12 @@ const docTemplate = `{ }, "/v1/audio/speech": { "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -62,7 +98,7 @@ const docTemplate = `{ ], "responses": { "200": { - "description": "Response", + "description": "generated audio/wav file", "schema": { "type": "string" } @@ -771,18 +807,26 @@ const docTemplate = `{ } }, "schema.TTSRequest": { + "description": "TTS request body", "type": "object", "properties": { "backend": { "type": "string" }, "input": { + "description": "text input", + "type": "string" + }, + "language": { + "description": "(optional) language to use with TTS model", "type": "string" }, "model": { + "description": "model name or full path", "type": "string" }, "voice": { + "description": "voice audio file or speaker id", "type": "string" } } diff --git a/swagger/swagger.json b/swagger/swagger.json index 1933da3a..1eba0ff3 100644 --- a/swagger/swagger.json +++ b/swagger/swagger.json @@ -15,6 +15,36 @@ }, "basePath": "/", "paths": { + "/tts": { + "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], + "summary": "Generates audio from the input text.", + "parameters": [ + { + "description": "query params", + "name": "request", + "in": "body", + "required": true, + "schema": { + "$ref": "#/definitions/schema.TTSRequest" + } + } + ], + "responses": { + "200": { + "description": "generated audio/wav file", + "schema": { + "type": "string" + } + } + } + } + }, "/v1/assistants": { "post": { "summary": "Create an assistant with a model and instructions.", @@ -41,6 +71,12 @@ }, "/v1/audio/speech": { "post": { + "consumes": [ + "application/json" + ], + "produces": [ + "audio/x-wav" + ], "summary": "Generates audio from the input text.", "parameters": [ { @@ -55,7 +91,7 @@ ], "responses": { "200": { - "description": "Response", + "description": "generated audio/wav file", "schema": { "type": "string" } @@ -764,18 +800,26 @@ } }, "schema.TTSRequest": { + "description": "TTS request body", "type": "object", "properties": { "backend": { "type": "string" }, "input": { + "description": "text input", + "type": "string" + }, + "language": { + "description": "(optional) language to use with TTS model", "type": "string" }, "model": { + "description": "model name or full path", "type": "string" }, "voice": { + "description": "voice audio file or speaker id", "type": "string" } } diff --git a/swagger/swagger.yaml b/swagger/swagger.yaml index 33ce0b78..db4ef52f 100644 --- a/swagger/swagger.yaml +++ b/swagger/swagger.yaml @@ -367,14 +367,21 @@ definitions: type: integer type: object schema.TTSRequest: + description: TTS request body properties: backend: type: string input: + description: text input + type: string + language: + description: (optional) language to use with TTS model type: string model: + description: model name or full path type: string voice: + description: voice audio file or speaker id type: string type: object schema.ToolCall: @@ -399,6 +406,25 @@ info: title: LocalAI API version: 2.0.0 paths: + /tts: + post: + consumes: + - application/json + parameters: + - description: query params + in: body + name: request + required: true + schema: + $ref: '#/definitions/schema.TTSRequest' + produces: + - audio/x-wav + responses: + "200": + description: generated audio/wav file + schema: + type: string + summary: Generates audio from the input text. /v1/assistants: post: parameters: @@ -416,6 +442,8 @@ paths: summary: Create an assistant with a model and instructions. /v1/audio/speech: post: + consumes: + - application/json parameters: - description: query params in: body @@ -423,9 +451,11 @@ paths: required: true schema: $ref: '#/definitions/schema.TTSRequest' + produces: + - audio/x-wav responses: "200": - description: Response + description: generated audio/wav file schema: type: string summary: Generates audio from the input text.