Speechmatics ASR REST API (2.0.0)
Download OpenAPI specification:Download
The Speechmatics Automatic Speech Recognition REST API is used to submit ASR jobs and receive the results.
Create a new job.
header Parameters
Authorization required | string Customer API token |
X-SM-EAR-Tag | string Early Access Release Tag |
Request Body schema: multipart/form-data
config required | string JSON containing a |
data_file | string <binary> The data file to be processed. Alternatively the data file can be fetched from a url specified in |
text_file | string <binary> For alignment jobs, the text file that the data file should be aligned to. |
Responses
Response Schema:
id required | string The unique ID assigned to the job. Keep a record of this for later retrieval of your completed job. |
Response samples
- 201
- 400
- 401
- 403
- 410
- 500
{- "id": "a1b2c3d4e5"
}
List all jobs.
query Parameters
created_before | string <date-time> UTC Timestamp cursor for paginating request response. Filters jobs based on creation time to the nearest millisecond. Accepts up to nanosecond precision, truncating to millisecond precision. By default, the response will start with the most recent job. |
limit | integer [ 1 .. 100 ] Limit for paginating the request response. Defaults to 100. |
include_deleted | boolean Specifies whether deleted jobs should be included in the response. Defaults to false. |
header Parameters
Authorization required | string Customer API token |
X-SM-EAR-Tag | string Early Access Release Tag |
Responses
Response Schema:
required | Array of objects (JobDetails) | ||||||||||||||||||
Array
|
Response samples
- 200
- 401
- 422
- 500
{- "jobs": [
- {
- "created_at": "2018-01-09T12:29:01.853047Z",
- "data_name": "recording.mp3",
- "duration": 244,
- "id": "a1b2c3d4e5",
- "status": "transcribing",
- "type": "transcription",
- "tracking": {
- "title": "ACME Q12018 Statement",
- "reference": "/data/clients/ACME/statements/segs/2018Q1-seg8",
- "tags": [
- "quick-review",
- "segment"
], - "details": {
- "client": "ACME Corp",
- "segment": 8,
- "seg_start": 963.201,
- "seg_end": 1091.481
}
}, - "transcription_config": {
- "language": "en",
- "additional_vocab": [
- {
- "content": "Speechmatics",
- "sounds_like": [
- "speechmatics"
]
}, - {
- "content": "gnocchi",
- "sounds_like": [
- "nyohki",
- "nokey",
- "nochi"
]
}, - {
- "content": "CEO",
- "sounds_like": [
- "C.E.O."
]
}, - {
- "content": "financial crisis"
}
], - "diarization": "channel",
- "channel_diarization_labels": [
- "Agent",
- "Caller"
]
}, - "notification_config": [
- {
- "contents": [
- "transcript",
- "data"
], - "auth_headers": [
- "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VySWQiOiJiMDhmODZhZi0zNWRhLTQ4ZjItOGZhYi1jZWYzOTA0NjYwYmQifQ.-xN_h82PHVTCMA9vdoHrcZxH-x5mb11y1537t3rGzcM"
]
}
]
}, - {
- "created_at": "2018-01-09T11:23:42.984612Z",
- "data_name": "hello.wav",
- "duration": 130,
- "id": "084d1f86-9fe9-11e8-9c91-00155d019c0b",
- "status": "aligning",
- "type": "alignment",
- "text_name": "hello.txt",
- "alignment_config": {
- "language": "en"
}, - "tracking": {
- "title": "Project X Intro",
- "reference": "/data/projects/X/overview/audio/hello.wav"
}
}
]
}
Get job details, including progress and any error reports.
path Parameters
jobid required | string Example: a1b2c3d4e5 ID of the job. |
header Parameters
Authorization required | string Customer API token |
X-SM-EAR-Tag | string Early Access Release Tag |
Responses
Response Schema:
required | object (JobDetails) Document describing a job, including the status and config used. This model will be returned when you get job details or list all jobs. | ||||||||||||||||||
|
Response samples
- 200
- 401
- 404
- 410
- 500
{- "job": {
- "created_at": "2018-01-09T12:29:01.853047Z",
- "data_name": "recording.mp3",
- "duration": 244,
- "id": "a1b2c3d4e5",
- "status": "transcribing",
- "type": "transcription",
- "transcription_config": {
- "language": "en",
- "additional_vocab": [
- {
- "content": "Speechmatics",
- "sounds_like": [
- "speechmatics"
]
}, - {
- "content": "gnocchi",
- "sounds_like": [
- "nyohki",
- "nokey",
- "nochi"
]
}, - {
- "content": "CEO",
- "sounds_like": [
- "C.E.O."
]
}, - {
- "content": "financial crisis"
}
], - "diarization": "channel",
- "channel_diarization_labels": [
- "Agent",
- "Caller"
]
}, - "notification_config": [
- {
- "contents": [
- "transcript",
- "data"
], - "auth_headers": [
- "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VySWQiOiJiMDhmODZhZi0zNWRhLTQ4ZjItOGZhYi1jZWYzOTA0NjYwYmQifQ.-xN_h82PHVTCMA9vdoHrcZxH-x5mb11y1537t3rGzcM"
]
}
], - "tracking": {
- "title": "ACME Q12018 Statement",
- "reference": "/data/clients/ACME/statements/segs/2018Q1-seg8",
- "tags": [
- "quick-review",
- "segment"
], - "details": {
- "client": "ACME Corp",
- "segment": 8,
- "seg_start": 963.201,
- "seg_end": 1091.481
}
}
}
}
Delete a job and remove all associated resources.
path Parameters
jobid required | string Example: a1b2c3d4e5 ID of the job to delete. |
query Parameters
force | boolean When set, a running job will be force terminated. When unset (default), a running job will not be terminated and request will return HTTP 423 Locked. |
header Parameters
Authorization required | string Customer API token |
X-SM-EAR-Tag | string Early Access Release Tag |
Responses
Response Schema:
required | object (JobDetails) Document describing a job, including the status and config used. This model will be returned when you get job details or list all jobs. | ||||||||||||||||||
|
Response samples
- 200
- 401
- 404
- 410
- 423
- 500
{- "job": {
- "created_at": "2018-01-09T12:29:01.853047Z",
- "data_name": "recording.mp3",
- "duration": 244,
- "id": "a1b2c3d4e5",
- "status": "deleted",
- "type": "transcription",
- "transcription_config": {
- "language": "en",
- "additional_vocab": [
- {
- "content": "Speechmatics",
- "sounds_like": [
- "speechmatics"
]
}, - {
- "content": "gnocchi",
- "sounds_like": [
- "nyohki",
- "nokey",
- "nochi"
]
}, - {
- "content": "CEO",
- "sounds_like": [
- "C.E.O."
]
}, - {
- "content": "financial crisis"
}
], - "diarization": "channel",
- "channel_diarization_labels": [
- "Agent",
- "Caller"
]
}, - "notification_config": [
- {
- "contents": [
- "transcript",
- "data"
], - "auth_headers": [
- "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VySWQiOiJiMDhmODZhZi0zNWRhLTQ4ZjItOGZhYi1jZWYzOTA0NjYwYmQifQ.-xN_h82PHVTCMA9vdoHrcZxH-x5mb11y1537t3rGzcM"
]
}
], - "tracking": {
- "title": "ACME Q12018 Statement",
- "reference": "/data/clients/ACME/statements/segs/2018Q1-seg8",
- "tags": [
- "quick-review",
- "segment"
], - "details": {
- "client": "ACME Corp",
- "segment": 8,
- "seg_start": 963.201,
- "seg_end": 1091.481
}
}
}
}
Get the transcript for a transcription job.
path Parameters
jobid required | string Example: a1b2c3d4e5 ID of the job. |
query Parameters
format | string Enum: "json-v2" "txt" "srt" The transcription format (by default the |
header Parameters
Authorization required | string Customer API token |
X-SM-EAR-Tag | string Early Access Release Tag |
Responses
Response Schema:
format required | string Example: "2.1" Speechmatics JSON transcript format version number. |
required | object (JobInfo) Summary information about an ASR job, to support identification and tracking. |
required | object (RecognitionMetadata) Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output. |
required | Array of objects (RecognitionResult) Example: [[{"channel":"channel_1","start_time":0.55,"end_time":1.2,"type":"word","alternatives":[{"confidence":0.95,"content":"Hello","language":"en","speaker":"S1","display":{"direction":"ltr"}}]}]] |
object Example: {"de":[{"start_time":0.5,"end_time":1.3,"content":"Guten Tag, wie geht es dir?","speaker":"UU"}],"fr":[{"start_time":0.5,"end_time":1.3,"content":"Bonjour, comment ça va?","speaker":"UU"}]} Translations of the transcript into other languages. It is a map of ISO language codes to arrays of translated sentences. |
Response samples
- 200
- 401
- 404
- 410
- 500
{- "format": "2.1",
- "job": {
- "created_at": "2018-01-09T12:29:01.853047Z",
- "data_name": "string",
- "duration": 0,
- "id": "a1b2c3d4e5",
- "text_name": "string",
- "tracking": {
- "title": "ACME Q12018 Earnings Call",
- "reference": "/data/clients/ACME/statements/segs/2018Q1-seg8",
- "tags": [
- "quick-review",
- "segment"
], - "details": {
- "client": "ACME Corp",
- "segment": 8,
- "seg_start": 963.201,
- "seg_end": 1091.481
}
}
}, - "metadata": {
- "created_at": "2018-01-09T12:29:01.853047Z",
- "type": "alignment",
- "transcription_config": {
- "language": "en",
- "output_locale": "en-GB",
- "additional_vocab": [
- {
- "content": "Speechmatics",
- "sounds_like": [
- "speechmatics"
]
}, - {
- "content": "gnocchi",
- "sounds_like": [
- "nyohki",
- "nokey",
- "nochi"
]
}, - {
- "content": "CEO",
- "sounds_like": [
- "C.E.O."
]
}, - {
- "content": "financial crisis"
}
], - "diarization": "channel",
- "channel_diarization_labels": [
- "Caller",
- "Agent"
]
}, - "alignment_config": {
- "language": "en"
}, - "output_config": {
- "srt_overrides": {
- "max_line_length": 0,
- "max_lines": 0
}
}
}, - "results": [
- [
- {
- "channel": "channel_1",
- "start_time": 0.55,
- "end_time": 1.2,
- "type": "word",
- "alternatives": [
- {
- "confidence": 0.95,
- "content": "Hello",
- "language": "en",
- "speaker": "S1",
- "display": {
- "direction": "ltr"
}
}
]
}
]
], - "translations": {
- "de": [
- {
- "start_time": 0.5,
- "end_time": 1.3,
- "content": "Guten Tag, wie geht es dir?",
- "speaker": "UU"
}
], - "fr": [
- {
- "start_time": 0.5,
- "end_time": 1.3,
- "content": "Bonjour, comment ça va?",
- "speaker": "UU"
}
]
}
}
Get the aligned text file for an alignment job.
path Parameters
jobid required | string Example: a1b2c3d4e5 ID of the job. |
query Parameters
tags | string Enum: "word_start_and_end" "one_per_line" Control how timing information is added to the text file provided as input to the alignment job. If set to |
header Parameters
Authorization required | string Customer API token |
X-SM-EAR-Tag | string Early Access Release Tag |
Responses
Response Schema:
Response samples
- 200
- 401
- 404
- 410
- 500
Get the usage statistics.
query Parameters
since | string <date> Include usage after the given date (inclusive). This is a ISO-8601 calendar date format: |
until | string <date> Include usage before the given date (inclusive). This is a ISO-8601 calendar date format: |
header Parameters
Authorization required | string Customer API token |
X-SM-EAR-Tag | string Early Access Release Tag |
Responses
Response Schema: application/json
since required | string <date-time> Example: "2021-10-14T00:55:00Z" |
until required | string <date-time> Example: "2022-12-01T00:00:00Z" |
required | Array of objects (UsageDetails) |
required | Array of objects (UsageDetails) |
Response samples
- 200
- 401
- 500
{- "since": "2021-09-12T00:00:00Z",
- "until": "2022-01-01T23:59:59Z",
- "summary": [
- {
- "mode": "batch",
- "type": "transcription",
- "count": 5,
- "duration_hrs": 1.53
}, - {
- "mode": "batch",
- "type": "alignment",
- "count": 1,
- "duration_hrs": 0.1
}
], - "details": [
- {
- "mode": "batch",
- "type": "transcription",
- "language": "sv",
- "operating_point": "standard",
- "count": 4,
- "duration_hrs": 1.33
}, - {
- "mode": "batch",
- "type": "transcription",
- "language": "de",
- "operating_point": "enhanced",
- "count": 1,
- "duration_hrs": 0.2
}, - {
- "mode": "batch",
- "type": "alignment",
- "language": "en",
- "count": 1,
- "duration_hrs": 0.1
}
]
}
This model should be used when you create a new job. It will also be returned as a part of response in a number of requests. This includes when you get job details or get the transcript for a transcription job.
Based on the value of type
, a type-specific object such as transcription_config
is required to be present to specify all configuration settings or parameters needed to process the job inputs as expected.
If the results of the job are to be forwarded on completion, notification_config
can be provided with a list of callbacks to be made; no assumptions should be made about the order in
which they will occur. For more details, please refer to Notifications in the documentation.
Customer specific job details or metadata can be supplied in tracking
, and this information will be available where possible in the job results and in callbacks.
type required | string (JobType) Enum: "alignment" "transcription" |
object (DataFetchConfig) | |
object (DataFetchConfig) | |
object (AlignmentConfig) Example: {"language":"en"} | |
object (TranscriptionConfig) Example: {"language":"en","output_locale":"en-GB","additional_vocab":[{"content":"Speechmatics","sounds_like":["speechmatics"]},{"content":"gnocchi","sounds_like":["nyohki","nokey","nochi"]},{"content":"CEO","sounds_like":["C.E.O."]},{"content":"financial crisis"}],"diarization":"channel","channel_diarization_labels":["Caller","Agent"]} | |
Array of objects (NotificationConfig) Example: [[{"url":"https://collector.example.org/callback","contents":["transcript:json-v2"],"auth_headers":["Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VySWQiOiJiMDhmODZhZi0zNWRhLTQ4ZjItOGZhYi1jZWYzOTA0NjYwYmQifQ.-xN_h82PHVTCMA9vdoHrcZxH-x5mb11y1537t3rGzcM"]}]] | |
object (TrackingData) Example: {"title":"ACME Q12018 Earnings Call","reference":"/data/clients/ACME/statements/segs/2018Q1-seg8","tags":["quick-review","segment"],"details":{"client":"ACME Corp","segment":8,"seg_start":963.201,"seg_end":1091.481}} | |
object (OutputConfig) | |
object (TranslationConfig) |
{- "type": "alignment",
- "fetch_data": {
- "url": "string",
- "auth_headers": [
- "string"
]
}, - "fetch_text": {
- "url": "string",
- "auth_headers": [
- "string"
]
}, - "alignment_config": {
- "language": "en"
}, - "transcription_config": {
- "language": "en",
- "output_locale": "en-GB",
- "additional_vocab": [
- {
- "content": "Speechmatics",
- "sounds_like": [
- "speechmatics"
]
}, - {
- "content": "gnocchi",
- "sounds_like": [
- "nyohki",
- "nokey",
- "nochi"
]
}, - {
- "content": "CEO",
- "sounds_like": [
- "C.E.O."
]
}, - {
- "content": "financial crisis"
}
], - "diarization": "channel",
- "channel_diarization_labels": [
- "Caller",
- "Agent"
]
}, - "notification_config": [
- [
- {
- "contents": [
- "transcript:json-v2"
], - "auth_headers": [
- "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VySWQiOiJiMDhmODZhZi0zNWRhLTQ4ZjItOGZhYi1jZWYzOTA0NjYwYmQifQ.-xN_h82PHVTCMA9vdoHrcZxH-x5mb11y1537t3rGzcM"
]
}
]
], - "tracking": {
- "title": "ACME Q12018 Earnings Call",
- "reference": "/data/clients/ACME/statements/segs/2018Q1-seg8",
- "tags": [
- "quick-review",
- "segment"
], - "details": {
- "client": "ACME Corp",
- "segment": 8,
- "seg_start": 963.201,
- "seg_end": 1091.481
}
}, - "output_config": {
- "srt_overrides": {
- "max_line_length": 0,
- "max_lines": 0
}
}, - "translation_config": {
- "target_languages": [
- "es",
- "de"
]
}
}
Returned when you get job details, list all jobs or delete a job. This model includes the status and config that was used.
created_at required | string <date-time> Example: "2018-01-09T12:29:01.853047Z" The UTC date time the job was created. |
data_name required | string Name of the data file submitted for job. |
text_name | string Name of the text file submitted to be aligned to audio. |
duration | integer >= 0 The file duration (in seconds). May be missing for fetch URL jobs. |
id required | string Example: "a1b2c3d4e5" The unique id assigned to the job. |
status required | string Enum: "running" "done" "rejected" "deleted" "expired" The status of the job.
|
object (JobConfig) JSON object that contains various groups of job configuration
parameters.
Based on the value of If the results of the job are to be forwarded on completion,
Customer specific job details or metadata can be supplied in
| |
lang | string Optional parameter used for backwards compatibility with v1 api |
Array of objects (JobDetailError) Optional list of errors that have occurred in user interaction, for example: audio could not be fetched or notification could not be sent. |
{- "created_at": "2018-01-09T12:29:01.853047Z",
- "data_name": "string",
- "text_name": "string",
- "duration": 0,
- "id": "a1b2c3d4e5",
- "status": "running",
- "config": {
- "type": "alignment",
- "fetch_data": {
- "url": "string",
- "auth_headers": [
- "string"
]
}, - "fetch_text": {
- "url": "string",
- "auth_headers": [
- "string"
]
}, - "alignment_config": {
- "language": "en"
}, - "transcription_config": {
- "language": "en",
- "output_locale": "en-GB",
- "additional_vocab": [
- {
- "content": "Speechmatics",
- "sounds_like": [
- "speechmatics"
]
}, - {
- "content": "gnocchi",
- "sounds_like": [
- "nyohki",
- "nokey",
- "nochi"
]
}, - {
- "content": "CEO",
- "sounds_like": [
- "C.E.O."
]
}, - {
- "content": "financial crisis"
}
], - "diarization": "channel",
- "channel_diarization_labels": [
- "Caller",
- "Agent"
]
}, - "notification_config": [
- [
- {
- "contents": [
- "transcript:json-v2"
], - "auth_headers": [
- "Authorization: Bearer eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJ1c2VySWQiOiJiMDhmODZhZi0zNWRhLTQ4ZjItOGZhYi1jZWYzOTA0NjYwYmQifQ.-xN_h82PHVTCMA9vdoHrcZxH-x5mb11y1537t3rGzcM"
]
}
]
], - "tracking": {
- "title": "ACME Q12018 Earnings Call",
- "reference": "/data/clients/ACME/statements/segs/2018Q1-seg8",
- "tags": [
- "quick-review",
- "segment"
], - "details": {
- "client": "ACME Corp",
- "segment": 8,
- "seg_start": 963.201,
- "seg_end": 1091.481
}
}, - "output_config": {
- "srt_overrides": {
- "max_line_length": 0,
- "max_lines": 0
}
}, - "translation_config": {
- "target_languages": [
- "es",
- "de"
]
}
}, - "lang": "string",
- "errors": [
- {
- "timestamp": "2021-07-14T11:53:49.242Z",
- "message": "Audio fetch error, http status 418"
}
]
}
Returned when you get the transcript for a transcription job. It includes metadata about the job, such as the transcription config that was used.
format required | string Example: "2.1" Speechmatics JSON transcript format version number. |
required | object (JobInfo) Summary information about an ASR job, to support identification and tracking. |
required | object (RecognitionMetadata) Summary information about the output from an ASR job, comprising the job type and configuration parameters used when generating the output. |
required | Array of objects (RecognitionResult) Example: [[{"channel":"channel_1","start_time":0.55,"end_time":1.2,"type":"word","alternatives":[{"confidence":0.95,"content":"Hello","language":"en","speaker":"S1","display":{"direction":"ltr"}}]}]] |
object Example: {"de":[{"start_time":0.5,"end_time":1.3,"content":"Guten Tag, wie geht es dir?","speaker":"UU"}],"fr":[{"start_time":0.5,"end_time":1.3,"content":"Bonjour, comment ça va?","speaker":"UU"}]} Translations of the transcript into other languages. It is a map of ISO language codes to arrays of translated sentences. |
{- "format": "2.1",
- "job": {
- "created_at": "2018-01-09T12:29:01.853047Z",
- "data_name": "string",
- "duration": 0,
- "id": "a1b2c3d4e5",
- "text_name": "string",
- "tracking": {
- "title": "ACME Q12018 Earnings Call",
- "reference": "/data/clients/ACME/statements/segs/2018Q1-seg8",
- "tags": [
- "quick-review",
- "segment"
], - "details": {
- "client": "ACME Corp",
- "segment": 8,
- "seg_start": 963.201,
- "seg_end": 1091.481
}
}
}, - "metadata": {
- "created_at": "2018-01-09T12:29:01.853047Z",
- "type": "alignment",
- "transcription_config": {
- "language": "en",
- "output_locale": "en-GB",
- "additional_vocab": [
- {
- "content": "Speechmatics",
- "sounds_like": [
- "speechmatics"
]
}, - {
- "content": "gnocchi",
- "sounds_like": [
- "nyohki",
- "nokey",
- "nochi"
]
}, - {
- "content": "CEO",
- "sounds_like": [
- "C.E.O."
]
}, - {
- "content": "financial crisis"
}
], - "diarization": "channel",
- "channel_diarization_labels": [
- "Caller",
- "Agent"
]
}, - "alignment_config": {
- "language": "en"
}, - "output_config": {
- "srt_overrides": {
- "max_line_length": 0,
- "max_lines": 0
}
}
}, - "results": [
- [
- {
- "channel": "channel_1",
- "start_time": 0.55,
- "end_time": 1.2,
- "type": "word",
- "alternatives": [
- {
- "confidence": 0.95,
- "content": "Hello",
- "language": "en",
- "speaker": "S1",
- "display": {
- "direction": "ltr"
}
}
]
}
]
], - "translations": {
- "de": [
- {
- "start_time": 0.5,
- "end_time": 1.3,
- "content": "Guten Tag, wie geht es dir?",
- "speaker": "UU"
}
], - "fr": [
- {
- "start_time": 0.5,
- "end_time": 1.3,
- "content": "Bonjour, comment ça va?",
- "speaker": "UU"
}
]
}
}