xref: /third_party/libsnd/src/htk.c (revision b815c7f3)
1/*
2** Copyright (C) 2002-2017 Erik de Castro Lopo <erikd@mega-nerd.com>
3**
4** This program is free software; you can redistribute it and/or modify
5** it under the terms of the GNU Lesser General Public License as published by
6** the Free Software Foundation; either version 2.1 of the License, or
7** (at your option) any later version.
8**
9** This program is distributed in the hope that it will be useful,
10** but WITHOUT ANY WARRANTY; without even the implied warranty of
11** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12** GNU Lesser General Public License for more details.
13**
14** You should have received a copy of the GNU Lesser General Public License
15** along with this program; if not, write to the Free Software
16** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17*/
18
19#include	"sfconfig.h"
20
21#include	<stdio.h>
22#include	<fcntl.h>
23#include	<string.h>
24#include	<ctype.h>
25
26#include	"sndfile.h"
27#include	"sfendian.h"
28#include	"common.h"
29
30/*------------------------------------------------------------------------------
31** Macros to handle big/little endian issues.
32*/
33
34#define	SFE_HTK_BAD_FILE_LEN 	1666
35#define	SFE_HTK_NOT_WAVEFORM	1667
36
37/*------------------------------------------------------------------------------
38** Private static functions.
39*/
40
41static	int		htk_close		(SF_PRIVATE *psf) ;
42
43static int		htk_write_header (SF_PRIVATE *psf, int calc_length) ;
44static int		htk_read_header (SF_PRIVATE *psf) ;
45
46/*------------------------------------------------------------------------------
47** Public function.
48*/
49
50int
51htk_open	(SF_PRIVATE *psf)
52{	int		subformat ;
53	int		error = 0 ;
54
55	if (psf->is_pipe)
56		return SFE_HTK_NO_PIPE ;
57
58	if (psf->file.mode == SFM_READ || (psf->file.mode == SFM_RDWR && psf->filelength > 0))
59	{	if ((error = htk_read_header (psf)))
60			return error ;
61		} ;
62
63	subformat = SF_CODEC (psf->sf.format) ;
64
65	if (psf->file.mode == SFM_WRITE || psf->file.mode == SFM_RDWR)
66	{	if ((SF_CONTAINER (psf->sf.format)) != SF_FORMAT_HTK)
67			return	SFE_BAD_OPEN_FORMAT ;
68
69		psf->endian = SF_ENDIAN_BIG ;
70
71		if (htk_write_header (psf, SF_FALSE))
72			return psf->error ;
73
74		psf->write_header = htk_write_header ;
75		} ;
76
77	psf->container_close = htk_close ;
78
79	psf->blockwidth = psf->bytewidth * psf->sf.channels ;
80
81	switch (subformat)
82	{	case SF_FORMAT_PCM_16 :	/* 16-bit linear PCM. */
83				error = pcm_init (psf) ;
84				break ;
85
86		default : break ;
87		} ;
88
89	return error ;
90} /* htk_open */
91
92/*------------------------------------------------------------------------------
93*/
94
95static int
96htk_close	(SF_PRIVATE *psf)
97{
98	if (psf->file.mode == SFM_WRITE || psf->file.mode == SFM_RDWR)
99		htk_write_header (psf, SF_TRUE) ;
100
101	return 0 ;
102} /* htk_close */
103
104static int
105htk_write_header (SF_PRIVATE *psf, int calc_length)
106{	sf_count_t	current ;
107	int			sample_count, sample_period ;
108
109	current = psf_ftell (psf) ;
110
111	if (calc_length)
112		psf->filelength = psf_get_filelen (psf) ;
113
114	/* Reset the current header length to zero. */
115	psf->header.ptr [0] = 0 ;
116	psf->header.indx = 0 ;
117	psf_fseek (psf, 0, SEEK_SET) ;
118
119	if (psf->filelength > 12)
120		sample_count = (psf->filelength - 12) / 2 ;
121	else
122		sample_count = 0 ;
123
124	sample_period = 10000000 / psf->sf.samplerate ;
125
126	psf_binheader_writef (psf, "E444", BHW4 (sample_count), BHW4 (sample_period), BHW4 (0x20000)) ;
127
128	/* Header construction complete so write it out. */
129	psf_fwrite (psf->header.ptr, psf->header.indx, 1, psf) ;
130
131	if (psf->error)
132		return psf->error ;
133
134	psf->dataoffset = psf->header.indx ;
135
136	if (current > 0)
137		psf_fseek (psf, current, SEEK_SET) ;
138
139	return psf->error ;
140} /* htk_write_header */
141
142/*
143** Found the following info in a comment block within Bill Schottstaedt's
144** sndlib library.
145**
146** HTK format files consist of a contiguous sequence of samples preceded by a
147** header. Each sample is a vector of either 2-byte integers or 4-byte floats.
148** 2-byte integers are used for compressed forms as described below and for
149** vector quantised data as described later in section 5.11. HTK format data
150** files can also be used to store speech waveforms as described in section 5.8.
151**
152** The HTK file format header is 12 bytes long and contains the following data
153**   nSamples   -- number of samples in file (4-byte integer)
154**   sampPeriod -- sample period in 100ns units (4-byte integer)
155**   sampSize   -- number of bytes per sample (2-byte integer)
156**   parmKind   -- a code indicating the sample kind (2-byte integer)
157**
158** The parameter kind  consists of a 6 bit code representing the basic
159** parameter kind plus additional bits for each of the possible qualifiers.
160** The basic parameter kind codes are
161**
162**  0    WAVEFORM    sampled waveform
163**  1    LPC         linear prediction filter coefficients
164**  2    LPREFC      linear prediction reflection coefficients
165**  3    LPCEPSTRA   LPC cepstral coefficients
166**  4    LPDELCEP    LPC cepstra plus delta coefficients
167**  5    IREFC       LPC reflection coef in 16 bit integer format
168**  6    MFCC        mel-frequency cepstral coefficients
169**  7    FBANK       log mel-filter bank channel outputs
170**  8    MELSPEC     linear mel-filter bank channel outputs
171**  9    USER        user defined sample kind
172**  10   DISCRETE    vector quantised data
173**
174** and the bit-encoding for the qualifiers (in octal) is
175**   _E   000100      has energy
176**   _N   000200      absolute energy suppressed
177**   _D   000400      has delta coefficients
178**   _A   001000      has acceleration coefficients
179**   _C   002000      is compressed
180**   _Z   004000      has zero mean static coef.
181**   _K   010000      has CRC checksum
182**   _O   020000      has 0'th cepstral coef.
183*/
184
185static int
186htk_read_header (SF_PRIVATE *psf)
187{	int		sample_count, sample_period, marker ;
188
189	psf_binheader_readf (psf, "pE444", 0, &sample_count, &sample_period, &marker) ;
190
191	if (2 * sample_count + 12 != psf->filelength)
192		return SFE_HTK_BAD_FILE_LEN ;
193
194	if (marker != 0x20000)
195		return SFE_HTK_NOT_WAVEFORM ;
196
197	psf->sf.channels = 1 ;
198
199	if (sample_period > 0)
200	{	psf->sf.samplerate = 10000000 / sample_period ;
201		psf_log_printf (psf, "HTK Waveform file\n  Sample Count  : %d\n  Sample Period : %d => %d Hz\n",
202					sample_count, sample_period, psf->sf.samplerate) ;
203		}
204	else
205	{	psf->sf.samplerate = 16000 ;
206		psf_log_printf (psf, "HTK Waveform file\n  Sample Count  : %d\n  Sample Period : %d (should be > 0) => Guessed sample rate %d Hz\n",
207					sample_count, sample_period, psf->sf.samplerate) ;
208		} ;
209
210	psf->sf.format = SF_FORMAT_HTK | SF_FORMAT_PCM_16 ;
211	psf->bytewidth = 2 ;
212
213	/* HTK always has a 12 byte header. */
214	psf->dataoffset = 12 ;
215	psf->endian = SF_ENDIAN_BIG ;
216
217	psf->datalength = psf->filelength - psf->dataoffset ;
218
219	psf->blockwidth = psf->sf.channels * psf->bytewidth ;
220
221	if (! psf->sf.frames && psf->blockwidth)
222		psf->sf.frames = (psf->filelength - psf->dataoffset) / psf->blockwidth ;
223
224	return 0 ;
225} /* htk_read_header */
226
227