1b815c7f3Sopenharmony_ci/*
2b815c7f3Sopenharmony_ci** Copyright (C) 2002-2017 Erik de Castro Lopo <erikd@mega-nerd.com>
3b815c7f3Sopenharmony_ci**
4b815c7f3Sopenharmony_ci** This program is free software; you can redistribute it and/or modify
5b815c7f3Sopenharmony_ci** it under the terms of the GNU Lesser General Public License as published by
6b815c7f3Sopenharmony_ci** the Free Software Foundation; either version 2.1 of the License, or
7b815c7f3Sopenharmony_ci** (at your option) any later version.
8b815c7f3Sopenharmony_ci**
9b815c7f3Sopenharmony_ci** This program is distributed in the hope that it will be useful,
10b815c7f3Sopenharmony_ci** but WITHOUT ANY WARRANTY; without even the implied warranty of
11b815c7f3Sopenharmony_ci** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12b815c7f3Sopenharmony_ci** GNU Lesser General Public License for more details.
13b815c7f3Sopenharmony_ci**
14b815c7f3Sopenharmony_ci** You should have received a copy of the GNU Lesser General Public License
15b815c7f3Sopenharmony_ci** along with this program; if not, write to the Free Software
16b815c7f3Sopenharmony_ci** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17b815c7f3Sopenharmony_ci*/
18b815c7f3Sopenharmony_ci
19b815c7f3Sopenharmony_ci#include	"sfconfig.h"
20b815c7f3Sopenharmony_ci
21b815c7f3Sopenharmony_ci#include	<stdio.h>
22b815c7f3Sopenharmony_ci#include	<fcntl.h>
23b815c7f3Sopenharmony_ci#include	<string.h>
24b815c7f3Sopenharmony_ci#include	<ctype.h>
25b815c7f3Sopenharmony_ci
26b815c7f3Sopenharmony_ci#include	"sndfile.h"
27b815c7f3Sopenharmony_ci#include	"sfendian.h"
28b815c7f3Sopenharmony_ci#include	"common.h"
29b815c7f3Sopenharmony_ci
30b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------
31b815c7f3Sopenharmony_ci** Macros to handle big/little endian issues.
32b815c7f3Sopenharmony_ci*/
33b815c7f3Sopenharmony_ci
34b815c7f3Sopenharmony_ci#define	SFE_HTK_BAD_FILE_LEN 	1666
35b815c7f3Sopenharmony_ci#define	SFE_HTK_NOT_WAVEFORM	1667
36b815c7f3Sopenharmony_ci
37b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------
38b815c7f3Sopenharmony_ci** Private static functions.
39b815c7f3Sopenharmony_ci*/
40b815c7f3Sopenharmony_ci
41b815c7f3Sopenharmony_cistatic	int		htk_close		(SF_PRIVATE *psf) ;
42b815c7f3Sopenharmony_ci
43b815c7f3Sopenharmony_cistatic int		htk_write_header (SF_PRIVATE *psf, int calc_length) ;
44b815c7f3Sopenharmony_cistatic int		htk_read_header (SF_PRIVATE *psf) ;
45b815c7f3Sopenharmony_ci
46b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------
47b815c7f3Sopenharmony_ci** Public function.
48b815c7f3Sopenharmony_ci*/
49b815c7f3Sopenharmony_ci
50b815c7f3Sopenharmony_ciint
51b815c7f3Sopenharmony_cihtk_open	(SF_PRIVATE *psf)
52b815c7f3Sopenharmony_ci{	int		subformat ;
53b815c7f3Sopenharmony_ci	int		error = 0 ;
54b815c7f3Sopenharmony_ci
55b815c7f3Sopenharmony_ci	if (psf->is_pipe)
56b815c7f3Sopenharmony_ci		return SFE_HTK_NO_PIPE ;
57b815c7f3Sopenharmony_ci
58b815c7f3Sopenharmony_ci	if (psf->file.mode == SFM_READ || (psf->file.mode == SFM_RDWR && psf->filelength > 0))
59b815c7f3Sopenharmony_ci	{	if ((error = htk_read_header (psf)))
60b815c7f3Sopenharmony_ci			return error ;
61b815c7f3Sopenharmony_ci		} ;
62b815c7f3Sopenharmony_ci
63b815c7f3Sopenharmony_ci	subformat = SF_CODEC (psf->sf.format) ;
64b815c7f3Sopenharmony_ci
65b815c7f3Sopenharmony_ci	if (psf->file.mode == SFM_WRITE || psf->file.mode == SFM_RDWR)
66b815c7f3Sopenharmony_ci	{	if ((SF_CONTAINER (psf->sf.format)) != SF_FORMAT_HTK)
67b815c7f3Sopenharmony_ci			return	SFE_BAD_OPEN_FORMAT ;
68b815c7f3Sopenharmony_ci
69b815c7f3Sopenharmony_ci		psf->endian = SF_ENDIAN_BIG ;
70b815c7f3Sopenharmony_ci
71b815c7f3Sopenharmony_ci		if (htk_write_header (psf, SF_FALSE))
72b815c7f3Sopenharmony_ci			return psf->error ;
73b815c7f3Sopenharmony_ci
74b815c7f3Sopenharmony_ci		psf->write_header = htk_write_header ;
75b815c7f3Sopenharmony_ci		} ;
76b815c7f3Sopenharmony_ci
77b815c7f3Sopenharmony_ci	psf->container_close = htk_close ;
78b815c7f3Sopenharmony_ci
79b815c7f3Sopenharmony_ci	psf->blockwidth = psf->bytewidth * psf->sf.channels ;
80b815c7f3Sopenharmony_ci
81b815c7f3Sopenharmony_ci	switch (subformat)
82b815c7f3Sopenharmony_ci	{	case SF_FORMAT_PCM_16 :	/* 16-bit linear PCM. */
83b815c7f3Sopenharmony_ci				error = pcm_init (psf) ;
84b815c7f3Sopenharmony_ci				break ;
85b815c7f3Sopenharmony_ci
86b815c7f3Sopenharmony_ci		default : break ;
87b815c7f3Sopenharmony_ci		} ;
88b815c7f3Sopenharmony_ci
89b815c7f3Sopenharmony_ci	return error ;
90b815c7f3Sopenharmony_ci} /* htk_open */
91b815c7f3Sopenharmony_ci
92b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------
93b815c7f3Sopenharmony_ci*/
94b815c7f3Sopenharmony_ci
95b815c7f3Sopenharmony_cistatic int
96b815c7f3Sopenharmony_cihtk_close	(SF_PRIVATE *psf)
97b815c7f3Sopenharmony_ci{
98b815c7f3Sopenharmony_ci	if (psf->file.mode == SFM_WRITE || psf->file.mode == SFM_RDWR)
99b815c7f3Sopenharmony_ci		htk_write_header (psf, SF_TRUE) ;
100b815c7f3Sopenharmony_ci
101b815c7f3Sopenharmony_ci	return 0 ;
102b815c7f3Sopenharmony_ci} /* htk_close */
103b815c7f3Sopenharmony_ci
104b815c7f3Sopenharmony_cistatic int
105b815c7f3Sopenharmony_cihtk_write_header (SF_PRIVATE *psf, int calc_length)
106b815c7f3Sopenharmony_ci{	sf_count_t	current ;
107b815c7f3Sopenharmony_ci	int			sample_count, sample_period ;
108b815c7f3Sopenharmony_ci
109b815c7f3Sopenharmony_ci	current = psf_ftell (psf) ;
110b815c7f3Sopenharmony_ci
111b815c7f3Sopenharmony_ci	if (calc_length)
112b815c7f3Sopenharmony_ci		psf->filelength = psf_get_filelen (psf) ;
113b815c7f3Sopenharmony_ci
114b815c7f3Sopenharmony_ci	/* Reset the current header length to zero. */
115b815c7f3Sopenharmony_ci	psf->header.ptr [0] = 0 ;
116b815c7f3Sopenharmony_ci	psf->header.indx = 0 ;
117b815c7f3Sopenharmony_ci	psf_fseek (psf, 0, SEEK_SET) ;
118b815c7f3Sopenharmony_ci
119b815c7f3Sopenharmony_ci	if (psf->filelength > 12)
120b815c7f3Sopenharmony_ci		sample_count = (psf->filelength - 12) / 2 ;
121b815c7f3Sopenharmony_ci	else
122b815c7f3Sopenharmony_ci		sample_count = 0 ;
123b815c7f3Sopenharmony_ci
124b815c7f3Sopenharmony_ci	sample_period = 10000000 / psf->sf.samplerate ;
125b815c7f3Sopenharmony_ci
126b815c7f3Sopenharmony_ci	psf_binheader_writef (psf, "E444", BHW4 (sample_count), BHW4 (sample_period), BHW4 (0x20000)) ;
127b815c7f3Sopenharmony_ci
128b815c7f3Sopenharmony_ci	/* Header construction complete so write it out. */
129b815c7f3Sopenharmony_ci	psf_fwrite (psf->header.ptr, psf->header.indx, 1, psf) ;
130b815c7f3Sopenharmony_ci
131b815c7f3Sopenharmony_ci	if (psf->error)
132b815c7f3Sopenharmony_ci		return psf->error ;
133b815c7f3Sopenharmony_ci
134b815c7f3Sopenharmony_ci	psf->dataoffset = psf->header.indx ;
135b815c7f3Sopenharmony_ci
136b815c7f3Sopenharmony_ci	if (current > 0)
137b815c7f3Sopenharmony_ci		psf_fseek (psf, current, SEEK_SET) ;
138b815c7f3Sopenharmony_ci
139b815c7f3Sopenharmony_ci	return psf->error ;
140b815c7f3Sopenharmony_ci} /* htk_write_header */
141b815c7f3Sopenharmony_ci
142b815c7f3Sopenharmony_ci/*
143b815c7f3Sopenharmony_ci** Found the following info in a comment block within Bill Schottstaedt's
144b815c7f3Sopenharmony_ci** sndlib library.
145b815c7f3Sopenharmony_ci**
146b815c7f3Sopenharmony_ci** HTK format files consist of a contiguous sequence of samples preceded by a
147b815c7f3Sopenharmony_ci** header. Each sample is a vector of either 2-byte integers or 4-byte floats.
148b815c7f3Sopenharmony_ci** 2-byte integers are used for compressed forms as described below and for
149b815c7f3Sopenharmony_ci** vector quantised data as described later in section 5.11. HTK format data
150b815c7f3Sopenharmony_ci** files can also be used to store speech waveforms as described in section 5.8.
151b815c7f3Sopenharmony_ci**
152b815c7f3Sopenharmony_ci** The HTK file format header is 12 bytes long and contains the following data
153b815c7f3Sopenharmony_ci**   nSamples   -- number of samples in file (4-byte integer)
154b815c7f3Sopenharmony_ci**   sampPeriod -- sample period in 100ns units (4-byte integer)
155b815c7f3Sopenharmony_ci**   sampSize   -- number of bytes per sample (2-byte integer)
156b815c7f3Sopenharmony_ci**   parmKind   -- a code indicating the sample kind (2-byte integer)
157b815c7f3Sopenharmony_ci**
158b815c7f3Sopenharmony_ci** The parameter kind  consists of a 6 bit code representing the basic
159b815c7f3Sopenharmony_ci** parameter kind plus additional bits for each of the possible qualifiers.
160b815c7f3Sopenharmony_ci** The basic parameter kind codes are
161b815c7f3Sopenharmony_ci**
162b815c7f3Sopenharmony_ci**  0    WAVEFORM    sampled waveform
163b815c7f3Sopenharmony_ci**  1    LPC         linear prediction filter coefficients
164b815c7f3Sopenharmony_ci**  2    LPREFC      linear prediction reflection coefficients
165b815c7f3Sopenharmony_ci**  3    LPCEPSTRA   LPC cepstral coefficients
166b815c7f3Sopenharmony_ci**  4    LPDELCEP    LPC cepstra plus delta coefficients
167b815c7f3Sopenharmony_ci**  5    IREFC       LPC reflection coef in 16 bit integer format
168b815c7f3Sopenharmony_ci**  6    MFCC        mel-frequency cepstral coefficients
169b815c7f3Sopenharmony_ci**  7    FBANK       log mel-filter bank channel outputs
170b815c7f3Sopenharmony_ci**  8    MELSPEC     linear mel-filter bank channel outputs
171b815c7f3Sopenharmony_ci**  9    USER        user defined sample kind
172b815c7f3Sopenharmony_ci**  10   DISCRETE    vector quantised data
173b815c7f3Sopenharmony_ci**
174b815c7f3Sopenharmony_ci** and the bit-encoding for the qualifiers (in octal) is
175b815c7f3Sopenharmony_ci**   _E   000100      has energy
176b815c7f3Sopenharmony_ci**   _N   000200      absolute energy suppressed
177b815c7f3Sopenharmony_ci**   _D   000400      has delta coefficients
178b815c7f3Sopenharmony_ci**   _A   001000      has acceleration coefficients
179b815c7f3Sopenharmony_ci**   _C   002000      is compressed
180b815c7f3Sopenharmony_ci**   _Z   004000      has zero mean static coef.
181b815c7f3Sopenharmony_ci**   _K   010000      has CRC checksum
182b815c7f3Sopenharmony_ci**   _O   020000      has 0'th cepstral coef.
183b815c7f3Sopenharmony_ci*/
184b815c7f3Sopenharmony_ci
185b815c7f3Sopenharmony_cistatic int
186b815c7f3Sopenharmony_cihtk_read_header (SF_PRIVATE *psf)
187b815c7f3Sopenharmony_ci{	int		sample_count, sample_period, marker ;
188b815c7f3Sopenharmony_ci
189b815c7f3Sopenharmony_ci	psf_binheader_readf (psf, "pE444", 0, &sample_count, &sample_period, &marker) ;
190b815c7f3Sopenharmony_ci
191b815c7f3Sopenharmony_ci	if (2 * sample_count + 12 != psf->filelength)
192b815c7f3Sopenharmony_ci		return SFE_HTK_BAD_FILE_LEN ;
193b815c7f3Sopenharmony_ci
194b815c7f3Sopenharmony_ci	if (marker != 0x20000)
195b815c7f3Sopenharmony_ci		return SFE_HTK_NOT_WAVEFORM ;
196b815c7f3Sopenharmony_ci
197b815c7f3Sopenharmony_ci	psf->sf.channels = 1 ;
198b815c7f3Sopenharmony_ci
199b815c7f3Sopenharmony_ci	if (sample_period > 0)
200b815c7f3Sopenharmony_ci	{	psf->sf.samplerate = 10000000 / sample_period ;
201b815c7f3Sopenharmony_ci		psf_log_printf (psf, "HTK Waveform file\n  Sample Count  : %d\n  Sample Period : %d => %d Hz\n",
202b815c7f3Sopenharmony_ci					sample_count, sample_period, psf->sf.samplerate) ;
203b815c7f3Sopenharmony_ci		}
204b815c7f3Sopenharmony_ci	else
205b815c7f3Sopenharmony_ci	{	psf->sf.samplerate = 16000 ;
206b815c7f3Sopenharmony_ci		psf_log_printf (psf, "HTK Waveform file\n  Sample Count  : %d\n  Sample Period : %d (should be > 0) => Guessed sample rate %d Hz\n",
207b815c7f3Sopenharmony_ci					sample_count, sample_period, psf->sf.samplerate) ;
208b815c7f3Sopenharmony_ci		} ;
209b815c7f3Sopenharmony_ci
210b815c7f3Sopenharmony_ci	psf->sf.format = SF_FORMAT_HTK | SF_FORMAT_PCM_16 ;
211b815c7f3Sopenharmony_ci	psf->bytewidth = 2 ;
212b815c7f3Sopenharmony_ci
213b815c7f3Sopenharmony_ci	/* HTK always has a 12 byte header. */
214b815c7f3Sopenharmony_ci	psf->dataoffset = 12 ;
215b815c7f3Sopenharmony_ci	psf->endian = SF_ENDIAN_BIG ;
216b815c7f3Sopenharmony_ci
217b815c7f3Sopenharmony_ci	psf->datalength = psf->filelength - psf->dataoffset ;
218b815c7f3Sopenharmony_ci
219b815c7f3Sopenharmony_ci	psf->blockwidth = psf->sf.channels * psf->bytewidth ;
220b815c7f3Sopenharmony_ci
221b815c7f3Sopenharmony_ci	if (! psf->sf.frames && psf->blockwidth)
222b815c7f3Sopenharmony_ci		psf->sf.frames = (psf->filelength - psf->dataoffset) / psf->blockwidth ;
223b815c7f3Sopenharmony_ci
224b815c7f3Sopenharmony_ci	return 0 ;
225b815c7f3Sopenharmony_ci} /* htk_read_header */
226b815c7f3Sopenharmony_ci
227