1b815c7f3Sopenharmony_ci/* 2b815c7f3Sopenharmony_ci** Copyright (C) 2002-2017 Erik de Castro Lopo <erikd@mega-nerd.com> 3b815c7f3Sopenharmony_ci** 4b815c7f3Sopenharmony_ci** This program is free software; you can redistribute it and/or modify 5b815c7f3Sopenharmony_ci** it under the terms of the GNU Lesser General Public License as published by 6b815c7f3Sopenharmony_ci** the Free Software Foundation; either version 2.1 of the License, or 7b815c7f3Sopenharmony_ci** (at your option) any later version. 8b815c7f3Sopenharmony_ci** 9b815c7f3Sopenharmony_ci** This program is distributed in the hope that it will be useful, 10b815c7f3Sopenharmony_ci** but WITHOUT ANY WARRANTY; without even the implied warranty of 11b815c7f3Sopenharmony_ci** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12b815c7f3Sopenharmony_ci** GNU Lesser General Public License for more details. 13b815c7f3Sopenharmony_ci** 14b815c7f3Sopenharmony_ci** You should have received a copy of the GNU Lesser General Public License 15b815c7f3Sopenharmony_ci** along with this program; if not, write to the Free Software 16b815c7f3Sopenharmony_ci** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17b815c7f3Sopenharmony_ci*/ 18b815c7f3Sopenharmony_ci 19b815c7f3Sopenharmony_ci#include "sfconfig.h" 20b815c7f3Sopenharmony_ci 21b815c7f3Sopenharmony_ci#include <stdio.h> 22b815c7f3Sopenharmony_ci#include <fcntl.h> 23b815c7f3Sopenharmony_ci#include <string.h> 24b815c7f3Sopenharmony_ci#include <ctype.h> 25b815c7f3Sopenharmony_ci 26b815c7f3Sopenharmony_ci#include "sndfile.h" 27b815c7f3Sopenharmony_ci#include "sfendian.h" 28b815c7f3Sopenharmony_ci#include "common.h" 29b815c7f3Sopenharmony_ci 30b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------ 31b815c7f3Sopenharmony_ci** Macros to handle big/little endian issues. 32b815c7f3Sopenharmony_ci*/ 33b815c7f3Sopenharmony_ci 34b815c7f3Sopenharmony_ci#define SFE_HTK_BAD_FILE_LEN 1666 35b815c7f3Sopenharmony_ci#define SFE_HTK_NOT_WAVEFORM 1667 36b815c7f3Sopenharmony_ci 37b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------ 38b815c7f3Sopenharmony_ci** Private static functions. 39b815c7f3Sopenharmony_ci*/ 40b815c7f3Sopenharmony_ci 41b815c7f3Sopenharmony_cistatic int htk_close (SF_PRIVATE *psf) ; 42b815c7f3Sopenharmony_ci 43b815c7f3Sopenharmony_cistatic int htk_write_header (SF_PRIVATE *psf, int calc_length) ; 44b815c7f3Sopenharmony_cistatic int htk_read_header (SF_PRIVATE *psf) ; 45b815c7f3Sopenharmony_ci 46b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------ 47b815c7f3Sopenharmony_ci** Public function. 48b815c7f3Sopenharmony_ci*/ 49b815c7f3Sopenharmony_ci 50b815c7f3Sopenharmony_ciint 51b815c7f3Sopenharmony_cihtk_open (SF_PRIVATE *psf) 52b815c7f3Sopenharmony_ci{ int subformat ; 53b815c7f3Sopenharmony_ci int error = 0 ; 54b815c7f3Sopenharmony_ci 55b815c7f3Sopenharmony_ci if (psf->is_pipe) 56b815c7f3Sopenharmony_ci return SFE_HTK_NO_PIPE ; 57b815c7f3Sopenharmony_ci 58b815c7f3Sopenharmony_ci if (psf->file.mode == SFM_READ || (psf->file.mode == SFM_RDWR && psf->filelength > 0)) 59b815c7f3Sopenharmony_ci { if ((error = htk_read_header (psf))) 60b815c7f3Sopenharmony_ci return error ; 61b815c7f3Sopenharmony_ci } ; 62b815c7f3Sopenharmony_ci 63b815c7f3Sopenharmony_ci subformat = SF_CODEC (psf->sf.format) ; 64b815c7f3Sopenharmony_ci 65b815c7f3Sopenharmony_ci if (psf->file.mode == SFM_WRITE || psf->file.mode == SFM_RDWR) 66b815c7f3Sopenharmony_ci { if ((SF_CONTAINER (psf->sf.format)) != SF_FORMAT_HTK) 67b815c7f3Sopenharmony_ci return SFE_BAD_OPEN_FORMAT ; 68b815c7f3Sopenharmony_ci 69b815c7f3Sopenharmony_ci psf->endian = SF_ENDIAN_BIG ; 70b815c7f3Sopenharmony_ci 71b815c7f3Sopenharmony_ci if (htk_write_header (psf, SF_FALSE)) 72b815c7f3Sopenharmony_ci return psf->error ; 73b815c7f3Sopenharmony_ci 74b815c7f3Sopenharmony_ci psf->write_header = htk_write_header ; 75b815c7f3Sopenharmony_ci } ; 76b815c7f3Sopenharmony_ci 77b815c7f3Sopenharmony_ci psf->container_close = htk_close ; 78b815c7f3Sopenharmony_ci 79b815c7f3Sopenharmony_ci psf->blockwidth = psf->bytewidth * psf->sf.channels ; 80b815c7f3Sopenharmony_ci 81b815c7f3Sopenharmony_ci switch (subformat) 82b815c7f3Sopenharmony_ci { case SF_FORMAT_PCM_16 : /* 16-bit linear PCM. */ 83b815c7f3Sopenharmony_ci error = pcm_init (psf) ; 84b815c7f3Sopenharmony_ci break ; 85b815c7f3Sopenharmony_ci 86b815c7f3Sopenharmony_ci default : break ; 87b815c7f3Sopenharmony_ci } ; 88b815c7f3Sopenharmony_ci 89b815c7f3Sopenharmony_ci return error ; 90b815c7f3Sopenharmony_ci} /* htk_open */ 91b815c7f3Sopenharmony_ci 92b815c7f3Sopenharmony_ci/*------------------------------------------------------------------------------ 93b815c7f3Sopenharmony_ci*/ 94b815c7f3Sopenharmony_ci 95b815c7f3Sopenharmony_cistatic int 96b815c7f3Sopenharmony_cihtk_close (SF_PRIVATE *psf) 97b815c7f3Sopenharmony_ci{ 98b815c7f3Sopenharmony_ci if (psf->file.mode == SFM_WRITE || psf->file.mode == SFM_RDWR) 99b815c7f3Sopenharmony_ci htk_write_header (psf, SF_TRUE) ; 100b815c7f3Sopenharmony_ci 101b815c7f3Sopenharmony_ci return 0 ; 102b815c7f3Sopenharmony_ci} /* htk_close */ 103b815c7f3Sopenharmony_ci 104b815c7f3Sopenharmony_cistatic int 105b815c7f3Sopenharmony_cihtk_write_header (SF_PRIVATE *psf, int calc_length) 106b815c7f3Sopenharmony_ci{ sf_count_t current ; 107b815c7f3Sopenharmony_ci int sample_count, sample_period ; 108b815c7f3Sopenharmony_ci 109b815c7f3Sopenharmony_ci current = psf_ftell (psf) ; 110b815c7f3Sopenharmony_ci 111b815c7f3Sopenharmony_ci if (calc_length) 112b815c7f3Sopenharmony_ci psf->filelength = psf_get_filelen (psf) ; 113b815c7f3Sopenharmony_ci 114b815c7f3Sopenharmony_ci /* Reset the current header length to zero. */ 115b815c7f3Sopenharmony_ci psf->header.ptr [0] = 0 ; 116b815c7f3Sopenharmony_ci psf->header.indx = 0 ; 117b815c7f3Sopenharmony_ci psf_fseek (psf, 0, SEEK_SET) ; 118b815c7f3Sopenharmony_ci 119b815c7f3Sopenharmony_ci if (psf->filelength > 12) 120b815c7f3Sopenharmony_ci sample_count = (psf->filelength - 12) / 2 ; 121b815c7f3Sopenharmony_ci else 122b815c7f3Sopenharmony_ci sample_count = 0 ; 123b815c7f3Sopenharmony_ci 124b815c7f3Sopenharmony_ci sample_period = 10000000 / psf->sf.samplerate ; 125b815c7f3Sopenharmony_ci 126b815c7f3Sopenharmony_ci psf_binheader_writef (psf, "E444", BHW4 (sample_count), BHW4 (sample_period), BHW4 (0x20000)) ; 127b815c7f3Sopenharmony_ci 128b815c7f3Sopenharmony_ci /* Header construction complete so write it out. */ 129b815c7f3Sopenharmony_ci psf_fwrite (psf->header.ptr, psf->header.indx, 1, psf) ; 130b815c7f3Sopenharmony_ci 131b815c7f3Sopenharmony_ci if (psf->error) 132b815c7f3Sopenharmony_ci return psf->error ; 133b815c7f3Sopenharmony_ci 134b815c7f3Sopenharmony_ci psf->dataoffset = psf->header.indx ; 135b815c7f3Sopenharmony_ci 136b815c7f3Sopenharmony_ci if (current > 0) 137b815c7f3Sopenharmony_ci psf_fseek (psf, current, SEEK_SET) ; 138b815c7f3Sopenharmony_ci 139b815c7f3Sopenharmony_ci return psf->error ; 140b815c7f3Sopenharmony_ci} /* htk_write_header */ 141b815c7f3Sopenharmony_ci 142b815c7f3Sopenharmony_ci/* 143b815c7f3Sopenharmony_ci** Found the following info in a comment block within Bill Schottstaedt's 144b815c7f3Sopenharmony_ci** sndlib library. 145b815c7f3Sopenharmony_ci** 146b815c7f3Sopenharmony_ci** HTK format files consist of a contiguous sequence of samples preceded by a 147b815c7f3Sopenharmony_ci** header. Each sample is a vector of either 2-byte integers or 4-byte floats. 148b815c7f3Sopenharmony_ci** 2-byte integers are used for compressed forms as described below and for 149b815c7f3Sopenharmony_ci** vector quantised data as described later in section 5.11. HTK format data 150b815c7f3Sopenharmony_ci** files can also be used to store speech waveforms as described in section 5.8. 151b815c7f3Sopenharmony_ci** 152b815c7f3Sopenharmony_ci** The HTK file format header is 12 bytes long and contains the following data 153b815c7f3Sopenharmony_ci** nSamples -- number of samples in file (4-byte integer) 154b815c7f3Sopenharmony_ci** sampPeriod -- sample period in 100ns units (4-byte integer) 155b815c7f3Sopenharmony_ci** sampSize -- number of bytes per sample (2-byte integer) 156b815c7f3Sopenharmony_ci** parmKind -- a code indicating the sample kind (2-byte integer) 157b815c7f3Sopenharmony_ci** 158b815c7f3Sopenharmony_ci** The parameter kind consists of a 6 bit code representing the basic 159b815c7f3Sopenharmony_ci** parameter kind plus additional bits for each of the possible qualifiers. 160b815c7f3Sopenharmony_ci** The basic parameter kind codes are 161b815c7f3Sopenharmony_ci** 162b815c7f3Sopenharmony_ci** 0 WAVEFORM sampled waveform 163b815c7f3Sopenharmony_ci** 1 LPC linear prediction filter coefficients 164b815c7f3Sopenharmony_ci** 2 LPREFC linear prediction reflection coefficients 165b815c7f3Sopenharmony_ci** 3 LPCEPSTRA LPC cepstral coefficients 166b815c7f3Sopenharmony_ci** 4 LPDELCEP LPC cepstra plus delta coefficients 167b815c7f3Sopenharmony_ci** 5 IREFC LPC reflection coef in 16 bit integer format 168b815c7f3Sopenharmony_ci** 6 MFCC mel-frequency cepstral coefficients 169b815c7f3Sopenharmony_ci** 7 FBANK log mel-filter bank channel outputs 170b815c7f3Sopenharmony_ci** 8 MELSPEC linear mel-filter bank channel outputs 171b815c7f3Sopenharmony_ci** 9 USER user defined sample kind 172b815c7f3Sopenharmony_ci** 10 DISCRETE vector quantised data 173b815c7f3Sopenharmony_ci** 174b815c7f3Sopenharmony_ci** and the bit-encoding for the qualifiers (in octal) is 175b815c7f3Sopenharmony_ci** _E 000100 has energy 176b815c7f3Sopenharmony_ci** _N 000200 absolute energy suppressed 177b815c7f3Sopenharmony_ci** _D 000400 has delta coefficients 178b815c7f3Sopenharmony_ci** _A 001000 has acceleration coefficients 179b815c7f3Sopenharmony_ci** _C 002000 is compressed 180b815c7f3Sopenharmony_ci** _Z 004000 has zero mean static coef. 181b815c7f3Sopenharmony_ci** _K 010000 has CRC checksum 182b815c7f3Sopenharmony_ci** _O 020000 has 0'th cepstral coef. 183b815c7f3Sopenharmony_ci*/ 184b815c7f3Sopenharmony_ci 185b815c7f3Sopenharmony_cistatic int 186b815c7f3Sopenharmony_cihtk_read_header (SF_PRIVATE *psf) 187b815c7f3Sopenharmony_ci{ int sample_count, sample_period, marker ; 188b815c7f3Sopenharmony_ci 189b815c7f3Sopenharmony_ci psf_binheader_readf (psf, "pE444", 0, &sample_count, &sample_period, &marker) ; 190b815c7f3Sopenharmony_ci 191b815c7f3Sopenharmony_ci if (2 * sample_count + 12 != psf->filelength) 192b815c7f3Sopenharmony_ci return SFE_HTK_BAD_FILE_LEN ; 193b815c7f3Sopenharmony_ci 194b815c7f3Sopenharmony_ci if (marker != 0x20000) 195b815c7f3Sopenharmony_ci return SFE_HTK_NOT_WAVEFORM ; 196b815c7f3Sopenharmony_ci 197b815c7f3Sopenharmony_ci psf->sf.channels = 1 ; 198b815c7f3Sopenharmony_ci 199b815c7f3Sopenharmony_ci if (sample_period > 0) 200b815c7f3Sopenharmony_ci { psf->sf.samplerate = 10000000 / sample_period ; 201b815c7f3Sopenharmony_ci psf_log_printf (psf, "HTK Waveform file\n Sample Count : %d\n Sample Period : %d => %d Hz\n", 202b815c7f3Sopenharmony_ci sample_count, sample_period, psf->sf.samplerate) ; 203b815c7f3Sopenharmony_ci } 204b815c7f3Sopenharmony_ci else 205b815c7f3Sopenharmony_ci { psf->sf.samplerate = 16000 ; 206b815c7f3Sopenharmony_ci psf_log_printf (psf, "HTK Waveform file\n Sample Count : %d\n Sample Period : %d (should be > 0) => Guessed sample rate %d Hz\n", 207b815c7f3Sopenharmony_ci sample_count, sample_period, psf->sf.samplerate) ; 208b815c7f3Sopenharmony_ci } ; 209b815c7f3Sopenharmony_ci 210b815c7f3Sopenharmony_ci psf->sf.format = SF_FORMAT_HTK | SF_FORMAT_PCM_16 ; 211b815c7f3Sopenharmony_ci psf->bytewidth = 2 ; 212b815c7f3Sopenharmony_ci 213b815c7f3Sopenharmony_ci /* HTK always has a 12 byte header. */ 214b815c7f3Sopenharmony_ci psf->dataoffset = 12 ; 215b815c7f3Sopenharmony_ci psf->endian = SF_ENDIAN_BIG ; 216b815c7f3Sopenharmony_ci 217b815c7f3Sopenharmony_ci psf->datalength = psf->filelength - psf->dataoffset ; 218b815c7f3Sopenharmony_ci 219b815c7f3Sopenharmony_ci psf->blockwidth = psf->sf.channels * psf->bytewidth ; 220b815c7f3Sopenharmony_ci 221b815c7f3Sopenharmony_ci if (! psf->sf.frames && psf->blockwidth) 222b815c7f3Sopenharmony_ci psf->sf.frames = (psf->filelength - psf->dataoffset) / psf->blockwidth ; 223b815c7f3Sopenharmony_ci 224b815c7f3Sopenharmony_ci return 0 ; 225b815c7f3Sopenharmony_ci} /* htk_read_header */ 226b815c7f3Sopenharmony_ci 227