Attachment 'JSpeexCompressionUtilites.java'

Download

   1 package au.com.foundationstone.model.audio.jspeex;
   2 
   3 
   4 import java.io.ByteArrayInputStream;
   5 import java.io.ByteArrayOutputStream;
   6 import java.io.DataInputStream;
   7 import java.io.EOFException;
   8 import java.io.IOException;
   9 import java.io.InputStream;
  10 import org.xiph.speex.OggCrc;
  11 import org.xiph.speex.SpeexDecoder;
  12 import org.xiph.speex.SpeexEncoder;
  13 import au.com.foundationstone.model.ModelUtilities;
  14 import au.com.foundationstone.model.standalone.ModelConstants;
  15 
  16 
  17 /**
  18  * Facade for streams into JSpeex 0.9.7
  19  */
  20 public class JSpeexCompressionUtilites extends Object {
  21 	private static final int SIXTY_FOUR = 64;
  22 	private static final int FORTY_EIGHT = 48;
  23 	private static final int THIRTY_SIX = 36;
  24 	private static final int FORTY = 40;
  25 	private static final String SPEEX_HEADER_STRING = "Speex   ";
  26 	private static final int EIGHTY = 80;
  27 	private static final int TWO_FIVE_FIVE = 255;
  28 	private static final int TWENTY_TWO = 22;
  29 	private static final int TWENTY_THREE = 23;
  30 	private static final int TWENTY_FOUR = 24;
  31 	private static final int TWENTY_FIVE = 25;
  32 	private static final int DECDAT_BYTES = 44100;
  33 	private static final int PAYLOAD_BYTES = 65536;
  34 	private static final int HEADER_BYTES = 2048;
  35 	private static final boolean DEFAULT_PERCEPTUAL_ENHANCEMENT = false;
  36 	private static final float DEFAULT_QUALITY = 6.0f;
  37 	private static final int DEFAULT_COMPLEXITY = 9;
  38 	private static final int THREE = 3;
  39 	private static final int EIGHT = 8;
  40 	private static final int OXFF = 0xff;
  41 	private static final int EIGHTEEN = 18;
  42 	private static final int SIXTEEN = 16;
  43 	private static final int FOURTEEN = 14;
  44 	private static final int FOUR = 4;
  45 	private static final int UWB_STEREO_BUFFER_SIZE = 2560;
  46 	private static final int HEADERSIZE = EIGHT;
  47 	private static final String FORMAT = "fmt ";
  48 	private static final String DATA = "data";
  49 	private static final int WAVE_FORMAT_PCM = 0x0001;
  50 	public static final int FILE_FORMAT_OGG = 1;
  51 	public static final int FILE_FORMAT_WAVE = 2;
  52 	public static final String VERSION = "Java Speex Command Line Encoder v0.9.7 (Revision: 1.1)";
  53 	// decode
  54 	public static final int OGG_HEADERSIZE = 27;
  55 	public static final int OGG_SEGOFFSET = 26;
  56 	public static final String OGGID = "OggS";
  57 	//
  58 	private byte[] temp = new byte[UWB_STEREO_BUFFER_SIZE]; // stereo UWB requires one to read 2560b
  59 
  60 
  61 	/**
  62 	 *	see the spreadsheet. Complexity doesn't affect anything, therefore we use 9.
  63 	 *	quality	time to encode 2 seconds		size of 1hr (MB)
  64 	 *	0	0.7291		5.897426605		<- audio poor
  65 	 * 	1	0.7996		6.503391266
  66 	 *	2	0.8756		7.246170044		<- 16 kbps (7.6MB/hr)
  67 	 *	3	0.9241		8.316993713		<- tolerable
  68 	 * 	4	1.0071		9.928722382
  69 	 * 	5	1.125		11.27660751
  70 	 * 	6	1.1967		12.31704712		<----------- good (we use this)
  71 	 * 	7	1.3176		14.2020607
  72 	 * 	8	1.3988		17.41882324
  73 	 * 	9	1.6928		21.8466568
  74 	 * running on a G4 1GHz powerbook 512M, 10.4.5, Java 1.5
  75 	 * false enhance is better on the ears
  76 	 */
  77 	public JSpeexCompressionUtilites() {
  78 	}
  79 
  80 
  81 	public byte[] encodeWaveToSpeex(final byte[] inBytes) {
  82 		byte[] returnValue = new byte[0];
  83 		try {
  84 			returnValue = encodeWaveToSpeex(inBytes, DEFAULT_COMPLEXITY, DEFAULT_QUALITY);
  85 		} catch (IOException e) {
  86 			ModelUtilities.logger(ModelConstants.DEBUG_LEVEL_0, "cannot encode to speex!: " + e.toString());
  87 		}
  88 		return returnValue;
  89 	}
  90 
  91 
  92 	public byte[] decodeSpeexToWave(final byte[] inBytes) {
  93 		byte[] returnValue = new byte[0];
  94 		try {
  95 			returnValue = decodeSpeexToWave(inBytes, DEFAULT_PERCEPTUAL_ENHANCEMENT);
  96 		} catch (IOException e) {
  97 			ModelUtilities.logger(ModelConstants.DEBUG_LEVEL_0, "cannot decode from speex!: " + e.toString());
  98 		}
  99 		return returnValue;
 100 	}
 101 
 102 
 103 	/**
 104 	 * @param complexity - integer ranging from 1 to 10.
 105 	 * @param vbrQuality - float ranging from 0 to 10.
 106 	 *
 107 	 * For normal use, the noise level at complexity 1 is between 1 and 2 dB higher than at complexity
 108 	 * 10, but the CPU requirements for complexity 10 is about 5 times higher than for complexity 1.
 109 	 * In practice, the best trade-off is between complexity 2 and 4, though higher settings are
 110 	 * often useful when encoding non-speech sounds like DTMF tones.
 111 	 */
 112 	public byte[] encodeWaveToSpeex(final byte[] inBytes, final int complexity, final float vbrQuality)
 113 		throws IOException {
 114 		int sampleRate = -1; // sampling rate of the audio input
 115 		int channels = -1; // (1=mono, 2=stereo)
 116 		int mode = 2; // UWB
 117 		int quality = 1; // constant bit rate quality, not used. 0-10
 118 		int nframes = 1; // number of frames per speex packet
 119 		// read the WAVE header
 120 		DataInputStream dis = new DataInputStream(new ByteArrayInputStream(inBytes));
 121 		dis.readFully(temp, 0, HEADERSIZE + FOUR);
 122 		// Read other header chunks
 123 		dis.readFully(temp, 0, HEADERSIZE);
 124 		String chunk = new String(temp, 0, FOUR);
 125 		int size = readInt(temp, FOUR);
 126 		while (!chunk.equals(DATA)) {
 127 			dis.readFully(temp, 0, size);
 128 			if (chunk.equals(FORMAT)) {
 129 				if (readShort(temp, 0) != WAVE_FORMAT_PCM) throw new RuntimeException("Not a PCM file");
 130 				channels = readShort(temp, 2);
 131 				sampleRate = readInt(temp, FOUR);
 132 				if (readShort(temp, FOURTEEN) != SIXTEEN)
 133 					throw new RuntimeException("Not a 16 bit file " + readShort(temp, EIGHTEEN));
 134 				dis.readFully(temp, 0, HEADERSIZE);
 135 				chunk = new String(temp, 0, FOUR);
 136 				size = readInt(temp, FOUR);
 137 			}
 138 		}
 139 		SpeexEncoder speexEncoder = new SpeexEncoder();
 140 		speexEncoder.init(mode, quality, sampleRate, channels);
 141 		speexEncoder.getEncoder().setComplexity(complexity);
 142 		speexEncoder.getEncoder().setVbr(true);
 143 		speexEncoder.getEncoder().setVbrQuality(vbrQuality);
 144 		speexEncoder.getEncoder().setVad(true);
 145 		speexEncoder.getEncoder().setDtx(true);
 146 		// Open the file writer
 147 		OggSpeexStreamWriter writer = new OggSpeexStreamWriter(mode, sampleRate, channels, nframes, true);
 148 		ByteArrayOutputStream baos = new ByteArrayOutputStream();
 149 		writer.open(baos);
 150 		writer.writeHeader("Encoded with: " + VERSION);
 151 		int pcmPacketSize = 2 * channels * speexEncoder.getFrameSize();
 152 		try {
 153 			while (true) {
 154 				dis.readFully(temp, 0, nframes * pcmPacketSize);
 155 				for (int i = 0; i < nframes; i++)
 156 					speexEncoder.processData(temp, i * pcmPacketSize, pcmPacketSize);
 157 				int encsize = speexEncoder.getProcessedData(temp, 0);
 158 				if (encsize > 0) {
 159 					writer.writePacket(temp, 0, encsize);
 160 				}
 161 			}
 162 		} catch (EOFException e) {
 163 		}
 164 		writer.close();
 165 		dis.close();
 166 		return baos.toByteArray();
 167 	}
 168 
 169 
 170 	/**
 171 	 * Perceptual enhancement tries to reduce (the perception of) the noise produced by the coding/decoding process.
 172 	 * In most cases, perceptual enhancement make the sound further from the original objectively
 173 	 * but in the end it still sounds better (subjective improvement).
 174 	 */
 175 	public byte[] decodeSpeexToWave(final byte[] inBytes, final boolean perceptualEnhancement) throws IOException {
 176 		byte[] header = new byte[HEADER_BYTES];
 177 		byte[] payload = new byte[PAYLOAD_BYTES];
 178 		byte[] decdat = new byte[DECDAT_BYTES * 2 * 2];
 179 		int segments = 0;
 180 		int curseg = 0;
 181 		int bodybytes = 0;
 182 		int decsize = 0;
 183 		int packetNo = 0;
 184 		int mode = 2;
 185 		int sampleRate = -1;
 186 		int channels = -1;
 187 		int nframes = -1;
 188 		SpeexDecoder speexDecoder = new SpeexDecoder();
 189 		DataInputStream dis = new DataInputStream(new ByteArrayInputStream(inBytes));
 190 		PcmWaveByteArrayWriter writer = null;
 191 		int origchksum;
 192 		int chksum;
 193 		try {
 194 			while (true) {
 195 				dis.readFully(header, 0, OGG_HEADERSIZE);
 196 				origchksum = readInt(header, TWENTY_TWO);
 197 				header[TWENTY_TWO] = 0;
 198 				header[TWENTY_THREE] = 0;
 199 				header[TWENTY_FOUR] = 0;
 200 				header[TWENTY_FIVE] = 0;
 201 				chksum = OggCrc.checksum(0, header, 0, OGG_HEADERSIZE);
 202 				if (!OGGID.equals(new String(header, 0, FOUR))) throw new RuntimeException("missing ogg id!");
 203 				/* how many segments are there? */
 204 				segments = header[OGG_SEGOFFSET] & OXFF;
 205 				dis.readFully(header, OGG_HEADERSIZE, segments);
 206 				chksum = OggCrc.checksum(chksum, header, OGG_HEADERSIZE, segments);
 207 				/* decode each segment, writing output to wav */
 208 				for (curseg = 0; curseg < segments; curseg++) {
 209 					/* get the number of bytes in the segment */
 210 					bodybytes = header[OGG_HEADERSIZE + curseg] & OXFF;
 211 					if (bodybytes == TWO_FIVE_FIVE) throw new RuntimeException("sorry, don't handle 255 sizes!");
 212 					dis.readFully(payload, 0, bodybytes);
 213 					chksum = OggCrc.checksum(chksum, payload, 0, bodybytes);
 214 					/* decode the segment */
 215 					/* if first packet, read the Speex header */
 216 					if (packetNo == 0) {
 217 						boolean readSpeexHeader = true;
 218 						if (bodybytes != EIGHTY) {
 219 							throw new RuntimeException("bad bodybytes!");
 220 						} else if (!SPEEX_HEADER_STRING.equals(new String(payload, 0, EIGHT))) {
 221 							readSpeexHeader = false;
 222 						}
 223 						if (readSpeexHeader) {
 224 							mode = payload[FORTY] & OXFF;
 225 							sampleRate = readInt(payload, THIRTY_SIX);
 226 							channels = readInt(payload, FORTY_EIGHT);
 227 							nframes = readInt(payload, SIXTY_FOUR);
 228 							readSpeexHeader = speexDecoder.init(mode, sampleRate, channels, perceptualEnhancement);
 229 							/* once Speex header read, initialize the wave writer with output format */
 230 							writer = new PcmWaveByteArrayWriter(speexDecoder.getSampleRate(), speexDecoder
 231 								.getChannels());
 232 							writer.open();
 233 							writer.writeHeader(null);
 234 							packetNo++;
 235 						} else {
 236 							packetNo = 0;
 237 						}
 238 					} else if (packetNo == 1) { // Ogg Comment packet
 239 						packetNo++;
 240 					} else {
 241 						speexDecoder.processData(payload, 0, bodybytes);
 242 						for (int i = 1; i < nframes; i++) {
 243 							speexDecoder.processData(false);
 244 						}
 245 						/* get the amount of decoded data */
 246 						if ((decsize = speexDecoder.getProcessedData(decdat, 0)) > 0) {
 247 							writer.writePacket(decdat, 0, decsize);
 248 						}
 249 						packetNo++;
 250 					}
 251 				}
 252 				if (chksum != origchksum) {
 253 					throw new IOException("Ogg CheckSums do not match");
 254 				}
 255 			}
 256 		} catch (EOFException eof) {
 257 		}
 258 		writer.close();
 259 		return writer.getBytes();
 260 	}
 261 
 262 
 263 	/**
 264 	 * Converts Little Endian (Windows) bytes to an int (Java uses Big Endian).
 265 	 */
 266 	private static int readInt(final byte[] data, final int offset) {
 267 		return (data[offset] & OXFF) | ((data[offset + 1] & OXFF) << EIGHT) | ((data[offset + 2] & OXFF) << SIXTEEN)
 268 			| (data[offset + THREE] << TWENTY_FOUR); // no 0xff on the last one to keep the sign
 269 	}
 270 
 271 
 272 	/**
 273 	 * Converts Little Endian (Windows) bytes to an short (Java uses Big Endian).
 274 	 */
 275 	private static int readShort(final byte[] data, final int offset) {
 276 		return (data[offset] & OXFF) | (data[offset + 1] << EIGHT); // no 0xff on the last one to keep the sign
 277 	}
 278 }

Attached Files

To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.

You are not allowed to attach a file to this page.