Package timeside :: Package analyzer :: Module irit_speech_entropy
[hide private]
[frames] | no frames]

Source Code for Module timeside.analyzer.irit_speech_entropy

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright (c) 2013 Maxime Le Coz <lecoz@irit.fr> 
  4   
  5  # This file is part of TimeSide. 
  6   
  7  # TimeSide is free software: you can redistribute it and/or modify 
  8  # it under the terms of the GNU General Public License as published by 
  9  # the Free Software Foundation, either version 2 of the License, or 
 10  # (at your option) any later version. 
 11   
 12  # TimeSide is distributed in the hope that it will be useful, 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 15  # GNU General Public License for more details. 
 16   
 17  # You should have received a copy of the GNU General Public License 
 18  # along with TimeSide.  If not, see <http://www.gnu.org/licenses/>. 
 19   
 20  # Author: Maxime Le Coz <lecoz@irit.fr> 
 21   
 22  from timeside.core import Processor, implements, interfacedoc 
 23  from timeside.analyzer.core import Analyzer 
 24  from timeside.analyzer.utils import entropy, computeModulation 
 25  from timeside.analyzer.utils import segmentFromValues 
 26  from timeside.api import IAnalyzer 
 27  from numpy import array 
 28  from scipy.ndimage.morphology import binary_opening 
29 30 31 -class IRITSpeechEntropy(Analyzer):
32 implements(IAnalyzer) 33 34 @interfacedoc
35 - def setup(self, channels=None, samplerate=None, blocksize=None, totalframes=None):
36 super(IRITSpeechEntropy, self).setup( 37 channels, samplerate, blocksize, totalframes) 38 self.entropyValue = [] 39 self.threshold = 0.4 40 self.smoothLen = 5 41 self.modulLen = 2
42 43 @staticmethod 44 @interfacedoc
45 - def id():
46 return "irit_speech_entropy"
47 48 @staticmethod 49 @interfacedoc
50 - def name():
51 return "IRIT Speech entropy"
52 53 @staticmethod 54 @interfacedoc
55 - def unit():
56 return ""
57
58 - def __str__(self):
59 return "Speech confidences indexes"
60
61 - def process(self, frames, eod=False):
62 self.entropyValue.append(entropy(frames)) 63 return frames, eod
64
65 - def post_process(self):
66 67 entropyValue = array(self.entropyValue) 68 w = self.modulLen * self.samplerate() / self.blocksize() 69 modulentropy = computeModulation(entropyValue, w, False) 70 confEntropy = array(modulentropy - self.threshold) / self.threshold 71 confEntropy[confEntropy > 1] = 1 72 73 conf = self.new_result(data_mode='value', time_mode='framewise') 74 75 conf.id_metadata.id += '.' + 'confidence' 76 conf.id_metadata.name += ' ' + 'Confidence' 77 78 conf.data_object.value = confEntropy 79 self._results.add(conf) 80 81 # Binary Entropy 82 binaryEntropy = modulentropy > self.threshold 83 binaryEntropy = binary_opening( 84 binaryEntropy, [1] * (self.smoothLen * 2)) 85 86 convert = {False: 0, True: 1} 87 label = {0: 'NonSpeech', 1: 'Speech'} 88 segList = segmentFromValues(binaryEntropy) 89 90 91 92 segs = self.new_result(data_mode='label', time_mode='segment') 93 segs.id_metadata.id += '.' + 'segments' 94 segs.id_metadata.name += ' ' + 'Segments' 95 96 segs.data_object.label = segList 97 98 segs.data_object.label = [convert[s[2]] for s in segList] 99 segs.data_object.time = [(float(s[0]) * self.blocksize() / 100 self.samplerate()) 101 for s in segList] 102 segs.data_object.duration = [(float(s[1]-s[0]) * self.blocksize() / 103 self.samplerate()) 104 for s in segList] 105 106 self._results.add(segs) 107 108 return
109