| Home | Trees | Indices | Help |
|
|---|
|
|
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (c) 2007-2013 Parisson SARL
4
5 # This file is part of TimeSide.
6
7 # TimeSide is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 2 of the License, or
10 # (at your option) any later version.
11
12 # TimeSide is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16
17 # You should have received a copy of the GNU General Public License
18 # along with TimeSide. If not, see <http://www.gnu.org/licenses/>.
19
20 # Authors:
21 # Guillaume Pellerin <yomguy at parisson.com>
22 # Paul Brossier <piem@piem.org>
23 # Thomas Fillon <thomas at parisson.com>
24 from __future__ import division
25
26 from timeside.core import Processor
27 from timeside.__init__ import __version__
28 import numpy
29 from collections import OrderedDict
30 import h5py
31 import h5tools
32 import os
33
34 if os.environ.has_key('DISPLAY'):
35 doctest_option = '+SKIP'
36 else:
37 doctest_option = '+ELLIPSIS'
38
39
40 numpy_data_types = [
41 #'float128',
42 'float64',
43 'float32',
44 #'float16', Not supported by h5py for version < 2.2
45 'int64',
46 'int16',
47 'int32',
48 'int8',
49 'uint64',
50 'uint32',
51 'uint16',
52 'uint8',
53 'object_',
54 'string_',
55 'longlong',
56 #'timedelta64',
57 #'datetime64',
58 #'complex128',
59 #'complex64',
60 ]
61 numpy_data_types = map(lambda x: getattr(numpy, x), numpy_data_types)
66
67 """
68 Object that contains a metadata structure
69 stucture inspired by [1]
70 [1] : http://www.saltycrane.com/blog/2012/08/python-data-object-motivated-desire-mutable-namedtuple-default-values/
71
72 Metadata
73 ----------
74
75
76 Methods
77 -------
78 as_dict()
79 Return a dictionnary representation of the MetadataObject
80 """
81
82 # Define default values as an OrderDict
83 # in order to keep the order of the keys for display
84 _default_value = OrderedDict()
85
87 '''
88 Construct an Metadata object
89 Abstract Class _default_value must be specified by
90
91 Metadata()
92
93 Parameters
94 ----------
95
96 Returns
97 -------
98 Metadata
99 '''
100 # Set Default values
101 for key, value in self._default_value.items():
102 setattr(self, key, value)
103
104 # Set metadata passed in as arguments
105 # for k, v in zip(self._default_value.keys(), args):
106 # setattr(self, k, v)
107 # print 'args'
108 for key, value in kwargs.items():
109 setattr(self, key, value)
110
112 if name not in self._default_value.keys():
113 raise AttributeError("%s is not a valid attribute in %s" %
114 (name, self.__class__.__name__))
115 super(MetadataObject, self).__setattr__(name, value)
116
118 if name in self._default_value.keys():
119 new_default_value = self._default_value.copy()
120 del new_default_value[name]
121 super(MetadataObject, self).__setattr__('_default_value',
122 new_default_value)
123 super(MetadataObject, self).__delattr__(name)
124
128
131
133 return [self[attr] for attr in self.keys()]
134
136 return [(attr, self[attr]) for attr in self.keys()]
137
143
146
148 return '{}({})'.format(
149 self.__class__.__name__,
150 ', '.join('{}={}'.format(
151 att, repr(getattr(self, att)))
152 for att in self.keys()))
153
156
158 return (isinstance(other, self.__class__)
159 and all([self[key] == other[key] for key in self.keys()]))
160
164
166 import xml.etree.ElementTree as ET
167 root = ET.Element('Metadata')
168
169 for key in self.keys():
170 child = ET.SubElement(root, key)
171 child.text = repr(getattr(self, key))
172
173 return ET.tostring(root, encoding="utf-8", method="xml")
174
176 import xml.etree.ElementTree as ET
177 import ast
178 root = ET.fromstring(xml_string)
179 for child in root:
180 key = child.tag
181 if child.text:
182 self[key] = ast.literal_eval(child.text)
183
186
189
192
193 '''
194 Metadata object to handle Audio related Metadata
195
196 Attributes
197 ----------
198 id : str
199 name : str
200 unit : str
201 description : str
202 date : str
203 date and time in ISO 8601 format YYYY-MM-DDTHH:MM:SS
204 version : str
205 author : str
206 uuid : str
207 '''
208 # TODO :
209 # - (long) description --> à mettre dans l'API Processor
210
211 # Define default values
212 _default_value = OrderedDict([('id', None),
213 ('name', None),
214 ('unit', None),
215 ('description', None),
216 ('date', None),
217 ('version', None),
218 ('author', None),
219 ('uuid', None)])
220
226
229
230 '''
231 Metadata object to handle Identification Metadata
232
233 Attributes
234 ----------
235 uri : str
236 start : float
237 Start time of the segment in seconds
238 duration : float
239 Duration of the segment in seconds
240 channels : int
241 Number of channels
242 channelsManagement : str
243 A string that indicates how the channels are manage
244 Examples :
245 channelsManagement = '(L+R)/2'
246 channelsManagement = 'R' keep only right channel
247 channelsManagement = 'L' keep only left channel
248 channelsManagement = 'stereo' keep both stereo channels
249 '''
250
251 # Define default values
252 _default_value = OrderedDict([('uri', ''),
253 ('start', 0),
254 ('duration', None),
255 ('is_segment', None),
256 ('channels', None),
257 ('channelsManagement', '')])
258
261
262 '''
263 Metadata object to handle Label Metadata
264
265 Attributes
266 ----------
267 label : dict
268 A dictionnary that contains :
269 - label id has keys and
270 - label names has values
271
272 description : dict
273 A dictionnary that contains :
274 - label id has keys and
275 - label descriptions has values
276
277 label_type : str
278 = 'mono' or 'multi'
279 'mono' or 'multi' enable to specify the label mode :
280 - 'mono' : mono-label (only 1 label at a time)
281 - 'multi' : multi-label (several labels can be observe
282 at the same time)
283
284
285 '''
286
287 # Define default values
288 _default_value = OrderedDict([('label', {}),
289 ('description', {}),
290 ('label_type', 'mono')])
291
293 """
294 Save a dictionnary-like object inside a h5 file group
295 """
296 # Write attributes
297 name = 'label_type'
298 if self.__getattribute__(name) is not None:
299 h5group.attrs[name] = self.__getattribute__(name)
300
301 for name in ['label', 'description']:
302 subgroup = h5group.create_group(name)
303 h5tools.dict_to_hdf5(self.__getattribute__(name), subgroup)
304
307
308 '''
309 Metadata object to handle Frame related Metadata
310
311 Attributes
312 ----------
313 samplerate : int (or float?)
314 blocksize : int
315 stepsize : int
316 '''
317 # TODO : check is samplerate can support float
318
319 # Define default values
320 _default_value = OrderedDict([('samplerate', None),
321 ('blocksize', None),
322 ('stepsize', None)])
323
326
327 '''
328 Metadata object to handle data related Metadata
329
330 Attributes
331 ----------
332 value : numpy array
333 label : numpy array of int
334 time : numpy array of float
335 duration : numpy array of float
336
337 '''
338
339 # Define default values
340 _default_value = OrderedDict([('value', None),
341 ('label', None),
342 ('time', None),
343 ('duration', None)])
344
346 if value is None:
347 value = []
348
349 # Set Data with the proper type
350 if name == 'value':
351 value = numpy.asarray(value)
352 if value.dtype.type not in numpy_data_types:
353 raise TypeError(
354 'Result Data can not accept type %s for %s' %
355 (value.dtype.type, name))
356 if value.shape == ():
357 value.resize((1,))
358
359 elif name == 'label':
360 try:
361 value = numpy.asarray(value, dtype='int')
362 except ValueError:
363 raise TypeError(
364 'Result Data can not accept type %s for %s' %
365 (value.dtype.type, name))
366
367 elif name in ['time', 'duration']:
368 try:
369 value = numpy.asfarray(value)
370 except ValueError:
371 raise TypeError(
372 'Result Data can not accept type %s for %s' %
373 (value.dtype.type, name))
374 elif name == 'dataType':
375 return
376
377 super(DataObject, self).__setattr__(name, value)
378
380 try:
381 return (isinstance(other, self.__class__) and
382 all([numpy.array_equal(self[key], other[key])
383 for key in self.keys()]))
384 except AttributeError:
385 # print self
386 # print [self[key] == other[key] for key in self.keys()]
387 return (isinstance(other, self.__class__) and
388 all([bool(numpy.logical_and.reduce((self[key] == other[key]).ravel()))
389 for key in self.keys()]))
390
392 return not(isinstance(other, self.__class__) or
393 any([numpy.array_equal(self[key], other[key])
394 for key in self.keys()]))
395
397 import xml.etree.ElementTree as ET
398 root = ET.Element('Metadata')
399
400 for key in self.keys():
401 child = ET.SubElement(root, key)
402 value = getattr(self, key)
403 if value not in [None, []]:
404 child.text = repr(value.tolist())
405 child.set('dtype', value.dtype.__str__())
406
407 return ET.tostring(root, encoding="utf-8", method="xml")
408
410 import xml.etree.ElementTree as ET
411 import ast
412 root = ET.fromstring(xml_string)
413 for child in root:
414 key = child.tag
415 if child.text:
416 self[key] = numpy.asarray(ast.literal_eval(child.text),
417 dtype=child.get('dtype'))
418
420 # Write Datasets
421 for key in self.keys():
422 if self.__getattribute__(key) is None:
423 continue
424 if self.__getattribute__(key).dtype == 'object':
425 # Handle numpy type = object as vlen string
426 h5group.create_dataset(key,
427 data=self.__getattribute__(
428 key).tolist().__repr__(),
429 dtype=h5py.special_dtype(vlen=str))
430 else:
431 h5group.create_dataset(key, data=self.__getattribute__(key))
432
434 for key, dataset in h5group.items():
435 # Load value from the hdf5 dataset and store in data
436 # FIXME : the following conditional statement is to prevent
437 # reading an empty dataset.
438 # see : https://github.com/h5py/h5py/issues/281
439 # It should be fixed by the next h5py version
440 if dataset.shape != (0,):
441 if h5py.check_dtype(vlen=dataset.dtype):
442 # to deal with VLEN data used for list of
443 # list
444 self.__setattr__(key, eval(dataset[...].tolist()))
445 else:
446 self.__setattr__(key, dataset[...])
447 else:
448 self.__setattr__(key, [])
449
452
455
457 import xml.etree.ElementTree as ET
458 root = ET.Element('Metadata')
459
460 for key, value in self.items():
461 child = ET.SubElement(root, key)
462 child.text = repr(self.get(key))
463
464 return ET.tostring(root, encoding="utf-8", method="xml")
465
467 import xml.etree.ElementTree as ET
468 import ast
469 root = ET.fromstring(xml_string)
470 for child in root.iter():
471 if child.text:
472 self.set(child.tag, ast.literal_eval(child.text))
473
476
479
482
483 """
484 Object that contains the metadata and parameters of an analyzer process
485
486 Parameters
487 ----------
488 data_mode : str
489 data_mode describes the type of data :
490 - 'value' for values
491 - 'label' for label data see LabelMetadata
492 time_mode : str
493 time_mode describes the correspondance between data values and time
494 - 'framewise'
495 - 'global'
496 - 'segment'
497 - 'event'
498
499 Returns
500 -------
501 A new MetadataObject with the following attributes :
502 - data_object : :class:`DataObject`
503 - id_metadata : :class:`IdMetadata`
504 - audio_metadata : :class:`AudioMetadata`
505 - frame_metadata : :class:`FrameMetadata`
506 - label_metadata : :class:`LabelMetadata`
507 - parameters : :class:`AnalyzerParameters` Object
508
509 """
510
511 # Define default values
512 _default_value = OrderedDict([('id_metadata', None),
513 ('data_object', None),
514 ('audio_metadata', None),
515 ('frame_metadata', None),
516 ('label_metadata', None),
517 ('parameters', None)
518 ])
519
521 super(AnalyzerResult, self).__init__()
522
523 self.id_metadata = IdMetadata()
524 self.data_object = DataObject()
525 self.audio_metadata = AudioMetadata()
526 self.frame_metadata = FrameMetadata()
527 self.label_metadata = LabelMetadata()
528 self.parameters = AnalyzerParameters()
529
530 @staticmethod
532 """
533 Factory function for Analyzer result
534 """
535 for result_cls in AnalyzerResult.__subclasses__():
536 if (hasattr(result_cls, '_time_mode') and
537 hasattr(result_cls, '_data_mode') and
538 (result_cls._data_mode, result_cls._time_mode) == (data_mode,
539 time_mode)):
540 return result_cls()
541 print data_mode, time_mode
542 raise ValueError('Wrong arguments')
543
545 if name in ['_data_mode', '_time_mode']:
546 super(MetadataObject, self).__setattr__(name, value)
547 return
548
549 elif name in self.keys():
550 if isinstance(value, dict) and value:
551 for (sub_name, sub_value) in value.items():
552 self[name][sub_name] = sub_value
553 return
554
555 super(AnalyzerResult, self).__setattr__(name, value)
556
558 if self.data_mode == 'value':
559 return len(self.data_object.value)
560 else:
561 return len(self.data_object.label)
562
564 return dict([(key, self[key].as_dict())
565 for key in self.keys() if hasattr(self[key], 'as_dict')] +
566 [('data_mode', self.data_mode), ('time_mode', self.time_mode)])
567 # TODO : check if it can be simplified now
568
570 import xml.etree.ElementTree as ET
571 root = ET.Element('result')
572 root.metadata = {'name': self.id_metadata.name,
573 'id': self.id_metadata.id}
574
575 for name in ['data_mode', 'time_mode']:
576 child = ET.SubElement(root, name)
577 child.text = str(self.__getattribute__(name))
578 child.tag = name
579 root.append(child)
580
581 for key in self.keys():
582 child = ET.fromstring(self[key].to_xml())
583 child.tag = key
584 root.append(child)
585
586 return ET.tostring(root, encoding="utf-8", method="xml")
587
588 @staticmethod
590 import xml.etree.ElementTree as ET
591 root = ET.fromstring(xml_string)
592
593 data_mode_child = root.find('data_mode')
594 time_mode_child = root.find('time_mode')
595 result = AnalyzerResult.factory(data_mode=data_mode_child.text,
596 time_mode=time_mode_child.text)
597 for child in root:
598 key = child.tag
599 if key not in ['data_mode', 'time_mode']:
600 child_string = ET.tostring(child)
601 result[key].from_xml(child_string)
602
603 return result
604
606 # Save results in HDF5 Dataset
607 group = h5_file.create_group(self.id_metadata.id)
608 group.attrs['data_mode'] = self.__getattribute__('data_mode')
609 group.attrs['time_mode'] = self.__getattribute__('time_mode')
610 for key in self.keys():
611 if key in ['data_mode', 'time_mode']:
612 continue
613 subgroup = group.create_group(key)
614 self.__getattribute__(key).to_hdf5(subgroup)
615
616 @staticmethod
618 # Read Sub-Group
619 result = AnalyzerResult.factory(
620 data_mode=h5group.attrs['data_mode'],
621 time_mode=h5group.attrs['time_mode'])
622 for subgroup_name, h5subgroup in h5group.items():
623 result[subgroup_name].from_hdf5(h5subgroup)
624 return result
625
626 @property
628 return self._data_mode
629
630 @property
632 return self._time_mode
633
634 @property
637
638 @property
641
642 @property
645
646 @property
648 return self.id_metadata.id
649
650 @property
652 return self.id_metadata.name
653
654 @property
656 return self.id_metadata.unit
657
680
692
695 _time_mode = 'global'
696
698 super(GlobalObject, self).__init__()
699 del self.frame_metadata
700 del self.data_object.time
701 del self.data_object.duration
702
703 @property
706
707 @property
709 return self.audio_metadata.duration
710
713 _time_mode = 'framewise'
714
716 super(FramewiseObject, self).__init__()
717 del self.data_object.time
718 del self.data_object.duration
719
720 @property
722 return (self.audio_metadata.start +
723 self.frame_metadata.stepsize /
724 self.frame_metadata.samplerate *
725 numpy.arange(0, len(self)))
726
727 @property
729 return (self.frame_metadata.blocksize / self.frame_metadata.samplerate
730 * numpy.ones(len(self)))
731
734 _time_mode = 'event'
735
737 super(EventObject, self).__init__()
738 del self.frame_metadata
739 del self.data_object.duration
740
741 @property
743 return self.audio_metadata.start + self.data_object.time
744
745 @property
748
760
764
768
772
776
780
784
788
792
795
796 '''
797 >>> import timeside
798 >>> wavFile = 'http://github.com/yomguy/timeside-samples/raw/master/samples/sweep.mp3'
799 >>> d = timeside.decoder.FileDecoder(wavFile, start=1)
800
801 >>> a = timeside.analyzer.Analyzer()
802 >>> (d|a).run() #doctest: %s
803 >>> a.new_result() #doctest: %s
804 FrameValueResult(id_metadata=IdMetadata(id='analyzer', name='Generic analyzer', unit='', description='', date='...', version='...', author='TimeSide', uuid='...'), data_object=DataObject(value=array([], dtype=float64)), audio_metadata=AudioMetadata(uri='http://...', start=1.0, duration=7..., is_segment=True, channels=None, channelsManagement=''), frame_metadata=FrameMetadata(samplerate=44100, blocksize=8192, stepsize=8192), parameters={})
805 >>> resContainer = timeside.analyzer.core.AnalyzerResultContainer()
806
807 ''' % (doctest_option, doctest_option)
808
810 super(AnalyzerResultContainer, self).__init__()
811 if analyzer_results is not None:
812 self.add(analyzer_results)
813
815 if isinstance(analyzer_result, list):
816 for res in analyzer_result:
817 self.add(res)
818 return
819 # Check result
820 if not isinstance(analyzer_result, AnalyzerResult):
821 raise TypeError('only AnalyzerResult can be added')
822
823 self.__setitem__(analyzer_result.id_metadata.id,
824 analyzer_result)
825 #self.results += [analyzer_result]
826
828
829 import xml.etree.ElementTree as ET
830 # TODO : cf. telemeta util
831 root = ET.Element('timeside')
832
833 for result in self.values():
834 if result is not None:
835 root.append(ET.fromstring(result.to_xml()))
836
837 return ET.tostring(root, encoding="utf-8", method="xml")
838
839 @staticmethod
841 import xml.etree.ElementTree as ET
842
843 results = AnalyzerResultContainer()
844 # TODO : from file
845 #tree = ET.parse(xml_file)
846 #root = tree.getroot()
847 root = ET.fromstring(xml_string)
848 for child in root.iter('result'):
849 results.add(AnalyzerResult.from_xml(ET.tostring(child)))
850
851 return results
852
854 #if data_list == None: data_list = self.results
855 import simplejson as json
856
857 # Define Specialize JSON encoder for numpy array
858 def NumpyArrayEncoder(obj):
859 if isinstance(obj, numpy.ndarray):
860 return {'numpyArray': obj.tolist(),
861 'dtype': obj.dtype.__str__()}
862 raise TypeError(repr(obj) + " is not JSON serializable")
863
864 return json.dumps([res.as_dict() for res in self.values()],
865 default=NumpyArrayEncoder)
866
867 @staticmethod
869 import simplejson as json
870
871 # Define Specialize JSON decoder for numpy array
872 def NumpyArrayDecoder(obj):
873 if isinstance(obj, dict) and 'numpyArray' in obj:
874 numpy_obj = numpy.asarray(obj['numpyArray'],
875 dtype=obj['dtype'])
876 return numpy_obj
877 else:
878 return obj
879
880 results_json = json.loads(json_str, object_hook=NumpyArrayDecoder)
881 results = AnalyzerResultContainer()
882 for res_json in results_json:
883
884 res = AnalyzerResult.factory(data_mode=res_json['data_mode'],
885 time_mode=res_json['time_mode'])
886 for key in res_json.keys():
887 if key not in ['data_mode', 'time_mode']:
888 res[key] = res_json[key]
889
890 results.add(res)
891 return results
892
894 #if data_list == None: data_list = self.results
895 import yaml
896
897 # Define Specialize Yaml encoder for numpy array
898 def numpyArray_representer(dumper, obj):
899 return dumper.represent_mapping(u'!numpyArray',
900 {'dtype': obj.dtype.__str__(),
901 'array': obj.tolist()})
902
903 yaml.add_representer(numpy.ndarray, numpyArray_representer)
904
905 return yaml.dump([res.as_dict() for res in self.values()])
906
907 @staticmethod
909 import yaml
910
911 # Define Specialize Yaml encoder for numpy array
912 def numpyArray_constructor(loader, node):
913 mapping = loader.construct_mapping(node, deep=True)
914 return numpy.asarray(mapping['array'], dtype=mapping['dtype'])
915
916 yaml.add_constructor(u'!numpyArray', numpyArray_constructor)
917
918 results_yaml = yaml.load(yaml_str)
919 results = AnalyzerResultContainer()
920 for res_yaml in results_yaml:
921 res = AnalyzerResult.factory(data_mode=res_yaml['data_mode'],
922 time_mode=res_yaml['time_mode'])
923 for key in res_yaml.keys():
924 if key not in ['data_mode', 'time_mode']:
925 res[key] = res_yaml[key]
926 results.add(res)
927 return results
928
931
932 @staticmethod
935
937 # Open HDF5 file and save dataset (overwrite any existing file)
938 with h5py.File(output_file, 'w') as h5_file:
939 for res in self.values():
940 res.to_hdf5(h5_file)
941
942 @staticmethod
944 import h5py
945 # TODO : enable import for yaafe hdf5 format
946
947 # Open HDF5 file for reading and get results
948 h5_file = h5py.File(input_file, 'r')
949 results = AnalyzerResultContainer()
950 try:
951 for group in h5_file.values():
952 result = AnalyzerResult.from_hdf5(group)
953 results.add(result)
954 except TypeError:
955 print('TypeError for HDF5 serialization')
956 finally:
957 h5_file.close() # Close the HDF5 file
958
959 return results
960
963
964 '''
965 Generic class for the analyzers
966 '''
967
970
973 super(Analyzer, self).setup(channels, samplerate,
974 blocksize, totalframes)
975
976 # Set default values for result_* attributes
977 # may be overwritten by the analyzer
978 self.result_channels = self.input_channels
979 self.result_samplerate = self.input_samplerate
980 self.result_blocksize = self.input_blocksize
981 self.result_stepsize = self.input_stepsize
982
983 @property
985
986 return AnalyzerResultContainer(
987 [self.process_pipe.results[key] for key in self.process_pipe.results.keys()
988 if key.split('.')[0] == self.id()])
989
990 @staticmethod
993
994 @staticmethod
997
998 @staticmethod
1001
1003 '''
1004 Create a new result
1005
1006 Attributes
1007 ----------
1008 data_object : MetadataObject
1009 id_metadata : MetadataObject
1010 audio_metadata : MetadataObject
1011 frame_metadata : MetadataObject
1012 label_metadata : MetadataObject
1013 parameters : dict
1014
1015 '''
1016
1017 from datetime import datetime
1018
1019 result = AnalyzerResult.factory(data_mode=data_mode,
1020 time_mode=time_mode)
1021
1022 # Automatically write known metadata
1023 result.id_metadata.date = datetime.now().replace(
1024 microsecond=0).isoformat(' ')
1025 result.id_metadata.version = __version__
1026 result.id_metadata.author = 'TimeSide'
1027 result.id_metadata.id = self.id()
1028 result.id_metadata.name = self.name()
1029 result.id_metadata.unit = self.unit()
1030 result.id_metadata.uuid = self.uuid()
1031
1032 result.audio_metadata.uri = self.mediainfo()['uri']
1033 result.audio_metadata.start = self.mediainfo()['start']
1034 result.audio_metadata.duration = self.mediainfo()['duration']
1035 result.audio_metadata.is_segment = self.mediainfo()['is_segment']
1036
1037 if time_mode == 'framewise':
1038 result.frame_metadata.samplerate = self.result_samplerate
1039 result.frame_metadata.blocksize = self.result_blocksize
1040 result.frame_metadata.stepsize = self.result_stepsize
1041
1042 return result
1043
1044
1045 if __name__ == "__main__":
1046 import doctest
1047 doctest.testmod()
1048
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Sun Dec 15 00:09:38 2013 | http://epydoc.sourceforge.net |