1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 from __future__ import division
25
26 from timeside.core import Processor
27 from timeside.__init__ import __version__
28 import numpy
29 from collections import OrderedDict
30 import h5py
31 import h5tools
32 import os
33
34 if os.environ.has_key('DISPLAY'):
35 doctest_option = '+SKIP'
36 else:
37 doctest_option = '+ELLIPSIS'
38
39
40 numpy_data_types = [
41
42 'float64',
43 'float32',
44
45 'int64',
46 'int16',
47 'int32',
48 'int8',
49 'uint64',
50 'uint32',
51 'uint16',
52 'uint8',
53 'object_',
54 'string_',
55 'longlong',
56
57
58
59
60 ]
61 numpy_data_types = map(lambda x: getattr(numpy, x), numpy_data_types)
189
226
258
304
323
326
327 '''
328 Metadata object to handle data related Metadata
329
330 Attributes
331 ----------
332 value : numpy array
333 label : numpy array of int
334 time : numpy array of float
335 duration : numpy array of float
336
337 '''
338
339
340 _default_value = OrderedDict([('value', None),
341 ('label', None),
342 ('time', None),
343 ('duration', None)])
344
346 if value is None:
347 value = []
348
349
350 if name == 'value':
351 value = numpy.asarray(value)
352 if value.dtype.type not in numpy_data_types:
353 raise TypeError(
354 'Result Data can not accept type %s for %s' %
355 (value.dtype.type, name))
356 if value.shape == ():
357 value.resize((1,))
358
359 elif name == 'label':
360 try:
361 value = numpy.asarray(value, dtype='int')
362 except ValueError:
363 raise TypeError(
364 'Result Data can not accept type %s for %s' %
365 (value.dtype.type, name))
366
367 elif name in ['time', 'duration']:
368 try:
369 value = numpy.asfarray(value)
370 except ValueError:
371 raise TypeError(
372 'Result Data can not accept type %s for %s' %
373 (value.dtype.type, name))
374 elif name == 'dataType':
375 return
376
377 super(DataObject, self).__setattr__(name, value)
378
380 try:
381 return (isinstance(other, self.__class__) and
382 all([numpy.array_equal(self[key], other[key])
383 for key in self.keys()]))
384 except AttributeError:
385
386
387 return (isinstance(other, self.__class__) and
388 all([bool(numpy.logical_and.reduce((self[key] == other[key]).ravel()))
389 for key in self.keys()]))
390
392 return not(isinstance(other, self.__class__) or
393 any([numpy.array_equal(self[key], other[key])
394 for key in self.keys()]))
395
397 import xml.etree.ElementTree as ET
398 root = ET.Element('Metadata')
399
400 for key in self.keys():
401 child = ET.SubElement(root, key)
402 value = getattr(self, key)
403 if value not in [None, []]:
404 child.text = repr(value.tolist())
405 child.set('dtype', value.dtype.__str__())
406
407 return ET.tostring(root, encoding="utf-8", method="xml")
408
410 import xml.etree.ElementTree as ET
411 import ast
412 root = ET.fromstring(xml_string)
413 for child in root:
414 key = child.tag
415 if child.text:
416 self[key] = numpy.asarray(ast.literal_eval(child.text),
417 dtype=child.get('dtype'))
418
420
421 for key in self.keys():
422 if self.__getattribute__(key) is None:
423 continue
424 if self.__getattribute__(key).dtype == 'object':
425
426 h5group.create_dataset(key,
427 data=self.__getattribute__(
428 key).tolist().__repr__(),
429 dtype=h5py.special_dtype(vlen=str))
430 else:
431 h5group.create_dataset(key, data=self.__getattribute__(key))
432
434 for key, dataset in h5group.items():
435
436
437
438
439
440 if dataset.shape != (0,):
441 if h5py.check_dtype(vlen=dataset.dtype):
442
443
444 self.__setattr__(key, eval(dataset[...].tolist()))
445 else:
446 self.__setattr__(key, dataset[...])
447 else:
448 self.__setattr__(key, [])
449
452
455
457 import xml.etree.ElementTree as ET
458 root = ET.Element('Metadata')
459
460 for key, value in self.items():
461 child = ET.SubElement(root, key)
462 child.text = repr(self.get(key))
463
464 return ET.tostring(root, encoding="utf-8", method="xml")
465
467 import xml.etree.ElementTree as ET
468 import ast
469 root = ET.fromstring(xml_string)
470 for child in root.iter():
471 if child.text:
472 self.set(child.tag, ast.literal_eval(child.text))
473
476
479
482
483 """
484 Object that contains the metadata and parameters of an analyzer process
485
486 Parameters
487 ----------
488 data_mode : str
489 data_mode describes the type of data :
490 - 'value' for values
491 - 'label' for label data see LabelMetadata
492 time_mode : str
493 time_mode describes the correspondance between data values and time
494 - 'framewise'
495 - 'global'
496 - 'segment'
497 - 'event'
498
499 Returns
500 -------
501 A new MetadataObject with the following attributes :
502 - data_object : :class:`DataObject`
503 - id_metadata : :class:`IdMetadata`
504 - audio_metadata : :class:`AudioMetadata`
505 - frame_metadata : :class:`FrameMetadata`
506 - label_metadata : :class:`LabelMetadata`
507 - parameters : :class:`AnalyzerParameters` Object
508
509 """
510
511
512 _default_value = OrderedDict([('id_metadata', None),
513 ('data_object', None),
514 ('audio_metadata', None),
515 ('frame_metadata', None),
516 ('label_metadata', None),
517 ('parameters', None)
518 ])
519
520 - def __init__(self, data_mode=None, time_mode=None):
529
530 @staticmethod
531 - def factory(data_mode='value', time_mode='framewise'):
532 """
533 Factory function for Analyzer result
534 """
535 for result_cls in AnalyzerResult.__subclasses__():
536 if (hasattr(result_cls, '_time_mode') and
537 hasattr(result_cls, '_data_mode') and
538 (result_cls._data_mode, result_cls._time_mode) == (data_mode,
539 time_mode)):
540 return result_cls()
541 print data_mode, time_mode
542 raise ValueError('Wrong arguments')
543
556
558 if self.data_mode == 'value':
559 return len(self.data_object.value)
560 else:
561 return len(self.data_object.label)
562
564 return dict([(key, self[key].as_dict())
565 for key in self.keys() if hasattr(self[key], 'as_dict')] +
566 [('data_mode', self.data_mode), ('time_mode', self.time_mode)])
567
568
570 import xml.etree.ElementTree as ET
571 root = ET.Element('result')
572 root.metadata = {'name': self.id_metadata.name,
573 'id': self.id_metadata.id}
574
575 for name in ['data_mode', 'time_mode']:
576 child = ET.SubElement(root, name)
577 child.text = str(self.__getattribute__(name))
578 child.tag = name
579 root.append(child)
580
581 for key in self.keys():
582 child = ET.fromstring(self[key].to_xml())
583 child.tag = key
584 root.append(child)
585
586 return ET.tostring(root, encoding="utf-8", method="xml")
587
588 @staticmethod
590 import xml.etree.ElementTree as ET
591 root = ET.fromstring(xml_string)
592
593 data_mode_child = root.find('data_mode')
594 time_mode_child = root.find('time_mode')
595 result = AnalyzerResult.factory(data_mode=data_mode_child.text,
596 time_mode=time_mode_child.text)
597 for child in root:
598 key = child.tag
599 if key not in ['data_mode', 'time_mode']:
600 child_string = ET.tostring(child)
601 result[key].from_xml(child_string)
602
603 return result
604
606
607 group = h5_file.create_group(self.id_metadata.id)
608 group.attrs['data_mode'] = self.__getattribute__('data_mode')
609 group.attrs['time_mode'] = self.__getattribute__('time_mode')
610 for key in self.keys():
611 if key in ['data_mode', 'time_mode']:
612 continue
613 subgroup = group.create_group(key)
614 self.__getattribute__(key).to_hdf5(subgroup)
615
616 @staticmethod
625
626 @property
629
630 @property
633
634 @property
636 raise NotImplementedError
637
638 @property
640 raise NotImplementedError
641
642 @property
644 raise NotImplementedError
645
646 @property
648 return self.id_metadata.id
649
650 @property
652 return self.id_metadata.name
653
654 @property
656 return self.id_metadata.unit
657
660 _data_mode = 'value'
661
663 super(ValueObject, self).__init__()
664 del self.data_object.label
665 del self.label_metadata
666
667 @property
669 return self.data_object.value
670
671 @property
673 return dict(mean=numpy.mean(self.data, axis=0),
674 std=numpy.std(self.data, axis=0, ddof=1),
675 median=numpy.median(self.data, axis=0),
676 max=numpy.max(self.data, axis=0),
677 min=numpy.min(self.data, axis=0),
678 shape=self.data.shape,
679 )
680
683 _data_mode = 'label'
684
688
689 @property
691 return self.data_object.label
692
695 _time_mode = 'global'
696
702
703 @property
705 return self.audio_metadata.start
706
707 @property
710
713 _time_mode = 'framewise'
714
719
720 @property
722 return (self.audio_metadata.start +
723 self.frame_metadata.stepsize /
724 self.frame_metadata.samplerate *
725 numpy.arange(0, len(self)))
726
727 @property
729 return (self.frame_metadata.blocksize / self.frame_metadata.samplerate
730 * numpy.ones(len(self)))
731
734 _time_mode = 'event'
735
740
741 @property
743 return self.audio_metadata.start + self.data_object.time
744
745 @property
747 return numpy.zeros(len(self))
748
760
764
768
772
776
780
784
788
792
795
796 '''
797 >>> import timeside
798 >>> wavFile = 'http://github.com/yomguy/timeside-samples/raw/master/samples/sweep.mp3'
799 >>> d = timeside.decoder.FileDecoder(wavFile, start=1)
800
801 >>> a = timeside.analyzer.Analyzer()
802 >>> (d|a).run() #doctest: %s
803 >>> a.new_result() #doctest: %s
804 FrameValueResult(id_metadata=IdMetadata(id='analyzer', name='Generic analyzer', unit='', description='', date='...', version='...', author='TimeSide', uuid='...'), data_object=DataObject(value=array([], dtype=float64)), audio_metadata=AudioMetadata(uri='http://...', start=1.0, duration=7..., is_segment=True, channels=None, channelsManagement=''), frame_metadata=FrameMetadata(samplerate=44100, blocksize=8192, stepsize=8192), parameters={})
805 >>> resContainer = timeside.analyzer.core.AnalyzerResultContainer()
806
807 ''' % (doctest_option, doctest_option)
808
809 - def __init__(self, analyzer_results=None):
813
814 - def add(self, analyzer_result):
815 if isinstance(analyzer_result, list):
816 for res in analyzer_result:
817 self.add(res)
818 return
819
820 if not isinstance(analyzer_result, AnalyzerResult):
821 raise TypeError('only AnalyzerResult can be added')
822
823 self.__setitem__(analyzer_result.id_metadata.id,
824 analyzer_result)
825
826
828
829 import xml.etree.ElementTree as ET
830
831 root = ET.Element('timeside')
832
833 for result in self.values():
834 if result is not None:
835 root.append(ET.fromstring(result.to_xml()))
836
837 return ET.tostring(root, encoding="utf-8", method="xml")
838
839 @staticmethod
852
854
855 import simplejson as json
856
857
858 def NumpyArrayEncoder(obj):
859 if isinstance(obj, numpy.ndarray):
860 return {'numpyArray': obj.tolist(),
861 'dtype': obj.dtype.__str__()}
862 raise TypeError(repr(obj) + " is not JSON serializable")
863
864 return json.dumps([res.as_dict() for res in self.values()],
865 default=NumpyArrayEncoder)
866
867 @staticmethod
869 import simplejson as json
870
871
872 def NumpyArrayDecoder(obj):
873 if isinstance(obj, dict) and 'numpyArray' in obj:
874 numpy_obj = numpy.asarray(obj['numpyArray'],
875 dtype=obj['dtype'])
876 return numpy_obj
877 else:
878 return obj
879
880 results_json = json.loads(json_str, object_hook=NumpyArrayDecoder)
881 results = AnalyzerResultContainer()
882 for res_json in results_json:
883
884 res = AnalyzerResult.factory(data_mode=res_json['data_mode'],
885 time_mode=res_json['time_mode'])
886 for key in res_json.keys():
887 if key not in ['data_mode', 'time_mode']:
888 res[key] = res_json[key]
889
890 results.add(res)
891 return results
892
894
895 import yaml
896
897
898 def numpyArray_representer(dumper, obj):
899 return dumper.represent_mapping(u'!numpyArray',
900 {'dtype': obj.dtype.__str__(),
901 'array': obj.tolist()})
902
903 yaml.add_representer(numpy.ndarray, numpyArray_representer)
904
905 return yaml.dump([res.as_dict() for res in self.values()])
906
907 @staticmethod
909 import yaml
910
911
912 def numpyArray_constructor(loader, node):
913 mapping = loader.construct_mapping(node, deep=True)
914 return numpy.asarray(mapping['array'], dtype=mapping['dtype'])
915
916 yaml.add_constructor(u'!numpyArray', numpyArray_constructor)
917
918 results_yaml = yaml.load(yaml_str)
919 results = AnalyzerResultContainer()
920 for res_yaml in results_yaml:
921 res = AnalyzerResult.factory(data_mode=res_yaml['data_mode'],
922 time_mode=res_yaml['time_mode'])
923 for key in res_yaml.keys():
924 if key not in ['data_mode', 'time_mode']:
925 res[key] = res_yaml[key]
926 results.add(res)
927 return results
928
930 numpy.save(output_file, self)
931
932 @staticmethod
934 return numpy.load(input_file)
935
937
938 with h5py.File(output_file, 'w') as h5_file:
939 for res in self.values():
940 res.to_hdf5(h5_file)
941
942 @staticmethod
960
963
964 '''
965 Generic class for the analyzers
966 '''
967
970
971 - def setup(self, channels=None, samplerate=None,
972 blocksize=None, totalframes=None):
973 super(Analyzer, self).setup(channels, samplerate,
974 blocksize, totalframes)
975
976
977
978 self.result_channels = self.input_channels
979 self.result_samplerate = self.input_samplerate
980 self.result_blocksize = self.input_blocksize
981 self.result_stepsize = self.input_stepsize
982
983 @property
989
990 @staticmethod
993
994 @staticmethod
996 return "Generic analyzer"
997
998 @staticmethod
1001
1002 - def new_result(self, data_mode='value', time_mode='framewise'):
1003 '''
1004 Create a new result
1005
1006 Attributes
1007 ----------
1008 data_object : MetadataObject
1009 id_metadata : MetadataObject
1010 audio_metadata : MetadataObject
1011 frame_metadata : MetadataObject
1012 label_metadata : MetadataObject
1013 parameters : dict
1014
1015 '''
1016
1017 from datetime import datetime
1018
1019 result = AnalyzerResult.factory(data_mode=data_mode,
1020 time_mode=time_mode)
1021
1022
1023 result.id_metadata.date = datetime.now().replace(
1024 microsecond=0).isoformat(' ')
1025 result.id_metadata.version = __version__
1026 result.id_metadata.author = 'TimeSide'
1027 result.id_metadata.id = self.id()
1028 result.id_metadata.name = self.name()
1029 result.id_metadata.unit = self.unit()
1030 result.id_metadata.uuid = self.uuid()
1031
1032 result.audio_metadata.uri = self.mediainfo()['uri']
1033 result.audio_metadata.start = self.mediainfo()['start']
1034 result.audio_metadata.duration = self.mediainfo()['duration']
1035 result.audio_metadata.is_segment = self.mediainfo()['is_segment']
1036
1037 if time_mode == 'framewise':
1038 result.frame_metadata.samplerate = self.result_samplerate
1039 result.frame_metadata.blocksize = self.result_blocksize
1040 result.frame_metadata.stepsize = self.result_stepsize
1041
1042 return result
1043
1044
1045 if __name__ == "__main__":
1046 import doctest
1047 doctest.testmod()
1048